Hi Bruno,
On Sun, 2020 Jan 19 19:13-05:00, Bruno Haible wrote:
>
> OK, then we'll need
> a) for the short-term: in lib/iconv_open.c, apply an EBCDIC -> ASCII
> conversion to the 'from' and the 'to' strings. Can you implement that?
> And also a rule that removes the anti-EBCDIC guard from the gperf
> generated output (in modules/iconv_open).
Please see the attached patch to iconv_open.c. I'll leave the makefile
rule to you, as that is less straightforward for me. The patch, plus a
disabled #error in iconv_open-zos.h, gets test-iconv to build and pass.
However, the following test failures are new to me:
$ /tmp/testdir/gltests/test-btoc32-1.sh
/tmp/testdir/gltests/test-btoc32.c:49: assertion 'btoc32 (c) == c' failed
CEE5207E The signal SIGABRT was received.
$ /tmp/testdir/gltests/test-mbrtoc32-1.sh
/tmp/testdir/gltests/test-mbrtoc32.c:108: assertion 'wc == c' failed
CEE5207E The signal SIGABRT was received.
$ /tmp/testdir/gltests/test-mbrtoc32-5.sh
/tmp/testdir/gltests/test-mbrtoc32.c:115: assertion 'mbsinit (&state)' faid
CEE5207E The signal SIGABRT was received.
I tested using using Git 49c6f78c. Poking a bit into the
test-btoc32-1.sh failure, I saw that it occurred when btoc32(4) yielded
156, which seems consistent with an IBM-1047-to-ASCII mapping. (Per
Wikipedia, 0-3 is the same as ASCII, but 4 is a "SEL" character. And
btoc32(5) returns 9.)
> b) a feature request for the 'gperf' program, to generate two code
> bodies, one for ASCII and one for EBCDIC.
What about generating a translation table at compile/run time, that is
used if ASCII is unavailable? Something like
xlate['A'] = 65;
xlate['B'] = 66;
...
xlate['Z'] = 90;
...
c = xlate[c];
As I recall, there are EBCDIC variants with minor differences in the
positions of certain punctuation marks, and while they may or may not be
commonly used on z/OS, it would be desirable to remain robust against
that possibility.
--Daniel
--
Daniel Richard G. || sk...@iskunk.org
My ASCII-art .sig got a bad case of Times New Roman.
diff --git a/lib/iconv_open.c b/lib/iconv_open.c
index 989bd9d57..72276b1c3 100644
--- a/lib/iconv_open.c
+++ b/lib/iconv_open.c
@@ -38,10 +38,25 @@
#define ICONV_FLAVOR_SOLARIS "iconv_open-solaris.h"
#define ICONV_FLAVOR_ZOS "iconv_open-zos.h"
+#if defined __MVS__ && defined __IBMC__ && 'A' != 0x41
+/* On IBM z/OS, the encoding names are in EBCDIC, but the gperf source still
+ expects and returns ASCII. We need to convert between the two. */
+# define EBCDIC_CONVERT
+#endif
+
+#ifdef EBCDIC_CONVERT
+/* Ensure that the gperf source is compiled as ASCII. */
+# pragma convert("ISO8859-1")
+#endif
+
#ifdef ICONV_FLAVOR
# include ICONV_FLAVOR
#endif
+#ifdef EBCDIC_CONVERT
+# pragma convert(pop)
+#endif
+
iconv_t
rpl_iconv_open (const char *tocode, const char *fromcode)
#undef iconv_open
@@ -50,6 +65,10 @@ rpl_iconv_open (const char *tocode, const char *fromcode)
char tocode_upper[32];
char *fromcode_upper_end;
char *tocode_upper_end;
+#ifdef EBCDIC_CONVERT
+ char fromcode_ae[32];
+ char tocode_ae[32];
+#endif
#if REPLACE_ICONV_UTF
/* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}.
@@ -150,6 +169,15 @@ rpl_iconv_open (const char *tocode, const char *fromcode)
tocode_upper_end = q;
}
+#ifdef EBCDIC_CONVERT
+ /* Convert the encodings from EBCDIC to ASCII, as gperf expects the latter.
*/
+ if (__etoa (fromcode_upper) < 0 || __etoa (tocode_upper) < 0)
+ {
+ errno = EINVAL;
+ return (iconv_t)(-1);
+ }
+#endif
+
#ifdef ICONV_FLAVOR
/* Apply the mappings. */
{
@@ -169,5 +197,20 @@ rpl_iconv_open (const char *tocode, const char *fromcode)
tocode = tocode_upper;
#endif
+#ifdef EBCDIC_CONVERT
+ /* Convert the encodings back to EBCDIC for iconv_open(). */
+ strncpy (fromcode_ae, fromcode, sizeof(fromcode_ae));
+ strncpy (tocode_ae, tocode, sizeof(tocode_ae));
+ fromcode_ae[SIZEOF (fromcode_ae) - 1] = '\0';
+ tocode_ae[SIZEOF (tocode_ae) - 1] = '\0';
+ if (__atoe (fromcode_ae) < 0 || __atoe (tocode_ae) < 0)
+ {
+ errno = EINVAL;
+ return (iconv_t)(-1);
+ }
+ fromcode = fromcode_ae;
+ tocode = tocode_ae;
+#endif
+
return iconv_open (tocode, fromcode);
}