Thanks for explaining. I still see a problem with the proposed patch,
though, in that (if I'm understanding it correctly) it would cause
c_isalpha (120) to succeed, even though EBCDIC 120 corresponds to U+00CC
LATIN CAPITAL LETTER I WITH GRAVE, and that is not supposed to be an
alphabetic character in the stripped-down C locale. Code that uses
c-ctype wants only ASCII letters, and departing from this would likely
break things.
Worse, the C expression "c_ispunct ('[')" might return false, as the
library may be in a locale that's incompatible with the mode the
compiler was in when it compiled the '['.
Looking at the web page you mentioned, it appears that one approach is
to assume EBCDIC 1047 (this seems to be the default and typical setting
for C programs) at both compile-time and run-time. We can check the
compile-time assumption without any code overhead. The proposed patch
does that. If someone ally wants to use a different code page, either
at compile-time or at run-time, more code will need to be written (most
likely by the poor soul who actually needs that feature).
Yes, all control characters appear to be in [\x00-\x3F], but not
everything in that range is a control character. (I remember 0x04 was
not.) I tried making c_iscntrl() a simple range check at first, but
that did not agree with the system iscntrl().
Thanks, this should be fixed in the attached patch, which I've installed.
>From a92ab221b5cad8a5c1a5ca1fc1823d1f3fe4a24b Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 22 Sep 2015 14:47:06 -0700
Subject: [PATCH] c-ctype: assume EBCDIC 1047 for c_iscntrl
* lib/c-ctype.c (c_iscntrl): When EBCDIC, assume code page 1047 at
both compile-time and at run-time. Check it at compile-time. We can
worry about other code pages later, if the topic ever comes up.
Fix typo in C_CTYPE_EBCDIC.
---
lib/c-ctype.c | 38 +++++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/lib/c-ctype.c b/lib/c-ctype.c
index 916d46e..558c4af 100644
--- a/lib/c-ctype.c
+++ b/lib/c-ctype.c
@@ -131,17 +131,37 @@ c_isblank (int c)
bool
c_iscntrl (int c)
{
- enum { C_CTYPE_EBCDIC = (' ' == 64 && '0' == 240
- && 'A' == 193 && 'J' == 209 && 'S' == 226
- && 'A' == 129 && 'J' == 145 && 'S' == 162) };
- verify (C_CTYPE_ASCII || C_CTYPE_EBCDIC);
-
- if (0 <= c && c < ' ')
- return true;
+ enum { C_CTYPE_EBCDIC = (' ' == '\x40' && '0' == '\xf0'
+ && 'A' == '\xc1' && 'J' == '\xd1' && 'S' == '\xe2'
+ && 'a' == '\x81' && 'j' == '\x91' && 's' == '\xa2') };
if (C_CTYPE_ASCII)
- return c == 0x7f;
+ return (0 <= c && c < ' ') || c == 0x7f;
else
- return c == 0xff || c == -1;
+ {
+ /* Return true if C corresponds to an ASCII control character.
+ Assume EBCDIC code page 1047, and verify that the compiler
+ agrees with this. */
+ verify (C_CTYPE_ASCII
+ || (C_CTYPE_EBCDIC
+ && '!' == '\x5a' && '#' == '\x7b' && '$' == '\x5b'
+ && '@' == '\x7c' && '[' == '\xad' && '\\' == '\xe0'
+ && ']' == '\xbd' && '^' == '\x5f' && '_' == '\x6d'
+ && '`' == '\x79'));
+ switch (c)
+ {
+ case '\x00': case '\x01': case '\x02': case '\x03': case '\x05':
+ case '\x0b': case '\x0c': case '\x0d': case '\x0e': case '\x0f':
+ case '\x10': case '\x11': case '\x12': case '\x13': case '\x15':
+ case '\x16': case '\x18': case '\x19': case '\x1c': case '\x1d':
+ case '\x1e': case '\x1f': case '\x26': case '\x27': case '\x2d':
+ case '\x2e': case '\x2f': case '\x32': case '\x37': case '\x3c':
+ case '\x3d': case '\x3f': case '\xff':
+ case '\xff' < 0 ? 0xff : -1:
+ return true;
+ default:
+ return false;
+ }
+ }
}
bool
--
2.1.0