On 1/28/2011 5:12 PM, Bruno Haible wrote: > Please CC the bug-gnu-libiconv mailing list when discussing possible > bugs in GNU libiconv.
I hadn't intended on involving bug-gnu-libiconv until we had a working fix, and a consensus here on @cygwin. But, in any case, here is the portion of Corinna's patch dealing with the iconv issues, stripped down to the minimum necessary to correct the "problem". As pointed out in the @cygwin thread, there are still some open questions, which I had hoped to avoid by waiting until cygwin-1.7.8 was released. 1) On cygwin-1.7.8, __STDC_ISO_10646__ is defined, so this change will allow "correct" behavior *if compiled on cygwin-1.7.8*. -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ But cygwin-1.7.8 isn't out yet. But with this change (and the "don't include windows.h" change) then libiconv will still compile properly on cygwin-1.5 -- which does not support wide chars, and does NOT define __STDC_ISO_10646__. However, it WON'T compile properly on cygwin-1.7.x up to 1.7.7. 2) From cygwin-1.7.2 to cygwin-1.7.7, the following change could be used instead (there's an issue with 1.7.1 which doesn't bear exploration here): -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ But arguably, then it would break on "old" cygwin like 1.5. Perhaps this is ok, since 1.7 has been "out" for over a year, and maybe bug-gnu-libiconv doesn't care about old, unsupported-by-the-cygwin-project versions of cygwin. In any case, the attached patch goes with option 1 above. It is completely orthogonal to, and independent of, the other "relocation" patch, that I posted to the gnulib list. 2010-01-28 Corinna Vinschen <...> Correct wchar handling on cygwin-1.7.x * lib/iconv.c (iconv_canonicalize): Allow __STDC_ISO_10646__ to control, rather than using __CYGWIN__ to veto. * lib/iconv_open1.h: Ditto. * libcharset/lib/localcharset.c: Don't include windows.h if __CYGWIN__. (get_charset_aliases): Remove cygwin workaround; rely on generic implementation. Be sure to copy result of nl_langinfo into local buffer. -- Chuck
--- libiconv-1.13.1.orig/lib/iconv.c 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/lib/iconv.c 2011-01-27 12:46:21.544296281 +0100 @@ -550,7 +550,7 @@ const char * iconv_canonicalize (const c if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems. */ -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if (sizeof(wchar_t) == 4) { index = ei_ucs4internal; break; --- libiconv-1.13.1.orig/lib/iconv_open1.h 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/lib/iconv_open1.h 2011-01-27 12:47:03.119371056 +0100 @@ -98,7 +98,7 @@ if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems. */ -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if (sizeof(wchar_t) == 4) { to_index = ei_ucs4internal; break; @@ -174,7 +174,7 @@ if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems. */ -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if (sizeof(wchar_t) == 4) { from_index = ei_ucs4internal; break; --- libiconv-1.13.1.orig/libcharset/lib/localcharset.c 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/libcharset/lib/localcharset.c 2011-01-27 11:53:33.201852883 +0100 @@ -52,10 +52,6 @@ # include <locale.h> # endif # endif -# ifdef __CYGWIN__ -# define WIN32_LEAN_AND_MEAN -# include <windows.h> -# endif #elif defined WIN32_NATIVE # define WIN32_LEAN_AND_MEAN # include <windows.h> @@ -117,7 +113,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) +#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE) FILE *fp; const char *dir; const char *base = "charset.alias"; @@ -276,7 +272,7 @@ get_charset_aliases (void) "DECKOREAN" "\0" "EUC-KR" "\0"; # endif -# if defined WIN32_NATIVE || defined __CYGWIN__ +# if defined WIN32_NATIVE /* To avoid the troubles of installing a separate file in the same directory as the DLL and of retrieving the DLL's directory at runtime, simply inline the aliases here. */ @@ -332,55 +328,14 @@ locale_charset (void) # if HAVE_LANGINFO_CODESET - /* Most systems support nl_langinfo (CODESET) nowadays. */ - codeset = nl_langinfo (CODESET); - -# ifdef __CYGWIN__ - /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always - returns "US-ASCII". As long as this is not fixed, return the suffix - of the locale name from the environment variables (if present) or - the codepage as a number. */ - if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) - { - const char *locale; - static char buf[2 + 10 + 1]; + /* Most systems support nl_langinfo (CODESET) nowadays. + + POSIX allows that the returned pointer may point to a static area that + may be overwritten by subsequent calls to setlocale or nl_langinfo. */ + static char codeset_buf[64]; - locale = getenv ("LC_ALL"); - if (locale == NULL || locale[0] == '\0') - { - locale = getenv ("LC_CTYPE"); - if (locale == NULL || locale[0] == '\0') - locale = getenv ("LANG"); - } - if (locale != NULL && locale[0] != '\0') - { - /* If the locale name contains an encoding after the dot, return - it. */ - const char *dot = strchr (locale, '.'); - - if (dot != NULL) - { - const char *modifier; - - dot++; - /* Look for the possible @... trailer and remove it, if any. */ - modifier = strchr (dot, '@'); - if (modifier == NULL) - return dot; - if (modifier - dot < sizeof (buf)) - { - memcpy (buf, dot, modifier - dot); - buf [modifier - dot] = '\0'; - return buf; - } - } - } - - /* Woe32 has a function returning the locale's codepage as a number. */ - sprintf (buf, "CP%u", GetACP ()); - codeset = buf; - } -# endif + codeset_buf[0] = '\0'; + codeset = strncat (codeset_buf, nl_langinfo (CODESET), sizeof (codeset_buf)); # else
-- Problem reports: http://cygwin.com/problems.html FAQ: http://cygwin.com/faq/ Documentation: http://cygwin.com/docs.html Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple