On Tue, Sep 27, 2011 at 3:48 PM, Ozkan Sezer <seze...@gmail.com> wrote:
> On Tue, Sep 27, 2011 at 2:01 PM, Ruben Van Boxem
> <vanboxem.ru...@gmail.com> wrote:
>> 2011/9/26 Kai Tietz <ktiet...@googlemail.com>
>>>
>>> 2011/9/26 Ruben Van Boxem <vanboxem.ru...@gmail.com>:
>>> > Why does mingw-w64 not have a uchar.h header file? There's only a few
>>> > typedefs and some functions (which can easily be implemented through the
>>> > Win32 API IMHO).
>>> >
>>> > See the draft proposal here:
>>> > http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1040.pdf
>>> >
>>> > The reason I ask is because the C++11 standard mentions this header and
>>> > its
>>> > C++ counterpart <cuchar>. Although the functions aren't available in
>>> > msvcrt,
>>> > I'm sure I can come up with an implementation if necessary (based on
>>> > WideCharToMultiByte and some other code to get as far as utf32... hmm
>>> > I'll
>>> > have to think about that). For the UTF-32 bits there's code floating
>>> > around
>>> > all over the internet, like here:
>>> > http://bytes.com/topic/c/answers/517850-converting-utf-16-utf-32-a which
>>> > would definitely work with a bit of testing and refactoring.
>>> >
>>> > Please tell me what you think about this idea.
>>> >
>>> > Ruben
>>>
>>> Sounds interesting.  WideCharToMultiByte/MultiByteToWideChar API could
>>> handle this.
>>> As we don't have here API-conflicts to msvcrt's API, I would give it a
>>> try.
>>>
>>> Patches are welcome for this.
>>
>> Attached is a simple implementation that falls back to C's wcrtomb and
>> mbrtowc for char16_t (should indirectly be a wchar_t), as these handle the
>> necessary error conditions already.
>>
>> The char32_t bits are manual bit checking. Each case is handled
>> individually, and the result is built up if all conditions are met.
>>
>> I copied and adapted a disclaimer and some more information from stdint.h.
>> The typedefs are not allowed for C++11, as these must be distinct types
>> (implemented in the compiler). Clang had this fixed in r117038, GCC in 4.4.
>> This made me think that ifdef __cplusplus was enough (GCC 4.4 is the first
>> real mingw-w64 capable GCC, and Clang is still quite far from usable IMHO).
>>
>> Ruben
>
> Something is clearly wrong with the gcc version checking:  if you
> want to typedef when gcc < 4.4, that line in uchar.h should read
>  || (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
> ... I guess.
>
> There are also some other errors, I fixed partially. See attached.
> gcc44 compiles it. g++44 does _NOT_ compile it _unless_ I use
> -std=c++0x or gnu++0x
>
> Didn't inspect functions' code correctness
>
> --
> O.S.
>

Attached new ones with slightly better gcc check

--
O.S.
/**
 * This file has no copyright assigned and is placed in the Public Domain.
 * This file is part of the w64 mingw-runtime package.
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */
/* ISO C1x Unicode utilities
 * Based on ISO/IEC SC22/WG14 9899 TR 19769 (SC22 N1326)
 *
 *  THIS SOFTWARE IS NOT COPYRIGHTED
 *
 *  This source code is offered for use in the public domain. You may
 *  use, modify or distribute it freely.
 *
 *  This code is distributed in the hope that it will be useful but
 *  WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY
 *  DISCLAIMED. This includes but is not limited to warranties of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 *  Date: 2011-09-27
 */

#ifndef __UCHAR_H
#define __UCHAR_H

#include <stddef.h>	/* size_t */
#include <stdint.h>	/* uint_leastXX_t */
#include <wchar.h>	/* mbstate_t */

#if !defined(__cplusplus) || !defined(__GNUC__) || \
   (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
typedef uint_least16_t char16_t;
typedef uint_least32_t char32_t;
#endif

#define __STDC_UTF_16__ 1
#define __STDC_UTF_32__ 1

size_t mbrtoc16 (char16_t *__restrict__ pc16,
		 const char *__restrict__ s,
		 size_t n,
		 mbstate_t *__restrict__ ps);

size_t c16rtomb (char *__restrict__ s,
		 char16_t c16,
		 mbstate_t *__restrict__ ps);

size_t mbrtoc32 (char32_t *__restrict__ pc32,
		 const char *__restrict__ s,
		 size_t n,
		 mbstate_t *__restrict__ ps);

size_t c32rtomb (char *__restrict__ s,
		 char32_t c32,
		 mbstate_t *__restrict__ ps);

#endif /* __UCHAR_H */

/**
 * This file has no copyright assigned and is placed in the Public Domain.
 * This file is part of the w64 mingw-runtime package.
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */
/* ISO C1x Unicode utilities
 * Based on ISO/IEC SC22/WG14 9899 TR 19769 (SC22 N1326)
 *
 *  THIS SOFTWARE IS NOT COPYRIGHTED
 *
 *  This source code is offered for use in the public domain. You may
 *  use, modify or distribute it freely.
 *
 *  This code is distributed in the hope that it will be useful but
 *  WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY
 *  DISCLAIMED. This includes but is not limited to warranties of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 *  Date: 2011-09-27
 */

#include <errno.h>
#include <uchar.h>

size_t mbrtoc16 (char16_t *__restrict__ pc16,
		 const char *__restrict__ s,
		 size_t n,
		 mbstate_t *__restrict__ state)
{
/* wchar_t should compatible to char16_t on Windows */
    return mbrtowc((wchar_t *)pc16, s, n, state);
}

size_t c16rtomb (char *__restrict__ s,
		 char16_t c16,
		 mbstate_t *__restrict__ state)
{
/* wchar_t should compatible to char16_t on Windows */
    return wcrtomb(s, c16, state);
}

size_t mbrtoc32 (char32_t *__restrict__ pc32,
		 const char *__restrict__ s,
		 size_t n,
		 mbstate_t *__restrict__ ps)
{
    if (*s == 0)
    {
        *pc32 = 0;
        return 0;
    }

    /* ASCII character - high bit unset */
    if ((*s & 0x80) == 0)
    {
	*pc32 = *s;
	return 1;
    }

    /* Multibyte chars */
    if ((*s & 0xE0) == 0xC0) /* 110xxxxx needs 2 bytes */
    {
	if (n < 2) 
	    return (size_t)-2;

	*pc32 = ((s[0] & 31) << 6) | (s[1] & 63);
	return 2;
    }
    else if ((*s & 0xf0) == 0xE0) /* 1110xxxx needs 3 bytes */
    {
	if (n < 3)
	    return (size_t)-2;

	*pc32 = ((s[0] & 15) << 12) | ((s[1] & 63) << 6) | (s[2] & 63);
	return 3;
    }
    else if ((*s & 0xF8) == 0xF0) /* 11110xxx needs 4 bytes */
    {
	if (n < 4)
	    return (size_t)-2;

	*pc32 = ((s[0] & 7) << 18) | ((s[1] & 63) << 12) | ((s[2] & 63) << 6) | (s[4] & 63);
	return 4;
    }

    errno = EILSEQ;
    return (size_t)-1;
}

size_t c32rtomb (char *__restrict__ s,
		 char32_t c32,
		 mbstate_t *__restrict__ ps)
{
    if (c32 <= 0x7F) /* 7 bits needs 1 byte */
    {
	*s = (char)c32 & 0x7F;
	return 1;
    }
    else if (c32 <= 0x7FF) /* 11 bits needs 2 bytes */
    {
	s[1] = 0x80 | (char)(c32 & 0x3F);
	s[0] = 0xC0 | (char)(c32 >> 6);
	return 2;
    }
    else if (c32 <= 0xFFFF) /* 16 bits needs 3 bytes */
    {
	s[2] = 0x80 | (char)(c32 & 0x3F);
	s[1] = 0x80 | (char)((c32 >> 6) & 0x3F);
	s[0] = 0xE0 | (char)(c32 >> 12);
	return 3;
    }
    else if (c32 <= 0x1FFFFF) /* 21 bits needs 4 bytes */
    {
	s[3] = 0x80 | (char)(c32 & 0x3F);
	s[2] = 0x80 | (char)((c32 >> 6) & 0x3F);
	s[1] = 0x80 | (char)((c32 >> 12) & 0x3F);
	s[0] = 0xF0 | (char)(c32 >> 18);
	return 4;
    }

    errno = EILSEQ;
    return (size_t)-1;
}

------------------------------------------------------------------------------
All the data continuously generated in your IT infrastructure contains a
definitive record of customers, application performance, security
threats, fraudulent activity and more. Splunk takes this data and makes
sense of it. Business sense. IT sense. Common sense.
http://p.sf.net/sfu/splunk-d2dcopy1
_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to