https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79280

Martin Liška <marxin at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |RESOLVED
                 CC|                            |marxin at gcc dot gnu.org
         Resolution|---                         |INVALID

--- Comment #1 from Martin Liška <marxin at gcc dot gnu.org> ---
You have to call setlocale before using the function. Please take a look here:
https://sillymon.ch/posts/unicodeandc.html

This works for me:
#include <stdio.h>      /* printf */
#include <stdlib.h>     /* mbtowc, wchar_t(C) */
#include <string.h>
#include <locale.h>

int u8toint(const char* str) {
  if(!(*str&128)) return *str;
  unsigned char c = *str, bytes = 0;
  while((c<<=1)&128) ++bytes;
  int result = 0;
  for(int i=bytes; i>0; --i) result|= (*(str+i)&127)<<(6*(bytes-i));
  int mask = 1;
  for(int i=bytes; i<6; ++i) mask<<= 1, mask|= 1;
  result|= (*str&mask)<<(6*bytes);
  return result;
}

union data {
  wchar_t w;
  struct {
    unsigned char b1, b2;
  } bytes;
} a,b,c;

int main()
{
setlocale(LC_ALL, "");


const char *str = "řaaaaaaaa";
mbtowc(&(a.w), str, 10);
b.w = u8toint("ř");
c.w = L'ř';

printf("\na = %hhx%hhx", a.bytes.b2, a.bytes.b1); // a = 0c5 wrong
printf("\nb = %hhx%hhx", b.bytes.b2, b.bytes.b1); // b = 159 right
printf("\nc = %hhx%hhx", c.bytes.b2, c.bytes.b1); // c = 159 right
}

Reply via email to