https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79280
Martin Liška <marxin at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |RESOLVED CC| |marxin at gcc dot gnu.org Resolution|--- |INVALID --- Comment #1 from Martin Liška <marxin at gcc dot gnu.org> --- You have to call setlocale before using the function. Please take a look here: https://sillymon.ch/posts/unicodeandc.html This works for me: #include <stdio.h> /* printf */ #include <stdlib.h> /* mbtowc, wchar_t(C) */ #include <string.h> #include <locale.h> int u8toint(const char* str) { if(!(*str&128)) return *str; unsigned char c = *str, bytes = 0; while((c<<=1)&128) ++bytes; int result = 0; for(int i=bytes; i>0; --i) result|= (*(str+i)&127)<<(6*(bytes-i)); int mask = 1; for(int i=bytes; i<6; ++i) mask<<= 1, mask|= 1; result|= (*str&mask)<<(6*bytes); return result; } union data { wchar_t w; struct { unsigned char b1, b2; } bytes; } a,b,c; int main() { setlocale(LC_ALL, ""); const char *str = "řaaaaaaaa"; mbtowc(&(a.w), str, 10); b.w = u8toint("ř"); c.w = L'ř'; printf("\na = %hhx%hhx", a.bytes.b2, a.bytes.b1); // a = 0c5 wrong printf("\nb = %hhx%hhx", b.bytes.b2, b.bytes.b1); // b = 159 right printf("\nc = %hhx%hhx", c.bytes.b2, c.bytes.b1); // c = 159 right }