https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79280
Bug ID: 79280
Summary: mbtowc converts only one byte
Product: gcc
Version: 4.8.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: libgcc
Assignee: unassigned at gcc dot gnu.org
Reporter: janturon at email dot cz
Target Milestone: ---
mbtowc doesn't seem to work with chars longer than one byte, see the following
snippet:
int u8toint(const char* str) {
if(!(*str&128)) return *str;
unsigned char c = *str, bytes = 0;
while((c<<=1)&128) ++bytes;
int result = 0;
for(int i=bytes; i>0; --i) result|= (*(str+i)&127)<<(6*(bytes-i));
int mask = 1;
for(int i=bytes; i<6; ++i) mask<<= 1, mask|= 1;
result|= (*str&mask)<<(6*bytes);
return result;
}
union data {
wchar_t w;
struct {
unsigned char b1, b2;
} bytes;
} a,b,c;
mbtowc(&(a.w),"ř",6);
b.w = u8toint("ř");
c.w = L'ř';
printf("\na = %hhx%hhx", a.bytes.b2, a.bytes.b1); // a = 0c5 wrong
printf("\nb = %hhx%hhx", b.bytes.b2, b.bytes.b1); // b = 159 right
printf("\nc = %hhx%hhx", c.bytes.b2, c.bytes.b1); // c = 159 right