https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70893
--- Comment #6 from Кирилл ---
(In reply to Jonathan Wakely from comment #4)
> If you think there's a bug here please provide a testcase that compiles and
> produces an incorrect result.
Here:
#include
#include
#include
#include
using namespace std;
//-
/** Extensively tested and works.*/
template string my_utf16_to_utf8(const char *s, size_t sz)
{
uint32_t ucs=0; sz &= ~1; //odd sizes are rounded down
string rtv; rtv.resize(sz+sz/2);
char* o=&rtv[0];
for(const uint8_t *i=(uint8_t*)s, *e=i+sz; i>18|0xF0; ++o; *o=(ucs>>12 & 0x3F)|0x80; ++o;
*o=(ucs>>6 & 0x3F)|0x80; ++o; *o=(ucs&0x3F)|0x80;
}
else
{ *o=ucs>>12|0xE0; ++o; *o=(ucs>>6&0x3F)|0x80; ++o; *o=(ucs&0x3F)|0x80;
}
}
else
{
if(ucs&(~0x7F)) { *o=ucs>>6|0xC0; ++o; *o=(ucs&0x3F)|0x80; }
else *o = ucs;
}
// if((o-&rtv[0]>=rtv.size()) throw range_error("utf16_to_utf8()"); //debug
}
rtv.resize(o-&rtv[0]); rtv.shrink_to_fit();
return rtv;
}
//-
template inline std::string std_utf16_to_utf8(const char *s, size_t
sz)
{
using namespace std; sz &= ~1;
wstring_convert, char16_t>
conv;
try
{ return conv.to_bytes((const char16_t*)s, (const char16_t*)(s+sz)); }
catch(...) { return string{}; }
}
//-
int main(int argc, const char** argv)
{
static constexpr const uint8_t txt_utf16be[] = {
0x01, 0x31, 0x00, 0x6e, 0x00, 0x74, 0x02, 0x59, 0x00, 0x67, 0x00, 0xe6,
0x00, 0x6c, 0x00, 0xe6, 0x00, 0x6b, 0x00, 0x74, 0x01, 0x31, 0x00, 0x63,
0x00, 0x2c, 0x00, 0x20, 0x00, 0x62, 0x00, 0x61, 0x01, 0x31, 0x00, 0x20,
0x00, 0xf0, 0x02, 0x59, 0x00, 0x20, 0x00, 0x62, 0x00, 0x69, 0x02, 0xd0,
0x00, 0x73, 0x00, 0x74, 0x00, 0x69, 0x00, 0x20, 0x00, 0x62, 0x02, 0x54,
0x01, 0x31, 0x00, 0x7a, 0x00, 0x0a, 0x00, 0x0a, 0x00, 0x6c, 0x01, 0x31,
0x00, 0x72, 0x01, 0x31, 0x00, 0x6b, 0x00, 0x73, 0x00, 0x20, 0x00, 0x74,
0x00, 0x72, 0x00, 0xe6, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x6b, 0x00, 0x72,
0x00, 0x61, 0x01, 0x31, 0x00, 0x62, 0x00, 0x64, 0x00, 0x20, 0x01, 0x31,
0x00, 0x6e, 0x00, 0x74, 0x00, 0x75, 0x00, 0x20, 0x00, 0x69, 0x00, 0x70,
0x00, 0x61, 0x00, 0x20, 0x00, 0x6a, 0x00, 0x75, 0x02, 0xd0, 0x00, 0x73,
0x01, 0x31, 0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0xe6, 0x00, 0x6e,
0x00, 0x20, 0x00, 0x69, 0x02, 0xd0, 0x00, 0x73, 0x00, 0x74, 0x00, 0x20,
0x00, 0x6d, 0x01, 0x31, 0x00, 0x64, 0x00, 0x6c, 0x02, 0x59, 0x00, 0x6e,
0x00, 0x64, 0x00, 0x73, 0x00, 0x20, 0x00, 0xe6, 0x00, 0x6b, 0x00, 0x73,
0x02, 0x59, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x20, 0x00, 0x62, 0x00, 0x61,
0x01, 0x31, 0x00, 0x20, 0x00, 0x72, 0x02, 0x52, 0x00, 0x62, 0x02, 0x59,
0x00, 0x74, 0x00, 0x20, 0x00, 0x62, 0x00, 0x72, 0x00, 0x65, 0x01, 0x31,
0x00, 0x64, 0x00, 0x69, 0x00, 0x0a, 0x00, 0x0a, 0x00, 0x73, 0x02, 0x52,
0x00, 0x72, 0x00, 0x69, 0x00, 0x20, 0x02, 0x59, 0x00, 0x62, 0x00, 0x61,
0x02, 0x8a, 0x00, 0x74, 0x00, 0x20, 0x00, 0xf0, 0x02, 0x59, 0x00, 0x20,
0x00, 0x6c, 0x00, 0xe6, 0x00, 0x6b, 0x00, 0x20, 0x02, 0x59, 0x00, 0x76,
0x00, 0x20, 0x00, 0x73, 0x00, 0x74, 0x00, 0x72, 0x02, 0x5b, 0x00, 0x73,
0x00, 0x20, 0x01, 0x31, 0x00, 0x6e, 0x00, 0x66, 0x02, 0x54, 0x02, 0xd0,
0x00, 0x6d, 0x00, 0x65, 0x01, 0x31, 0x02, 0x83, 0x02, 0x59, 0x00, 0x6e,
0x00, 0x2c, 0x00, 0x20, 0x00, 0x62, 0x02, 0x8c, 0x00, 0x74, 0x00, 0x20,
0x00, 0x73, 0x01, 0x31, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x20, 0x00, 0xf0,
0x01, 0x31, 0x00, 0x73, 0x00, 0x20, 0x01, 0x31, 0x00, 0x7a, 0x00, 0x20,
0x02, 0x59, 0x00, 0x20, 0x00, 0x72, 0x00, 0xe6, 0x00, 0x70, 0x00, 0x20,
0x01, 0x31, 0x00, 0x74, 0x00, 0x20, 0x00, 0x77, 0x02, 0x8c, 0x00, 0x64,
0x02, 0x59, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x20, 0x00, 0x62, 0x00, 0x69,
0x00, 0x20, 0x00, 0x0a, 0x00, 0x6d, 0x02, 0x8c, 0x02, 0xa7, 0x00, 0x20,
0x00, 0x6a, 0x00, 0x75, 0x02, 0xd0, 0x00, 0x73, 0x00, 0x0a, 0x00, 0x0a,
};
constexpr size_t txt_sz = (sizeof txt_utf16be)/(sizeof txt_utf16be[0]);
cout << "My conversion: " << endl;
cout << my_utf16_to_utf8((char*)txt_utf16be, txt_sz) << endl;
cout << "Codecvt conversion: " << endl;
cout << std_utf16_to_utf8((char*)txt_utf16be, txt_sz);
return 0;
}