https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70893
--- Comment #6 from Кирилл <kirillnow at gmail dot com> --- (In reply to Jonathan Wakely from comment #4) > If you think there's a bug here please provide a testcase that compiles and > produces an incorrect result. Here: #include <iostream> #include <string> #include <locale> #include <codecvt> using namespace std; //------------------------------------------------------------------------------------------------- /** Extensively tested and works.*/ template<bool big_e> string my_utf16_to_utf8(const char *s, size_t sz) { uint32_t ucs=0; sz &= ~1; //odd sizes are rounded down string rtv; rtv.resize(sz+sz/2); char* o=&rtv[0]; for(const uint8_t *i=(uint8_t*)s, *e=i+sz; i<e; i+=2, ++o) { ucs = uint32_t(i[!big_e])<<8 | i[big_e]; if((ucs&0xFC00)==0xD800 && i+2<e && (i[2+!big_e]&0xFC)==0xDC) { i+=2; ucs &= 0x03FF; ucs = (ucs<<10 | uint32_t(i[!big_e]&3)<<8 | i[big_e]) + 0x010000; } //pass standalone surrogates as-is if(ucs&(~0x7FF)) { if(ucs&(~0xFFFF)) { *o=ucs>>18|0xF0; ++o; *o=(ucs>>12 & 0x3F)|0x80; ++o; *o=(ucs>>6 & 0x3F)|0x80; ++o; *o=(ucs&0x3F)|0x80; } else { *o=ucs>>12|0xE0; ++o; *o=(ucs>>6&0x3F)|0x80; ++o; *o=(ucs&0x3F)|0x80; } } else { if(ucs&(~0x7F)) { *o=ucs>>6|0xC0; ++o; *o=(ucs&0x3F)|0x80; } else *o = ucs; } // if((o-&rtv[0]>=rtv.size()) throw range_error("utf16_to_utf8()"); //debug } rtv.resize(o-&rtv[0]); rtv.shrink_to_fit(); return rtv; } //------------------------------------------------------------------------------------------------- template<bool big_e> inline std::string std_utf16_to_utf8(const char *s, size_t sz) { using namespace std; sz &= ~1; wstring_convert<codecvt_utf8_utf16 <char16_t, 0x10ffff, (big_e)?(codecvt_mode)0:little_endian>, char16_t> conv; try { return conv.to_bytes((const char16_t*)s, (const char16_t*)(s+sz)); } catch(...) { return string{}; } } //------------------------------------------------------------------------------------------------- int main(int argc, const char** argv) { static constexpr const uint8_t txt_utf16be[] = { 0x01, 0x31, 0x00, 0x6e, 0x00, 0x74, 0x02, 0x59, 0x00, 0x67, 0x00, 0xe6, 0x00, 0x6c, 0x00, 0xe6, 0x00, 0x6b, 0x00, 0x74, 0x01, 0x31, 0x00, 0x63, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x62, 0x00, 0x61, 0x01, 0x31, 0x00, 0x20, 0x00, 0xf0, 0x02, 0x59, 0x00, 0x20, 0x00, 0x62, 0x00, 0x69, 0x02, 0xd0, 0x00, 0x73, 0x00, 0x74, 0x00, 0x69, 0x00, 0x20, 0x00, 0x62, 0x02, 0x54, 0x01, 0x31, 0x00, 0x7a, 0x00, 0x0a, 0x00, 0x0a, 0x00, 0x6c, 0x01, 0x31, 0x00, 0x72, 0x01, 0x31, 0x00, 0x6b, 0x00, 0x73, 0x00, 0x20, 0x00, 0x74, 0x00, 0x72, 0x00, 0xe6, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x6b, 0x00, 0x72, 0x00, 0x61, 0x01, 0x31, 0x00, 0x62, 0x00, 0x64, 0x00, 0x20, 0x01, 0x31, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x75, 0x00, 0x20, 0x00, 0x69, 0x00, 0x70, 0x00, 0x61, 0x00, 0x20, 0x00, 0x6a, 0x00, 0x75, 0x02, 0xd0, 0x00, 0x73, 0x01, 0x31, 0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0xe6, 0x00, 0x6e, 0x00, 0x20, 0x00, 0x69, 0x02, 0xd0, 0x00, 0x73, 0x00, 0x74, 0x00, 0x20, 0x00, 0x6d, 0x01, 0x31, 0x00, 0x64, 0x00, 0x6c, 0x02, 0x59, 0x00, 0x6e, 0x00, 0x64, 0x00, 0x73, 0x00, 0x20, 0x00, 0xe6, 0x00, 0x6b, 0x00, 0x73, 0x02, 0x59, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x20, 0x00, 0x62, 0x00, 0x61, 0x01, 0x31, 0x00, 0x20, 0x00, 0x72, 0x02, 0x52, 0x00, 0x62, 0x02, 0x59, 0x00, 0x74, 0x00, 0x20, 0x00, 0x62, 0x00, 0x72, 0x00, 0x65, 0x01, 0x31, 0x00, 0x64, 0x00, 0x69, 0x00, 0x0a, 0x00, 0x0a, 0x00, 0x73, 0x02, 0x52, 0x00, 0x72, 0x00, 0x69, 0x00, 0x20, 0x02, 0x59, 0x00, 0x62, 0x00, 0x61, 0x02, 0x8a, 0x00, 0x74, 0x00, 0x20, 0x00, 0xf0, 0x02, 0x59, 0x00, 0x20, 0x00, 0x6c, 0x00, 0xe6, 0x00, 0x6b, 0x00, 0x20, 0x02, 0x59, 0x00, 0x76, 0x00, 0x20, 0x00, 0x73, 0x00, 0x74, 0x00, 0x72, 0x02, 0x5b, 0x00, 0x73, 0x00, 0x20, 0x01, 0x31, 0x00, 0x6e, 0x00, 0x66, 0x02, 0x54, 0x02, 0xd0, 0x00, 0x6d, 0x00, 0x65, 0x01, 0x31, 0x02, 0x83, 0x02, 0x59, 0x00, 0x6e, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x62, 0x02, 0x8c, 0x00, 0x74, 0x00, 0x20, 0x00, 0x73, 0x01, 0x31, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x20, 0x00, 0xf0, 0x01, 0x31, 0x00, 0x73, 0x00, 0x20, 0x01, 0x31, 0x00, 0x7a, 0x00, 0x20, 0x02, 0x59, 0x00, 0x20, 0x00, 0x72, 0x00, 0xe6, 0x00, 0x70, 0x00, 0x20, 0x01, 0x31, 0x00, 0x74, 0x00, 0x20, 0x00, 0x77, 0x02, 0x8c, 0x00, 0x64, 0x02, 0x59, 0x00, 0x6e, 0x00, 0x74, 0x00, 0x20, 0x00, 0x62, 0x00, 0x69, 0x00, 0x20, 0x00, 0x0a, 0x00, 0x6d, 0x02, 0x8c, 0x02, 0xa7, 0x00, 0x20, 0x00, 0x6a, 0x00, 0x75, 0x02, 0xd0, 0x00, 0x73, 0x00, 0x0a, 0x00, 0x0a, }; constexpr size_t txt_sz = (sizeof txt_utf16be)/(sizeof txt_utf16be[0]); cout << "My conversion: " << endl; cout << my_utf16_to_utf8<true>((char*)txt_utf16be, txt_sz) << endl; cout << "Codecvt conversion: " << endl; cout << std_utf16_to_utf8<true>((char*)txt_utf16be, txt_sz); return 0; }