On 04/03/15 17:20 +0000, Jonathan Wakely wrote:
To fix the non-portable 22_locale/conversions/string/2.cc test I
changed it to use char16_t and char32_t where I can reliably create an
invalid sequence that causes a conversion error. That revealed some
more problems in the Unicode conversion utilities, fixed by this patch
and verified by the new tests.
Most of the changes in codecvt.cc are just defining convenience
constants and inline functions, but there are some minor bugs fixed in
UTF-16 error handling too.
[...]
* testsuite/22_locale/conversions/string/2.cc: Use char16_t and
char32_t instead of wchar_t.
* testsuite/22_locale/conversions/string/3.cc: New.
I changed the 22_locale/conversions/string/2.cc and
22_locale/conversions/string/3.cc tests to use UTF-8 as well as UTF-16
and UTF-32 and that revealed another problem in wstring_convert: I
wasn't handling the noconv case. Fixed by this patch.
Tested x86_64-linux, committed to trunk.
commit 1e6eeea711f42aa908e4da8064bc9f4e1859b6bd
Author: Jonathan Wakely <jwak...@redhat.com>
Date: Thu Mar 5 12:26:25 2015 +0000
* include/bits/locale_conv.h (wstring_convert::_M_conv): Handle
noconv result.
* testsuite/22_locale/conversions/string/2.cc: Also test UTF-8.
* testsuite/22_locale/conversions/string/3.cc: Likewise, and UTF-16.
diff --git a/libstdc++-v3/include/bits/locale_conv.h b/libstdc++-v3/include/bits/locale_conv.h
index b53754d..9b49617 100644
--- a/libstdc++-v3/include/bits/locale_conv.h
+++ b/libstdc++-v3/include/bits/locale_conv.h
@@ -213,6 +213,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
while (__result == codecvt_base::partial && __next != __last
&& (__outstr.size() - __outchars) < __maxlen);
+ if (__result == codecvt_base::noconv)
+ {
+ __outstr.assign(__first, __last);
+ _M_count = __outstr.size();
+ return __outstr;
+ }
+
__outstr.resize(__outchars);
_M_count = __next - __first;
diff --git a/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc b/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc
index 07d2b52..9341f892 100644
--- a/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc
+++ b/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc
@@ -37,6 +37,24 @@ using std::u32string;
void test01()
{
+ typedef str_conv<char> sc;
+
+ const sc::byte_string berr = "invalid wide string";
+ const sc::wide_string werr = u8"invalid byte string";
+
+ sc c(berr, werr);
+ string input = "Stop";
+ input += char(0xFF);
+ string woutput = c.from_bytes(input);
+ VERIFY( input == woutput ); // noconv case doesn't detect invalid input
+ string winput = u8"Stop";
+ winput += char(0xFF);
+ string output = c.to_bytes(winput);
+ VERIFY( winput == output ); // noconv case doesn't detect invalid input
+}
+
+void test02()
+{
typedef str_conv<char16_t> sc;
const sc::byte_string berr = "invalid wide string";
@@ -53,7 +71,7 @@ void test01()
VERIFY( berr == output );
}
-void test02()
+void test03()
{
typedef str_conv<char32_t> sc;
@@ -75,4 +93,5 @@ int main()
{
test01();
test02();
+ test03();
}
diff --git a/libstdc++-v3/testsuite/22_locale/conversions/string/3.cc b/libstdc++-v3/testsuite/22_locale/conversions/string/3.cc
index 7c4ac20..6afa62b 100644
--- a/libstdc++-v3/testsuite/22_locale/conversions/string/3.cc
+++ b/libstdc++-v3/testsuite/22_locale/conversions/string/3.cc
@@ -30,12 +30,55 @@ template<typename Elem>
using str_conv = std::wstring_convert<cvt<Elem>, Elem>;
using std::string;
+using std::u16string;
using std::u32string;
// test construction with state, for partial conversions
void test01()
{
+ typedef str_conv<char> wsc;
+
+ wsc c;
+ string input = u8"\u00a3 shillings pence";
+ string woutput = c.from_bytes(input.substr(0, 1));
+ auto partial_state = c.state();
+ auto partial_count = c.converted();
+
+ auto woutput2 = c.from_bytes(u8"state reset on next conversion");
+ VERIFY( woutput2 == u8"state reset on next conversion" );
+
+ wsc c2(new cvt<char>, partial_state);
+ woutput += c2.from_bytes(input.substr(partial_count));
+ VERIFY( u8"\u00a3 shillings pence" == woutput );
+
+ string roundtrip = c2.to_bytes(woutput);
+ VERIFY( input == roundtrip );
+}
+
+void test02()
+{
+ typedef str_conv<char16_t> wsc;
+
+ wsc c;
+ string input = u8"\u00a3 shillings pence";
+ u16string woutput = c.from_bytes(input.substr(0, 1));
+ auto partial_state = c.state();
+ auto partial_count = c.converted();
+
+ auto woutput2 = c.from_bytes(u8"state reset on next conversion");
+ VERIFY( woutput2 == u"state reset on next conversion" );
+
+ wsc c2(new cvt<char16_t>, partial_state);
+ woutput += c2.from_bytes(input.substr(partial_count));
+ VERIFY( u"\u00a3 shillings pence" == woutput );
+
+ string roundtrip = c2.to_bytes(woutput);
+ VERIFY( input == roundtrip );
+}
+
+void test03()
+{
typedef str_conv<char32_t> wsc;
wsc c;
@@ -44,7 +87,7 @@ void test01()
auto partial_state = c.state();
auto partial_count = c.converted();
- auto woutput2 = c.from_bytes("state reset on next conversion");
+ auto woutput2 = c.from_bytes(u8"state reset on next conversion");
VERIFY( woutput2 == U"state reset on next conversion" );
wsc c2(new cvt<char32_t>, partial_state);
@@ -55,7 +98,10 @@ void test01()
VERIFY( input == roundtrip );
}
+
int main()
{
test01();
+ test02();
+ test03();
}