This patch implements _Escaping_sink, that stores characters in the
local (stack) buffer, and when filled escapes the range to underlying
sink.

To support that we define __write_escaped_unicode_part functions, that
takes the __str and __prev_esc by reference. The __prev_esc value is
updated based on last written character. And the __str is left non-empty,
if the buffer ends with incomplete code point sequence. In such case
the characters are not written. The _Escaping_sink will copy such
charcters to front of the buffer, so the full representation can be
reconstructed.

__formatter__str::_M_format_range now uses the _Escaping_sink to escape
any non-continous sequence of charcters.

This addresses PR119820 by removing the code constructing string
completly.

        PR libstdc++/PR119820

libstdc++-v3/ChangeLog:

        * include/std/format (__format::__write_escape_seqs)
        (__format::_Escaping_sink): Define.
        (__format::__write_escaped_unicode_part): Extract from
        __format::__write_escaped_unicode.
        (__format::__write_escaped_unicode): Forward to
        __write_escaped_unicode_part.
        (__formatter_str::_M_format_range): Use _Escaping sink.
        * testsuite/std/format/ranges/string.cc: New tests for
        charcter which codepoints will be split in buffer.
---
There is Polish saying about "shooting the mosquito with cannonball",
and using _Escaping_sink to resolve PR119820 feels that way.
However, I have already implemented _Escaping_sink some time ago,
and was sitting in my repository, waiting for additional tests where
charcter will be split between buffers to be written, and the issue
pushed me to add the test.
And continuing with the parallel, the mosquitos are eliminted, and
we got very nice cannon. So it's seem worth doing.

Testing on x86_64-linux. The std/format/* test passed.
OK for trunk?

 libstdc++-v3/include/std/format               | 198 +++++++++++++-----
 .../testsuite/std/format/ranges/string.cc     |  34 +++
 2 files changed, 177 insertions(+), 55 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index d584b81c78a..eea35df4835 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -105,6 +105,7 @@ namespace __format
   template<typename _CharT> class _Sink;
   template<typename _CharT> class _Fixedbuf_sink;
   template<typename _Out, typename _CharT> class _Padding_sink;
+  template<typename _Out, typename _CharT> class _Escaping_sink;
 
   // Output iterator that writes to a type-erase character sink.
   template<typename _CharT>
@@ -1062,6 +1063,17 @@ namespace __format
       return ++__out;
     }
 
+  template<typename _Out, typename _CharT>
+    _Out
+    __write_escape_seqs(_Out __out, basic_string_view<_CharT> __units)
+    {
+      using _UChar = make_unsigned_t<_CharT>;
+      for (_CharT __c : __units)
+       __out = __format::__write_escape_seq(
+                 __out, static_cast<_UChar>(__c), _Escapes<_CharT>::_S_x());
+      return __out;
+    }
+
   template<typename _Out, typename _CharT>
     _Out
     __write_escaped_char(_Out __out, _CharT __c)
@@ -1120,12 +1132,10 @@ namespace __format
 
   template<typename _CharT, typename _Out>
     _Out
-    __write_escaped_unicode(_Out __out,
-                           basic_string_view<_CharT> __str,
-                           _Term_char __term)
+    __write_escaped_unicode_part(_Out __out, basic_string_view<_CharT>& __str,
+                                bool& __prev_esc, _Term_char __term)
     {
       using _Str_view = basic_string_view<_CharT>;
-      using _UChar = make_unsigned_t<_CharT>;
       using _Esc = _Escapes<_CharT>;
 
       static constexpr char32_t __replace = U'\uFFFD';
@@ -1139,10 +1149,10 @@ namespace __format
        }();
 
       __unicode::_Utf_view<char32_t, _Str_view> __v(std::move(__str));
+      __str = {};
+
       auto __first = __v.begin();
       auto const __last = __v.end();
-
-      bool __prev_esc = true;
       while (__first != __last)
        {
          bool __esc_ascii = false;
@@ -1181,15 +1191,32 @@ namespace __format
            __out = __format::__write_escaped_char(__out, *__first.base());
          else if (__esc_unicode)
            __out = __format::__write_escape_seq(__out, *__first, _Esc::_S_u());
-         else // __esc_replace
-           for (_CharT __c : _Str_view(__first.base(), __first._M_units()))
-             __out = __format::__write_escape_seq(__out,
-                                                  static_cast<_UChar>(__c),
-                                                  _Esc::_S_x());
+         // __esc_replace
+         else if (_Str_view __units(__first.base(), __first._M_units());
+                  __units.end() != __last.base())
+           __out = __format::__write_escape_seqs(__out, __units);
+         else
+           {
+             __str = __units;
+             return __out;
+           }
+
          __prev_esc = true;
          ++__first;
-
        }
+
+      return __out;
+    }
+
+  template<typename _CharT, typename _Out>
+    _Out
+    __write_escaped_unicode(_Out __out, basic_string_view<_CharT> __str,
+                           _Term_char __term)
+    {
+      bool __prev_escape = true;
+      __out = __format::__write_escaped_unicode_part(__out, __str,
+                                                    __prev_escape, __term);
+      __out = __format::__write_escape_seqs(__out, __str);
       return __out;
     }
 
@@ -1408,55 +1435,28 @@ namespace __format
                                 size_t(ranges::distance(__rg)));
              return format(__str, __fc);
            }
-         else if (!_M_spec._M_debug)
+         else
            {
+             auto __handle_debug = [this, &__rg]<typename _NOut>(_NOut __nout)
+               {
+                  if (!_M_spec._M_debug)
+                   return ranges::copy(__rg, std::move(__nout)).out; 
+                    
+                   _Escaping_sink<_NOut, _CharT> 
+                    __sink(std::move(__nout), _Term_quote);
+                  ranges::copy(__rg, __sink.out());
+                  return __sink._M_finish();
+                };
+             
              const size_t __padwidth = _M_spec._M_get_width(__fc);
              if (__padwidth == 0 && _M_spec._M_prec_kind == _WP_none)
-               return ranges::copy(__rg, __fc.out()).out;
+               return __handle_debug(__fc.out());
 
-             _Padding_sink<_Out, _CharT> __sink(__fc.out(), __padwidth,
-                                                
_M_spec._M_get_precision(__fc));
-             ranges::copy(__rg, __sink.out());
+             _Padding_sink<_Out, _CharT> 
+               __sink(__fc.out(), __padwidth, _M_spec._M_get_precision(__fc));
+             __handle_debug(__sink.out());
              return __sink._M_finish(_M_spec._M_align, _M_spec._M_fill);
            }
-         else if constexpr (ranges::forward_range<_Rg> || 
ranges::sized_range<_Rg>)
-           {
-             const size_t __n(ranges::distance(__rg));
-             size_t __w = __n;
-             if constexpr (!__unicode::__literal_encoding_is_unicode<_CharT>())
-               if (size_t __max = _M_spec._M_get_precision(__fc); __n > __max)
-                 __w == __max;
-
-             if (__w <= __format::__stackbuf_size<_CharT>)
-               {
-                 _CharT __buf[__format::__stackbuf_size<_CharT>];
-                 ranges::copy_n(ranges::begin(__rg), __w, __buf);
-                 return _M_format_escaped(_String_view(__buf, __n), __fc);
-               }
-             else if constexpr (ranges::random_access_range<_Rg>)
-               {
-                 ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
-                 ranges::subrange __sub(__first, __first + __w);
-                 return _M_format_escaped(_String(from_range, __sub), __fc);
-               }
-             else if (__w <= __n)
-               {
-                 ranges::subrange __sub(
-                   counted_iterator(ranges::begin(__rg), __w),
-                   default_sentinel);
-                 return _M_format_escaped(_String(from_range, __sub), __fc);
-               }
-             else if constexpr (ranges::sized_range<_Rg>)
-               return _M_format_escaped(_String(from_range, __rg), __fc);
-             else
-               {
-                 // N.B. preserve the computed size
-                 ranges::subrange __sub(__rg, __n);
-                 return _M_format_escaped(_String(from_range, __sub), __fc);
-               }
-           }
-         else
-           return _M_format_escaped(_String(from_range, __rg), __fc);
        }
 
       constexpr void
@@ -3888,6 +3888,94 @@ namespace __format
       }
     };
 
+  template<typename _Out, typename _CharT>
+    class _Escaping_sink : public _Buf_sink<_CharT>
+    {
+      using _Esc = _Escapes<_CharT>;
+
+      _Out _M_out;
+      _Term_char _M_term : 2;
+      unsigned _M_prev_escape : 1;
+      unsigned _M_out_discards : 1;
+
+      void
+      _M_sync_discarding()
+      {
+       if constexpr (is_same_v<_Out, _Sink_iter<_CharT>>)
+         _M_out_discards = _M_out._M_discarding();
+      }
+
+      void
+      _M_write()
+      {
+       span<_CharT> __bytes = this->_M_used();
+       basic_string_view<_CharT> __str(__bytes.data(), __bytes.size());
+
+       size_t __rem = 0;
+        if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
+         {
+           bool __prev_escape = _M_prev_escape; 
+           _M_out = __format::__write_escaped_unicode_part(
+                      std::move(_M_out), __str, __prev_escape, _M_term);
+            _M_prev_escape = __prev_escape;
+
+           __rem = __str.size();
+           if (__rem > 0 && __str.data() != this->_M_buf) [[unlikely]]
+             ranges::move(__str, this->_M_buf);
+         }
+        else
+          _M_out = __format::__write_escaped_ascii(
+                     std::move(_M_out), __str, _M_term);
+
+       this->_M_reset(this->_M_buf, __rem);
+        _M_sync_discarding();
+      }
+
+      void
+      _M_overflow() override
+      {
+       if (_M_out_discards)
+         this->_M_rewind();
+       else
+         _M_write();
+      }
+
+      bool
+      _M_discarding() const override
+      { return _M_out_discards; }
+
+    public:
+      [[__gnu__::__always_inline__]]
+      explicit
+      _Escaping_sink(_Out __out, _Term_char __term)
+      : _M_out(std::move(__out)), _M_term(__term),
+       _M_prev_escape(true), _M_out_discards(false)
+      { 
+        _M_out = __format::__write(std::move(_M_out), _Esc::_S_term(_M_term));
+       _M_sync_discarding();
+      }
+
+      _Out
+      _M_finish()
+      {
+       if (_M_out_discards)
+         return std::move(_M_out);
+
+       if (!this->_M_used().empty())
+       {
+         _M_write();
+          if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
+           if (auto __rem = this->_M_used(); !__rem.empty())
+             {
+               basic_string_view<_CharT> __str(__rem.data(), __rem.size());
+               _M_out = __format::__write_escape_seqs(std::move(_M_out), 
__str);
+             }
+       }
+       return __format::__write(std::move(_M_out), _Esc::_S_term(_M_term));
+
+      }
+    };
+
   enum class _Arg_t : unsigned char {
     _Arg_none, _Arg_bool, _Arg_c, _Arg_i, _Arg_u, _Arg_ll, _Arg_ull,
     _Arg_flt, _Arg_dbl, _Arg_ldbl, _Arg_str, _Arg_sv, _Arg_ptr, _Arg_handle,
diff --git a/libstdc++-v3/testsuite/std/format/ranges/string.cc 
b/libstdc++-v3/testsuite/std/format/ranges/string.cc
index 99e5eaf411f..0e856b32d44 100644
--- a/libstdc++-v3/testsuite/std/format/ranges/string.cc
+++ b/libstdc++-v3/testsuite/std/format/ranges/string.cc
@@ -279,6 +279,39 @@ void test_padding()
   VERIFY( strip_prefix(resv, 46, '*') );
   VERIFY( strip_quotes(resv) );
   VERIFY( resv == in );
+
+  // width is 5, size is 15
+  in = "\u2160\u2161\u2162\u2163\u2164";
+  in += in; // width is 10, size is 30
+  in += in; // width is 20, size is 60
+  in += in; // width is 40, size is 120
+  in += in; // width is 80, size is 240
+  in += in; // width is 160, size is 480
+
+  lc = std::forward_list<char>(std::from_range, in);
+
+  resv = res = std::format("{:s}", lc);
+  VERIFY( resv == in );
+
+  resv = res = std::format("{:*>10s}", lc);
+  VERIFY( resv == in );
+
+  resv = res = std::format("{:*>200s}", lc);
+  VERIFY( strip_prefix(resv, 40, '*') );
+  VERIFY( resv == in );
+
+  resv = res = std::format("{:?s}", lc);
+  VERIFY( strip_quotes(resv) );
+  VERIFY( resv == in );
+
+  resv = res = std::format("{:*>10?s}", lc);
+  VERIFY( strip_quotes(resv) );
+  VERIFY( resv == in );
+
+  resv = res = std::format("{:*>200?s}", lc);
+  VERIFY( strip_prefix(resv, 38, '*') );
+  VERIFY( strip_quotes(resv) );
+  VERIFY( resv == in );
 }
 
 int main()
@@ -287,4 +320,5 @@ int main()
   test_outputs<char>();
   test_outputs<wchar_t>();
   test_nested();
+  test_padding();
 }
-- 
2.49.0

Reply via email to