https://gcc.gnu.org/g:0223119f1a6351543c6e96a9735e05cbd4583889

commit r15-5958-g0223119f1a6351543c6e96a9735e05cbd4583889
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Fri Dec 6 09:09:12 2024 +0100

    libcpp, c++: Optimize initializers using #embed in C++
    
    This patch adds similar optimizations to the C++ FE as have been
    implemented earlier in the C FE.
    The libcpp hunk enables use of CPP_EMBED token even for C++, not just
    C; the preprocessor guarantees there is always a CPP_NUMBER CPP_COMMA
    before CPP_EMBED and CPP_COMMA CPP_NUMBER after it which simplifies
    parsing (unless #embed is more than 2GB, in that case it could be
    CPP_NUMBER CPP_COMMA CPP_EMBED CPP_COMMA CPP_EMBED CPP_COMMA CPP_EMBED
    CPP_COMMA CPP_NUMBER etc. with each CPP_EMBED covering at most INT_MAX
    bytes).
    Similarly to the C patch, this patch parses it into RAW_DATA_CST tree
    in the braced initializers (and from there peels into INTEGER_CSTs unless
    it is an initializer of an std::byte array or integral array with CHAR_BIT
    element precision), parses CPP_EMBED in cp_parser_expression into just
    the last INTEGER_CST in it because I think users don't need millions of
    -Wunused-value warnings because they did useless
      int a = (
      #embed "megabyte.dat"
      );
    and so most of the inner INTEGER_CSTs would be there just for the warning,
    and in the rest of contexts like template argument list, function argument
    list, attribute argument list, ...) parse it into a sequence of INTEGER_CSTs
    (I wrote a range/iterator classes to simplify that).
    
    My dumb
    cat embed-11.c
    constexpr unsigned char a[] = {
      #embed "cc1plus"
    };
    const unsigned char *b = a;
    testcase where cc1plus is 492329008 bytes long when configured
    --enable-checking=yes,rtl,extra against recent binutils with .base64 gas
    support results in:
    time ./xg++ -B ./ -S -O2 embed-11.c
    
    real    0m4.350s
    user    0m2.427s
    sys     0m0.830s
    time ./xg++ -B ./ -c -O2 embed-11.c
    
    real    0m6.932s
    user    0m6.034s
    sys     0m0.888s
    (compared to running out of memory or very long compilation).
    On a shorter inclusion,
    cat embed-12.c
    constexpr unsigned char a[] = {
      #embed "xg++"
    };
    const unsigned char *b = a;
    where xg++ is 15225904 bytes long, this takes using GCC with the #embed
    patchset except for this patch:
    time ~/src/gcc/obj36/gcc/xg++ -B ~/src/gcc/obj36/gcc/ -S -O2 embed-12.c
    
    real    0m33.190s
    user    0m32.327s
    sys     0m0.790s
    and with this patch:
    time ./xg++ -B ./ -S -O2 embed-12.c
    
    real    0m0.118s
    user    0m0.090s
    sys     0m0.028s
    
    The patch doesn't change anything on what the first patch in the series
    introduces even for C++, namely that #embed is expanded (actually or as if)
    into a sequence of literals like
    
127,69,76,70,2,1,1,3,0,0,0,0,0,0,0,0,2,0,62,0,1,0,0,0,80,211,64,0,0,0,0,0,64,0,0,0,0,0,0,0,8,253
    and so each element has int type.
    That is how I believe it is in C23, and the different versions of the
    C++ P1967 paper specified there some casts, P1967R12 in particular
    "Otherwise, the integral constant expression is the value of std::fgetc’s 
return is cast
    to unsigned char."
    but please see
    https://github.com/llvm/llvm-project/pull/97274#issuecomment-2230929277
    comment and whether we really want the preprocessor to preprocess it for
    C++ as (or as-if)
    static_cast<unsigned char>(127),static_cast<unsigned 
char>(69),static_cast<unsigned char>(76),static_cast<unsigned 
char>(70),static_cast<unsigned char>(2),...
    i.e. 9 tokens per byte rather than 2, or
    (unsigned char)127,(unsigned char)69,...
    or
    ((unsigned char)127),((unsigned char)69),...
    etc.
    Without a literal suffix for unsigned char constant literals it is horrible,
    plus the incompatibility between C and C++.  Sure, we could use the magic
    form more often for C++ to save the size and do the 9 or how many tokens
    form only for the boundary constants and use #embed "." 
__gnu__::__base64__("...")
    for what is in between if there are at least 2 tokens inside of it.
    E.g. (unsigned char)127 vs. static_cast<unsigned char>(127) behaves
    differently if there is constexpr long long p[] = { ... };
    ...
      #embed __FILE__
    [p]
    
    2024-12-06  Jakub Jelinek  <ja...@redhat.com>
    
    libcpp/
            * files.cc (finish_embed): Use CPP_EMBED even for C++.
    gcc/
            * tree.h (RAW_DATA_UCHAR_ELT, RAW_DATA_SCHAR_ELT): Define.
    gcc/cp/ChangeLog:
            * cp-tree.h (class raw_data_iterator): New type.
            (class raw_data_range): New type.
            * parser.cc (cp_parser_postfix_open_square_expression): Handle
            parsing of CPP_EMBED.
            (cp_parser_parenthesized_expression_list): Likewise.  Use
            cp_lexer_next_token_is.
            (cp_parser_expression): Handle parsing of CPP_EMBED.
            (cp_parser_template_argument_list): Likewise.
            (cp_parser_initializer_list): Likewise.
            (cp_parser_oacc_clause_tile): Likewise.
            (cp_parser_omp_tile_sizes): Likewise.
            * pt.cc (tsubst_expr): Handle RAW_DATA_CST.
            * constexpr.cc (reduced_constant_expression_p): Likewise.
            (raw_data_cst_elt): New function.
            (find_array_ctor_elt): Handle RAW_DATA_CST.
            (cxx_eval_array_reference): Likewise.
            * typeck2.cc (digest_init_r): Emit -Wnarrowing and/or -Wconversion
            diagnostics.
            (process_init_constructor_array): Handle RAW_DATA_CST.
            * decl.cc (maybe_deduce_size_from_array_init): Likewise.
            (is_direct_enum_init): Fail for RAW_DATA_CST.
            (cp_maybe_split_raw_data): New function.
            (consume_init): New function.
            (reshape_init_array_1): Add VECTOR_P argument.  Handle RAW_DATA_CST.
            (reshape_init_array): Adjust reshape_init_array_1 caller.
            (reshape_init_vector): Likewise.
            (reshape_init_class): Handle RAW_DATA_CST.
            (reshape_init_r): Likewise.
    gcc/testsuite/
            * c-c++-common/cpp/embed-22.c: New test.
            * c-c++-common/cpp/embed-23.c: New test.
            * g++.dg/cpp/embed-4.C: New test.
            * g++.dg/cpp/embed-5.C: New test.
            * g++.dg/cpp/embed-6.C: New test.
            * g++.dg/cpp/embed-7.C: New test.
            * g++.dg/cpp/embed-8.C: New test.
            * g++.dg/cpp/embed-9.C: New test.
            * g++.dg/cpp/embed-10.C: New test.
            * g++.dg/cpp/embed-11.C: New test.
            * g++.dg/cpp/embed-12.C: New test.
            * g++.dg/cpp/embed-13.C: New test.
            * g++.dg/cpp/embed-14.C: New test.

Diff:
---
 gcc/cp/constexpr.cc                       |  91 +++++++++++++++-
 gcc/cp/cp-tree.h                          |  47 +++++++++
 gcc/cp/decl.cc                            | 170 +++++++++++++++++++++++++-----
 gcc/cp/parser.cc                          |  95 +++++++++++++++--
 gcc/cp/pt.cc                              |   8 ++
 gcc/cp/typeck2.cc                         |  41 ++++++-
 gcc/testsuite/c-c++-common/cpp/embed-22.c |  28 +++++
 gcc/testsuite/c-c++-common/cpp/embed-23.c |  36 +++++++
 gcc/testsuite/g++.dg/cpp/embed-10.C       |  40 +++++++
 gcc/testsuite/g++.dg/cpp/embed-11.C       |  41 +++++++
 gcc/testsuite/g++.dg/cpp/embed-12.C       |  34 ++++++
 gcc/testsuite/g++.dg/cpp/embed-13.C       |  28 +++++
 gcc/testsuite/g++.dg/cpp/embed-14.C       |  13 +++
 gcc/testsuite/g++.dg/cpp/embed-4.C        |  66 ++++++++++++
 gcc/testsuite/g++.dg/cpp/embed-5.C        |  72 +++++++++++++
 gcc/testsuite/g++.dg/cpp/embed-6.C        |  72 +++++++++++++
 gcc/testsuite/g++.dg/cpp/embed-7.C        |   7 ++
 gcc/testsuite/g++.dg/cpp/embed-8.C        |   7 ++
 gcc/testsuite/g++.dg/cpp/embed-9.C        |  57 ++++++++++
 gcc/tree.h                                |   4 +
 libcpp/files.cc                           |   3 +-
 21 files changed, 920 insertions(+), 40 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index c1d540133d5e..08765c9caa61 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3469,7 +3469,13 @@ reduced_constant_expression_p (tree t)
                    return false;
                  if (TREE_CODE (e.index) == RANGE_EXPR)
                    cursor = TREE_OPERAND (e.index, 1);
-                 cursor = int_const_binop (PLUS_EXPR, cursor, size_one_node);
+                 if (TREE_CODE (e.value) == RAW_DATA_CST)
+                   cursor
+                     = int_const_binop (PLUS_EXPR, cursor,
+                                        size_int (RAW_DATA_LENGTH (e.value)));
+                 else
+                   cursor = int_const_binop (PLUS_EXPR, cursor,
+                                             size_one_node);
                }
              if (find_array_ctor_elt (t, max) == -1)
                return false;
@@ -4081,6 +4087,20 @@ array_index_cmp (tree key, tree index)
     }
 }
 
+/* Extract a single INTEGER_CST from RAW_DATA_CST RAW_DATA at
+   relative index OFF.  */
+
+static tree
+raw_data_cst_elt (tree raw_data, unsigned int off)
+{
+  return build_int_cst (TREE_TYPE (raw_data),
+                       TYPE_UNSIGNED (TREE_TYPE (raw_data))
+                       ? (HOST_WIDE_INT)
+                         RAW_DATA_UCHAR_ELT (raw_data, off)
+                       : (HOST_WIDE_INT)
+                         RAW_DATA_SCHAR_ELT (raw_data, off));
+}
+
 /* Returns the index of the constructor_elt of ARY which matches DINDEX, or -1
    if none.  If INSERT is true, insert a matching element rather than fail.  */
 
@@ -4105,10 +4125,11 @@ find_array_ctor_elt (tree ary, tree dindex, bool insert)
       if (cindex == NULL_TREE)
        {
          /* Verify that if the last index is missing, all indexes
-            are missing.  */
+            are missing and there is no RAW_DATA_CST.  */
          if (flag_checking)
            for (unsigned int j = 0; j < len - 1; ++j)
-             gcc_assert ((*elts)[j].index == NULL_TREE);
+             gcc_assert ((*elts)[j].index == NULL_TREE
+                         && TREE_CODE ((*elts)[j].value) != RAW_DATA_CST);
          if (i < end)
            return i;
          else
@@ -4131,6 +4152,11 @@ find_array_ctor_elt (tree ary, tree dindex, bool insert)
        {
          if (i < end)
            return i;
+         tree value = (*elts)[end - 1].value;
+         if (TREE_CODE (value) == RAW_DATA_CST
+             && wi::to_offset (dindex) < (wi::to_offset (cindex)
+                                          + RAW_DATA_LENGTH (value)))
+           begin = end - 1;
          else
            begin = end;
        }
@@ -4144,12 +4170,59 @@ find_array_ctor_elt (tree ary, tree dindex, bool insert)
       tree idx = elt.index;
 
       int cmp = array_index_cmp (dindex, idx);
+      if (cmp > 0
+         && TREE_CODE (elt.value) == RAW_DATA_CST
+         && wi::to_offset (dindex) < (wi::to_offset (idx)
+                                      + RAW_DATA_LENGTH (elt.value)))
+       cmp = 0;
       if (cmp < 0)
        end = middle;
       else if (cmp > 0)
        begin = middle + 1;
       else
        {
+         if (insert && TREE_CODE (elt.value) == RAW_DATA_CST)
+           {
+             /* We need to split the RAW_DATA_CST elt.  */
+             constructor_elt e;
+             gcc_checking_assert (TREE_CODE (idx) != RANGE_EXPR);
+             unsigned int off = (wi::to_offset (dindex)
+                                 - wi::to_offset (idx)).to_uhwi ();
+             tree value = elt.value;
+             unsigned int len = RAW_DATA_LENGTH (value);
+             if (off > 1 && len >= off + 3)
+               value = copy_node (elt.value);
+             if (off)
+               {
+                 if (off > 1)
+                   RAW_DATA_LENGTH (elt.value) = off;
+                 else
+                   elt.value = raw_data_cst_elt (elt.value, 0);
+                 e.index = size_binop (PLUS_EXPR, elt.index,
+                                       build_int_cst (TREE_TYPE (elt.index),
+                                                      off));
+                 e.value = NULL_TREE;
+                 ++middle;
+                 vec_safe_insert (CONSTRUCTOR_ELTS (ary), middle, e);
+               }
+             (*elts)[middle].value = raw_data_cst_elt (value, off);
+             if (len >= off + 2)
+               {
+                 e.index = (*elts)[middle].index;
+                 e.index = size_binop (PLUS_EXPR, e.index,
+                                       build_one_cst (TREE_TYPE (e.index)));
+                 if (len >= off + 3)
+                   {
+                     RAW_DATA_LENGTH (value) -= off + 1;
+                     RAW_DATA_POINTER (value) += off + 1;
+                     e.value = value;
+                   }
+                 else
+                   e.value = raw_data_cst_elt (value, off + 1);
+                 vec_safe_insert (CONSTRUCTOR_ELTS (ary), middle + 1, e);
+               }
+             return middle;
+           }
          if (insert && TREE_CODE (idx) == RANGE_EXPR)
            {
              /* We need to split the range.  */
@@ -4505,7 +4578,17 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
     {
       tree r;
       if (TREE_CODE (ary) == CONSTRUCTOR)
-       r = (*CONSTRUCTOR_ELTS (ary))[i].value;
+       {
+         r = (*CONSTRUCTOR_ELTS (ary))[i].value;
+         if (TREE_CODE (r) == RAW_DATA_CST)
+           {
+             tree ridx = (*CONSTRUCTOR_ELTS (ary))[i].index;
+             gcc_checking_assert (ridx);
+             unsigned int off
+               = (wi::to_offset (index) - wi::to_offset (ridx)).to_uhwi ();
+             r = raw_data_cst_elt (r, off);
+           }
+       }
       else if (TREE_CODE (ary) == VECTOR_CST)
        r = VECTOR_CST_ELT (ary, i);
       else
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 67a5eba683ce..5f0529530b94 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -991,6 +991,53 @@ public:
   lkp_iterator end() { return lkp_iterator (NULL_TREE); }
 };
 
+/* Iterator for a RAW_DATA_CST.  */
+
+class raw_data_iterator {
+  tree t;
+  unsigned int n;
+
+ public:
+  explicit raw_data_iterator (tree t, unsigned int n)
+    : t (t), n (n)
+  {
+  }
+
+  operator bool () const
+  {
+    return n < (unsigned) RAW_DATA_LENGTH (t);
+  }
+
+  raw_data_iterator &operator++ ()
+  {
+    ++n;
+    return *this;
+  }
+
+  tree operator* () const
+  {
+    return build_int_cst (TREE_TYPE (t), RAW_DATA_UCHAR_ELT (t, n));
+  }
+
+  bool operator== (const raw_data_iterator &o) const
+  {
+    return t == o.t && n == o.n;
+  }
+};
+
+/* Treat a tree as a range of raw_data_iterator, e.g.
+   for (tree f : raw_data_range (d)) { ... }  */
+
+class raw_data_range
+{
+  tree t;
+public:
+  raw_data_range (tree t) : t (t) { }
+  raw_data_iterator begin () { return raw_data_iterator (t, 0); }
+  raw_data_iterator end ()
+  { return raw_data_iterator (t, RAW_DATA_LENGTH (t)); }
+};
+
 /* hash traits for declarations.  Hashes potential overload sets via
    DECL_NAME.  */
 
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 4b6a5191a8ae..cceed11d6378 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -6486,18 +6486,22 @@ maybe_deduce_size_from_array_init (tree decl, tree init)
        {
          vec<constructor_elt, va_gc> *v = CONSTRUCTOR_ELTS (initializer);
          constructor_elt *ce;
-         HOST_WIDE_INT i;
+         HOST_WIDE_INT i, j = 0;
          FOR_EACH_VEC_SAFE_ELT (v, i, ce)
            {
              if (instantiation_dependent_expression_p (ce->index))
                return;
-             if (!check_array_designated_initializer (ce, i))
+             if (!check_array_designated_initializer (ce, j))
                failure = 1;
              /* If an un-designated initializer is type-dependent, we can't
                 check brace elision yet.  */
              if (ce->index == NULL_TREE
                  && type_dependent_expression_p (ce->value))
                return;
+             if (TREE_CODE (ce->value) == RAW_DATA_CST)
+               j += RAW_DATA_LENGTH (ce->value);
+             else
+               ++j;
            }
        }
 
@@ -6853,6 +6857,7 @@ is_direct_enum_init (tree type, tree init)
       && TREE_CODE (init) == CONSTRUCTOR
       && CONSTRUCTOR_IS_DIRECT_INIT (init)
       && CONSTRUCTOR_NELTS (init) == 1
+      && TREE_CODE (CONSTRUCTOR_ELT (init, 0)->value) != RAW_DATA_CST
       /* DR 2374: The single element needs to be implicitly
         convertible to the underlying type of the enum.  */
       && !type_dependent_expression_p (CONSTRUCTOR_ELT (init, 0)->value)
@@ -6864,6 +6869,36 @@ is_direct_enum_init (tree type, tree init)
   return false;
 }
 
+/* Helper function for reshape_init*.  Split first element of
+   RAW_DATA_CST and save the rest to d->cur->value.  */
+
+static tree
+cp_maybe_split_raw_data (reshape_iter *d)
+{
+  if (TREE_CODE (d->cur->value) != RAW_DATA_CST)
+    return NULL_TREE;
+  tree ret = *raw_data_iterator (d->cur->value, 0);
+  ++RAW_DATA_POINTER (d->cur->value);
+  --RAW_DATA_LENGTH (d->cur->value);
+  if (RAW_DATA_LENGTH (d->cur->value) == 1)
+    d->cur->value = *raw_data_iterator (d->cur->value, 0);
+  return ret;
+}
+
+/* Wrapper around that which for RAW_DATA_CST in INIT
+   (as well as in D->cur->value) peels off the first element
+   of the raw data and returns it, otherwise increments
+   D->cur and returns INIT.  */
+
+static tree
+consume_init (tree init, reshape_iter *d)
+{
+  if (tree raw_init = cp_maybe_split_raw_data (d))
+    return raw_init;
+  d->cur++;
+  return init;
+}
+
 /* Subroutine of reshape_init_array and reshape_init_vector, which does
    the actual work. ELT_TYPE is the element type of the array. MAX_INDEX is an
    INTEGER_CST representing the size of the array minus one (the maximum 
index),
@@ -6872,7 +6907,8 @@ is_direct_enum_init (tree type, tree init)
 
 static tree
 reshape_init_array_1 (tree elt_type, tree max_index, reshape_iter *d,
-                     tree first_initializer_p, tsubst_flags_t complain)
+                     tree first_initializer_p, bool vector_p,
+                     tsubst_flags_t complain)
 {
   tree new_init;
   bool sized_array_p = (max_index && TREE_CONSTANT (max_index));
@@ -6910,6 +6946,7 @@ reshape_init_array_1 (tree elt_type, tree max_index, 
reshape_iter *d,
       max_index_cst = constant_lower_bound (midx);
     }
 
+  constructor_elt *first_cur = d->cur;
   /* Loop until there are no more initializers.  */
   for (index = 0;
        d->cur != d->end && (!sized_array_p || index <= max_index_cst);
@@ -6917,16 +6954,78 @@ reshape_init_array_1 (tree elt_type, tree max_index, 
reshape_iter *d,
     {
       tree elt_init;
       constructor_elt *old_cur = d->cur;
+      const char *old_raw_data_ptr = NULL;
+
+      if (TREE_CODE (d->cur->value) == RAW_DATA_CST)
+       old_raw_data_ptr = RAW_DATA_POINTER (d->cur->value);
 
       if (d->cur->index)
        CONSTRUCTOR_IS_DESIGNATED_INIT (new_init) = true;
       check_array_designated_initializer (d->cur, index);
-      elt_init = reshape_init_r (elt_type, d,
-                                /*first_initializer_p=*/NULL_TREE,
-                                complain);
+      if (TREE_CODE (d->cur->value) == RAW_DATA_CST
+         && (TREE_CODE (elt_type) == INTEGER_TYPE
+             || is_byte_access_type (elt_type))
+         && TYPE_PRECISION (elt_type) == CHAR_BIT
+         && (!sized_array_p || index < max_index_cst)
+         && !vector_p)
+       {
+         elt_init = d->cur->value;
+         if (!sized_array_p
+             || ((unsigned) RAW_DATA_LENGTH (d->cur->value)
+                 <= max_index_cst - index + 1))
+           d->cur++;
+         else
+           {
+             unsigned int len = max_index_cst - index + 1;
+             if ((unsigned) RAW_DATA_LENGTH (d->cur->value) == len + 1)
+               d->cur->value
+                 = build_int_cst (integer_type_node,
+                                  *(const unsigned char *)
+                                  RAW_DATA_POINTER (d->cur->value) + len);
+             else
+               {
+                 d->cur->value = copy_node (elt_init);
+                 RAW_DATA_LENGTH (d->cur->value) -= len;
+                 RAW_DATA_POINTER (d->cur->value) += len;
+               }
+             RAW_DATA_LENGTH (elt_init) = len;
+           }
+         TREE_TYPE (elt_init) = elt_type;
+       }
+      else
+       elt_init = reshape_init_r (elt_type, d,
+                                  /*first_initializer_p=*/NULL_TREE,
+                                  complain);
       if (elt_init == error_mark_node)
        return error_mark_node;
       tree idx = size_int (index);
+      if (reuse && old_raw_data_ptr && d->cur == old_cur)
+       {
+         /* We need to stop reusing as some RAW_DATA_CST in the original
+            ctor had to be split.  */
+         new_init = build_constructor (init_list_type_node, NULL);
+         if (index)
+           {
+             vec_safe_grow (CONSTRUCTOR_ELTS (new_init), index);
+             memcpy (CONSTRUCTOR_ELT (new_init, 0), first_cur,
+                     (d->cur - first_cur)
+                     * sizeof (*CONSTRUCTOR_ELT (new_init, 0)));
+             if (CONSTRUCTOR_IS_DESIGNATED_INIT (first_initializer_p))
+               {
+                 unsigned int j;
+                 tree nidx, nval;
+                 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (new_init),
+                                           j, nidx, nval)
+                   if (nidx)
+                     {
+                       CONSTRUCTOR_IS_DESIGNATED_INIT (new_init) = 1;
+                       (void) nval;
+                       break;
+                     }
+               }
+           }
+         reuse = false;
+       }
       if (reuse)
        {
          old_cur->index = idx;
@@ -6939,8 +7038,15 @@ reshape_init_array_1 (tree elt_type, tree max_index, 
reshape_iter *d,
        TREE_CONSTANT (new_init) = false;
 
       /* This can happen with an invalid initializer (c++/54501).  */
-      if (d->cur == old_cur && !sized_array_p)
+      if (d->cur == old_cur
+         && !sized_array_p
+         && (old_raw_data_ptr == NULL
+             || (TREE_CODE (d->cur->value) == RAW_DATA_CST
+                 && RAW_DATA_POINTER (d->cur->value) == old_raw_data_ptr)))
        break;
+
+      if (TREE_CODE (elt_init) == RAW_DATA_CST)
+       index += RAW_DATA_LENGTH (elt_init) - 1;
     }
 
   return new_init;
@@ -6961,7 +7067,7 @@ reshape_init_array (tree type, reshape_iter *d, tree 
first_initializer_p,
     max_index = array_type_nelts_minus_one (type);
 
   return reshape_init_array_1 (TREE_TYPE (type), max_index, d,
-                              first_initializer_p, complain);
+                              first_initializer_p, false, complain);
 }
 
 /* Subroutine of reshape_init_r, processes the initializers for vectors.
@@ -6993,7 +7099,7 @@ reshape_init_vector (tree type, reshape_iter *d, 
tsubst_flags_t complain)
     max_index = size_int (TYPE_VECTOR_SUBPARTS (type) - 1);
 
   return reshape_init_array_1 (TREE_TYPE (type), max_index, d,
-                              NULL_TREE, complain);
+                              NULL_TREE, true, complain);
 }
 
 /* Subroutine of reshape_init*: We're initializing an element with TYPE from
@@ -7066,8 +7172,12 @@ reshape_init_class (tree type, reshape_iter *d, bool 
first_initializer_p,
     {
       tree field_init;
       constructor_elt *old_cur = d->cur;
+      const char *old_raw_data_ptr = NULL;
       bool direct_desig = false;
 
+      if (TREE_CODE (d->cur->value) == RAW_DATA_CST)
+       old_raw_data_ptr = RAW_DATA_POINTER (d->cur->value);
+
       /* Handle C++20 designated initializers.  */
       if (d->cur->index)
        {
@@ -7181,6 +7291,7 @@ reshape_init_class (tree type, reshape_iter *d, bool 
first_initializer_p,
             is initialized by the designated-initializer-list { D }, where D
             is the designated- initializer-clause naming a member of the
             anonymous union member."  */
+         gcc_checking_assert (TREE_CODE (d->cur->value) != RAW_DATA_CST);
          field_init = reshape_single_init (TREE_TYPE (field),
                                            d->cur->value, complain);
          d->cur++;
@@ -7193,7 +7304,11 @@ reshape_init_class (tree type, reshape_iter *d, bool 
first_initializer_p,
       if (field_init == error_mark_node)
        return error_mark_node;
 
-      if (d->cur == old_cur && d->cur->index)
+      if (d->cur == old_cur
+         && d->cur->index
+         && (old_raw_data_ptr == NULL
+             || (TREE_CODE (d->cur->value) == RAW_DATA_CST
+                 && RAW_DATA_POINTER (d->cur->value) == old_raw_data_ptr)))
        {
          /* This can happen with an invalid initializer for a flexible
             array member (c++/54441).  */
@@ -7228,8 +7343,11 @@ reshape_init_class (tree type, reshape_iter *d, bool 
first_initializer_p,
      correspond to all remaining elements of the initializer list (if any).  */
   if (last_was_pack_expansion)
     {
+      tree init = d->cur->value;
+      if (tree raw_init = cp_maybe_split_raw_data (d))
+       init = raw_init;
       CONSTRUCTOR_APPEND_ELT (CONSTRUCTOR_ELTS (new_init),
-                             last_was_pack_expansion, d->cur->value);
+                             last_was_pack_expansion, init);
       while (d->cur != d->end)
        d->cur++;
     }
@@ -7281,7 +7399,7 @@ reshape_init_r (tree type, reshape_iter *d, tree 
first_initializer_p,
     {
       /* A complex type can be initialized from one or two initializers,
         but braces are not elided.  */
-      d->cur++;
+      init = consume_init (init, d);
       if (BRACE_ENCLOSED_INITIALIZER_P (stripped_init))
        {
          if (CONSTRUCTOR_NELTS (stripped_init) > 2)
@@ -7296,10 +7414,13 @@ reshape_init_r (tree type, reshape_iter *d, tree 
first_initializer_p,
        {
          vec<constructor_elt, va_gc> *v = 0;
          CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init);
-         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, d->cur->value);
+         tree raw_init = cp_maybe_split_raw_data (d);
+         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                 raw_init ? raw_init : d->cur->value);
          if (has_designator_problem (d, complain))
            return error_mark_node;
-         d->cur++;
+         if (!raw_init)
+           d->cur++;
          init = build_constructor (init_list_type_node, v);
        }
       return init;
@@ -7347,9 +7468,7 @@ reshape_init_r (tree type, reshape_iter *d, tree 
first_initializer_p,
          else
            maybe_warn_cpp0x (CPP0X_INITIALIZER_LISTS);
        }
-
-      d->cur++;
-      return init;
+      return consume_init (init, d);
     }
 
   /* "If T is a class type and the initializer list has a single element of
@@ -7360,6 +7479,7 @@ reshape_init_r (tree type, reshape_iter *d, tree 
first_initializer_p,
       /* But not if it's a designated init.  */
       && !d->cur->index
       && d->end - d->cur == 1
+      && TREE_CODE (init) != RAW_DATA_CST
       && reference_related_p (type, TREE_TYPE (init)))
     {
       d->cur++;
@@ -7381,12 +7501,14 @@ reshape_init_r (tree type, reshape_iter *d, tree 
first_initializer_p,
         valid aggregate initialization.  */
       && !first_initializer_p
       && (same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (init))
-         || can_convert_arg (type, TREE_TYPE (init), init, LOOKUP_NORMAL,
-                             complain)))
-    {
-      d->cur++;
-      return init;
-    }
+         || can_convert_arg (type, TREE_TYPE (init),
+                             TREE_CODE (init) == RAW_DATA_CST
+                             ? build_int_cst (integer_type_node,
+                                              *(const unsigned char *)
+                                              RAW_DATA_POINTER (init))
+                             : init,
+                             LOOKUP_NORMAL, complain)))
+    return consume_init (init, d);
 
   /* [dcl.init.string]
 
@@ -7486,7 +7608,7 @@ reshape_init_r (tree type, reshape_iter *d, tree 
first_initializer_p,
   else if (VECTOR_TYPE_P (type))
     new_init = reshape_init_vector (type, d, complain);
   else
-    gcc_unreachable();
+    gcc_unreachable ();
 
   if (braces_elided_p
       && TREE_CODE (new_init) == CONSTRUCTOR)
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 4ee739ca5c9c..2ee4fc2ad58c 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -8520,6 +8520,19 @@ cp_parser_postfix_open_square_expression (cp_parser 
*parser,
        {
          while (true)
            {
+             /* Handle #embed in the expression-list.  */
+             if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+               {
+                 tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+                 cp_lexer_consume_token (parser->lexer);
+                 vec_safe_reserve (expression_list,
+                                   RAW_DATA_LENGTH (raw_data));
+                 for (tree argument : raw_data_range (raw_data))
+                   expression_list->quick_push (argument);
+                 cp_parser_require (parser, CPP_COMMA, RT_COMMA);
+                 continue;
+               }
+
              cp_expr expr
                = cp_parser_parenthesized_expression_list_elt (parser,
                                                               /*cast_p=*/
@@ -8987,12 +9000,27 @@ cp_parser_parenthesized_expression_list (cp_parser* 
parser,
        /* At the beginning of attribute lists, check to see if the
           next token is an identifier.  */
        if (is_attribute_list == id_attr
-           && cp_lexer_peek_token (parser->lexer)->type == CPP_NAME)
+           && cp_lexer_next_token_is (parser->lexer, CPP_NAME))
          expr = cp_lexer_consume_token (parser->lexer)->u.value;
        else if (is_attribute_list == assume_attr)
          expr = cp_parser_conditional_expression (parser);
        else if (is_attribute_list == uneval_string_attr)
          expr = cp_parser_unevaluated_string_literal (parser);
+       else if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+         {
+           /* Handle #embed in the argument list.  */
+           tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+           location_t loc = cp_lexer_peek_token (parser->lexer)->location;
+           cp_lexer_consume_token (parser->lexer);
+           vec_safe_reserve (expression_list, RAW_DATA_LENGTH (raw_data));
+           for (tree arg : raw_data_range (raw_data))
+             if (wrap_locations_p)
+               expression_list->quick_push (maybe_wrap_with_location (arg,
+                                                                      loc));
+             else
+               expression_list->quick_push (arg);
+           goto get_comma;
+         }
        else
          expr
            = cp_parser_parenthesized_expression_list_elt (parser, cast_p,
@@ -11103,8 +11131,24 @@ cp_parser_expression (cp_parser* parser, cp_id_kind * 
pidk,
       cp_expr assignment_expression;
 
       /* Parse the next assignment-expression.  */
-      assignment_expression
-       = cp_parser_assignment_expression (parser, pidk, cast_p, decltype_p);
+      if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+       {
+         /* Users aren't interested in milions of -Wunused-value
+            warnings when using #embed inside of a comma expression,
+            and one CPP_NUMBER plus CPP_COMMA before it and one
+            CPP_COMMA plus CPP_NUMBER after it is guaranteed by
+            the preprocessor.  Thus, parse the whole CPP_EMBED just
+            as a single INTEGER_CST, the last byte in it.  */
+         tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+         location_t loc = cp_lexer_peek_token (parser->lexer)->location;
+         cp_lexer_consume_token (parser->lexer);
+         assignment_expression
+           = *raw_data_iterator (raw_data, RAW_DATA_LENGTH (raw_data) - 1);
+         assignment_expression.set_location (loc);
+       }
+      else
+       assignment_expression
+         = cp_parser_assignment_expression (parser, pidk, cast_p, decltype_p);
 
       /* We don't create a temporary for a call that is the immediate operand
         of decltype or on the RHS of a comma.  But when we see a comma, we
@@ -19794,6 +19838,17 @@ cp_parser_template_argument_list (cp_parser* parser)
        /* Consume the comma.  */
        cp_lexer_consume_token (parser->lexer);
 
+      /* Handle #embed in the argument list.  */
+      if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+       {
+         tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+         cp_lexer_consume_token (parser->lexer);
+         args.reserve (RAW_DATA_LENGTH (raw_data), false);
+         for (tree argument : raw_data_range (raw_data))
+           args.quick_push (argument);
+         continue;
+       }
+
       /* Parse the template-argument.  */
       tree argument = cp_parser_template_argument (parser);
 
@@ -26867,10 +26922,17 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
non_constant_p,
        first_designator = designator;
 
       /* Parse the initializer.  */
-      initializer = cp_parser_initializer_clause (parser,
-                                                 (non_constant_p != nullptr
-                                                  ? &clause_non_constant_p
-                                                  : nullptr));
+      if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+       {
+         initializer = cp_lexer_peek_token (parser->lexer)->u.value;
+         clause_non_constant_p = false;
+         cp_lexer_consume_token (parser->lexer);
+       }
+      else
+       initializer = cp_parser_initializer_clause (parser,
+                                                   (non_constant_p != nullptr
+                                                    ? &clause_non_constant_p
+                                                    : nullptr));
       /* If any clause is non-constant, so is the entire initializer.  */
       if (non_constant_p && clause_non_constant_p)
        *non_constant_p = true;
@@ -39340,6 +39402,15 @@ cp_parser_oacc_clause_tile (cp_parser *parser, 
location_t clause_loc, tree list)
          cp_lexer_consume_token (parser->lexer);
          expr = integer_zero_node;
        }
+      else if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+       {
+         /* Handle #embed in the size-expr-list.  */
+         tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+         cp_lexer_consume_token (parser->lexer);
+         for (tree argument : raw_data_range (raw_data))
+           tile = tree_cons (NULL_TREE, argument, tile);
+         continue;
+       }
       else
        expr = cp_parser_constant_expression (parser);
 
@@ -48492,6 +48563,16 @@ cp_parser_omp_tile_sizes (cp_parser *parser, 
location_t loc)
       if (sizes && !cp_parser_require (parser, CPP_COMMA, RT_COMMA))
        return error_mark_node;
 
+      if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+       {
+         /* Handle #embed in the size-expr-list.  */
+         tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+         cp_lexer_consume_token (parser->lexer);
+         for (tree argument : raw_data_range (raw_data))
+           sizes = tree_cons (NULL_TREE, argument, sizes);
+         continue;
+       }
+
       tree expr = cp_parser_constant_expression (parser);
       if (expr == error_mark_node)
        {
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index b094d141f3b0..8a91c9ce9f65 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -21958,6 +21958,14 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
        RETURN (r);
       }
 
+    case RAW_DATA_CST:
+      {
+       tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+       r = copy_node (t);
+       TREE_TYPE (r) = type;
+       RETURN (r);
+      }
+
     case PTRMEM_CST:
       /* These can sometimes show up in a partial instantiation, but never
         involve template parms.  */
diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index 2694c1dfd51c..fce687e83b3e 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1311,6 +1311,36 @@ digest_init_r (tree type, tree init, int nested, int 
flags,
         a parenthesized list.  */
       if (nested && !(flags & LOOKUP_AGGREGATE_PAREN_INIT))
        flags |= LOOKUP_NO_NARROWING;
+      if (TREE_CODE (init) == RAW_DATA_CST && !TYPE_UNSIGNED (type))
+       {
+         tree ret = init;
+         if ((flags & LOOKUP_NO_NARROWING) || warn_conversion)
+           for (unsigned int i = 0;
+                i < (unsigned) RAW_DATA_LENGTH (init); ++i)
+             if (RAW_DATA_SCHAR_ELT (init, i) < 0)
+               {
+                 if ((flags & LOOKUP_NO_NARROWING))
+                   {
+                     tree elt
+                       = build_int_cst (integer_type_node,
+                                        RAW_DATA_UCHAR_ELT (init, i));
+                     if (!check_narrowing (type, elt, complain, false))
+                       {
+                         if (!(complain & tf_warning_or_error))
+                           ret = error_mark_node;
+                         continue;
+                       }
+                   }
+                 if (warn_conversion)
+                   warning (OPT_Wconversion,
+                            "conversion from %qT to %qT changes value from "
+                            "%qd to %qd",
+                            integer_type_node, type,
+                            RAW_DATA_UCHAR_ELT (init, i),
+                            RAW_DATA_SCHAR_ELT (init, i));
+               }
+         return ret;
+       }
       init = convert_for_initialization (0, type, init, flags,
                                         ICR_INIT, NULL_TREE, 0,
                                         complain);
@@ -1559,7 +1589,7 @@ static int
 process_init_constructor_array (tree type, tree init, int nested, int flags,
                                tsubst_flags_t complain)
 {
-  unsigned HOST_WIDE_INT i, len = 0;
+  unsigned HOST_WIDE_INT i, j, len = 0;
   int picflags = 0;
   bool unbounded = false;
   constructor_elt *ce;
@@ -1602,11 +1632,12 @@ process_init_constructor_array (tree type, tree init, 
int nested, int flags,
        return PICFLAG_ERRONEOUS;
     }
 
+  j = 0;
   FOR_EACH_VEC_SAFE_ELT (v, i, ce)
     {
       if (!ce->index)
-       ce->index = size_int (i);
-      else if (!check_array_designated_initializer (ce, i))
+       ce->index = size_int (j);
+      else if (!check_array_designated_initializer (ce, j))
        ce->index = error_mark_node;
       gcc_assert (ce->value);
       ce->value
@@ -1628,6 +1659,10 @@ process_init_constructor_array (tree type, tree init, 
int nested, int flags,
          CONSTRUCTOR_PLACEHOLDER_BOUNDARY (init) = 1;
          CONSTRUCTOR_PLACEHOLDER_BOUNDARY (ce->value) = 0;
        }
+      if (TREE_CODE (ce->value) == RAW_DATA_CST)
+       j += RAW_DATA_LENGTH (ce->value);
+      else
+       ++j;
     }
 
   /* No more initializers. If the array is unbounded, we are done. Otherwise,
diff --git a/gcc/testsuite/c-c++-common/cpp/embed-22.c 
b/gcc/testsuite/c-c++-common/cpp/embed-22.c
new file mode 100644
index 000000000000..1b35cf92012d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/embed-22.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-psabi" } */
+/* { dg-additional-options "-std=c23" { target c } } */
+
+typedef unsigned char V __attribute__((vector_size (128)));
+
+V a;
+
+void
+foo (void)
+{
+  V b = {
+    #embed __FILE__ limit (128) gnu::offset (3)
+  };
+  a = b;
+}
+
+const unsigned char c[] = {
+  #embed __FILE__ limit (128) gnu::offset (3)
+};
+
+int
+main ()
+{
+  foo ();
+  if (__builtin_memcmp (&c[0], &a, sizeof (a)))
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/c-c++-common/cpp/embed-23.c 
b/gcc/testsuite/c-c++-common/cpp/embed-23.c
new file mode 100644
index 000000000000..ea00c6ca4bf5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/embed-23.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-std=gnu23" { target c } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+struct S { _Complex double a; V b; int c; };
+struct T { int a; struct S b; int c; struct S d; int e; unsigned char f[22]; 
_Complex long double g; };
+
+const unsigned char a[] = {
+  #embed __FILE__ limit (124)
+};
+const struct T b[2] = {
+  #embed __FILE__ limit (124)
+};
+
+int
+main ()
+{
+  for (int i = 0; i < 2; ++i)
+    if (b[i].a != a[i * 62]
+       || __real__ b[i].b.a != a[i * 62 + 1]
+       || __imag__ b[i].b.a
+       || __builtin_memcmp (&b[i].b.b, &a[i * 62 + 2], 16)
+       || b[i].b.c != a[i * 62 + 18]
+       || b[i].c != a[i * 62 + 19]
+       || __real__ b[i].d.a != a[i * 62 + 20]
+       || __imag__ b[i].d.a
+       || __builtin_memcmp (&b[i].d.b, &a[i * 62 + 21], 16)
+       || b[i].d.c != a[i * 62 + 37]
+       || b[i].e != a[i * 62 + 38]
+       || __builtin_memcmp (&b[i].f[0], &a[i * 62 + 39], 22)
+       || __real__ b[i].g != a[i * 62 + 61]
+       || __imag__ b[i].g)
+      __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-10.C 
b/gcc/testsuite/g++.dg/cpp/embed-10.C
new file mode 100644
index 000000000000..7ffcc2d2608c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-10.C
@@ -0,0 +1,40 @@
+// { dg-do run { target c++23 } }
+// { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" }
+
+const unsigned char m[] = {
+  #embed <magna-carta.txt> limit (136)
+};
+
+struct S
+{
+  S () : a {} {};
+  template <typename ...T>
+  int &operator[] (T... args)
+  {
+    int b[] = { args... };
+    for (int i = 0; i < sizeof (b) / sizeof (b[0]); ++i)
+      if (b[i] != m[i])
+       return a[137];
+    return a[sizeof (b) / sizeof (b[0])];
+  }
+  int a[138];
+};
+
+S s;
+
+int
+main ()
+{
+  if (&s[
+      #embed <magna-carta.txt> limit (1)
+       ] != &s.a[1])
+    __builtin_abort ();
+  if (&s[
+      #embed <magna-carta.txt> limit (6)
+       ] != &s.a[6])
+    __builtin_abort ();
+  if (&s[
+      #embed <magna-carta.txt> limit (135)
+       ] != &s.a[135])
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-11.C 
b/gcc/testsuite/g++.dg/cpp/embed-11.C
new file mode 100644
index 000000000000..5b190fdaf3fd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-11.C
@@ -0,0 +1,41 @@
+// { dg-do run }
+// { dg-options "-Wunused-value" }
+
+#include <stdarg.h>
+
+const unsigned char a[] = {
+  #embed __FILE__ limit (128)
+};
+
+int
+foo (int x, ...)
+{
+  if (x != 42)
+    return 2;
+  va_list ap;
+  va_start (ap, x);
+  for (int i = 0; i < 128; ++i)
+    if (va_arg (ap, int) != a[i])
+      {
+       va_end (ap);
+       return 1;
+      }
+  va_end (ap);
+  return 0;
+}
+
+int b, c;
+
+int
+main ()
+{
+  if (foo (42,
+#embed __FILE__ limit (128)
+      ))
+    __builtin_abort ();
+  b = (
+#embed __FILE__ limit (128) prefix (c = 2 * ) suffix ( + 6)    // { dg-warning 
"right operand of comma operator has no effect" }
+  );
+  if (b != a[127] + 6 || c != 2 * a[0])
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-12.C 
b/gcc/testsuite/g++.dg/cpp/embed-12.C
new file mode 100644
index 000000000000..86963f603102
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-12.C
@@ -0,0 +1,34 @@
+// { dg-do compile }
+// { dg-options "-Wnonnull" }
+
+#define A(n) int *p##n
+#define B(n) A(n##0), A(n##1), A(n##2), A(n##3), A(n##4), A(n##5), A(n##6), 
A(n##7)
+#define C(n) B(n##0), B(n##1), B(n##2), B(n##3), B(n##4), B(n##5), B(n##6), 
B(n##7)
+#define D C(0), C(1), C(2), C(3)
+
+void foo (D) __attribute__((nonnull (  // { dg-message "in a call to function 
'\[^\n\r]*' declared 'nonnull'" }
+#embed __FILE__ limit (128)
+)));
+#if __cplusplus >= 201103L
+[[gnu::nonnull (
+#embed __FILE__ limit (128)
+)]] void bar (D);      // { dg-message "in a call to function '\[^\n\r]*' 
declared 'nonnull'" "" { target c++11 } }
+#else
+void bar (D) __attribute__((nonnull (  // { dg-message "in a call to function 
'\[^\n\r]*' declared 'nonnull'" "" { target c++98_only } }
+#embed __FILE__ limit (128)
+)));
+#endif
+
+#undef A
+#if __cplusplus >= 201103L
+#define A(n) nullptr
+#else
+#define A(n) 0
+#endif
+
+void
+baz ()
+{
+  foo (D);     // { dg-warning "argument \[0-9]\+ null where non-null 
expected" }
+  bar (D);     // { dg-warning "argument \[0-9]\+ null where non-null 
expected" }
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-13.C 
b/gcc/testsuite/g++.dg/cpp/embed-13.C
new file mode 100644
index 000000000000..0eea5ae9dd8c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-13.C
@@ -0,0 +1,28 @@
+// { dg-do run { target c++17_down } }
+// { dg-options "" }
+
+const unsigned char r[64] = {
+#embed __FILE__ limit (64)
+};
+struct S { int a; long b; unsigned char c[63]; int d; };
+S s = {
+#embed __FILE__ limit (64) prefix (.a = 1, .b = ) suffix (, .d = 2)
+};
+const unsigned char t[66] = {
+#embed __FILE__ limit (64) prefix ([0] = 1, [1] =) suffix (, [65] = 2)
+};
+int u[] = { [0] =
+#embed __FILE__ limit (64)
+};
+
+int
+main ()
+{
+  if (s.a != 1 || s.b != r[0] || __builtin_memcmp (s.c, r + 1, 63) || s.d != 2)
+    __builtin_abort ();
+  if (t[0] != 1 || __builtin_memcmp (t + 1, r, 64) || t[65] != 2)
+    __builtin_abort ();
+  for (int i = 0; i < 64; ++i)
+    if (u[i] != r[i])
+      __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-14.C 
b/gcc/testsuite/g++.dg/cpp/embed-14.C
new file mode 100644
index 000000000000..9f4be10409ed
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-14.C
@@ -0,0 +1,13 @@
+// { dg-do compile }
+// { dg-options "" }
+
+struct S { int a; long b; unsigned char c[63]; int d; };
+S s = {
+#embed __FILE__ limit (64) prefix (.a = 1, .b = ) suffix (, .d = 2)    // { 
dg-error "either all initializer clauses should be designated or none of them 
should be" "" { target c++20 } }
+};
+const unsigned char t[66] = {
+#embed __FILE__ limit (64) prefix ([0] = 1, [1] =) suffix (, [65] = 2) // { 
dg-error "either all initializer clauses should be designated or none of them 
should be" "" { target c++20 } }
+};
+int u[] = { [0] =
+#embed __FILE__ limit (64)                                             // { 
dg-error "either all initializer clauses should be designated or none of them 
should be" "" { target c++20 } }
+};
diff --git a/gcc/testsuite/g++.dg/cpp/embed-4.C 
b/gcc/testsuite/g++.dg/cpp/embed-4.C
new file mode 100644
index 000000000000..c36a7adb0037
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-4.C
@@ -0,0 +1,66 @@
+// { dg-do run { target c++11 } }
+// { dg-options "" }
+
+constexpr unsigned char a[] = {
+#embed __FILE__
+};
+
+constexpr unsigned char
+foo (int x)
+{
+  return a[x];
+}
+constexpr unsigned char b = a[32];
+constexpr unsigned char c = foo (42);
+
+#if __cplusplus >= 201402L
+constexpr bool
+bar ()
+{
+  unsigned char d[] = {
+  #embed __FILE__
+  };
+  d[42] = ' ';
+  d[32] = 'X';
+  d[0] = d[1] + 16;
+  d[sizeof (d) - 1] = d[42] - ' ';
+  for (int i = 0; i < sizeof (d); ++i)
+    switch (i)
+      {
+      case 0:
+       if (d[i] != a[1] + 16)
+         return false;
+       break;
+      case 32:
+       if (d[i] != 'X')
+         return false;
+       break;
+      case 42:
+       if (d[i] != ' ')
+         return false;
+       break;
+      case sizeof (d) - 1:
+       if (d[i] != 0)
+         return false;
+       break;
+      default:
+       if (d[i] != a[i])
+         return false;
+       break;
+      }
+  return true;
+}
+
+static_assert (bar (), "");
+#endif
+
+int
+main ()
+{
+  unsigned char e[] = {
+  #embed __FILE__
+  };
+
+  if (b != e[32] || c != e[42])
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-5.C 
b/gcc/testsuite/g++.dg/cpp/embed-5.C
new file mode 100644
index 000000000000..9a498445b89d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-5.C
@@ -0,0 +1,72 @@
+// { dg-do run { target c++14 } }
+// { dg-options "" }
+
+template <typename T>
+constexpr T a[] = {
+#embed __FILE__
+};
+
+template <typename T>
+constexpr T
+foo (int x)
+{
+  return a<T>[x];
+}
+constexpr unsigned char b = a<unsigned char>[32];
+constexpr unsigned char c = foo<unsigned char> (42);
+constexpr int b2 = a<int>[32];
+constexpr int c2 = foo<int> (42);
+
+template <typename T>
+constexpr bool
+bar ()
+{
+  T d[] = {
+  #embed __FILE__
+  };
+  d[42] = ' ';
+  d[32] = 'X';
+  d[0] = d[1] + 16;
+  d[sizeof (d) / sizeof (T) - 1] = d[42] - ' ';
+  for (int i = 0; i < sizeof (d) / sizeof (T); ++i)
+    switch (i)
+      {
+      case 0:
+       if (d[i] != a<T>[1] + 16)
+         return false;
+       break;
+      case 32:
+       if (d[i] != 'X')
+         return false;
+       break;
+      case 42:
+       if (d[i] != ' ')
+         return false;
+       break;
+      case sizeof (d) / sizeof (T) - 1:
+       if (d[i] != 0)
+         return false;
+       break;
+      default:
+       if (d[i] != a<T>[i])
+         return false;
+       break;
+      }
+  return true;
+}
+
+static_assert (bar<unsigned char> (), "");
+static_assert (bar<int> (), "");
+
+int
+main ()
+{
+  unsigned char e[] = {
+  #embed __FILE__
+  };
+
+  if (b != e[32] || c != e[42])
+    __builtin_abort ();
+  if (b2 != b || c2 != c)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-6.C 
b/gcc/testsuite/g++.dg/cpp/embed-6.C
new file mode 100644
index 000000000000..3896295213a5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-6.C
@@ -0,0 +1,72 @@
+// { dg-do run { target c++14 } }
+// { dg-options "" }
+
+template <typename T>
+constexpr unsigned char a[] = {
+#embed __FILE__
+};
+
+template <typename T>
+constexpr unsigned char
+foo (int x)
+{
+  return a<T>[x];
+}
+constexpr unsigned char b = a<unsigned char>[32];
+constexpr unsigned char c = foo<unsigned char> (42);
+constexpr unsigned char b2 = a<int>[32];
+constexpr unsigned char c2 = foo<int> (42);
+
+template <typename T>
+constexpr bool
+bar ()
+{
+  unsigned char d[] = {
+  #embed __FILE__
+  };
+  d[42] = ' ';
+  d[32] = 'X';
+  d[0] = d[1] + 16;
+  d[sizeof (d) - 1] = d[42] - ' ';
+  for (int i = 0; i < sizeof (d); ++i)
+    switch (i)
+      {
+      case 0:
+       if (d[i] != a<T>[1] + 16)
+         return false;
+       break;
+      case 32:
+       if (d[i] != 'X')
+         return false;
+       break;
+      case 42:
+       if (d[i] != ' ')
+         return false;
+       break;
+      case sizeof (d) - 1:
+       if (d[i] != 0)
+         return false;
+       break;
+      default:
+       if (d[i] != a<T>[i])
+         return false;
+       break;
+      }
+  return true;
+}
+
+static_assert (bar<unsigned char> (), "");
+static_assert (bar<int> (), "");
+
+int
+main ()
+{
+  unsigned char e[] = {
+  #embed __FILE__
+  };
+
+  if (b != e[32] || c != e[42])
+    __builtin_abort ();
+  if (b2 != b || c2 != c)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/embed-7.C 
b/gcc/testsuite/g++.dg/cpp/embed-7.C
new file mode 100644
index 000000000000..ac88b8277475
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-7.C
@@ -0,0 +1,7 @@
+// This is a comment with some UTF-8 non-ASCII characters: áéíóú.
+// { dg-do compile { target c++11 } }
+// { dg-options "" } */
+
+const signed char a[] = {
+#embed __FILE__
+};     // { dg-error "narrowing conversion of '\[12]\[0-9]\[0-9]' from 'int' 
to 'const signed char'" }
diff --git a/gcc/testsuite/g++.dg/cpp/embed-8.C 
b/gcc/testsuite/g++.dg/cpp/embed-8.C
new file mode 100644
index 000000000000..294f03259af5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-8.C
@@ -0,0 +1,7 @@
+// This is a comment with some UTF-8 non-ASCII characters: áéíóú.
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wno-narrowing -Wconversion" }
+
+const signed char a[] = {
+#embed __FILE__
+};     // { dg-warning "conversion from 'int' to 'const signed char' changes 
value from '\[12]\[0-9]\[0-9]' to '-\[0-9]\[0-9]*'" }
diff --git a/gcc/testsuite/g++.dg/cpp/embed-9.C 
b/gcc/testsuite/g++.dg/cpp/embed-9.C
new file mode 100644
index 000000000000..fb559291ba33
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/embed-9.C
@@ -0,0 +1,57 @@
+// { dg-do run { target c++11 } }
+// { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" }
+
+const unsigned char m[] = {
+  #embed <magna-carta.txt> limit (131)
+};
+
+template <int ...N>
+int
+foo ()
+{
+  unsigned char a[] = { N... };
+  for (int i = 0; i < sizeof (a); ++i)
+    if (a[i] != m[i])
+      return -1;
+  return sizeof (a);
+}
+
+template <typename ...T>
+int
+bar (T... args)
+{
+  int a[] = { args... };
+  for (int i = 0; i < sizeof (a) / sizeof (a[0]); ++i)
+    if (a[i] != m[i])
+      return -1;
+  return sizeof (a) / sizeof (a[0]);
+}
+
+int
+main ()
+{
+  if (foo <
+    #embed <magna-carta.txt> limit (1)
+      > () != 1)
+    __builtin_abort ();
+  if (foo <
+    #embed <magna-carta.txt> limit (6)
+      > () != 6)
+    __builtin_abort ();
+  if (foo <
+    #embed <magna-carta.txt> limit (131)
+      > () != 131)
+    __builtin_abort ();
+  if (bar (
+    #embed <magna-carta.txt> limit (1)
+      ) != 1)
+    __builtin_abort ();
+  if (bar (
+    #embed <magna-carta.txt> limit (6)
+      ) != 6)
+    __builtin_abort ();
+  if (bar (
+    #embed <magna-carta.txt> limit (131)
+      ) != 131)
+    __builtin_abort ();
+}
diff --git a/gcc/tree.h b/gcc/tree.h
index 4437ab923557..75054839d9b6 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1172,6 +1172,10 @@ extern void omp_clause_range_check_failed (const_tree, 
const char *, int,
   (RAW_DATA_CST_CHECK (NODE)->raw_data_cst.str)
 #define RAW_DATA_OWNER(NODE) \
   (RAW_DATA_CST_CHECK (NODE)->raw_data_cst.owner)
+#define RAW_DATA_UCHAR_ELT(NODE, I) \
+  (((const unsigned char *) RAW_DATA_POINTER (NODE))[I])
+#define RAW_DATA_SCHAR_ELT(NODE, I) \
+  (((const signed char *) RAW_DATA_POINTER (NODE))[I])
 
 /* In a COMPLEX_CST node.  */
 #define TREE_REALPART(NODE) (COMPLEX_CST_CHECK (NODE)->complex.real)
diff --git a/libcpp/files.cc b/libcpp/files.cc
index a60fe1fb6eac..6a16503b209b 100644
--- a/libcpp/files.cc
+++ b/libcpp/files.cc
@@ -1243,8 +1243,7 @@ finish_embed (cpp_reader *pfile, _cpp_file *file,
     limit = params->limit;
 
   size_t embed_tokens = 0;
-  if (!CPP_OPTION (pfile, cplusplus)
-      && CPP_OPTION (pfile, lang) != CLK_ASM
+  if (CPP_OPTION (pfile, lang) != CLK_ASM
       && limit >= 64)
     embed_tokens = ((limit - 2) / INT_MAX) + (((limit - 2) % INT_MAX) != 0);

Reply via email to