https://gcc.gnu.org/g:44058b847145166715f15e49fa8854f30e852f24

commit r15-3600-g44058b847145166715f15e49fa8854f30e852f24
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Thu Sep 12 11:34:06 2024 +0200

    libcpp: Add support for gnu::offset #embed/__has_embed parameter
    
    The following patch adds on top of the just posted #embed patch
    a first extension, gnu::offset which allows to seek in the data
    file (for seekable files, otherwise read and throw away).
    I think this is useful e.g. when some binary data start with
    some well known header which shouldn't be included in the data etc.
    
    2024-09-12  Jakub Jelinek  <ja...@redhat.com>
    
    libcpp/
            * internal.h (struct cpp_embed_params): Add offset member.
            * directives.cc (EMBED_PARAMS): Add gnu::offset entry.
            (enum embed_param_kind): Add NUM_EMBED_STD_PARAMS.
            (_cpp_parse_embed_params): Use NUM_EMBED_STD_PARAMS rather than
            NUM_EMBED_PARAMS when parsing standard parameters.  Parse 
gnu::offset
            parameter.
            * files.cc (struct _cpp_file): Add offset member.
            (_cpp_stack_embed): Handle params->offset.
    gcc/
            * doc/cpp.texi (Binary Resource Inclusion): Document gnu::offset
            #embed parameter.
    gcc/testsuite/
            * c-c++-common/cpp/embed-15.c: New test.
            * c-c++-common/cpp/embed-16.c: New test.
            * gcc.dg/cpp/embed-5.c: New test.

Diff:
---
 gcc/doc/cpp.texi                          |  8 ++-
 gcc/testsuite/c-c++-common/cpp/embed-15.c | 88 ++++++++++++++++++++++++++++
 gcc/testsuite/c-c++-common/cpp/embed-16.c | 31 ++++++++++
 gcc/testsuite/gcc.dg/cpp/embed-5.c        |  4 ++
 libcpp/directives.cc                      | 40 ++++++++++---
 libcpp/files.cc                           | 95 ++++++++++++++++++++++++++-----
 libcpp/internal.h                         |  2 +-
 7 files changed, 244 insertions(+), 24 deletions(-)

diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 032b095602d5..612d97e16df8 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -3966,8 +3966,8 @@ treated the same), followed by parameter argument in 
parentheses, like
 with currently supported standard parameters @code{limit}, @code{prefix},
 @code{suffix} and @code{if_empty}, or implementation defined parameters
 specified by a unique vendor prefix followed by @code{::} followed by
-name of the parameter.  GCC will use the @code{gnu} prefix but currently
-doesn't support any extensions.
+name of the parameter.  GCC uses the @code{gnu} prefix for vendor
+parameters and currently supports the @code{gnu::offset} parameter.
 
 The @code{limit} parameter argument is a constant expression which
 specifies the maximum number of bytes included by the directive,
@@ -3977,6 +3977,10 @@ that sequence is not empty and @code{if_empty} argument 
is balanced token
 sequence which is used as expansion for @code{#embed} directive if the
 resource is empty.
 
+The @code{gnu::offset} parameter argument is a constant expression
+which specifies how many bytes to skip from the start of the resource.
+@code{limit} is then counted from that position.
+
 The @code{#embed} directive is not supported in the Traditional Mode
 (@pxref{Traditional Mode}).
 
diff --git a/gcc/testsuite/c-c++-common/cpp/embed-15.c 
b/gcc/testsuite/c-c++-common/cpp/embed-15.c
new file mode 100644
index 000000000000..c12aeb31db53
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/embed-15.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#if __has_embed (__FILE__ gnu::offset (4 + FOOBAR) limit (3)) != 
__STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#embed <magna-carta.txt> limit(1) gnu::offset (0) prefix(int a = ) suffix (;) 
+#embed <magna-carta.txt> limit(1) __gnu__::offset (1 * 1) prefix(int b = ) 
suffix (;) 
+#embed <magna-carta.txt> limit(1) gnu::__offset__ (1 + 1) prefix(int c = ) 
suffix (;) 
+#embed <magna-carta.txt> __limit__(1) __gnu__::__offset__ (1 + (1 \
+  + 1)) __prefix__(int d = ) __suffix__ (;)
+const unsigned char e[] = {
+  #embed <magna-carta.txt> limit(5) gnu::offset (999)
+};
+const unsigned char f[] = {
+  #embed <magna-carta.txt> limit(7) gnu::offset (998)
+};
+const unsigned char g[] = {
+  #embed <magna-carta.txt> limit(8) gnu::offset (998)
+};
+const unsigned char h[] = {
+  #embed <magna-carta.txt> limit(8) gnu::offset (997)
+};
+const unsigned char i[] = {
+  #embed <magna-carta.txt> limit(9) gnu::offset (997)
+};
+const unsigned char j[] = {
+  #embed <magna-carta.txt> limit(30) gnu::offset (990)
+};
+const unsigned char k[] = {
+  #embed <magna-carta.txt> limit(26) gnu::offset (992)
+};
+const unsigned char l[] = {
+  #embed <magna-carta.txt>
+};
+const unsigned char m[] = {
+  #embed <magna-carta.txt> __limit__ (1000) __gnu__::__offset__ (32)
+};
+#if __has_embed (<magna-carta.txt> limit(5) gnu::offset (999)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(5) gnu::offset (999)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(7) gnu::offset (998)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(8) gnu::offset (998)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(8) gnu::offset (997)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(9) gnu::offset (997)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(30) gnu::offset (990)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(26) gnu::offset (992)) != 
__STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt>) != __STDC_EMBED_FOUND__ \
+    || __has_embed (<magna-carta.txt> limit(26) gnu::offset (992)) != 
__STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#ifdef __cplusplus
+#define C "C"
+#else
+#define C
+#endif
+extern C void abort (void);
+extern C int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+int
+main ()
+{
+  if (a != 'H' || b != 'e' || c != 'n' || d != 'r')
+    abort ();
+  if (sizeof (e) != 5
+      || sizeof (f) != 7
+      || sizeof (g) != 8
+      || sizeof (h) != 8
+      || sizeof (i) != 9
+      || sizeof (j) != 30
+      || sizeof (k) != 26
+      || sizeof (l) < 1032
+      || sizeof (m) != 1000)
+    abort ();
+  if (memcmp (e, l + 999, 5)
+      || memcmp (f, l + 998, 7)
+      || memcmp (g, l + 998, 8)
+      || memcmp (h, l + 997, 8)
+      || memcmp (i, l + 997, 9)
+      || memcmp (j, l + 990, 30)
+      || memcmp (k, l + 992, 26)
+      || memcmp (m, l + 32, 1000))
+    abort ();
+  if (l[0] != 'H' || l[1] != 'e' || l[2] != 'n' || l[3] != 'r')
+    abort ();
+}
diff --git a/gcc/testsuite/c-c++-common/cpp/embed-16.c 
b/gcc/testsuite/c-c++-common/cpp/embed-16.c
new file mode 100644
index 000000000000..a3d1a6d10287
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/embed-16.c
@@ -0,0 +1,31 @@
+/* { dg-do preprocess } */
+/* { dg-options "" } */
+
+#embed __FILE__ gnu::offset(1) gnu::offset(1) /* { dg-error "duplicate embed 
parameter 'gnu::offset'" } */
+#embed __FILE__ gnu::offset prefix() suffix() /* { dg-error "expected '\\\('" 
} */
+#embed __FILE__ gnu::offset (1 / 0) /* { dg-error "division by zero in #embed" 
} */
+#embed __FILE__ __gnu__::__offset__ (+ + +) /* { dg-error "operator '\\\+' has 
no right operand" } */
+#define FOO 1
+#embed __FILE__ gnu::offset(0 + defined(FOO)) /* { dg-error "'defined' in 
#embed parameter" } */
+#embed __FILE__ gnu::offset (-1) /* { dg-error "negative embed parameter 
operand" } */
+#embed __FILE__ gnu::offset (-42) /* { dg-error "negative embed parameter 
operand" } */
+#embed __FILE__ gnu::offset (-9223372036854775807 - 1) /* { dg-error "negative 
embed parameter operand" } */
+#embed __FILE__ gnu::offset (18446744073709551615ULL) /* { dg-error "too large 
'gnu::offset' argument" } */
+#if 1 + __has_embed (__FILE__ gnu::offset(1) __gnu__::__offset__(1)) /* { 
dg-error "duplicate embed parameter 'gnu::offset'" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__offset__ prefix() suffix()) /* { 
dg-error "expected '\\\('" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset(1/0)) /* { dg-error "division by 
zero in #embed" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset(+ + +)) /* { dg-error "operator 
'\\\+' has no right operand" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset(0 + defined(FOO))) /* { dg-error 
"'defined' in #embed parameter" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset (-1)) /* { dg-error "negative embed 
parameter operand" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset (-42)) /* { dg-error "negative embed 
parameter operand" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset (-9223372036854775807 - 1)) /* { 
dg-error "negative embed parameter operand" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::offset (18446744073709551615ULL)) /* { 
dg-error "too large 'gnu::offset' argument" } */
+#endif
diff --git a/gcc/testsuite/gcc.dg/cpp/embed-5.c 
b/gcc/testsuite/gcc.dg/cpp/embed-5.c
new file mode 100644
index 000000000000..64209144841a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/embed-5.c
@@ -0,0 +1,4 @@
+/* { dg-do run } */
+/* { dg-options "-std=c23 -pedantic-errors 
--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */
+
+#include "../../c-c++-common/cpp/embed-15.c"
diff --git a/libcpp/directives.cc b/libcpp/directives.cc
index 4aac519ff40c..f8d98980f2e7 100644
--- a/libcpp/directives.cc
+++ b/libcpp/directives.cc
@@ -1014,13 +1014,15 @@ skip_balanced_token_seq (cpp_reader *pfile, cpp_ttype 
end,
   EMBED_PARAM (LIMIT, "limit")         \
   EMBED_PARAM (PREFIX, "prefix")       \
   EMBED_PARAM (SUFFIX, "suffix")       \
-  EMBED_PARAM (IF_EMPTY, "if_empty")
+  EMBED_PARAM (IF_EMPTY, "if_empty")   \
+  EMBED_PARAM (GNU_OFFSET, "offset")
 
 enum embed_param_kind {
 #define EMBED_PARAM(c, s) EMBED_PARAM_##c,
   EMBED_PARAMS
 #undef EMBED_PARAM
-  NUM_EMBED_PARAMS
+  NUM_EMBED_PARAMS,
+  NUM_EMBED_STD_PARAMS = EMBED_PARAM_IF_EMPTY + 1
 };
 
 static struct { int len; const char *name; } embed_params[NUM_EMBED_PARAMS] = {
@@ -1120,7 +1122,18 @@ _cpp_parse_embed_params (cpp_reader *pfile, struct 
cpp_embed_params *params)
       size_t param_kind = -1;
       if (param_prefix == NULL)
        {
-         for (size_t i = 0; i < NUM_EMBED_PARAMS; ++i)
+         for (size_t i = 0; i < NUM_EMBED_STD_PARAMS; ++i)
+           if (param_name_len == embed_params[i].len
+               && memcmp (param_name, embed_params[i].name,
+                          param_name_len) == 0)
+             {
+               param_kind = i;
+               break;
+             }
+       }
+      else if (param_prefix_len == 3 && memcmp (param_prefix, "gnu", 3) == 0)
+       {
+         for (size_t i = NUM_EMBED_STD_PARAMS; i < NUM_EMBED_PARAMS; ++i)
            if (param_name_len == embed_params[i].len
                && memcmp (param_name, embed_params[i].name,
                           param_name_len) == 0)
@@ -1157,12 +1170,23 @@ _cpp_parse_embed_params (cpp_reader *pfile, struct 
cpp_embed_params *params)
       if (param_kind != (size_t) -1 && token->type != CPP_OPEN_PAREN)
        cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0,
                             "expected '('");
-      else if (param_kind == EMBED_PARAM_LIMIT)
+      else if (param_kind == EMBED_PARAM_LIMIT
+              || param_kind == EMBED_PARAM_GNU_OFFSET)
        {
-         if (params->has_embed && pfile->op_stack == NULL)
-           _cpp_expand_op_stack (pfile);
-         params->limit = _cpp_parse_expr (pfile, "#embed", token);
-         token = _cpp_get_token_no_padding (pfile);
+         if (params->has_embed && pfile->op_stack == NULL)
+           _cpp_expand_op_stack (pfile);
+         cpp_num_part res = _cpp_parse_expr (pfile, "#embed", token);
+         if (param_kind == EMBED_PARAM_LIMIT)
+           params->limit = res;
+         else
+           {
+             if (res > INTTYPE_MAXIMUM (off_t))
+               cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0,
+                                    "too large 'gnu::offset' argument");
+             else
+               params->offset = res;
+           }
+         token = _cpp_get_token_no_padding (pfile);
        }
       else if (token->type == CPP_OPEN_PAREN)
        {
diff --git a/libcpp/files.cc b/libcpp/files.cc
index 52b1850f2263..9d64bff657b4 100644
--- a/libcpp/files.cc
+++ b/libcpp/files.cc
@@ -90,6 +90,9 @@ struct _cpp_file
   /* Size for #embed, perhaps smaller than st.st_size.  */
   size_t limit;
 
+  /* Offset for #embed.  */
+  off_t offset;
+
   /* File descriptor.  Invalid if -1, otherwise open.  */
   int fd;
 
@@ -1242,8 +1245,11 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
   _cpp_file *orig_file = file;
   if (file->buffer_valid
       && (!S_ISREG (file->st.st_mode)
-         || (file->limit < file->st.st_size + (size_t) 0
-             && file->limit < params->limit)))
+         || file->offset + (cpp_num_part) 0 > params->offset
+         || (file->limit < file->st.st_size - file->offset + (size_t) 0
+             && (params->offset - file->offset > (cpp_num_part) file->limit
+                 || file->limit - (params->offset
+                                   - file->offset) < params->limit))))
     {
       bool found = false;
       if (S_ISREG (file->st.st_mode))
@@ -1256,8 +1262,13 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
                 && strcmp (file->path, file->next_file->path) == 0)
            {
              file = file->next_file;
-             if (file->limit >= file->st.st_size + (size_t) 0
-                 || file->limit >= params->limit)
+             if (file->offset + (cpp_num_part) 0 <= params->offset
+                 && (file->limit >= (file->st.st_size - file->offset
+                                     + (size_t) 0)
+                     || (params->offset
+                         - file->offset <= (cpp_num_part) file->limit
+                         && file->limit - (params->offset
+                                           - file->offset) >= params->limit)))
                {
                  found = true;
                  break;
@@ -1313,8 +1324,10 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
       if (regular)
        {
          cpp_num_part limit;
-         if (file->st.st_size + (cpp_num_part) 0 < params->limit)
-           limit = file->st.st_size;
+         if (file->st.st_size + (cpp_num_part) 0 < params->offset)
+           limit = 0;
+         else if (file->st.st_size - params->offset < params->limit)
+           limit = file->st.st_size - params->offset;
          else
            limit = params->limit;
          if (params->has_embed)
@@ -1325,6 +1338,14 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
                            "%s is too large", file->path);
              goto fail;
            }
+         if (lseek (file->fd, params->offset, SEEK_CUR)
+             != (off_t) params->offset)
+           {
+             cpp_errno_filename (pfile, CPP_DL_ERROR, file->path,
+                                 params->loc);
+             goto fail;
+           }
+         file->offset = params->offset;
          file->limit = limit;
          size = limit;
        }
@@ -1337,6 +1358,38 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
       buf = XNEWVEC (uchar, size ? size : 1);
       total = 0;
 
+      if (!regular && params->offset)
+       {
+         uchar *buf2 = buf;
+         ssize_t size2 = size;
+         cpp_num_part total2 = params->offset;
+
+         if (params->offset > 8 * 1024 && size < 8 * 1024)
+           {
+             size2 = 32 * 1024;
+             buf2 = XNEWVEC (uchar, size2);
+           }
+         do
+           {
+             if ((cpp_num_part) size2 > total2)
+               size2 = total2;
+             count = read (file->fd, buf2, size2);
+             if (count < 0)
+               {
+                 cpp_errno_filename (pfile, CPP_DL_ERROR, file->path,
+                                     params->loc);
+                 if (buf2 != buf)
+                   free (buf2);
+                 free (buf);
+                 goto fail;
+               }
+             total2 -= count;
+           }
+         while (total2);
+         if (buf2 != buf)
+           free (buf2);
+       }
+
       while ((count = read (file->fd, buf + total, size - total)) > 0)
        {
          total += count;
@@ -1377,7 +1430,10 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
          file->limit = total;
        }
       else if (!regular)
-       file->limit = total;
+       {
+         file->offset = params->offset;
+         file->limit = total;
+       }
 
       file->buffer_start = buf;
       file->buffer = buf;
@@ -1386,9 +1442,22 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
       file->fd = -1;
     }
   else if (params->has_embed)
-    return file->limit && params->limit ? 1 : 2;
+    {
+      if (params->offset - file->offset > file->limit)
+       return 2;
+      size_t limit = file->limit - (params->offset - file->offset);
+      return limit && params->limit ? 1 : 2;
+    }
 
+  const uchar *buffer = file->buffer;
   size_t limit = file->limit;
+  if (params->offset - file->offset > limit)
+    limit = 0;
+  else
+    {
+      buffer += params->offset - file->offset;
+      limit -= params->offset - file->offset;
+    }
   if (params->limit < limit)
     limit = params->limit;
 
@@ -1412,20 +1481,20 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
   size_t len = 0;
   for (size_t i = 0; i < limit; ++i)
     {
-      if (file->buffer[i] < 10)
+      if (buffer[i] < 10)
        len += 2;
-      else if (file->buffer[i] < 100)
+      else if (buffer[i] < 100)
        len += 3;
 #if UCHAR_MAX == 255
       else
        len += 4;
 #else
-      else if (file->buffer[i] < 1000)
+      else if (buffer[i] < 1000)
        len += 4;
       else
        {
          char buf[64];
-         len += sprintf (buf, "%d", file->buffer[i]) + 1;
+         len += sprintf (buf, "%d", buffer[i]) + 1;
        }
 #endif
       if (len > INTTYPE_MAXIMUM (ssize_t))
@@ -1479,7 +1548,7 @@ _cpp_stack_embed (cpp_reader *pfile, const char *fname, 
bool angle,
       if (i == 0)
        tok->flags |= PREV_WHITE;
       tok->val.str.text = s;
-      tok->val.str.len = sprintf ((char *) s, "%d", file->buffer[i]);
+      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
       s += tok->val.str.len + 1;
       if (tok == &pfile->directive_result)
        tok = toks;
diff --git a/libcpp/internal.h b/libcpp/internal.h
index ff49c282aa36..ad0625af9554 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -637,7 +637,7 @@ struct cpp_embed_params
 {
   location_t loc;
   bool has_embed;
-  cpp_num_part limit;
+  cpp_num_part limit, offset;
   cpp_embed_params_tokens prefix, suffix, if_empty;
 };

Reply via email to