Hi Paul,

> > /* The user can define UNSIGNED_INDSIZE_T, to get a different set of 
> > compiler
> >     warnings.  */
> 
> I'm leery of this complexity, as it would change the semantics of what the 
> type 
> means. The type is not just about compiler warnings; it's about preferring 
> signed integer arithmetic whenever possible to avoid unexpected results 
> during 
> comparisons and to catch integer overflow.

With the _current_ tools (sanitizers) and the _current_ C standard, the choice
of a signed type is a right one. But, beyond the readability (for humans), 
there's
also the benefits we can get from future compiler enhancements. Clang has just
introduced a special case of range types in clang 11; we could very well have
range types in C compilers in 2 years, and possibly in the standard in 10 years.

For this reason I see the choice of a signed type as an _implementation_ detail.
Conceptually, for a type whose values are 0..2^31-1, it does not matter whether
it is considered signed or not. Except for binary operations, like the 
comparisons
that you mention, for which the compiler warnings will help us, both now and
10 years from now.

The purpose of the UNSIGNED_INDSIZE_T is that we can occasionally verify that
no code makes assumptions about 'idx_t' being signed, since such assumptions
would not be future-proof.

> > typedef ptrdiff_t indsize_t;
> 
> The type name "indsize_t" is a bit long and this would detract from 
> readability 
> in programs that use a lot of indexes. I suggest "idx_t" instead.  dfa.c is 
> already using this type name and it works well there.

OK, I've changed s/indsize/idx/ everywhere.

> dfa.c also has IDX_MAX.

Good point. Added.

> I suppose IDX_WIDTH would also be useful.

Added as well.

> I'm not so 
> sure about PRIdIDX etc. as those macros make programs hard to read and I 
> don't 
> see much point to hiding the fact that the type is equivalent to ptrdiff_t. 
> (dfa.c just uses %td etc. for printf formats.)

Yes, %tu or %zu should work equally well (except for possibly some pointless
warnings on 64-bit native Windows).

I'm pushing the attached patch.

I don't know how far 'dfa.c' can make use of idx.h, without annoying Arnold
with too many dependencies when he pulls in 'dfa.c' into gawk.

Bruno

From 84f4817709b9998b3eadfb8d4409890bd46eb511 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Thu, 3 Dec 2020 22:56:22 +0100
Subject: [PATCH] idx: New module.

* lib/idx.h: New file.
* modules/idx: New file.
* lib/canonicalize-lgpl.c: Include idx.h. Use idx_t instead of
ptrdiff_t.
* lib/canonicalize.c: Likewise.
* modules/canonicalize-lgpl (Depends-on): Add idx.
* modules/canonicalize (Depends-on): Likewise.
---
 ChangeLog                 |  11 +++++
 lib/canonicalize-lgpl.c   |   4 +-
 lib/canonicalize.c        |  23 +++++-----
 lib/idx.h                 | 104 ++++++++++++++++++++++++++++++++++++++++++++++
 modules/canonicalize      |   1 +
 modules/canonicalize-lgpl |   1 +
 modules/idx               |  22 ++++++++++
 7 files changed, 154 insertions(+), 12 deletions(-)
 create mode 100644 lib/idx.h
 create mode 100644 modules/idx

diff --git a/ChangeLog b/ChangeLog
index 6ff86eb..4b6e880 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
 2020-12-03  Bruno Haible  <br...@clisp.org>
 
+	idx: New module.
+	* lib/idx.h: New file.
+	* modules/idx: New file.
+	* lib/canonicalize-lgpl.c: Include idx.h. Use idx_t instead of
+	ptrdiff_t.
+	* lib/canonicalize.c: Likewise.
+	* modules/canonicalize-lgpl (Depends-on): Add idx.
+	* modules/canonicalize (Depends-on): Likewise.
+
+2020-12-03  Bruno Haible  <br...@clisp.org>
+
 	fprintf-posix-tests: Avoid a test failure on macOS 10.13.
 	Reported by Martin Storsjö <mar...@martin.st> in
 	<https://lists.gnu.org/archive/html/bug-gnulib/2020-12/msg00003.html>.
diff --git a/lib/canonicalize-lgpl.c b/lib/canonicalize-lgpl.c
index 981300a..d090dcd 100644
--- a/lib/canonicalize-lgpl.c
+++ b/lib/canonicalize-lgpl.c
@@ -41,6 +41,7 @@
 
 #ifdef _LIBC
 # include <shlib-compat.h>
+typedef ptrdiff_t idx_t;
 #else
 # define SHLIB_COMPAT(lib, introduced, obsoleted) 0
 # define versioned_symbol(lib, local, symbol, version) extern int dummy
@@ -48,6 +49,7 @@
 # define weak_alias(local, symbol)
 # define __canonicalize_file_name canonicalize_file_name
 # define __realpath realpath
+# include "idx.h"
 # include "pathmax.h"
 # include "malloca.h"
 # include "filename.h"
@@ -227,7 +229,7 @@ __realpath (const char *name, char *resolved)
 
           if (rpath_limit - dest <= end - start)
             {
-              ptrdiff_t dest_offset = dest - rpath;
+              idx_t dest_offset = dest - rpath;
               char *new_rpath;
 
               if (resolved)
diff --git a/lib/canonicalize.c b/lib/canonicalize.c
index d12fc6b..e44793d 100644
--- a/lib/canonicalize.c
+++ b/lib/canonicalize.c
@@ -27,6 +27,7 @@
 
 #include "areadlink.h"
 #include "file-set.h"
+#include "idx.h"
 #include "hash-triple.h"
 #include "pathmax.h"
 #include "xalloc.h"
@@ -76,7 +77,7 @@ seen_triple (Hash_table **ht, char const *filename, struct stat const *st)
 {
   if (*ht == NULL)
     {
-      int initial_capacity = 7;
+      idx_t initial_capacity = 7;
       *ht = hash_initialize (initial_capacity,
                             NULL,
                             triple_hash,
@@ -107,12 +108,12 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
   char const *start;
   char const *end;
   char const *rname_limit;
-  ptrdiff_t extra_len = 0;
+  idx_t extra_len = 0;
   Hash_table *ht = NULL;
   int saved_errno;
   bool logical = (can_mode & CAN_NOLINKS) != 0;
   int num_links = 0;
-  ptrdiff_t prefix_len;
+  idx_t prefix_len;
 
   canonicalize_mode_t can_exist = can_mode & CAN_MODE_MASK;
   if (multiple_bits_set (can_exist))
@@ -142,8 +143,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
       rname = xgetcwd ();
       if (!rname)
         return NULL;
-      ptrdiff_t rnamelen = strlen (rname);
-      ptrdiff_t rnamesize = rnamelen;  /* Lower bound on size; good enough.  */
+      idx_t rnamelen = strlen (rname);
+      idx_t rnamesize = rnamelen;  /* Lower bound on size; good enough.  */
       if (rnamesize < PATH_MAX)
         {
           rnamesize = PATH_MAX;
@@ -175,7 +176,7 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
               /* For UNC file names '\\server\path\to\file', extend the prefix
                  to include the server: '\\server\'.  */
               {
-                ptrdiff_t i;
+                idx_t i;
                 for (i = 2; name[i] != '\0' && !ISSLASH (name[i]); )
                   i++;
                 if (name[i] != '\0' /* implies ISSLASH (name[i]) */
@@ -229,8 +230,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
 
           if (rname_limit - dest <= end - start)
             {
-              ptrdiff_t dest_offset = dest - rname;
-              ptrdiff_t new_size = rname_limit - rname;
+              idx_t dest_offset = dest - rname;
+              idx_t new_size = rname_limit - rname;
 
               if (end - start + 1 > PATH_MAX)
                 new_size += end - start + 1;
@@ -286,8 +287,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
                     }
                 }
 
-              ptrdiff_t n = strlen (buf);
-              ptrdiff_t len = strlen (end);
+              idx_t n = strlen (buf);
+              idx_t len = strlen (end);
 
               if (!extra_len)
                 {
@@ -307,7 +308,7 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode)
 
               if (IS_ABSOLUTE_FILE_NAME (buf))
                 {
-                  ptrdiff_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf);
+                  idx_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf);
 
                   if (pfxlen)
                     memcpy (rname, buf, pfxlen);
diff --git a/lib/idx.h b/lib/idx.h
new file mode 100644
index 0000000..6574b1b
--- /dev/null
+++ b/lib/idx.h
@@ -0,0 +1,104 @@
+/* A type for indices and sizes.
+
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <https://www.gnu.org/licenses/>.  */
+
+#ifndef _IDX_H
+#define _IDX_H
+
+/* Get ptrdiff_t.  */
+#include <stddef.h>
+
+/* Get PTRDIFF_WIDTH.  */
+#include <stdint.h>
+
+/* The type 'idx_t' holds an (array) index or an (object) size.
+   Its implementation is a signed or unsigned integer type, capable of holding
+   the values
+     0..2^63-1 (on 64-bit platforms) or
+     0..2^31-1 (on 32-bit platforms).
+
+   Why not use 'size_t' directly?
+
+     * Security: Signed types can be checked for overflow via
+       '-fsanitize=undefined', but unsigned types cannot.
+
+   Why not use 'ptrdiff_t' directly?
+
+     * Maintainability: When reading and modifying code, it helps to know that
+       a certain variable cannot have negative values.  For example, when you
+       have a loop
+
+         int n = ...;
+         for (int i = 0; i < n; i++) ...
+
+       or
+
+         ptrdiff_t n = ...;
+         for (ptrdiff_t i = 0; i < n; i++) ...
+
+       you have to ask yourself "what if n < 0?".  Whereas in
+
+         idx_t n = ...;
+         for (idx_t i = 0; i < n; i++) ...
+
+       you know that this case cannot happen.
+
+       Similarly, when a programmer writes
+
+         idx_t = ptr2 - ptr1;
+
+       there is an implied assertion that ptr1 and ptr2 point into the same
+       object and that ptr1 <= ptr2.
+
+     * Being future-proof: In the future, range types (integers which are
+       constrained to a certain range of values) may be added to C compilers
+       or to the C standard.  Several programming languages (Ada, Haskell,
+       Common Lisp, Pascal) already have range types.  Such range types may
+       help producing good code and good warnings.  The type 'idx_t'
+       could then be typedef'ed to a range type.  */
+
+/* The user can define UNSIGNED_IDX_T, to get a different set of compiler
+   warnings.  */
+#if defined UNSIGNED_IDX_T
+# if __clang_version__ >= 11 && 0
+/* It would be tempting to use the clang "extended integer types", which are a
+   special case of range types.
+   <https://clang.llvm.org/docs/LanguageExtensions.html#extended-integer-types>
+   However, these types don't support binary operators with plain integer
+   types (e.g. expressions such as x > 1).  */
+typedef unsigned _ExtInt (PTRDIFF_WIDTH - 1) idx_t;
+# else
+/* Use an unsigned type as wide as 'ptrdiff_t'.
+   ISO C does not mandate that 'size_t' and 'ptrdiff_t' have the same size, but
+   it is so an all platforms we have seen since 1990.  */
+typedef size_t idx_t;
+# endif
+#else
+/* Use the signed type 'ptrdiff_t' by default.  */
+typedef ptrdiff_t idx_t;
+#endif
+
+/* IDX_MAX is the maximum value of an idx_t.  */
+#if defined UNSIGNED_IDX_T
+# define IDX_MAX SIZE_MAX
+#else
+# define IDX_MAX PTRDIFF_MAX
+#endif
+
+/* IDX_WIDTH is the number of bits in an idx_t.  */
+#define IDX_WIDTH (PTRDIFF_WIDTH - 1)
+
+#endif /* _IDX_H */
diff --git a/modules/canonicalize b/modules/canonicalize
index d5eb493..65c5011 100644
--- a/modules/canonicalize
+++ b/modules/canonicalize
@@ -14,6 +14,7 @@ extensions
 file-set
 filename
 hash-triple-simple
+idx
 memmove
 nocrash
 pathmax
diff --git a/modules/canonicalize-lgpl b/modules/canonicalize-lgpl
index 701b492..f67ca78 100644
--- a/modules/canonicalize-lgpl
+++ b/modules/canonicalize-lgpl
@@ -14,6 +14,7 @@ alloca-opt        [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONI
 double-slash-root [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
 errno             [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
 filename          [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
+idx           [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
 malloca           [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
 memmove           [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
 pathmax           [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1]
diff --git a/modules/idx b/modules/idx
new file mode 100644
index 0000000..07faac2
--- /dev/null
+++ b/modules/idx
@@ -0,0 +1,22 @@
+Description:
+A type for indices and sizes.
+
+Files:
+lib/idx.h
+
+Depends-on:
+stdint
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += idx.h
+
+Include:
+"idx.h"
+
+License:
+LGPLv2+
+
+Maintainer:
+all
-- 
2.7.4

Reply via email to