Hi Paul, Here's my take on the typedef for indices and sizes.
As I've said in https://lists.gnu.org/archive/html/bug-gnulib/2017-06/msg00024.html and later, the point is to make the code clearer. For example, in canonicalize-lgpl.c when we write ptrdiff_t dest_offset = dest - rpath; it means "rpath and dest are pointers into possibly different memory objects", whereas when we write indsize_t dest_offset = dest - rpath; it means "rpath and dest are pointers into the same memory object, and rpath <= dest". Here's the proposed patch. 2020-12-03 Bruno Haible <br...@clisp.org> indsize: New module. * lib/indsize.h: New file. * modules/indsize: New file. * lib/canonicalize-lgpl.c: Include indsize.h. Use indsize_t instead of ptrdiff_t. * lib/canonicalize.c: Likewise. * modules/canonicalize-lgpl (Depends-on): Add indsize. * modules/canonicalize (Depends-on): Likewise. ================================ lib/indsize.h ================================ /* A type for indices and sizes. Copyright (C) 2020 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see <https://www.gnu.org/licenses/>. */ #ifndef _INDSIZE_H #define _INDSIZE_H /* Get ptrdiff_t. */ #include <stddef.h> /* Get PTRDIFF_WIDTH. */ #include <stdint.h> /* The type 'indsize_t' holds an (array) index or an (object) size. Its implementation is a signed or unsigned integer type, capable of holding the values 0..2^63-1 (on 64-bit platforms) or 0..2^31-1 (on 32-bit platforms). Why not use 'size_t' directly? * Security: Signed types can be checked for overflow via '-fsanitize=undefined', but unsigned types cannot. Why not use 'ptrdiff_t' directly? * Maintainability: When reading and modifying code, it helps to know that a certain variable cannot have negative values. For example, when you have a loop int n = ...; for (int i = 0; i < n; i++) ... or ptrdiff_t n = ...; for (ptrdiff_t i = 0; i < n; i++) ... you have to ask yourself "what if n < 0?". Whereas in indsize_t n = ...; for (indsize_t i = 0; i < n; i++) ... you know that this case cannot happen. * Being future-proof: In the future, range types (integers which are constrained to a certain range of values) may be added to C compilers or to the C standard. Several programming languages (Ada, Haskell, Common Lisp, Pascal) already have range types. Such range types may help producing good code and good warnings. The type 'indsize_t' could then be typedef'ed to a range type. */ /* The user can define UNSIGNED_INDSIZE_T, to get a different set of compiler warnings. */ #if defined UNSIGNED_INDSIZE_T # if __clang_version__ >= 11 && 0 /* It would be tempting to use the clang "extended integer types", which are a special case of range types. <https://clang.llvm.org/docs/LanguageExtensions.html#extended-integer-types> However, these types don't support binary operators with plain integer types (e.g. expressions such as x > 1). */ typedef unsigned _ExtInt (PTRDIFF_WIDTH - 1) indsize_t; # else /* Use an unsigned type as wide as 'ptrdiff_t'. ISO C does not mandate that 'size_t' and 'ptrdiff_t' have the same size, but it is so an all platforms we have seen since 1990. */ typedef size_t indsize_t; # endif #else /* Use the signed type 'ptrdiff_t' by default. */ typedef ptrdiff_t indsize_t; #endif #endif /* _INDSIZE_H */ =============================== modules/indsize =============================== Description: A type for indices and sizes. Files: lib/indsize.h Depends-on: stdint configure.ac: Makefile.am: lib_SOURCES += indsize.h Include: "indsize.h" License: LGPLv2+ Maintainer: all =============================================================================== diff --git a/lib/canonicalize-lgpl.c b/lib/canonicalize-lgpl.c index 981300a..5089061 100644 --- a/lib/canonicalize-lgpl.c +++ b/lib/canonicalize-lgpl.c @@ -41,6 +41,7 @@ #ifdef _LIBC # include <shlib-compat.h> +typedef ptrdiff_t indsize_t; #else # define SHLIB_COMPAT(lib, introduced, obsoleted) 0 # define versioned_symbol(lib, local, symbol, version) extern int dummy @@ -48,6 +49,7 @@ # define weak_alias(local, symbol) # define __canonicalize_file_name canonicalize_file_name # define __realpath realpath +# include "indsize.h" # include "pathmax.h" # include "malloca.h" # include "filename.h" @@ -227,7 +229,7 @@ __realpath (const char *name, char *resolved) if (rpath_limit - dest <= end - start) { - ptrdiff_t dest_offset = dest - rpath; + indsize_t dest_offset = dest - rpath; char *new_rpath; if (resolved) diff --git a/lib/canonicalize.c b/lib/canonicalize.c index d12fc6b..b2b06c6 100644 --- a/lib/canonicalize.c +++ b/lib/canonicalize.c @@ -27,6 +27,7 @@ #include "areadlink.h" #include "file-set.h" +#include "indsize.h" #include "hash-triple.h" #include "pathmax.h" #include "xalloc.h" @@ -76,7 +77,7 @@ seen_triple (Hash_table **ht, char const *filename, struct stat const *st) { if (*ht == NULL) { - int initial_capacity = 7; + indsize_t initial_capacity = 7; *ht = hash_initialize (initial_capacity, NULL, triple_hash, @@ -107,12 +108,12 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) char const *start; char const *end; char const *rname_limit; - ptrdiff_t extra_len = 0; + indsize_t extra_len = 0; Hash_table *ht = NULL; int saved_errno; bool logical = (can_mode & CAN_NOLINKS) != 0; int num_links = 0; - ptrdiff_t prefix_len; + indsize_t prefix_len; canonicalize_mode_t can_exist = can_mode & CAN_MODE_MASK; if (multiple_bits_set (can_exist)) @@ -142,8 +143,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) rname = xgetcwd (); if (!rname) return NULL; - ptrdiff_t rnamelen = strlen (rname); - ptrdiff_t rnamesize = rnamelen; /* Lower bound on size; good enough. */ + indsize_t rnamelen = strlen (rname); + indsize_t rnamesize = rnamelen; /* Lower bound on size; good enough. */ if (rnamesize < PATH_MAX) { rnamesize = PATH_MAX; @@ -175,7 +176,7 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) /* For UNC file names '\\server\path\to\file', extend the prefix to include the server: '\\server\'. */ { - ptrdiff_t i; + indsize_t i; for (i = 2; name[i] != '\0' && !ISSLASH (name[i]); ) i++; if (name[i] != '\0' /* implies ISSLASH (name[i]) */ @@ -229,8 +230,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) if (rname_limit - dest <= end - start) { - ptrdiff_t dest_offset = dest - rname; - ptrdiff_t new_size = rname_limit - rname; + indsize_t dest_offset = dest - rname; + indsize_t new_size = rname_limit - rname; if (end - start + 1 > PATH_MAX) new_size += end - start + 1; @@ -286,8 +287,8 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) } } - ptrdiff_t n = strlen (buf); - ptrdiff_t len = strlen (end); + indsize_t n = strlen (buf); + indsize_t len = strlen (end); if (!extra_len) { @@ -307,7 +308,7 @@ canonicalize_filename_mode (const char *name, canonicalize_mode_t can_mode) if (IS_ABSOLUTE_FILE_NAME (buf)) { - ptrdiff_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); + indsize_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); if (pfxlen) memcpy (rname, buf, pfxlen); diff --git a/modules/canonicalize-lgpl b/modules/canonicalize-lgpl index 701b492..67e449a 100644 --- a/modules/canonicalize-lgpl +++ b/modules/canonicalize-lgpl @@ -14,6 +14,7 @@ alloca-opt [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONI double-slash-root [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] errno [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] filename [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] +indsize [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] malloca [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] memmove [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] pathmax [test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1] diff --git a/modules/canonicalize b/modules/canonicalize index d5eb493..e89998f 100644 --- a/modules/canonicalize +++ b/modules/canonicalize @@ -14,6 +14,7 @@ extensions file-set filename hash-triple-simple +indsize memmove nocrash pathmax