https://gcc.gnu.org/g:e2c93a07d739034e5375e69da38091efc87d56e9

commit r15-8939-ge2c93a07d739034e5375e69da38091efc87d56e9
Author: Jonathan Wakely <jwak...@redhat.com>
Date:   Tue Mar 18 21:17:03 2025 +0000

    libgcobol: Fix uses of tolower and toupper with std::transform
    
    As explained in the libstdc++ manual[1] and elsewhere[2], using tolower
    and toupper in std::transform directly is wrong. If char is signed then
    non-ASCII characters with negative values lead to undefined behaviour.
    Also, tolower and toupper are overloaded names in C++ so expecting them
    to resolve to a unique function pointer is unreliable. Finally, the
    <cctype> header was included, not <ctype.h>, so they should have been
    qualified as std::tolower and std::toupper.
    
    [1] 
https://gcc.gnu.org/onlinedocs/libstdc++/manual/strings.html#strings.string.simple
    [2] https://en.cppreference.com/w/cpp/string/byte/tolower
    
    libgcobol/ChangeLog:
    
            * intrinsic.cc (is_zulu_format): Qualify toupper and cast
            argument to unsigned char.
            (fill_cobol_tm): Likewise.
            (iscasematch): Likewise for to lower.
            (numval): Qualify calls to tolower.
            (__gg__lower_case): Use lambda expression for
            tolower call.
            (__gg__upper_case): Likewise for toupper call.
            * libgcobol.cc (mangler_core): Cast tolower argument to unsigned
            char.
            * valconv.cc (__gg__string_to_numeric_edited): Cast to upper
            arguments to unsigned char.

Diff:
---
 libgcobol/intrinsic.cc | 28 +++++++++++++++-------------
 libgcobol/libgcobol.cc |  2 +-
 libgcobol/valconv.cc   |  4 ++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc
index e3d255a29d61..345d3ac73527 100644
--- a/libgcobol/intrinsic.cc
+++ b/libgcobol/intrinsic.cc
@@ -99,7 +99,7 @@ is_zulu_format(PCHAR left, PCHAR &right)
   bool retval = false;
   if( right > left )
     {
-    retval = toupper(*(right-1)) == internal_Z;
+    retval = std::toupper((unsigned char)*(right-1)) == internal_Z;
     }
   return retval;
   }
@@ -1984,7 +1984,8 @@ __gg__lower_case( cblc_field_t *dest,
   memset(dest->data, internal_space, dest_length);
   memcpy(dest->data, input->data+input_offset, std::min(dest_length, 
source_length));
   internal_to_ascii((char *)dest->data, dest_length);
-  std::transform(dest->data, dest->data + dest_length, dest->data, tolower);
+  std::transform(dest->data, dest->data + dest_length, dest->data,
+                [](unsigned char c) { return std::tolower(c); });
   ascii_to_internal_str((char *)dest->data, dest_length);
   }
 
@@ -2431,7 +2432,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'd' )
+        if( std::tolower(ch) == 'd' )
           {
           if( leading_sign )
             {
@@ -2439,7 +2440,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != 'b' )
             {
             goto done;
             }
@@ -2447,7 +2448,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'c' )
+        if( std::tolower(ch) == 'c' )
           {
           if( leading_sign )
             {
@@ -2455,7 +2456,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != 'r' )
             {
             goto done;
             }
@@ -2494,7 +2495,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'd' )
+        if( std::tolower(ch) == 'd' )
           {
           if( leading_sign )
             {
@@ -2502,7 +2503,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != 'b' )
             {
             goto done;
             }
@@ -2510,7 +2511,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'c' )
+        if( std::tolower(ch) == 'c' )
           {
           if( leading_sign )
             {
@@ -2518,7 +2519,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != 'r' )
             {
             goto done;
             }
@@ -3736,7 +3737,8 @@ __gg__upper_case( cblc_field_t *dest,
   memset(dest->data, internal_space, dest_length);
   memcpy(dest->data, input->data+input_offset, std::min(dest_length, 
source_length));
   internal_to_ascii((char *)dest->data, dest_length);
-  std::transform(dest->data, dest->data + dest_length, dest->data, toupper);
+  std::transform(dest->data, dest->data + dest_length, dest->data,
+                [](unsigned char c) { return std::toupper(c); });
   ascii_to_internal_str((char *)dest->data, dest_length);
   }
 
@@ -4551,7 +4553,7 @@ fill_cobol_tm(cobol_tm &ctm,
     if( ch == internal_Z || ch == internal_z )
       {
       // This has to be the end of the road
-      if( toupper(source[0]) != 'Z' )
+      if( std::toupper((unsigned char)source[0]) != 'Z' )
         {
         retval += 0;
         break;
@@ -5054,7 +5056,7 @@ iscasematch(char *a1, char *a2, char *b1, char *b2)
   bool retval = true;
   while( a1 < a2 && b1 < b2 )
     {
-    if( tolower(*a1++) != tolower(*b1++) )
+    if( std::tolower((unsigned char)*a1++) != std::tolower((unsigned 
char)*b1++) )
       {
       retval = false;
       }
diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc
index 0890835822c3..b990508b1129 100644
--- a/libgcobol/libgcobol.cc
+++ b/libgcobol/libgcobol.cc
@@ -8826,7 +8826,7 @@ mangler_core(const char *s, const char *eos)
       }
     else
       {
-      *d++ = tolower(ch);
+      *d++ = tolower((unsigned char)ch);
       }
     }
   *d++ = NULLCH;
diff --git a/libgcobol/valconv.cc b/libgcobol/valconv.cc
index 0b80d72cc3a6..33d9a0dd1ffd 100644
--- a/libgcobol/valconv.cc
+++ b/libgcobol/valconv.cc
@@ -252,8 +252,8 @@ __gg__string_to_numeric_edited( char * const dest,
   if( dlength >= 2 )
     {
     // It's a positive number, so we might have to get rid of a CR or DB:
-    char ch1 = toupper(dest[dlength-2]);
-    char ch2 = toupper(dest[dlength-1]);
+    char ch1 = toupper((unsigned char)dest[dlength-2]);
+    char ch2 = toupper((unsigned char)dest[dlength-1]);
     if(     (ch1 == ascii_D && ch2 == ascii_B)
             ||  (ch1 == ascii_C && ch2 == ascii_R) )
       {

Reply via email to