>From ce33db9c7c440be3ffff988d4ad90f2ec41fea9c Mon Sep 17 00:00:00 2001
From: Bob Dubner mailto:rdub...@symas.com
Date: Wed, 2 Apr 2025 12:18:08 -0400
Subject: [PATCH] cobol: Plug memory leak caused by intermediate_e
stack-frame
 variables. [PR119521]

COBOL variables with attribute intermediate_e are being allocated on
the stack frame, but their data was assigned using malloc(), without
a corresponding call to free().  For numerics, the problem is solved
with a fixed allocation of sixteen bytes for the cblc_field_t::data
member (sixteen is big enough for all data types) and with a fixed
allocation of 8,192 bytes for the alphanumeric type.

In use, the intermediate numeric data types are "shrunk" to the minimum
applicable size.  The intermediate alphanumerics, generally used as
destination targets for functions, are trimmed as well.

gcc/cobol

        PR cobol/119521
        * genapi.cc: (parser_division): Change comment.
        (parser_symbol_add): Change intermediate_t handling.
        * parse.y: Multiple changes to new_alphanumeric() calls.
        * parse_ante.h: Establish named constant for date function
        calls.  Change declaration of new_alphanumeric() function.
        * symbols.cc: (new_temporary_impl): Use named constant
        for default size of temporary alphanumerics.
        * symbols.h: Establish MAXIMUM_ALPHA_LENGTH constant.

libgcobol

        PR cobol/119521
        * intrinsic.cc: (__gg__reverse): Trim final result for
intermediate_e.
        * libgcobol.cc: (__gg__adjust_dest_size): Abort on attempt to
increase
        the size of a result.  (__gg__module_name): Formatting.

__gg__reverse(): Resize only intermediates
---
 gcc/cobol/genapi.cc    | 85 ++++++++++++++++++++----------------------
 gcc/cobol/parse.y      | 73 +++++++++++++++++-------------------
 gcc/cobol/parse_ante.h |  3 +-
 gcc/cobol/symbols.cc   |  3 +-
 gcc/cobol/symbols.h    |  6 +++
 libgcobol/intrinsic.cc |  4 ++
 libgcobol/libgcobol.cc |  8 ++--
 7 files changed, 94 insertions(+), 88 deletions(-)

diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc
index 92ab460e2c0b..4d958cfc0d4b 100644
--- a/gcc/cobol/genapi.cc
+++ b/gcc/cobol/genapi.cc
@@ -6647,7 +6647,10 @@ parser_division(cbl_division_t division,
 
           if( args[i].refer.field->attr & any_length_e )
             {
-            //gg_printf("side channel 0x%lx\n",
gg_array_value(var_decl_call_parameter_lengths, rt_i), NULL_TREE);
+            // gg_printf("side channel: Length of \"%s\" is %ld\n", 
+                      // member(args[i].refer.field->var_decl_node,
"name"),
+                      // gg_array_value(var_decl_call_parameter_lengths,
rt_i), 
+                      // NULL_TREE);
 
             // Get the length from the global lengths[] side channel.
Don't
             // forget to use the length mask on the table value.
@@ -16753,55 +16756,47 @@ parser_symbol_add(struct cbl_field_t *new_var )
 
         if( bytes_to_allocate )
           {
-          if(    new_var->attr & (intermediate_e)
-              && new_var->type != FldLiteralN
-              && new_var->type != FldLiteralA )
+          // We need a unique name for the allocated data for this COBOL
variable:
+          char achDataName[256];
+          if( new_var->attr & external_e )
             {
-            // We'll malloc() data in initialize_variable
-            data_area = null_pointer_node;
+            sprintf(achDataName, "%s", new_var->name);
+            }
+          else if( new_var->name[0] == '_' )
+            {
+            // Avoid doubling up on leading underscore
+            sprintf(achDataName,
+                    "%s_data_%lu",
+                    new_var->name,
+                    sv_data_name_counter++);
             }
           else
             {
-            // We need a unique name for the allocated data for this
COBOL variable:
-            char achDataName[256];
-            if( new_var->attr & external_e )
-              {
-              sprintf(achDataName, "%s", new_var->name);
-              }
-            else if( new_var->name[0] == '_' )
-              {
-              // Avoid doubling up on leading underscore
-              sprintf(achDataName,
-                      "%s_data_%lu",
-                      new_var->name,
-                      sv_data_name_counter++);
-              }
-            else
-              {
-              sprintf(achDataName,
-                      "_%s_data_%lu",
-                      new_var->name,
-                      sv_data_name_counter++);
-              }
+            sprintf(achDataName,
+                    "_%s_data_%lu",
+                    new_var->name,
+                    sv_data_name_counter++);
+            }
 
-            if( new_var->attr & external_e )
-              {
-              tree array_type = build_array_type_nelts(UCHAR,
bytes_to_allocate);
-              new_var->data_decl_node = gg_define_variable(
-                                  array_type,
-                                  achDataName,
-                                  vs_external);
-              data_area = gg_get_address_of(new_var->data_decl_node);
-              }
-            else
-              {
-              tree array_type = build_array_type_nelts(UCHAR,
bytes_to_allocate);
-              new_var->data_decl_node = gg_define_variable(
-                                  array_type,
-                                  achDataName,
-                                  vs_static);
-              data_area = gg_get_address_of(new_var->data_decl_node);
-              }
+          if( new_var->attr & external_e )
+            {
+            tree array_type = build_array_type_nelts(UCHAR,
bytes_to_allocate);
+            new_var->data_decl_node = gg_define_variable(
+                                array_type,
+                                achDataName,
+                                vs_external);
+            data_area = gg_get_address_of(new_var->data_decl_node);
+            }
+          else
+            {
+            gg_variable_scope_t vs_scope = (new_var->attr &
intermediate_e)
+                                            ? vs_stack : vs_static ;
+            tree array_type = build_array_type_nelts(UCHAR,
bytes_to_allocate);
+            new_var->data_decl_node = gg_define_variable(
+                                array_type,
+                                achDataName,
+                                vs_scope);
+            data_area = gg_get_address_of(new_var->data_decl_node);
             }
           }
         }
diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y
index 538e56fa64d1..3f282013a4ab 100644
--- a/gcc/cobol/parse.y
+++ b/gcc/cobol/parse.y
@@ -9983,7 +9983,7 @@ intrinsic:      function_udf
                   }
                   $$ = is_numeric(args[0].field)?
                          new_tempnumeric_float() :
-                         new_alphanumeric(args[0].field->data.capacity);
+                         new_alphanumeric();
 
                   parser_intrinsic_callv( $$, intrinsic_cname($1),
                                          args.size(), args.data() );
@@ -10013,7 +10013,7 @@ intrinsic:      function_udf
                 }
         |       BIT_OF  '(' expr[r1] ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric(8 * $r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_1($$, BIT_OF, $r1, @r1)) YYERROR;
                 }
         |       CHAR  '(' expr[r1] ')' {
@@ -10031,27 +10031,24 @@ intrinsic:      function_udf
 
         |       DISPLAY_OF  '(' varg[r1]  ')' {
                   location_set(@1);
-                  uint32_t len = $r1->field->data.capacity;
-                  $$ = new_alphanumeric(4 * len);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_2($$, DISPLAY_OF, $r1, NULL) )
YYERROR;
                 }
         |       DISPLAY_OF  '(' varg[r1] varg[r2]  ')' {
                   location_set(@1);
-                  uint32_t len = $r1->field->data.capacity
-                    + $r2->field->data.capacity;
-                  $$ = new_alphanumeric(4 * len);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_2($$, DISPLAY_OF, $r1, $r2) )
YYERROR;
                 }
 
         |       EXCEPTION_FILE filename {
                   location_set(@1);
-                  $$ = new_alphanumeric(256);
+                  $$ = new_alphanumeric();
                   parser_exception_file( $$, $filename );
                 }
 
         |       FIND_STRING '(' varg[r1] last start_after anycase ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   /* auto r1 = new_reference(new_literal(strlen($r1),
$r1, quoted_e)); */
                  cbl_unimplemented("FIND_STRING");
                   /* if( ! intrinsic_call_4($$, FIND_STRING, r1, $r2) )
YYERROR; */
@@ -10163,7 +10160,7 @@ intrinsic:      function_udf
 
         |       HEX_OF  '(' varg[r1] ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric(2 * $r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_1($$, HEX_OF, $r1, @r1)) YYERROR;
                 }
        |       LENGTH '(' tableish[val] ')' {
@@ -10241,7 +10238,7 @@ intrinsic:      function_udf
 
         |       SUBSTITUTE '(' varg[r1] subst_inputs[inputs] ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric(64);
+                  $$ = new_alphanumeric();
                   std::vector <cbl_substitute_t> args($inputs->size());
                   std::transform( $inputs->begin(), $inputs->end(),
args.begin(),
                                   []( const substitution_t& arg ) {
@@ -10284,14 +10281,14 @@ intrinsic:      function_udf
                      YYERROR;
                      break;
                   }
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   cbl_refer_t * how = new_reference($trim_trailing);
                   if( ! intrinsic_call_2($$, TRIM, $r1, how) ) YYERROR;
                 }
 
         |       USUBSTR '(' alpha_val[r1] expr[r2] expr[r3]  ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric(32);  // how long?
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_3($$, FORMATTED_DATETIME,
                                              $r1, $r2, $r3) ) YYERROR;
                 }
@@ -10316,7 +10313,7 @@ intrinsic:      function_udf
                   auto type = intrinsic_return_type($1);
                   switch(type) {
                   case FldAlphanumeric:
-                    $$ = new_alphanumeric($r1->field->data.capacity);
+                    $$ = new_alphanumeric();
                     break;
                   default:
                     if( $1 == NUMVAL || $1 == NUMVAL_F )
@@ -10352,7 +10349,7 @@ intrinsic:      function_udf
                   static auto one = new cbl_refer_t( new_literal("1") );
                   static auto four = new cbl_refer_t( new_literal("4") );
                   cbl_span_t year(one, four);
-                  auto r3 = new_reference(new_alphanumeric(21));
+                  auto r3 =
new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
 
                   parser_intrinsic_call_0( r3->field,
"__gg__current_date" );
@@ -10368,7 +10365,7 @@ intrinsic:      function_udf
                   static auto one = new cbl_refer_t( new_literal("1") );
                   static auto four = new cbl_refer_t( new_literal("4") );
                   cbl_span_t year(one, four);
-                  auto r3 = new_reference(new_alphanumeric(21));
+                  auto r3 =
new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
 
                   parser_intrinsic_call_0( r3->field,
"__gg__current_date" );
@@ -10394,7 +10391,7 @@ intrinsic:      function_udf
                   static auto one = new cbl_refer_t( new_literal("1") );
                   static auto four = new cbl_refer_t( new_literal("4") );
                   cbl_span_t year(one, four);
-                  auto r3 = new_reference(new_alphanumeric(21));
+                  auto r3 =
new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
 
                   parser_intrinsic_call_0( r3->field,
"__gg__current_date" );
@@ -10410,7 +10407,7 @@ intrinsic:      function_udf
                   static auto one = new cbl_refer_t( new_literal("1") );
                   static auto four = new cbl_refer_t( new_literal("4") );
                   cbl_span_t year(one, four);
-                  auto r3 = new_reference(new_alphanumeric(21));
+                  auto r3 =
new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
 
                   parser_intrinsic_call_0( r3->field,
"__gg__current_date" );
@@ -10436,7 +10433,7 @@ intrinsic:      function_udf
                   static auto one = new cbl_refer_t( new_literal("1") );
                   static auto four = new cbl_refer_t( new_literal("4") );
                   cbl_span_t year(one, four);
-                  auto r3 = new_reference(new_alphanumeric(21));
+                  auto r3 =
new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
 
                   parser_intrinsic_call_0( r3->field,
"__gg__current_date" );
@@ -10452,7 +10449,7 @@ intrinsic:      function_udf
                   static auto one = new cbl_refer_t( new_literal("1") );
                   static auto four = new cbl_refer_t( new_literal("4") );
                   cbl_span_t year(one, four);
-                  auto r3 = new_reference(new_alphanumeric(21));
+                  auto r3 =
new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
 
                   parser_intrinsic_call_0( r3->field,
"__gg__current_date" );
@@ -10492,7 +10489,7 @@ intrinsic:      function_udf
         |       intrinsic_X2 '(' varg[r1] varg[r2] ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_2($$, $1, $r1, $r2) ) YYERROR;
                 }
         |       intrinsic_locale
@@ -10540,54 +10537,54 @@ intrinsic_locale:
                 LOCALE_COMPARE '(' varg[r1] varg[r2]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   cbl_refer_t dummy = {};
                   if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2,
&dummy) ) YYERROR;
                 }
         |       LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2,
$r3) ) YYERROR;
                 }
 
         |       LOCALE_DATE '(' varg[r1]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   cbl_refer_t dummy = {};
                   if( ! intrinsic_call_2($$, LOCALE_DATE, $r1, &dummy) )
YYERROR;
                 }
         |             LOCALE_DATE '(' varg[r1] varg[r2]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_2($$, LOCALE_DATE, $r1, $r2) )
YYERROR;
                 }
         |       LOCALE_TIME '(' varg[r1]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   cbl_refer_t dummy = {};
                   if( ! intrinsic_call_2($$, LOCALE_TIME, $r1, &dummy) )
YYERROR;
                 }
         |       LOCALE_TIME '(' varg[r1] varg[r2]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_2($$, LOCALE_TIME, $r1, $r2) )
YYERROR;
                 }
         |       LOCALE_TIME_FROM_SECONDS '(' varg[r1]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   cbl_refer_t dummy = {};
                   if( ! intrinsic_call_2($$, LOCALE_TIME_FROM_SECONDS,
$r1, &dummy) ) YYERROR;
                 }
         |       LOCALE_TIME_FROM_SECONDS '(' varg[r1] varg[r2]  ')'
                 {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity);
+                  $$ = new_alphanumeric();
                   if( ! intrinsic_call_2($$, LOCALE_TIME_FROM_SECONDS,
$r1, $r2) ) YYERROR;
                 }
                 ;
@@ -10603,7 +10600,7 @@ trim_trailing:  %empty          { $$ =
new_literal("0"); }  // Remove both
 
 intrinsic0:     CURRENT_DATE {
                   location_set(@1);
-                  $$ = new_alphanumeric(21);
+                  $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE);
                   parser_intrinsic_call_0( $$, "__gg__current_date" );
                 }
         |       E {
@@ -10614,33 +10611,33 @@ intrinsic0:     CURRENT_DATE {
 
         |       EXCEPTION_FILE_N {
                   location_set(@1);
-                  $$ = new_alphanumeric(256);
+                  $$ = new_alphanumeric();
                   intrinsic_call_0( $$, EXCEPTION_FILE_N );
                 }
 
         |       EXCEPTION_FILE {
                   location_set(@1);
-                  $$ = new_alphanumeric(256);
+                  $$ = new_alphanumeric();
                   parser_exception_file( $$ );
                 }
         |       EXCEPTION_LOCATION_N {
                   location_set(@1);
-                  $$ = new_alphanumeric(256);
+                  $$ = new_alphanumeric();
                   intrinsic_call_0( $$, EXCEPTION_LOCATION_N );
                 }
         |       EXCEPTION_LOCATION {
                   location_set(@1);
-                  $$ = new_alphanumeric(256);
+                  $$ = new_alphanumeric();
                   intrinsic_call_0( $$, EXCEPTION_LOCATION );
                 }
         |       EXCEPTION_STATEMENT {
                   location_set(@1);
-                  $$ = new_alphanumeric(63);
+                  $$ = new_alphanumeric();
                   intrinsic_call_0( $$, EXCEPTION_STATEMENT );
                 }
         |       EXCEPTION_STATUS {
                   location_set(@1);
-                  $$ = new_alphanumeric(31);
+                  $$ = new_alphanumeric();
                   intrinsic_call_0( $$, EXCEPTION_STATUS );
                 }
 
@@ -10656,12 +10653,12 @@ intrinsic0:     CURRENT_DATE {
                 }
         |       UUID4 {
                   location_set(@1);
-                  $$ = new_alphanumeric(32); // don't know correct size
+                  $$ = new_alphanumeric();
                  parser_intrinsic_call_0( $$, "__gg__uuid4" );
                 }
         |       WHEN_COMPILED {
                   location_set(@1);
-                  $$ = new_alphanumeric(21); // Returns
YYYYMMDDhhmmssss-0500
+                  $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE); //
Returns YYYYMMDDhhmmssss-0500
                  parser_intrinsic_call_0( $$, "__gg__when_compiled" );
                 }
                 ;
diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h
index 8ae51c540ade..aa366283ff5e 100644
--- a/gcc/cobol/parse_ante.h
+++ b/gcc/cobol/parse_ante.h
@@ -41,6 +41,7 @@
 
 #define MAXLENGTH_FORMATTED_DATE     10
 #define MAXLENGTH_FORMATTED_TIME     19
+#define MAXLENGTH_CALENDAR_DATE      21
 #define MAXLENGTH_FORMATTED_DATETIME 30
 
 #pragma GCC diagnostic push
@@ -220,7 +221,7 @@ namcpy(const YYLTYPE& loc, cbl_name_t tgt, const char
*src ) {
 }
 
 cbl_field_t *
-new_alphanumeric( size_t capacity );
+new_alphanumeric( size_t capacity = MAXIMUM_ALPHA_LENGTH );
 
 static inline cbl_refer_t *
 new_reference( enum cbl_field_type_t type, const char *initial ) {
diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc
index a4fc82c4ffa7..2373bfe6cc5a 100644
--- a/gcc/cobol/symbols.cc
+++ b/gcc/cobol/symbols.cc
@@ -3237,7 +3237,8 @@ new_temporary_impl( enum cbl_field_type_t type )
                                 0, FldAlphanumeric, FldInvalid,
                                 intermediate_e, 0, 0, 0, nonarray, 0, "",
                                 0, cbl_field_t::linkage_t(),
-                                {}, NULL };
+                                {MAXIMUM_ALPHA_LENGTH,
MAXIMUM_ALPHA_LENGTH, 
+                                                            0, 0, NULL},
NULL };
   static const struct cbl_field_t empty_float = {
                                 0, FldFloat, FldInvalid,
                                 intermediate_e,
diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h
index d5acf167a47b..c231763d5cbe 100644
--- a/gcc/cobol/symbols.h
+++ b/gcc/cobol/symbols.h
@@ -224,6 +224,12 @@ enum symbol_type_t {
   SymDataSection,
 };
 
+// The ISO specification says alphanumeric literals have a maximum length
of
+// 8,191 characters.  It seems to be silent on the length of alphanumeric
data
+// items.  Our implementation requires a maximum length, so we chose to
make it
+// the same.
+#define MAXIMUM_ALPHA_LENGTH 8192
+
 struct cbl_field_data_t {
   uint32_t memsize;             // nonzero if larger subsequent
redefining field
   uint32_t capacity,            // allocated space
diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc
index 345d3ac73527..16bf84be620b 100644
--- a/libgcobol/intrinsic.cc
+++ b/libgcobol/intrinsic.cc
@@ -3494,6 +3494,10 @@ __gg__reverse(cblc_field_t *dest,
     {
     dest->data[i] = (input->data+input_offset)[source_length-1-i];
     }
+  if( (dest->attr & intermediate_e) )
+    {
+    dest->capacity = std::min(dest_length, source_length);
+    }
   }
 
 extern "C"
diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc
index b990508b1129..224c5f26e963 100644
--- a/libgcobol/libgcobol.cc
+++ b/libgcobol/libgcobol.cc
@@ -11312,8 +11312,10 @@ __gg__adjust_dest_size(cblc_field_t *dest, size_t
ncount)
     {
     if( dest->allocated < ncount )
       {
-      dest->allocated = ncount;
-      dest->data = (unsigned char *)realloc(dest->data, ncount);
+      fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): Adjusting
size upward is not possible.\n");
+      abort();
+//      dest->allocated = ncount;
+//      dest->data = (unsigned char *)realloc(dest->data, ncount);
       }
     dest->capacity = ncount;
     }
@@ -12643,7 +12645,7 @@ __gg__module_name(cblc_field_t *dest,
module_type_t type)
       break;
     }
 
-__gg__adjust_dest_size(dest, strlen(result));
+  __gg__adjust_dest_size(dest, strlen(result));
   memcpy(dest->data, result, strlen(result)+1);
   }
 
-- 
2.34.1

Reply via email to