>From ce33db9c7c440be3ffff988d4ad90f2ec41fea9c Mon Sep 17 00:00:00 2001 From: Bob Dubner mailto:rdub...@symas.com Date: Wed, 2 Apr 2025 12:18:08 -0400 Subject: [PATCH] cobol: Plug memory leak caused by intermediate_e stack-frame variables. [PR119521]
COBOL variables with attribute intermediate_e are being allocated on the stack frame, but their data was assigned using malloc(), without a corresponding call to free(). For numerics, the problem is solved with a fixed allocation of sixteen bytes for the cblc_field_t::data member (sixteen is big enough for all data types) and with a fixed allocation of 8,192 bytes for the alphanumeric type. In use, the intermediate numeric data types are "shrunk" to the minimum applicable size. The intermediate alphanumerics, generally used as destination targets for functions, are trimmed as well. gcc/cobol PR cobol/119521 * genapi.cc: (parser_division): Change comment. (parser_symbol_add): Change intermediate_t handling. * parse.y: Multiple changes to new_alphanumeric() calls. * parse_ante.h: Establish named constant for date function calls. Change declaration of new_alphanumeric() function. * symbols.cc: (new_temporary_impl): Use named constant for default size of temporary alphanumerics. * symbols.h: Establish MAXIMUM_ALPHA_LENGTH constant. libgcobol PR cobol/119521 * intrinsic.cc: (__gg__reverse): Trim final result for intermediate_e. * libgcobol.cc: (__gg__adjust_dest_size): Abort on attempt to increase the size of a result. (__gg__module_name): Formatting. __gg__reverse(): Resize only intermediates --- gcc/cobol/genapi.cc | 85 ++++++++++++++++++++---------------------- gcc/cobol/parse.y | 73 +++++++++++++++++------------------- gcc/cobol/parse_ante.h | 3 +- gcc/cobol/symbols.cc | 3 +- gcc/cobol/symbols.h | 6 +++ libgcobol/intrinsic.cc | 4 ++ libgcobol/libgcobol.cc | 8 ++-- 7 files changed, 94 insertions(+), 88 deletions(-) diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index 92ab460e2c0b..4d958cfc0d4b 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -6647,7 +6647,10 @@ parser_division(cbl_division_t division, if( args[i].refer.field->attr & any_length_e ) { - //gg_printf("side channel 0x%lx\n", gg_array_value(var_decl_call_parameter_lengths, rt_i), NULL_TREE); + // gg_printf("side channel: Length of \"%s\" is %ld\n", + // member(args[i].refer.field->var_decl_node, "name"), + // gg_array_value(var_decl_call_parameter_lengths, rt_i), + // NULL_TREE); // Get the length from the global lengths[] side channel. Don't // forget to use the length mask on the table value. @@ -16753,55 +16756,47 @@ parser_symbol_add(struct cbl_field_t *new_var ) if( bytes_to_allocate ) { - if( new_var->attr & (intermediate_e) - && new_var->type != FldLiteralN - && new_var->type != FldLiteralA ) + // We need a unique name for the allocated data for this COBOL variable: + char achDataName[256]; + if( new_var->attr & external_e ) { - // We'll malloc() data in initialize_variable - data_area = null_pointer_node; + sprintf(achDataName, "%s", new_var->name); + } + else if( new_var->name[0] == '_' ) + { + // Avoid doubling up on leading underscore + sprintf(achDataName, + "%s_data_%lu", + new_var->name, + sv_data_name_counter++); } else { - // We need a unique name for the allocated data for this COBOL variable: - char achDataName[256]; - if( new_var->attr & external_e ) - { - sprintf(achDataName, "%s", new_var->name); - } - else if( new_var->name[0] == '_' ) - { - // Avoid doubling up on leading underscore - sprintf(achDataName, - "%s_data_%lu", - new_var->name, - sv_data_name_counter++); - } - else - { - sprintf(achDataName, - "_%s_data_%lu", - new_var->name, - sv_data_name_counter++); - } + sprintf(achDataName, + "_%s_data_%lu", + new_var->name, + sv_data_name_counter++); + } - if( new_var->attr & external_e ) - { - tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); - new_var->data_decl_node = gg_define_variable( - array_type, - achDataName, - vs_external); - data_area = gg_get_address_of(new_var->data_decl_node); - } - else - { - tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); - new_var->data_decl_node = gg_define_variable( - array_type, - achDataName, - vs_static); - data_area = gg_get_address_of(new_var->data_decl_node); - } + if( new_var->attr & external_e ) + { + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_external); + data_area = gg_get_address_of(new_var->data_decl_node); + } + else + { + gg_variable_scope_t vs_scope = (new_var->attr & intermediate_e) + ? vs_stack : vs_static ; + tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate); + new_var->data_decl_node = gg_define_variable( + array_type, + achDataName, + vs_scope); + data_area = gg_get_address_of(new_var->data_decl_node); } } } diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index 538e56fa64d1..3f282013a4ab 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -9983,7 +9983,7 @@ intrinsic: function_udf } $$ = is_numeric(args[0].field)? new_tempnumeric_float() : - new_alphanumeric(args[0].field->data.capacity); + new_alphanumeric(); parser_intrinsic_callv( $$, intrinsic_cname($1), args.size(), args.data() ); @@ -10013,7 +10013,7 @@ intrinsic: function_udf } | BIT_OF '(' expr[r1] ')' { location_set(@1); - $$ = new_alphanumeric(8 * $r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_1($$, BIT_OF, $r1, @r1)) YYERROR; } | CHAR '(' expr[r1] ')' { @@ -10031,27 +10031,24 @@ intrinsic: function_udf | DISPLAY_OF '(' varg[r1] ')' { location_set(@1); - uint32_t len = $r1->field->data.capacity; - $$ = new_alphanumeric(4 * len); + $$ = new_alphanumeric(); if( ! intrinsic_call_2($$, DISPLAY_OF, $r1, NULL) ) YYERROR; } | DISPLAY_OF '(' varg[r1] varg[r2] ')' { location_set(@1); - uint32_t len = $r1->field->data.capacity - + $r2->field->data.capacity; - $$ = new_alphanumeric(4 * len); + $$ = new_alphanumeric(); if( ! intrinsic_call_2($$, DISPLAY_OF, $r1, $r2) ) YYERROR; } | EXCEPTION_FILE filename { location_set(@1); - $$ = new_alphanumeric(256); + $$ = new_alphanumeric(); parser_exception_file( $$, $filename ); } | FIND_STRING '(' varg[r1] last start_after anycase ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); /* auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); */ cbl_unimplemented("FIND_STRING"); /* if( ! intrinsic_call_4($$, FIND_STRING, r1, $r2) ) YYERROR; */ @@ -10163,7 +10160,7 @@ intrinsic: function_udf | HEX_OF '(' varg[r1] ')' { location_set(@1); - $$ = new_alphanumeric(2 * $r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_1($$, HEX_OF, $r1, @r1)) YYERROR; } | LENGTH '(' tableish[val] ')' { @@ -10241,7 +10238,7 @@ intrinsic: function_udf | SUBSTITUTE '(' varg[r1] subst_inputs[inputs] ')' { location_set(@1); - $$ = new_alphanumeric(64); + $$ = new_alphanumeric(); std::vector <cbl_substitute_t> args($inputs->size()); std::transform( $inputs->begin(), $inputs->end(), args.begin(), []( const substitution_t& arg ) { @@ -10284,14 +10281,14 @@ intrinsic: function_udf YYERROR; break; } - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); cbl_refer_t * how = new_reference($trim_trailing); if( ! intrinsic_call_2($$, TRIM, $r1, how) ) YYERROR; } | USUBSTR '(' alpha_val[r1] expr[r2] expr[r3] ')' { location_set(@1); - $$ = new_alphanumeric(32); // how long? + $$ = new_alphanumeric(); if( ! intrinsic_call_3($$, FORMATTED_DATETIME, $r1, $r2, $r3) ) YYERROR; } @@ -10316,7 +10313,7 @@ intrinsic: function_udf auto type = intrinsic_return_type($1); switch(type) { case FldAlphanumeric: - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); break; default: if( $1 == NUMVAL || $1 == NUMVAL_F ) @@ -10352,7 +10349,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_literal("1") ); static auto four = new cbl_refer_t( new_literal("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(21)); + auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -10368,7 +10365,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_literal("1") ); static auto four = new cbl_refer_t( new_literal("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(21)); + auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -10394,7 +10391,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_literal("1") ); static auto four = new cbl_refer_t( new_literal("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(21)); + auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -10410,7 +10407,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_literal("1") ); static auto four = new cbl_refer_t( new_literal("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(21)); + auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -10436,7 +10433,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_literal("1") ); static auto four = new cbl_refer_t( new_literal("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(21)); + auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -10452,7 +10449,7 @@ intrinsic: function_udf static auto one = new cbl_refer_t( new_literal("1") ); static auto four = new cbl_refer_t( new_literal("4") ); cbl_span_t year(one, four); - auto r3 = new_reference(new_alphanumeric(21)); + auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE)); r3->refmod = year; parser_intrinsic_call_0( r3->field, "__gg__current_date" ); @@ -10492,7 +10489,7 @@ intrinsic: function_udf | intrinsic_X2 '(' varg[r1] varg[r2] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_2($$, $1, $r1, $r2) ) YYERROR; } | intrinsic_locale @@ -10540,54 +10537,54 @@ intrinsic_locale: LOCALE_COMPARE '(' varg[r1] varg[r2] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); cbl_refer_t dummy = {}; if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &dummy) ) YYERROR; } | LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, $r3) ) YYERROR; } | LOCALE_DATE '(' varg[r1] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); cbl_refer_t dummy = {}; if( ! intrinsic_call_2($$, LOCALE_DATE, $r1, &dummy) ) YYERROR; } | LOCALE_DATE '(' varg[r1] varg[r2] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_2($$, LOCALE_DATE, $r1, $r2) ) YYERROR; } | LOCALE_TIME '(' varg[r1] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); cbl_refer_t dummy = {}; if( ! intrinsic_call_2($$, LOCALE_TIME, $r1, &dummy) ) YYERROR; } | LOCALE_TIME '(' varg[r1] varg[r2] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_2($$, LOCALE_TIME, $r1, $r2) ) YYERROR; } | LOCALE_TIME_FROM_SECONDS '(' varg[r1] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); cbl_refer_t dummy = {}; if( ! intrinsic_call_2($$, LOCALE_TIME_FROM_SECONDS, $r1, &dummy) ) YYERROR; } | LOCALE_TIME_FROM_SECONDS '(' varg[r1] varg[r2] ')' { location_set(@1); - $$ = new_alphanumeric($r1->field->data.capacity); + $$ = new_alphanumeric(); if( ! intrinsic_call_2($$, LOCALE_TIME_FROM_SECONDS, $r1, $r2) ) YYERROR; } ; @@ -10603,7 +10600,7 @@ trim_trailing: %empty { $$ = new_literal("0"); } // Remove both intrinsic0: CURRENT_DATE { location_set(@1); - $$ = new_alphanumeric(21); + $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE); parser_intrinsic_call_0( $$, "__gg__current_date" ); } | E { @@ -10614,33 +10611,33 @@ intrinsic0: CURRENT_DATE { | EXCEPTION_FILE_N { location_set(@1); - $$ = new_alphanumeric(256); + $$ = new_alphanumeric(); intrinsic_call_0( $$, EXCEPTION_FILE_N ); } | EXCEPTION_FILE { location_set(@1); - $$ = new_alphanumeric(256); + $$ = new_alphanumeric(); parser_exception_file( $$ ); } | EXCEPTION_LOCATION_N { location_set(@1); - $$ = new_alphanumeric(256); + $$ = new_alphanumeric(); intrinsic_call_0( $$, EXCEPTION_LOCATION_N ); } | EXCEPTION_LOCATION { location_set(@1); - $$ = new_alphanumeric(256); + $$ = new_alphanumeric(); intrinsic_call_0( $$, EXCEPTION_LOCATION ); } | EXCEPTION_STATEMENT { location_set(@1); - $$ = new_alphanumeric(63); + $$ = new_alphanumeric(); intrinsic_call_0( $$, EXCEPTION_STATEMENT ); } | EXCEPTION_STATUS { location_set(@1); - $$ = new_alphanumeric(31); + $$ = new_alphanumeric(); intrinsic_call_0( $$, EXCEPTION_STATUS ); } @@ -10656,12 +10653,12 @@ intrinsic0: CURRENT_DATE { } | UUID4 { location_set(@1); - $$ = new_alphanumeric(32); // don't know correct size + $$ = new_alphanumeric(); parser_intrinsic_call_0( $$, "__gg__uuid4" ); } | WHEN_COMPILED { location_set(@1); - $$ = new_alphanumeric(21); // Returns YYYYMMDDhhmmssss-0500 + $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE); // Returns YYYYMMDDhhmmssss-0500 parser_intrinsic_call_0( $$, "__gg__when_compiled" ); } ; diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h index 8ae51c540ade..aa366283ff5e 100644 --- a/gcc/cobol/parse_ante.h +++ b/gcc/cobol/parse_ante.h @@ -41,6 +41,7 @@ #define MAXLENGTH_FORMATTED_DATE 10 #define MAXLENGTH_FORMATTED_TIME 19 +#define MAXLENGTH_CALENDAR_DATE 21 #define MAXLENGTH_FORMATTED_DATETIME 30 #pragma GCC diagnostic push @@ -220,7 +221,7 @@ namcpy(const YYLTYPE& loc, cbl_name_t tgt, const char *src ) { } cbl_field_t * -new_alphanumeric( size_t capacity ); +new_alphanumeric( size_t capacity = MAXIMUM_ALPHA_LENGTH ); static inline cbl_refer_t * new_reference( enum cbl_field_type_t type, const char *initial ) { diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index a4fc82c4ffa7..2373bfe6cc5a 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -3237,7 +3237,8 @@ new_temporary_impl( enum cbl_field_type_t type ) 0, FldAlphanumeric, FldInvalid, intermediate_e, 0, 0, 0, nonarray, 0, "", 0, cbl_field_t::linkage_t(), - {}, NULL }; + {MAXIMUM_ALPHA_LENGTH, MAXIMUM_ALPHA_LENGTH, + 0, 0, NULL}, NULL }; static const struct cbl_field_t empty_float = { 0, FldFloat, FldInvalid, intermediate_e, diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h index d5acf167a47b..c231763d5cbe 100644 --- a/gcc/cobol/symbols.h +++ b/gcc/cobol/symbols.h @@ -224,6 +224,12 @@ enum symbol_type_t { SymDataSection, }; +// The ISO specification says alphanumeric literals have a maximum length of +// 8,191 characters. It seems to be silent on the length of alphanumeric data +// items. Our implementation requires a maximum length, so we chose to make it +// the same. +#define MAXIMUM_ALPHA_LENGTH 8192 + struct cbl_field_data_t { uint32_t memsize; // nonzero if larger subsequent redefining field uint32_t capacity, // allocated space diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc index 345d3ac73527..16bf84be620b 100644 --- a/libgcobol/intrinsic.cc +++ b/libgcobol/intrinsic.cc @@ -3494,6 +3494,10 @@ __gg__reverse(cblc_field_t *dest, { dest->data[i] = (input->data+input_offset)[source_length-1-i]; } + if( (dest->attr & intermediate_e) ) + { + dest->capacity = std::min(dest_length, source_length); + } } extern "C" diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc index b990508b1129..224c5f26e963 100644 --- a/libgcobol/libgcobol.cc +++ b/libgcobol/libgcobol.cc @@ -11312,8 +11312,10 @@ __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount) { if( dest->allocated < ncount ) { - dest->allocated = ncount; - dest->data = (unsigned char *)realloc(dest->data, ncount); + fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): Adjusting size upward is not possible.\n"); + abort(); +// dest->allocated = ncount; +// dest->data = (unsigned char *)realloc(dest->data, ncount); } dest->capacity = ncount; } @@ -12643,7 +12645,7 @@ __gg__module_name(cblc_field_t *dest, module_type_t type) break; } -__gg__adjust_dest_size(dest, strlen(result)); + __gg__adjust_dest_size(dest, strlen(result)); memcpy(dest->data, result, strlen(result)+1); } -- 2.34.1