On Sat, 2025-02-15 at 16:01 -0500, James K. Lowden wrote: > From 5d53920602e234e4d99ae2d502e662ee3699978e 4 Oct 2024 12:01:22 - > 0400 > From: "James K. Lowden" <jklow...@symas.com> > Date: Sat 15 Feb 2025 12:50:52 PM EST > Subject: [PATCH] 3 new 'cobol' FE files >
[...] > +/* > + * Replace any separators in the copybook's REPLACING candidate with > + * "stretchable" space. Escape any regex metacharacters in > candidate. > + * > + * "For matching purposes, each occurrence of a separator comma, a > + * separator semicolon, or a sequence of one or more separator > spaces > + * is considered to be a single space." > + * > + * If the indicator column is column 7 and is a 'D', we treat that > as > + * a SPACE for the purposes of matching a COPY REPLACING or REPLACE > + * directive. > + */ > +const char * > +esc( size_t len, const char input[] ) { > + static char spaces[] = "([,;]?[[:space:]])+"; > + static char spaceD[] = "(\n {6}D" "|" "[,;]?[[:space:]])+"; > + static char buffer[64 * 1024]; > + char *p = buffer; > + const char *eoinput = input + len; > + > + const char *spacex = is_reference_format()? spaceD : spaces; > + > + for( const char *s=input; *s && s < eoinput; s++ ) { > + *p = '\0'; > + gcc_assert( size_t(p - buffer) < sizeof(buffer) - 4 ); > + switch(*s) { > + case '^': case '$': > + case '(': case ')': > + case '*': case '+': case '?': > + case '[': case ']': > + case '{': case '}': > + case '|': > + case '.': > + *p++ = '\\'; > + *p++ = *s; > + break; > + case '\\': > + *p++ = '['; > + *p++ = *s; > + *p++ = ']'; > + break; > + > + case ';': case ',': > + if( ! (s+1 < eoinput && s[1] == 0x20) ) { > + *p++ = *s; > + break; > + } > + __attribute__((fallthrough)); > + case 0x20: case '\n': > + gcc_assert(p + sizeof(spacex) < buffer + sizeof(buffer)); > + p = stpcpy( p, spacex ); > + while( s+1 < eoinput && is_separator_space(s+1)) { > + s++; > + } > + break; > + default: > + *p++ = *s; > + break; > + } > + } > + *p = '\0'; > + > +#if 0 > + dbgmsg("%s:%d: regex '%s'", __func__, __LINE__, buffer); > +#endif > + return xstrdup(buffer); > +} Has a fixed size 64k buffer; doesn't seem to have proper overflow handling. Could use a pretty_printer to accumulate chars. Returns a allocated buffer as "const char *", which should be just a "char *". [...] > > +int > +copybook_elem_t::open_file( const char directory[], bool literally ) > { > + int erc; > + char *pattern, *copier = xstrdup(cobol_filename()); > + if( ! directory ) { > + directory = dirname(copier); > + if( 0 == strcmp(".", directory) ) directory = NULL; > + } > + > + char *path = NULL; > + > + if( directory || library.name ) { > + if( directory && library.name ) { > + path = xasprintf( "%s/%s/%s", directory, library.name, > source.name ); > + } else { > + const char *dir = directory? directory : library.name; > + path = xasprintf( "%s/%s", dir, source.name ); > + } > + } else { > + path = xasprintf( "%s", source.name ); > + } > + > + gcc_assert(path); > + > + if( literally ) { > + dbgmsg("copybook_elem_t::open_file: trying %s", path); > + > + if( (this->fd = open(path, O_RDONLY)) == -1 ) { > + dbgmsg("could not open %s: %m", path); > + return fd; > + } > + this->source.name = path; > + if( ! cobol_filename(this->source.name, inode_of(fd)) ) { > + error_msg(source.loc, "recursive copybook: '%s' includes > itself", path); > + (void)! close(fd); > + fd = -1; > + } > + return fd; > + } > + gcc_assert( ! literally ); > + > + if( extensions ) { > + pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB,%s}", > + path, this->extensions); > + } else { > + pattern = xasprintf("%s{,.cpy,.CPY,.cbl,.CBL,.cob,.COB}", path); > + } > + > + free(copier); There’s a manual free of "copier" here, but there’s are various error- handling early returns paths that will leak. Maybe just use a std::string? Similarly with “path”; I think this is always leaked. Maybe std::string here too. Have you tried running the compiler under valgrind? Configure with —enable-valgrind-annotations and pass -wrap per=valgrind to the driver. [...] > + > +static bool > +is_word_char( char ch ) { > + switch(ch) { > + case '0' ... '9': > + case 'a' ... 'z': > + case 'A' ... 'Z': > + case '$': > + case '-': > + case '_': > + return true; > + } > + return false; > +} Range based cases are a GCC extension IIRC, so this isn’t compatible with other C++ compilers [...] > > + default: > + gcc_assert(false); This shows up in various places; use gcc_unreachable(); for this. Hope this is constructive Dave