3.2.3 regex vs. 3.1.x regex

2006-10-31 Thread John Gatewood Ham

I have attached an example program that can deal with
the different regex parsing in 3.0.x vs. 3.1.x and
regex sub-expressions.  I have tested with
3.1.17 and 3.2.3 successfully.  Since creating a
script that actually worked with both bash versions
took me a while, I suspect other people might want
to see an example.  This example will split out
3 subexpressions from the input, but does not
include any error checking for bad input.  If the
code is incorrect please let me know.

Thanks!

JGH#! /bin/bash
#
# We need bash 3.x or better
#
if [ -z "${BASH_VERSION:-}" ]
then
   printf "This script requires GNU bash (ftp://ftp.gnu.org/gnu/bash)\n"
   printf "You need version 3.0 or newer for this script.\n"
   exit 1
fi

if [ ${BASH_VERSINFO[0]]} -lt 3 ]
then
   printf "This script requires bash >= 3.0; you only have ${BASH_VERSION}\n"
   exit 1
fi

if ((BASH_VERSINFO[1]==2))
then
   if ((BASH_VERSINFO[2]<3))
   then
 printf "The regex implementation in bash 3.2.0 through 3.2.2 has issues.\n"
 printf "Please apply the patches 
(ftp://ftp.gnu.org/gnu/bash/bash-3.2-patches/)\n"
 printf "and then try running this script again.\n"
 exit 1
   fi
   NEW_REGEX=1
else
   NEW_REGEX=0
fi

REGEX1[0]='"([ATCSBH]) ([^ ]+) (.*)"'
REGEX1[1]='([ATCSBH])\ ([^\ ]+)\ (.*)'

testmeout() {
  local i

  eval "[[ \"${1}\" =~ ${REGEX1[${NEW_REGEX}]} ]]"
  for ((i=1;i<${#BASH_REMATCH[*]};++i))
  do
printf "Subexpression %d is \"%s\"\n" ${i} "${BASH_REMATCH[${i}]}"
  done
}

printf "Testing with ${BASH_VERSION}\n"
testmeout "A Today mypackage"
exit 0
___
Bug-bash mailing list
Bug-bash@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-bash


Bash-3.2 Official Patch 3

2006-10-31 Thread Chet Ramey
 BASH PATCH REPORT
 =

Bash-Release: 3.2
Patch-ID: bash32-003

Bug-Reported-by:John Gatewood Ham <[EMAIL PROTECTED]>
Bug-Reference-ID:   <[EMAIL PROTECTED]>
Bug-Reference-URL:  
http://lists.gnu.org/archive/html/bug-bash/2006-10/msg00045.html

Bug-Description:

When using the conditional command's `=~' operator to match regular
expressions, the parser did not skip over shell metacharacters in the
regular expression, leading to syntax errors.

Patch:

*** ../bash-3.2-patched/parse.y Tue Oct 17 11:45:20 2006
--- parse.y Sat Oct 14 14:56:16 2006
***
*** 1029,1034 
--- 1029,1035 
  #define PST_CMDTOKEN  0x1000  /* command token OK - unused */
  #define PST_COMPASSIGN0x2000  /* parsing x=(...) compound 
assignment */
  #define PST_ASSIGNOK  0x4000  /* assignment statement ok in this 
context */
+ #define PST_REGEXP0x8000  /* parsing an ERE/BRE as a single word 
*/
  
  /* Initial size to allocate for tokens, and the
 amount to grow them by. */
***
*** 2591,2596 
--- 2592,2600 
return (character);
  }
  
+   if (parser_state & PST_REGEXP)
+ goto tokword;
+ 
/* Shell meta-characters. */
if MBTEST(shellmeta (character) && ((parser_state & PST_DBLPAREN) == 0))
  {
***
*** 2698,2703 
--- 2702,2708 
if MBTEST(character == '-' && (last_read_token == LESS_AND || 
last_read_token == GREATER_AND))
  return (character);
  
+ tokword:
/* Okay, if we got this far, we have to read a word.  Read one,
   and then check it against the known ones. */
result = read_token_word (character);
***
*** 3202,3209 
if (tok == WORD && test_binop (yylval.word->word))
op = yylval.word;
  #if defined (COND_REGEXP)
!   else if (tok == WORD && STREQ (yylval.word->word,"=~"))
!   op = yylval.word;
  #endif
else if (tok == '<' || tok == '>')
op = make_word_from_token (tok);  /* ( */
--- 3207,3217 
if (tok == WORD && test_binop (yylval.word->word))
op = yylval.word;
  #if defined (COND_REGEXP)
!   else if (tok == WORD && STREQ (yylval.word->word, "=~"))
!   {
! op = yylval.word;
! parser_state |= PST_REGEXP;
!   }
  #endif
else if (tok == '<' || tok == '>')
op = make_word_from_token (tok);  /* ( */
***
*** 3234,3239 
--- 3242,3248 
  
/* rhs */
tok = read_token (READ);
+   parser_state &= ~PST_REGEXP;
if (tok == WORD)
{
  tright = make_cond_node (COND_TERM, yylval.word, (COND_COM *)NULL, 
(COND_COM *)NULL);
***
*** 3419,3427 
  goto next_character;
}
  
  #ifdef EXTENDED_GLOB
/* Parse a ksh-style extended pattern matching specification. */
!   if (extended_glob && PATTERN_CHAR (character))
{
  peek_char = shell_getc (1);
  if MBTEST(peek_char == '(')   /* ) */
--- 3428,3461 
  goto next_character;
}
  
+ #ifdef COND_REGEXP
+   /* When parsing a regexp as a single word inside a conditional command,
+we need to special-case characters special to both the shell and
+regular expressions.  Right now, that is only '(' and '|'. */ /*)*/
+   if MBTEST((parser_state & PST_REGEXP) && (character == '(' || character 
== '|'))/*)*/
+ {
+   if (character == '|')
+ goto got_character;
+ 
+ push_delimiter (dstack, character);
+ ttok = parse_matched_pair (cd, '(', ')', &ttoklen, 0);
+ pop_delimiter (dstack);
+ if (ttok == &matched_pair_error)
+   return -1;  /* Bail immediately. */
+ RESIZE_MALLOCED_BUFFER (token, token_index, ttoklen + 2,
+ token_buffer_size, TOKEN_DEFAULT_GROW_SIZE);
+ token[token_index++] = character;
+ strcpy (token + token_index, ttok);
+ token_index += ttoklen;
+ FREE (ttok);
+ dollar_present = all_digit_token = 0;
+ goto next_character;
+ }
+ #endif /* COND_REGEXP */
+ 
  #ifdef EXTENDED_GLOB
/* Parse a ksh-style extended pattern matching specification. */
!   if MBTEST(extended_glob && PATTERN_CHAR (character))
{
  peek_char = shell_getc (1);
  if MBTEST(peek_char == '(')   /* ) */

*** ../bash-3.2/patchlevel.hThu Apr 13 08:31:04 2006
--- patchlevel.hMon Oct 16 14:22:54 2006
***
*** 26,30 
 looks for to find the patch level (for the sccs version string). */
  
! #define PATCHLEVEL 2
  
  #endif /* _PATCHLEVEL_H_ */
--- 26,30 
 looks for to find the patch level (for the sccs version string). */
  
! #define PATCHLEVEL 3
  
  #endif /* _PATCHLEVEL_H_ */


-- 
``The lyf so short, the craft so long to lerne.'' - Chaucer
  

Bash-3.2 Official Patch 2

2006-10-31 Thread Chet Ramey
 BASH PATCH REPORT
 =

Bash-Release: 3.2
Patch-ID: bash32-002

Bug-Reported-by:Jim Gifford <[EMAIL PROTECTED]>
Bug-Reference-ID:   <[EMAIL PROTECTED]>
Bug-Reference-URL:  
http://lists.gnu.org/archive/html/bug-bash/2006-10/msg00082.html

Bug-Description:

An incorrect encoding specification in the Content-Type header causes msgfmt
to fail, which causes `make install' to fail.

Patch:

*** ../bash-3.2/po/ru.poTue Jan 10 17:51:03 2006
--- po/ru.poMon Oct 16 15:13:23 2006
***
*** 13,17 
  "Language-Team: Russian <[EMAIL PROTECTED]>\n"
  "MIME-Version: 1.0\n"
! "Content-Type: text/plain; charset=UTF-8\n"
  "Content-Transfer-Encoding: 8bit\n"
  "Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && 
n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
--- 13,17 
  "Language-Team: Russian <[EMAIL PROTECTED]>\n"
  "MIME-Version: 1.0\n"
! "Content-Type: text/plain; charset=KOI8-R\n"
  "Content-Transfer-Encoding: 8bit\n"
  "Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && 
n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"

*** ../bash-3.2/patchlevel.hThu Apr 13 08:31:04 2006
--- patchlevel.hMon Oct 16 14:22:54 2006
***
*** 26,30 
 looks for to find the patch level (for the sccs version string). */
  
! #define PATCHLEVEL 1
  
  #endif /* _PATCHLEVEL_H_ */
--- 26,30 
 looks for to find the patch level (for the sccs version string). */
  
! #define PATCHLEVEL 2
  
  #endif /* _PATCHLEVEL_H_ */

-- 
``The lyf so short, the craft so long to lerne.'' - Chaucer
Live Strong.
Chet Ramey, ITS, CWRU[EMAIL PROTECTED]http://tiswww.tis.case.edu/~chet/


___
Bug-bash mailing list
Bug-bash@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-bash