I have been experimenting with code to allow
sequences to be included in lists and to
extend sequences to arbitrary character strings.
This seems to work well now.  Please feel free
to incorporate this into future versions of
the Bash shell.

--- Brian
#include <cctype>
#include "BraceExpander.h"
#include "errors.h"
#include "symbols.h"
#include "types.h"

/*------------------------------------------------------------------------------
  Expand brace expressions within each token in a list.

  Each brace expression consists of zero or more components, separated by a 
  LIST_SEPARATOR character.  Each component can be a range of values or a 
  string that may include another brace expression.

  Version 1.0  2014-12-16  Brian B. McGuinness
------------------------------------------------------------------------------*/

/*------------------------------------------------------------------------------
  append_string_to_tokens - Append a string to the current set of tokens, or add it
                            as a separate token if there are no current tokens yet.

  list    = The list of tokens containing the current set at the end
  startAt = The index where the current set of tokens begins in the list
  suffix  = The string to append

  We return the updated list of tokens.
------------------------------------------------------------------------------*/
void BraceExpander::append_string_to_tokens (std::vector<String> &list, int startAt, String suffix) {
  int length = list.size();

  if (length > startAt) {
    for (int n = startAt; n < length; n++) {
      list.at (n).append (suffix);
    }
  }
  else list.push_back (suffix);
}

/*------------------------------------------------------------------------------
  compare_strings - Compare two strings, ignoring leading zeros

  First, we scan past leading zeros, if any, in each string.  Then we compare 
  the remainder of the strings.  If one string is longer, it is deemed greater.
  Otherwise, we do a lexical comparison.

  string1 = The first string to be compared
  string2 = The second string to be compared

  We return an integer value that is < 0 if string1 < string2, 
  0 if string1 = string2, or > 0 if string1 > string2.
------------------------------------------------------------------------------*/
int BraceExpander::compare_strings (const String &string1, const String &string2) {
  size_t length1 = string1.length();
  size_t length2 = string2.length();

  // Scan past leading zeros, as they have no value
  size_t index1 = 0, index2 = 0;
  while (string1[index1] == '0' && index1 < length1) index1++;
  while (string2[index2] == '0' && index2 < length2) index2++;

  // A longer string is deemed greater
  length1 -= index1;
  length2 -= index2;
  if (length1 < length2) return -1;
  if (length1 > length2) return  1;

  // They're the same length, so perform a lexical comparison
  return string1.compare (index1, String::npos, string2, index2, String::npos);
}

/*------------------------------------------------------------------------------
  expandBraces - Expand brace expressions within each token in a list.

  tokens = The list of tokens

  We return the list of tokens with the brace expressions expanded.
------------------------------------------------------------------------------*/
struct ExpansionData {
  int                 part_starts_at;  // Index in the expansion list where the current component starts
  std::vector<String> expansions;      // Tokens generated by expansions

  ExpansionData (int starts_at) {
    part_starts_at = starts_at;
  }
};

std::vector<String> BraceExpander::expandBraces (std::vector<String> &tokens) {
  const Char UNQUOTED = ' ';

  std::vector<String> result;

  for (String token : tokens) {
    // Ignore isolated braces; these open blocks
    if (token.length() == 1 && token[0] == BRACE_BEGIN) {
      result.push_back (token);
      continue;
    }

    String                     buffer;
    Char                       previous_char  = ' ';
    Char                       previous_state = UNQUOTED;
    int                        range_break    = 0;
    std::vector<ExpansionData> stack;
    int                        stack_top      = 0;
    Char                       state          = UNQUOTED;
    int                        token_length   = token.length();

    stack.push_back (ExpansionData (0));

    for (int i = 0; i < token_length; i++) {
      char c = token.at (i);

      switch (state) {
        case UNQUOTED: // Looking for the next brace expression
                 switch (c) {
                   case BRACE_BEGIN:
                            if (range_break != 0) throw SyntaxError (token.insert (0, "Braces are not allowed in a range: ").c_str());
                            stack_top++;
                            stack.push_back (ExpansionData (stack[stack_top - 1].expansions.size()));
                            if (buffer.length() > 0) {
                              append_string_to_tokens (stack[stack_top - 1].expansions, stack[stack_top - 1].part_starts_at, buffer);
                            }
                            buffer.clear();
                            previous_char = ' ';
                            break;

                   case BRACE_END:
                            if (stack.empty()) throw SyntaxError (token.insert (0, "Unpaired closing brace: ").c_str());
                            // fall through

                   case LIST_SEPARATOR:  // End of a component: expand it
                            if (stack.empty()) {
                              buffer.push_back (c);
                              break; 
                            }

                            if (range_break != 0) {
                              // This component is a range
                              String start = buffer.substr (0, range_break);
                              String end   = buffer.substr (range_break + 2);
                              for (String value = start; compare_strings (value, end) <= 0; value = increment_string (value, end)) {
                                stack[stack_top].expansions.push_back (value);
                              }
                              range_break = 0;
                            }
                            else {
                              // This component is a simple string
                              append_string_to_tokens (stack[stack_top].expansions, stack[stack_top].part_starts_at, buffer);
                            }
                            buffer.clear ();
                            stack[stack_top].part_starts_at = stack[stack_top].expansions.size();

                            if (c == BRACE_END) {
                              // Apply this brace level's expansions to the previous brace level
                              int start  = stack[stack_top - 1].part_starts_at;
                              int length = stack[stack_top - 1].expansions.size();
                              if (length > start) {
                                std::vector<String> newList;
                                for (int n = 0; n < start; n++) newList.push_back (stack[stack_top - 1].expansions.at (n));
                                for (int n = start; n < length; n++) {
                                  for (String end: stack[stack_top].expansions) {
                                    newList.push_back (stack[stack_top - 1].expansions.at (n) + end);
                                  }
                                }
                                stack[stack_top - 1].expansions = newList;
                              }
                              else {
                                for (String expansion : stack[stack_top].expansions) {
                                  stack[stack_top - 1].expansions.push_back (expansion);
                                }
                              }
                              stack.pop_back();
                              stack_top--;
                            }
                            break;

                   case RANGE:      // Unquoted, unescaped range character
                            buffer.push_back (c);
                            if (previous_char == c && range_break == 0) range_break = buffer.length() - 2;
                            break;

                   case ESCAPE:
                            previous_state = UNQUOTED;
                            state          = ESCAPE;
                            buffer.push_back (c);
                            break;

                   case STRONG_QUOTE: 
                   case WEAK_QUOTE:   
                            state = c;
                            buffer.push_back (c);
                            break;

                   default: buffer.push_back (c);
                            break;
                 }
                 break;

        case ESCAPE:  // The last character was an escape character
                 buffer.push_back (c);
                 state = previous_state;
                 break;

        case STRONG_QUOTE:   // No brace expansion is done inside quotes
        case WEAK_QUOTE:
                 buffer.push_back (c);
                 if (c == ESCAPE) {
                   previous_state = state;
                   state = ESCAPE;
                 }
                 else if (c == state) state = UNQUOTED;
                 break;
      }

      previous_char = c;
    }

    if (stack_top > 0) throw SyntaxError (token.insert (0, "Unpaired opening brace: ").c_str());

    // Handle leftover text at the end of a token
    if (buffer.length() > 0) {
      if (stack[0].expansions.size() > 0) {         // It's a suffix for a brace expansion
        String suffix = buffer;
        for (String expansion : stack[0].expansions) {
          result.push_back (expansion + suffix);
        }
      }
      else result.push_back (buffer); // It's a token with no brace expansions
    }

    // There's no leftover text to append, so just save the expansions for this token
    else for (String expansion : stack[0].expansions) result.push_back (expansion);
  }
  return result;
}

/*------------------------------------------------------------------------------
  increment_string - Increment a string by 1, with carry.

  Starting with the rightmost character, we increment the character by 1 (replace it with its 
  successor in the collating sequence).  When we reach '9' (for a digit character) or 'z' or 'Z'
  (for an alphabetic character) we wrap around (to '0', 'a', or 'A') and implement carry by 
  incrementing the next character to the left in the same way.  The incrementing process skips 
  over characters that are neither digits not letters, leaving them unaltered.

  When a carry results in our prefixing a new character, we check the pattern to determine 
  whether to prefix a digit, a letter, or something else.  The upper bound of a sequence of 
  strings is often used as the pattern.  That way, we build a character sequence that matches the 
  final result.  Alternatively, "" can be used if no pattern is desired.  In that case we prefix
  a character of the same type as the leftmost character in the current value.

  string  = The string to be incremented
  pattern = The pattern to use for prefixing new characters (e.g. "0cd-4b")

  We return the incremented string.
------------------------------------------------------------------------------*/
Char BraceExpander::SUCCESSOR[] = {
    0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15, 
   16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31, 
   32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47, 
   49,  50,  51,  52,  53,  54,  55,  56,  57,  48,  58,  59,  60,  61,  62,  63, 
   64,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80, 
   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  65,  91,  92,  93,  94,  95, 
   96,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
  113, 114, 115, 116, 117, 118, 119, 120, 121, 122,  97, 123, 124, 125, 126, 127
};

String &BraceExpander::increment_string (String &value, const String &pattern) {
  size_t value_length = value.length();

  for (int index = value_length - 1; index >= 0; index--) {
    Char current = value[index];
    Char next    = (current < 128) ? SUCCESSOR[current] : current;
    value[index] = next;

    if (next > current) break;

    // Perform a carry

    if (index == 0) {
      // We need to prefix a new digit

      size_t pattern_length = pattern.length();
      if (pattern_length > value_length) {
        current = pattern[pattern_length - value_length - 1];
      }

      if      (isdigit (current)) current = '1';
      else if (islower (current)) current = 'a';
      else if (isupper (current)) current = 'A';

      value.insert (0, 1, current);
      break;
    }
  }

  return value;
}

/*------------------------------------------------------------------------------
  Expand brace expressions within each token in a list.

  Each brace expression consists of zero or more components, separated by a 
  LIST_SEPARATOR character.  Each component can be a range of values or a 
  string that may include another brace expression.

  Version 1.0  2014-12-16  Brian B. McGuinness
------------------------------------------------------------------------------*/
#ifndef BraceExpander_h
#define BraceExpander_h

#include <string>
#include <vector>
#include "types.h"

class BraceExpander {
  public:
    int compare_strings (const String &string1, const String &string2);
    std::vector<String> expandBraces (std::vector<String> &tokens);
    String &increment_string (String &value, const String &pattern);

  private:
    static Char SUCCESSOR[];

    void append_string_to_tokens (std::vector<String> &list, int startAt, String suffix);
};

#endif // ! BraceExpander_h

#include <stdexcept>

class SyntaxError : public std::runtime_error {
  public:
    SyntaxError (const char *message) : runtime_error (message) {}
};

/*------------------------------------------------------------------------------
  Define special symbols

  Version 1.0  2014-12-16  Brian B. McGuinness
------------------------------------------------------------------------------*/
#ifndef symbols_h
#define symbols_h

#include "types.h"

const Char   BRACE_BEGIN        = '{';   // Begins a brace expression
const Char   BRACE_END          = '}';   // Ends a brace expression
const Char   LIST_SEPARATOR     = ',';   // Separates components of a brace expansion
const Char   RANGE              = '.';   // Indicates a range of values when it appears twice in succession
const Char   ESCAPE             = '\\';  // Quotes the following char
const Char   STRONG_QUOTE       = '\'';  // Quotes everything except the escape char
const Char   WEAK_QUOTE         = '"';   // Quotes everything except escape and variable chars

#endif // ! symbols_h

/*------------------------------------------------------------------------------
  Rename basic types so we can change them easily

  Version 1.0  2014-12-16  Brian B. McGuinness
------------------------------------------------------------------------------*/
#ifndef types_h
#define types_h

typedef char        Char;
typedef std::string String;

#endif // ! types_h

Attachment: build
Description: Binary data

#include <iostream>
#include "BraceExpander.h"

/*------------------------------------------------------------------------------
  Main program for testing
------------------------------------------------------------------------------*/
int main (int argc, char **argv) {
  if (argc < 2) std::cout <<  "Syntax: " << *argv << " token..." << std::endl;
  else {
    BraceExpander expander;
    std::vector<String> tokens;
    for (int i = 0; i < argc; i++) tokens.push_back (String (argv[i]));

    std::vector<String> result = expander.expandBraces (tokens);

    for (String token : result) {
      std::cout << " '" << token << "'";
    }
    std::cout << std::endl;
  }
}

Attachment: run
Description: Binary data

Attachment: build2
Description: Binary data

#include <iostream>
#include "BraceExpander.h"

/*------------------------------------------------------------------------------
  Main program for testing
------------------------------------------------------------------------------*/
int main (int argc, char **argv) {
  if (argc != 3) std::cout <<  "Syntax: " << *argv << "s lower_bound upper_bound" << std::endl;
  else {
    BraceExpander expander;
    std::string value (argv[1]);
    std::string limit (argv[2]);

    std::cout << " '" << value << "'";

    while (expander.compare_strings (value, limit) < 0) {
      value = expander.increment_string (value, limit);
      std::cout << " '" << value << "'";
    }
    std::cout << std::endl;
  }
}

Reply via email to