I have been experimenting with code to allow sequences to be included in lists and to extend sequences to arbitrary character strings. This seems to work well now. Please feel free to incorporate this into future versions of the Bash shell.
--- Brian
#include <cctype>
#include "BraceExpander.h"
#include "errors.h"
#include "symbols.h"
#include "types.h"
/*------------------------------------------------------------------------------
Expand brace expressions within each token in a list.
Each brace expression consists of zero or more components, separated by a
LIST_SEPARATOR character. Each component can be a range of values or a
string that may include another brace expression.
Version 1.0 2014-12-16 Brian B. McGuinness
------------------------------------------------------------------------------*/
/*------------------------------------------------------------------------------
append_string_to_tokens - Append a string to the current set of tokens, or add it
as a separate token if there are no current tokens yet.
list = The list of tokens containing the current set at the end
startAt = The index where the current set of tokens begins in the list
suffix = The string to append
We return the updated list of tokens.
------------------------------------------------------------------------------*/
void BraceExpander::append_string_to_tokens (std::vector<String> &list, int startAt, String suffix) {
int length = list.size();
if (length > startAt) {
for (int n = startAt; n < length; n++) {
list.at (n).append (suffix);
}
}
else list.push_back (suffix);
}
/*------------------------------------------------------------------------------
compare_strings - Compare two strings, ignoring leading zeros
First, we scan past leading zeros, if any, in each string. Then we compare
the remainder of the strings. If one string is longer, it is deemed greater.
Otherwise, we do a lexical comparison.
string1 = The first string to be compared
string2 = The second string to be compared
We return an integer value that is < 0 if string1 < string2,
0 if string1 = string2, or > 0 if string1 > string2.
------------------------------------------------------------------------------*/
int BraceExpander::compare_strings (const String &string1, const String &string2) {
size_t length1 = string1.length();
size_t length2 = string2.length();
// Scan past leading zeros, as they have no value
size_t index1 = 0, index2 = 0;
while (string1[index1] == '0' && index1 < length1) index1++;
while (string2[index2] == '0' && index2 < length2) index2++;
// A longer string is deemed greater
length1 -= index1;
length2 -= index2;
if (length1 < length2) return -1;
if (length1 > length2) return 1;
// They're the same length, so perform a lexical comparison
return string1.compare (index1, String::npos, string2, index2, String::npos);
}
/*------------------------------------------------------------------------------
expandBraces - Expand brace expressions within each token in a list.
tokens = The list of tokens
We return the list of tokens with the brace expressions expanded.
------------------------------------------------------------------------------*/
struct ExpansionData {
int part_starts_at; // Index in the expansion list where the current component starts
std::vector<String> expansions; // Tokens generated by expansions
ExpansionData (int starts_at) {
part_starts_at = starts_at;
}
};
std::vector<String> BraceExpander::expandBraces (std::vector<String> &tokens) {
const Char UNQUOTED = ' ';
std::vector<String> result;
for (String token : tokens) {
// Ignore isolated braces; these open blocks
if (token.length() == 1 && token[0] == BRACE_BEGIN) {
result.push_back (token);
continue;
}
String buffer;
Char previous_char = ' ';
Char previous_state = UNQUOTED;
int range_break = 0;
std::vector<ExpansionData> stack;
int stack_top = 0;
Char state = UNQUOTED;
int token_length = token.length();
stack.push_back (ExpansionData (0));
for (int i = 0; i < token_length; i++) {
char c = token.at (i);
switch (state) {
case UNQUOTED: // Looking for the next brace expression
switch (c) {
case BRACE_BEGIN:
if (range_break != 0) throw SyntaxError (token.insert (0, "Braces are not allowed in a range: ").c_str());
stack_top++;
stack.push_back (ExpansionData (stack[stack_top - 1].expansions.size()));
if (buffer.length() > 0) {
append_string_to_tokens (stack[stack_top - 1].expansions, stack[stack_top - 1].part_starts_at, buffer);
}
buffer.clear();
previous_char = ' ';
break;
case BRACE_END:
if (stack.empty()) throw SyntaxError (token.insert (0, "Unpaired closing brace: ").c_str());
// fall through
case LIST_SEPARATOR: // End of a component: expand it
if (stack.empty()) {
buffer.push_back (c);
break;
}
if (range_break != 0) {
// This component is a range
String start = buffer.substr (0, range_break);
String end = buffer.substr (range_break + 2);
for (String value = start; compare_strings (value, end) <= 0; value = increment_string (value, end)) {
stack[stack_top].expansions.push_back (value);
}
range_break = 0;
}
else {
// This component is a simple string
append_string_to_tokens (stack[stack_top].expansions, stack[stack_top].part_starts_at, buffer);
}
buffer.clear ();
stack[stack_top].part_starts_at = stack[stack_top].expansions.size();
if (c == BRACE_END) {
// Apply this brace level's expansions to the previous brace level
int start = stack[stack_top - 1].part_starts_at;
int length = stack[stack_top - 1].expansions.size();
if (length > start) {
std::vector<String> newList;
for (int n = 0; n < start; n++) newList.push_back (stack[stack_top - 1].expansions.at (n));
for (int n = start; n < length; n++) {
for (String end: stack[stack_top].expansions) {
newList.push_back (stack[stack_top - 1].expansions.at (n) + end);
}
}
stack[stack_top - 1].expansions = newList;
}
else {
for (String expansion : stack[stack_top].expansions) {
stack[stack_top - 1].expansions.push_back (expansion);
}
}
stack.pop_back();
stack_top--;
}
break;
case RANGE: // Unquoted, unescaped range character
buffer.push_back (c);
if (previous_char == c && range_break == 0) range_break = buffer.length() - 2;
break;
case ESCAPE:
previous_state = UNQUOTED;
state = ESCAPE;
buffer.push_back (c);
break;
case STRONG_QUOTE:
case WEAK_QUOTE:
state = c;
buffer.push_back (c);
break;
default: buffer.push_back (c);
break;
}
break;
case ESCAPE: // The last character was an escape character
buffer.push_back (c);
state = previous_state;
break;
case STRONG_QUOTE: // No brace expansion is done inside quotes
case WEAK_QUOTE:
buffer.push_back (c);
if (c == ESCAPE) {
previous_state = state;
state = ESCAPE;
}
else if (c == state) state = UNQUOTED;
break;
}
previous_char = c;
}
if (stack_top > 0) throw SyntaxError (token.insert (0, "Unpaired opening brace: ").c_str());
// Handle leftover text at the end of a token
if (buffer.length() > 0) {
if (stack[0].expansions.size() > 0) { // It's a suffix for a brace expansion
String suffix = buffer;
for (String expansion : stack[0].expansions) {
result.push_back (expansion + suffix);
}
}
else result.push_back (buffer); // It's a token with no brace expansions
}
// There's no leftover text to append, so just save the expansions for this token
else for (String expansion : stack[0].expansions) result.push_back (expansion);
}
return result;
}
/*------------------------------------------------------------------------------
increment_string - Increment a string by 1, with carry.
Starting with the rightmost character, we increment the character by 1 (replace it with its
successor in the collating sequence). When we reach '9' (for a digit character) or 'z' or 'Z'
(for an alphabetic character) we wrap around (to '0', 'a', or 'A') and implement carry by
incrementing the next character to the left in the same way. The incrementing process skips
over characters that are neither digits not letters, leaving them unaltered.
When a carry results in our prefixing a new character, we check the pattern to determine
whether to prefix a digit, a letter, or something else. The upper bound of a sequence of
strings is often used as the pattern. That way, we build a character sequence that matches the
final result. Alternatively, "" can be used if no pattern is desired. In that case we prefix
a character of the same type as the leftmost character in the current value.
string = The string to be incremented
pattern = The pattern to use for prefixing new characters (e.g. "0cd-4b")
We return the incremented string.
------------------------------------------------------------------------------*/
Char BraceExpander::SUCCESSOR[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 58, 59, 60, 61, 62, 63,
64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 65, 91, 92, 93, 94, 95,
96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 97, 123, 124, 125, 126, 127
};
String &BraceExpander::increment_string (String &value, const String &pattern) {
size_t value_length = value.length();
for (int index = value_length - 1; index >= 0; index--) {
Char current = value[index];
Char next = (current < 128) ? SUCCESSOR[current] : current;
value[index] = next;
if (next > current) break;
// Perform a carry
if (index == 0) {
// We need to prefix a new digit
size_t pattern_length = pattern.length();
if (pattern_length > value_length) {
current = pattern[pattern_length - value_length - 1];
}
if (isdigit (current)) current = '1';
else if (islower (current)) current = 'a';
else if (isupper (current)) current = 'A';
value.insert (0, 1, current);
break;
}
}
return value;
}
/*------------------------------------------------------------------------------
Expand brace expressions within each token in a list.
Each brace expression consists of zero or more components, separated by a
LIST_SEPARATOR character. Each component can be a range of values or a
string that may include another brace expression.
Version 1.0 2014-12-16 Brian B. McGuinness
------------------------------------------------------------------------------*/
#ifndef BraceExpander_h
#define BraceExpander_h
#include <string>
#include <vector>
#include "types.h"
class BraceExpander {
public:
int compare_strings (const String &string1, const String &string2);
std::vector<String> expandBraces (std::vector<String> &tokens);
String &increment_string (String &value, const String &pattern);
private:
static Char SUCCESSOR[];
void append_string_to_tokens (std::vector<String> &list, int startAt, String suffix);
};
#endif // ! BraceExpander_h
#include <stdexcept>
class SyntaxError : public std::runtime_error {
public:
SyntaxError (const char *message) : runtime_error (message) {}
};
/*------------------------------------------------------------------------------
Define special symbols
Version 1.0 2014-12-16 Brian B. McGuinness
------------------------------------------------------------------------------*/
#ifndef symbols_h
#define symbols_h
#include "types.h"
const Char BRACE_BEGIN = '{'; // Begins a brace expression
const Char BRACE_END = '}'; // Ends a brace expression
const Char LIST_SEPARATOR = ','; // Separates components of a brace expansion
const Char RANGE = '.'; // Indicates a range of values when it appears twice in succession
const Char ESCAPE = '\\'; // Quotes the following char
const Char STRONG_QUOTE = '\''; // Quotes everything except the escape char
const Char WEAK_QUOTE = '"'; // Quotes everything except escape and variable chars
#endif // ! symbols_h
/*------------------------------------------------------------------------------ Rename basic types so we can change them easily Version 1.0 2014-12-16 Brian B. McGuinness ------------------------------------------------------------------------------*/ #ifndef types_h #define types_h typedef char Char; typedef std::string String; #endif // ! types_h
build
Description: Binary data
#include <iostream>
#include "BraceExpander.h"
/*------------------------------------------------------------------------------
Main program for testing
------------------------------------------------------------------------------*/
int main (int argc, char **argv) {
if (argc < 2) std::cout << "Syntax: " << *argv << " token..." << std::endl;
else {
BraceExpander expander;
std::vector<String> tokens;
for (int i = 0; i < argc; i++) tokens.push_back (String (argv[i]));
std::vector<String> result = expander.expandBraces (tokens);
for (String token : result) {
std::cout << " '" << token << "'";
}
std::cout << std::endl;
}
}
run
Description: Binary data
build2
Description: Binary data
#include <iostream>
#include "BraceExpander.h"
/*------------------------------------------------------------------------------
Main program for testing
------------------------------------------------------------------------------*/
int main (int argc, char **argv) {
if (argc != 3) std::cout << "Syntax: " << *argv << "s lower_bound upper_bound" << std::endl;
else {
BraceExpander expander;
std::string value (argv[1]);
std::string limit (argv[2]);
std::cout << " '" << value << "'";
while (expander.compare_strings (value, limit) < 0) {
value = expander.increment_string (value, limit);
std::cout << " '" << value << "'";
}
std::cout << std::endl;
}
}
