Package: flex Version: 2.5.35-2 Severity: normal On ia64, flex appears to generate a completely different (and broken) output depending on whether the input file is specified using shell redirection or via a pipe.
For example, compare: merulo% flex -t < input.txt | head #line 3 "<stdout>" #define YY_INT_ALIGNED short int /* A lexical scanner generated by flex */ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 5 with: merulo% cat input.txt | flex -t | head { bufferAppend(buffer, yytext); yy_push_state(C24); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\object" { bufferAppend(buffer, yytext); yy_push_state(C25); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\docvar" { bufferAppend(buffer, yytext); This latter output is clearly broken. This appears to be the cause of #500171, which is in turn blocking the transition of a version of apertium fixing an RC bug, #496395. (In case it isn't obvious, "cat" is being used as minimal testcase - see #500171 for the full log.) The offending input.txt is attached. Regards, -- ,''`. : :' : Chris Lamb `. `'` [EMAIL PROTECTED] `-
%{ #include <cstdlib> #include <iostream> #include <map> #include <vector> #include <regex.h> #include <string> #include <lttoolbox/lt_locale.h> #include <lttoolbox/ltstr.h> using namespace std; wstring buffer; string symbuf = ""; bool isDot, hasWrite_dot, hasWrite_white; FILE *formatfile; string last; int current; long int offset; vector<long int> offsets; vector<wstring> tags; vector<int> orders; regex_t escape_chars; regex_t names_regexp; void bufferAppend(wstring &buf, string const &str) { symbuf.append(str); for(size_t i = 0, limit = symbuf.size(); i < limit;) { wchar_t symbol; int gap = mbtowc(&symbol, symbuf.c_str() + i, MB_CUR_MAX); if(gap == -1) { if(i + MB_CUR_MAX < limit) { buf += L'?'; gap = 1; } else { symbuf = symbuf.substr(i); return; } } else { buf += symbol; } i += gap; } symbuf = ""; return; } void init_escape() { if(regcomp(&escape_chars, "\\\\|[][<>@^$/]", REG_EXTENDED)) { cerr << "ERROR: Illegal regular expression for escape characters" << endl; exit(EXIT_FAILURE); } } void init_tagNames() { if(regcomp(&names_regexp, "", REG_EXTENDED)) { cerr << "ERROR: Illegal regular expression for tag-names" << endl; exit(EXIT_FAILURE); } } string backslash(string const &str) { string new_str = ""; for(unsigned int i = 0; i < str.size(); i++) { if(str[i] == '\\') { new_str += str[i]; } new_str += str[i]; } return new_str; } wstring escape(string const &str) { regmatch_t pmatch; char const *mystring = str.c_str(); int base = 0; wstring result = L""; while(!regexec(&escape_chars, mystring + base, 1, &pmatch, 0)) { bufferAppend(result, str.substr(base, pmatch.rm_so)); result += L'\\'; wchar_t micaracter; int pos = mbtowc(&micaracter, str.c_str() + base + pmatch.rm_so, MB_CUR_MAX); if(pos == -1) { wcerr << L"Uno" << endl; wcerr << L"Encoding error." << endl; exit(EXIT_FAILURE); } result += micaracter; base += pmatch.rm_eo; } bufferAppend(result, str.substr(base)); return result; } wstring escape(wstring const &str) { string dest = ""; for(size_t i = 0, limit = str.size(); i < limit; i++) { char symbol[MB_CUR_MAX+1]; int pos = wctomb(symbol, str[i]); if(pos == -1) { symbol[0]='?'; pos = 1; } symbol[pos] = 0; dest.append(symbol); } return escape(dest); } string get_tagName(string tag){ regmatch_t pmatch; char const *mystring = tag.c_str(); string result = ""; if(!regexec(&names_regexp, mystring, 1, &pmatch, 0)) { result=tag.substr(pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so); return result; } return ""; } map<string, wstring, Ltstr> S1_substitution; void S1_init() { S1_substitution["\\'8a"] = L"Å "; S1_substitution["\\'8c"] = L"Å"; S1_substitution["\\'8d"] = L"Ť"; S1_substitution["\\'8e"] = L"Ž"; S1_substitution["\\'8f"] = L"Ź"; S1_substitution["\\'9a"] = L"Å¡"; S1_substitution["\\'9c"] = L"Å"; S1_substitution["\\'9d"] = L"Å¥"; S1_substitution["\\'9e"] = L"ž"; S1_substitution["\\'9f"] = L"ź"; S1_substitution["\\'a3"] = L"Å"; S1_substitution["\\'a5"] = L"Ä"; S1_substitution["\\'aa"] = L"Å"; S1_substitution["\\'af"] = L"Å»"; S1_substitution["\\'b3"] = L"Å"; S1_substitution["\\'b5"] = L"µ"; S1_substitution["\\'b9"] = L"Ä "; S1_substitution["\\'ba"] = L"Å"; S1_substitution["\\'bc"] = L"Ľ"; S1_substitution["\\'be"] = L"ľ"; S1_substitution["\\'bf"] = L"ż"; S1_substitution["\\'c0"] = L"Å"; S1_substitution["\\'c1"] = L"Ã"; S1_substitution["\\'c2"] = L"Ã"; S1_substitution["\\'c3"] = L"Ä"; S1_substitution["\\'c4"] = L"Ã"; S1_substitution["\\'c5"] = L"Ĺ"; S1_substitution["\\'c6"] = L"Ä"; S1_substitution["\\'c7"] = L"Ã"; S1_substitution["\\'c8"] = L"Ä"; S1_substitution["\\'c9"] = L"Ã"; S1_substitution["\\'ca"] = L"Ä"; S1_substitution["\\'cb"] = L"Ã"; S1_substitution["\\'cc"] = L"Ä"; S1_substitution["\\'cd"] = L"Ã"; S1_substitution["\\'ce"] = L"Ã"; S1_substitution["\\'cf"] = L"Ä"; S1_substitution["\\'d0"] = L"Ã"; S1_substitution["\\'d1"] = L"Å"; S1_substitution["\\'d2"] = L"Å"; S1_substitution["\\'d3"] = L"Ã"; S1_substitution["\\'d4"] = L"Ã"; S1_substitution["\\'d5"] = L"Å"; S1_substitution["\\'d6"] = L"Ã"; S1_substitution["\\'d8"] = L"Å"; S1_substitution["\\'d9"] = L"Å®"; S1_substitution["\\'da"] = L"Ã"; S1_substitution["\\'db"] = L"Ű"; S1_substitution["\\'dc"] = L"Ã"; S1_substitution["\\'dd"] = L"Ã"; S1_substitution["\\'de"] = L"Å¢"; S1_substitution["\\'df"] = L"Ã"; S1_substitution["\\'e0"] = L"Å"; S1_substitution["\\'e1"] = L"á"; S1_substitution["\\'e2"] = L"â"; S1_substitution["\\'e3"] = L"Ä"; S1_substitution["\\'e4"] = L"ä"; S1_substitution["\\'e5"] = L"ĺ"; S1_substitution["\\'e6"] = L"Ä"; S1_substitution["\\'e7"] = L"ç"; S1_substitution["\\'e8"] = L"Ä"; S1_substitution["\\'e9"] = L"é"; S1_substitution["\\'ea"] = L"Ä"; S1_substitution["\\'eb"] = L"ë"; S1_substitution["\\'ec"] = L"Ä"; S1_substitution["\\'ed"] = L"Ã"; S1_substitution["\\'ee"] = L"î"; S1_substitution["\\'ef"] = L"Ä"; S1_substitution["\\'f0"] = L"Ä"; S1_substitution["\\'f1"] = L"Å"; S1_substitution["\\'f2"] = L"Å"; S1_substitution["\\'f3"] = L"ó"; S1_substitution["\\'f4"] = L"ô"; S1_substitution["\\'f5"] = L"Å"; S1_substitution["\\'f6"] = L"ö"; S1_substitution["\\'f8"] = L"Å"; S1_substitution["\\'f9"] = L"ů"; S1_substitution["\\'fa"] = L"ú"; S1_substitution["\\'fb"] = L"ű"; S1_substitution["\\'fc"] = L"ü"; S1_substitution["\\'fd"] = L"ý"; S1_substitution["\\'fe"] = L"Å£"; S1_substitution["\\'ff"] = L"Ë"; S1_substitution["\\'8a\r\n"] = L"Å "; S1_substitution["\\'8c\r\n"] = L"Å"; S1_substitution["\\'8d\r\n"] = L"Ť"; S1_substitution["\\'8e\r\n"] = L"Ž"; S1_substitution["\\'8f\r\n"] = L"Ź"; S1_substitution["\\'9a\r\n"] = L"Å¡"; S1_substitution["\\'9c\r\n"] = L"Å"; S1_substitution["\\'9d\r\n"] = L"Å¥"; S1_substitution["\\'9e\r\n"] = L"ž"; S1_substitution["\\'9f\r\n"] = L"ź"; S1_substitution["\\'a3\r\n"] = L"Å"; S1_substitution["\\'a5\r\n"] = L"Ä"; S1_substitution["\\'aa\r\n"] = L"Å"; S1_substitution["\\'af\r\n"] = L"Å»"; S1_substitution["\\'b3\r\n"] = L"Å"; S1_substitution["\\'b5\r\n"] = L"µ"; S1_substitution["\\'b9\r\n"] = L"Ä "; S1_substitution["\\'ba\r\n"] = L"Å"; S1_substitution["\\'bc\r\n"] = L"Ľ"; S1_substitution["\\'be\r\n"] = L"ľ"; S1_substitution["\\'bf\r\n"] = L"ż"; S1_substitution["\\'c0\r\n"] = L"Å"; S1_substitution["\\'c1\r\n"] = L"Ã"; S1_substitution["\\'c2\r\n"] = L"Ã"; S1_substitution["\\'c3\r\n"] = L"Ä"; S1_substitution["\\'c4\r\n"] = L"Ã"; S1_substitution["\\'c5\r\n"] = L"Ĺ"; S1_substitution["\\'c6\r\n"] = L"Ä"; S1_substitution["\\'c7\r\n"] = L"Ã"; S1_substitution["\\'c8\r\n"] = L"Ä"; S1_substitution["\\'c9\r\n"] = L"Ã"; S1_substitution["\\'ca\r\n"] = L"Ä"; S1_substitution["\\'cb\r\n"] = L"Ã"; S1_substitution["\\'cc\r\n"] = L"Ä"; S1_substitution["\\'cd\r\n"] = L"Ã"; S1_substitution["\\'ce\r\n"] = L"Ã"; S1_substitution["\\'cf\r\n"] = L"Ä"; S1_substitution["\\'d0\r\n"] = L"Ã"; S1_substitution["\\'d1\r\n"] = L"Å"; S1_substitution["\\'d2\r\n"] = L"Å"; S1_substitution["\\'d3\r\n"] = L"Ã"; S1_substitution["\\'d4\r\n"] = L"Ã"; S1_substitution["\\'d5\r\n"] = L"Å"; S1_substitution["\\'d6\r\n"] = L"Ã"; S1_substitution["\\'d8\r\n"] = L"Å"; S1_substitution["\\'d9\r\n"] = L"Å®"; S1_substitution["\\'da\r\n"] = L"Ã"; S1_substitution["\\'db\r\n"] = L"Ű"; S1_substitution["\\'dc\r\n"] = L"Ã"; S1_substitution["\\'dd\r\n"] = L"Ã"; S1_substitution["\\'de\r\n"] = L"Å¢"; S1_substitution["\\'df\r\n"] = L"Ã"; S1_substitution["\\'e0\r\n"] = L"Å"; S1_substitution["\\'e1\r\n"] = L"á"; S1_substitution["\\'e2\r\n"] = L"â"; S1_substitution["\\'e3\r\n"] = L"Ä"; S1_substitution["\\'e4\r\n"] = L"ä"; S1_substitution["\\'e5\r\n"] = L"ĺ"; S1_substitution["\\'e6\r\n"] = L"Ä"; S1_substitution["\\'e7\r\n"] = L"ç"; S1_substitution["\\'e8\r\n"] = L"Ä"; S1_substitution["\\'e9\r\n"] = L"é"; S1_substitution["\\'ea\r\n"] = L"Ä"; S1_substitution["\\'eb\r\n"] = L"ë"; S1_substitution["\\'ec\r\n"] = L"Ä"; S1_substitution["\\'ed\r\n"] = L"Ã"; S1_substitution["\\'ee\r\n"] = L"î"; S1_substitution["\\'ef\r\n"] = L"Ä"; S1_substitution["\\'f0\r\n"] = L"Ä"; S1_substitution["\\'f1\r\n"] = L"Å"; S1_substitution["\\'f2\r\n"] = L"Å"; S1_substitution["\\'f3\r\n"] = L"ó"; S1_substitution["\\'f4\r\n"] = L"ô"; S1_substitution["\\'f5\r\n"] = L"Å"; S1_substitution["\\'f6\r\n"] = L"ö"; S1_substitution["\\'f8\r\n"] = L"Å"; S1_substitution["\\'f9\r\n"] = L"ů"; S1_substitution["\\'fa\r\n"] = L"ú"; S1_substitution["\\'fb\r\n"] = L"ű"; S1_substitution["\\'fc\r\n"] = L"ü"; S1_substitution["\\'fd\r\n"] = L"ý"; S1_substitution["\\'fe\r\n"] = L"Å£"; S1_substitution["\\'ff\r\n"] = L"Ë"; S1_substitution["\\'8a\n"] = L"Å "; S1_substitution["\\'8c\n"] = L"Å"; S1_substitution["\\'8d\n"] = L"Ť"; S1_substitution["\\'8e\n"] = L"Ž"; S1_substitution["\\'8f\n"] = L"Ź"; S1_substitution["\\'9a\n"] = L"Å¡"; S1_substitution["\\'9c\n"] = L"Å"; S1_substitution["\\'9d\n"] = L"Å¥"; S1_substitution["\\'9e\n"] = L"ž"; S1_substitution["\\'9f\n"] = L"ź"; S1_substitution["\\'a3\n"] = L"Å"; S1_substitution["\\'a5\n"] = L"Ä"; S1_substitution["\\'aa\n"] = L"Å"; S1_substitution["\\'af\n"] = L"Å»"; S1_substitution["\\'b3\n"] = L"Å"; S1_substitution["\\'b5\n"] = L"µ"; S1_substitution["\\'b9\n"] = L"Ä "; S1_substitution["\\'ba\n"] = L"Å"; S1_substitution["\\'bc\n"] = L"Ľ"; S1_substitution["\\'be\n"] = L"ľ"; S1_substitution["\\'bf\n"] = L"ż"; S1_substitution["\\'c0\n"] = L"Å"; S1_substitution["\\'c1\n"] = L"Ã"; S1_substitution["\\'c2\n"] = L"Ã"; S1_substitution["\\'c3\n"] = L"Ä"; S1_substitution["\\'c4\n"] = L"Ã"; S1_substitution["\\'c5\n"] = L"Ĺ"; S1_substitution["\\'c6\n"] = L"Ä"; S1_substitution["\\'c7\n"] = L"Ã"; S1_substitution["\\'c8\n"] = L"Ä"; S1_substitution["\\'c9\n"] = L"Ã"; S1_substitution["\\'ca\n"] = L"Ä"; S1_substitution["\\'cb\n"] = L"Ã"; S1_substitution["\\'cc\n"] = L"Ä"; S1_substitution["\\'cd\n"] = L"Ã"; S1_substitution["\\'ce\n"] = L"Ã"; S1_substitution["\\'cf\n"] = L"Ä"; S1_substitution["\\'d0\n"] = L"Ã"; S1_substitution["\\'d1\n"] = L"Å"; S1_substitution["\\'d2\n"] = L"Å"; S1_substitution["\\'d3\n"] = L"Ã"; S1_substitution["\\'d4\n"] = L"Ã"; S1_substitution["\\'d5\n"] = L"Å"; S1_substitution["\\'d6\n"] = L"Ã"; S1_substitution["\\'d8\n"] = L"Å"; S1_substitution["\\'d9\n"] = L"Å®"; S1_substitution["\\'da\n"] = L"Ã"; S1_substitution["\\'db\n"] = L"Ű"; S1_substitution["\\'dc\n"] = L"Ã"; S1_substitution["\\'dd\n"] = L"Ã"; S1_substitution["\\'de\n"] = L"Å¢"; S1_substitution["\\'df\n"] = L"Ã"; S1_substitution["\\'e0\n"] = L"Å"; S1_substitution["\\'e1\n"] = L"á"; S1_substitution["\\'e2\n"] = L"â"; S1_substitution["\\'e3\n"] = L"Ä"; S1_substitution["\\'e4\n"] = L"ä"; S1_substitution["\\'e5\n"] = L"ĺ"; S1_substitution["\\'e6\n"] = L"Ä"; S1_substitution["\\'e7\n"] = L"ç"; S1_substitution["\\'e8\n"] = L"Ä"; S1_substitution["\\'e9\n"] = L"é"; S1_substitution["\\'ea\n"] = L"Ä"; S1_substitution["\\'eb\n"] = L"ë"; S1_substitution["\\'ec\n"] = L"Ä"; S1_substitution["\\'ed\n"] = L"Ã"; S1_substitution["\\'ee\n"] = L"î"; S1_substitution["\\'ef\n"] = L"Ä"; S1_substitution["\\'f0\n"] = L"Ä"; S1_substitution["\\'f1\n"] = L"Å"; S1_substitution["\\'f2\n"] = L"Å"; S1_substitution["\\'f3\n"] = L"ó"; S1_substitution["\\'f4\n"] = L"ô"; S1_substitution["\\'f5\n"] = L"Å"; S1_substitution["\\'f6\n"] = L"ö"; S1_substitution["\\'f8\n"] = L"Å"; S1_substitution["\\'f9\n"] = L"ů"; S1_substitution["\\'fa\n"] = L"ú"; S1_substitution["\\'fb\n"] = L"ű"; S1_substitution["\\'fc\n"] = L"ü"; S1_substitution["\\'fd\n"] = L"ý"; S1_substitution["\\'fe\n"] = L"Å£"; S1_substitution["\\'ff\n"] = L"Ë"; S1_substitution["\\'8a\r"] = L"Å "; S1_substitution["\\'8c\r"] = L"Å"; S1_substitution["\\'8d\r"] = L"Ť"; S1_substitution["\\'8e\r"] = L"Ž"; S1_substitution["\\'8f\r"] = L"Ź"; S1_substitution["\\'9a\r"] = L"Å¡"; S1_substitution["\\'9c\r"] = L"Å"; S1_substitution["\\'9d\r"] = L"Å¥"; S1_substitution["\\'9e\r"] = L"ž"; S1_substitution["\\'9f\r"] = L"ź"; S1_substitution["\\'a3\r"] = L"Å"; S1_substitution["\\'a5\r"] = L"Ä"; S1_substitution["\\'aa\r"] = L"Å"; S1_substitution["\\'af\r"] = L"Å»"; S1_substitution["\\'b3\r"] = L"Å"; S1_substitution["\\'b5\r"] = L"µ"; S1_substitution["\\'b9\r"] = L"Ä "; S1_substitution["\\'ba\r"] = L"Å"; S1_substitution["\\'bc\r"] = L"Ľ"; S1_substitution["\\'be\r"] = L"ľ"; S1_substitution["\\'bf\r"] = L"ż"; S1_substitution["\\'c0\r"] = L"Å"; S1_substitution["\\'c1\r"] = L"Ã"; S1_substitution["\\'c2\r"] = L"Ã"; S1_substitution["\\'c3\r"] = L"Ä"; S1_substitution["\\'c4\r"] = L"Ã"; S1_substitution["\\'c5\r"] = L"Ĺ"; S1_substitution["\\'c6\r"] = L"Ä"; S1_substitution["\\'c7\r"] = L"Ã"; S1_substitution["\\'c8\r"] = L"Ä"; S1_substitution["\\'c9\r"] = L"Ã"; S1_substitution["\\'ca\r"] = L"Ä"; S1_substitution["\\'cb\r"] = L"Ã"; S1_substitution["\\'cc\r"] = L"Ä"; S1_substitution["\\'cd\r"] = L"Ã"; S1_substitution["\\'ce\r"] = L"Ã"; S1_substitution["\\'cf\r"] = L"Ä"; S1_substitution["\\'d0\r"] = L"Ã"; S1_substitution["\\'d1\r"] = L"Å"; S1_substitution["\\'d2\r"] = L"Å"; S1_substitution["\\'d3\r"] = L"Ã"; S1_substitution["\\'d4\r"] = L"Ã"; S1_substitution["\\'d5\r"] = L"Å"; S1_substitution["\\'d6\r"] = L"Ã"; S1_substitution["\\'d8\r"] = L"Å"; S1_substitution["\\'d9\r"] = L"Å®"; S1_substitution["\\'da\r"] = L"Ã"; S1_substitution["\\'db\r"] = L"Ű"; S1_substitution["\\'dc\r"] = L"Ã"; S1_substitution["\\'dd\r"] = L"Ã"; S1_substitution["\\'de\r"] = L"Å¢"; S1_substitution["\\'df\r"] = L"Ã"; S1_substitution["\\'e0\r"] = L"Å"; S1_substitution["\\'e1\r"] = L"á"; S1_substitution["\\'e2\r"] = L"â"; S1_substitution["\\'e3\r"] = L"Ä"; S1_substitution["\\'e4\r"] = L"ä"; S1_substitution["\\'e5\r"] = L"ĺ"; S1_substitution["\\'e6\r"] = L"Ä"; S1_substitution["\\'e7\r"] = L"ç"; S1_substitution["\\'e8\r"] = L"Ä"; S1_substitution["\\'e9\r"] = L"é"; S1_substitution["\\'ea\r"] = L"Ä"; S1_substitution["\\'eb\r"] = L"ë"; S1_substitution["\\'ec\r"] = L"Ä"; S1_substitution["\\'ed\r"] = L"Ã"; S1_substitution["\\'ee\r"] = L"î"; S1_substitution["\\'ef\r"] = L"Ä"; S1_substitution["\\'f0\r"] = L"Ä"; S1_substitution["\\'f1\r"] = L"Å"; S1_substitution["\\'f2\r"] = L"Å"; S1_substitution["\\'f3\r"] = L"ó"; S1_substitution["\\'f4\r"] = L"ô"; S1_substitution["\\'f5\r"] = L"Å"; S1_substitution["\\'f6\r"] = L"ö"; S1_substitution["\\'f8\r"] = L"Å"; S1_substitution["\\'f9\r"] = L"ů"; S1_substitution["\\'fa\r"] = L"ú"; S1_substitution["\\'fb\r"] = L"ű"; S1_substitution["\\'fc\r"] = L"ü"; S1_substitution["\\'fd\r"] = L"ý"; S1_substitution["\\'fe\r"] = L"Å£"; S1_substitution["\\'ff\r"] = L"Ë"; } void printBuffer() { if(isDot) { fputws_unlocked(L".[]", yyout); isDot = false; } if(buffer.size() > 8192) { string filename = tmpnam(NULL); FILE *largeblock = fopen(filename.c_str(), "w"); fputws_unlocked(buffer.c_str(), largeblock); fclose(largeblock); fputwc_unlocked(L'[', yyout); fputwc_unlocked(L'@', yyout); wchar_t cad[filename.size()]; size_t pos = mbstowcs(cad, filename.c_str(), filename.size()); if(pos == (size_t) -1) { wcerr << L"Tres" << endl; wcerr << L"Encoding error." << endl; exit(EXIT_FAILURE); } cad[pos] = 0; fputws_unlocked(cad, yyout); fputwc_unlocked(L']', yyout); } else if(buffer.size() > 1) { fputwc_unlocked(L'[', yyout); wstring const tmp = escape(buffer); if(tmp[0] == L'@') { fputwc_unlocked(L'\\', yyout); } fputws_unlocked(tmp.c_str(), yyout); fputwc_unlocked(L']', yyout); } else if(buffer.size() == 1 && buffer[0] != L' ') { fputwc_unlocked(L'[', yyout); wstring const tmp = escape(buffer); if(tmp[0] == L'@') { fputwc_unlocked(L'\\', yyout); } fputws_unlocked(tmp.c_str(), yyout); fputwc_unlocked(L']', yyout); } else { fputws_unlocked(buffer.c_str(), yyout); } buffer = L""; } %} %x C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 C28 C29 C30 C31 C32 C33 %option nounput %option noyywrap %option caseless %option stack %% <C1>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C2>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C3>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C4>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C5>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C6>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C7>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C8>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C9>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C10>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C11>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C12>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C13>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C14>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C15>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C16>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C17>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C18>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C19>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C20>{ ";" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C21>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C22>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C23>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C24>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C25>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C26>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C27>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C28>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C29>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C30>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C31>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C32>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } <C33>{ "}" { last = "buffer"; bufferAppend(buffer, yytext); yy_pop_state(); } \n|. { last = "buffer"; bufferAppend(buffer, yytext); } } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\snext" { bufferAppend(buffer, yytext); yy_push_state(C1); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\keycode" { bufferAppend(buffer, yytext); yy_push_state(C2); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fcharset" { bufferAppend(buffer, yytext); yy_push_state(C3); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fnil" { bufferAppend(buffer, yytext); yy_push_state(C4); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\froman" { bufferAppend(buffer, yytext); yy_push_state(C5); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fswiss" { bufferAppend(buffer, yytext); yy_push_state(C6); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fmodern" { bufferAppend(buffer, yytext); yy_push_state(C7); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fscript" { bufferAppend(buffer, yytext); yy_push_state(C8); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fdecor" { bufferAppend(buffer, yytext); yy_push_state(C9); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\ftech" { bufferAppend(buffer, yytext); yy_push_state(C10); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fbidi" { bufferAppend(buffer, yytext); yy_push_state(C11); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\falt" { bufferAppend(buffer, yytext); yy_push_state(C12); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fontfile" { bufferAppend(buffer, yytext); yy_push_state(C13); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fn" { bufferAppend(buffer, yytext); yy_push_state(C14); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sbasedon" { bufferAppend(buffer, yytext); yy_push_state(C15); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\additive" { bufferAppend(buffer, yytext); yy_push_state(C16); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\file" { bufferAppend(buffer, yytext); yy_push_state(C17); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\listname" { bufferAppend(buffer, yytext); yy_push_state(C18); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\leveltext" { bufferAppend(buffer, yytext); yy_push_state(C19); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\levelnumbers" { bufferAppend(buffer, yytext); yy_push_state(C20); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\pict" { bufferAppend(buffer, yytext); yy_push_state(C21); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sn" { bufferAppend(buffer, yytext); yy_push_state(C22); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sv" { bufferAppend(buffer, yytext); yy_push_state(C23); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"{\\\*\\blipuid "[^ \n\r]+"}" { bufferAppend(buffer, yytext); yy_push_state(C24); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\object" { bufferAppend(buffer, yytext); yy_push_state(C25); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\docvar" { bufferAppend(buffer, yytext); yy_push_state(C26); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\bkmkstart" { bufferAppend(buffer, yytext); yy_push_state(C27); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\bkmkend" { bufferAppend(buffer, yytext); yy_push_state(C28); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\rxe" { bufferAppend(buffer, yytext); yy_push_state(C29); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fldinst" { bufferAppend(buffer, yytext); yy_push_state(C30); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fldrslt" { bufferAppend(buffer, yytext); yy_push_state(C31); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\pntxt" { bufferAppend(buffer, yytext); yy_push_state(C32); } "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\colortbl" { bufferAppend(buffer, yytext); yy_push_state(C33); } [a-f0-9]{20,} { isDot = true; bufferAppend(buffer, yytext); } "\\par"|"\\row"|"\\cell" { isDot = true; bufferAppend(buffer, yytext); } "{"[ \n\r]*\\[^'][^ \n\r\\]*[ \n\r]* { bufferAppend(buffer, yytext); } \\[^'][^ \n\r\\]*[ \n\r]* { bufferAppend(buffer, yytext); } "}" { bufferAppend(buffer, yytext); } "{" { bufferAppend(buffer, yytext); } "\\'"[0-9a-fA-F][0-9a-fA-F](\r|\n|"\r\n")? { if(S1_substitution.find(yytext) != S1_substitution.end()) { printBuffer(); fputws_unlocked(S1_substitution[yytext].c_str(), yyout); offset+=S1_substitution[yytext].size(); hasWrite_dot = hasWrite_white = true; } else { last="buffer"; bufferAppend(buffer, yytext); } } [ \n\t\r$*] { if (last == "open_tag") bufferAppend(tags.back(), yytext); else bufferAppend(buffer, yytext); } \\|[][<>@^$/] { printBuffer(); fputwc_unlocked(L'\\', yyout); offset++; wchar_t symbol; int pos = mbtowc(&symbol, yytext, MB_CUR_MAX); if(pos == -1) { wcerr << L"Cuatro" << endl; wcerr << L"Encoding error." << endl; exit(EXIT_FAILURE); } fputwc_unlocked(symbol, yyout); offset++; hasWrite_dot = hasWrite_white = true; } . { printBuffer(); symbuf += yytext; wchar_t symbol; int pos = mbtowc(&symbol, symbuf.c_str(), MB_CUR_MAX); if(pos == -1) { if(symbuf.size() > MB_CUR_MAX) { // unknown character symbuf = ""; fputwc_unlocked(L'?', yyout); offset++; hasWrite_dot = hasWrite_white = true; } } else { symbuf = ""; fputwc_unlocked(symbol, yyout); offset++; hasWrite_dot = hasWrite_white = true; } } <<EOF>> { isDot = true; printBuffer(); return 0; } %% void usage(string const &progname) { cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl; cerr << "rtf format processor " << endl; exit(EXIT_SUCCESS); } int main(int argc, char *argv[]) { LtLocale::tryToSetLocale(); if(argc > 3) { usage(argv[0]); } switch(argc) { case 3: yyout = fopen(argv[2], "w"); if(!yyout) { usage(argv[0]); } case 2: yyin = fopen(argv[1], "r"); if(!yyin) { usage(argv[0]); } break; default: break; } // prevent warning message yy_push_state(1); yy_top_state(); yy_pop_state(); S1_init(); last = ""; buffer = L""; isDot = hasWrite_dot = hasWrite_white = false; current=0; offset = 0; init_escape(); init_tagNames(); yylex(); fclose(yyin); fclose(yyout); }
signature.asc
Description: PGP signature