Package: flex
Version: 2.5.35-2
Severity: normal

On ia64, flex appears to generate a completely different (and broken) output
depending on whether the input file is specified using shell redirection or
via a pipe.

For example, compare:

    merulo% flex -t < input.txt | head

    #line 3 "<stdout>"

    #define  YY_INT_ALIGNED short int

    /* A lexical scanner generated by flex */

    #define FLEX_SCANNER
    #define YY_FLEX_MAJOR_VERSION 2
    #define YY_FLEX_MINOR_VERSION 5

with:

    merulo% cat input.txt | flex -t | head        
    {
      bufferAppend(buffer, yytext);
      yy_push_state(C24);
    }
    "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\object"       {
      bufferAppend(buffer, yytext);
      yy_push_state(C25);
    }
    "{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\docvar"       {
      bufferAppend(buffer, yytext);


This latter output is clearly broken.

This appears to be the cause of #500171, which is in turn blocking the
transition of a version of apertium fixing an RC bug, #496395. (In case it
isn't obvious, "cat" is being used as minimal testcase - see #500171 for the
full log.)

The offending input.txt is attached.


Regards,

-- 
      ,''`.
     : :'  :     Chris Lamb
     `. `'`      [EMAIL PROTECTED]
       `-

%{

#include <cstdlib>
#include <iostream>
#include <map>
#include <vector>
#include <regex.h>
#include <string>
#include <lttoolbox/lt_locale.h>
#include <lttoolbox/ltstr.h>

using namespace std;

wstring buffer;
string symbuf = "";
bool isDot, hasWrite_dot, hasWrite_white;
FILE *formatfile;
string last;
int current;
long int offset;


vector<long int> offsets;
vector<wstring> tags;
vector<int> orders;

regex_t escape_chars;
regex_t names_regexp;

void bufferAppend(wstring &buf, string const &str)
{
  symbuf.append(str);

  for(size_t i = 0, limit = symbuf.size(); i < limit;)
  {
    wchar_t symbol;
    int gap = mbtowc(&symbol, symbuf.c_str() + i, MB_CUR_MAX);
    if(gap == -1)
    {
      if(i + MB_CUR_MAX < limit)
      {
        buf += L'?';
        gap = 1;
      }
      else
      { 
        symbuf = symbuf.substr(i);
        return;
      }
    }
    else 
    { 
      buf += symbol;
    }

    i += gap;
  }

  symbuf = "";
  return;
}


void init_escape()
{  
  if(regcomp(&escape_chars, "\\\\|[][<>@^$/]", REG_EXTENDED))
  {
    cerr << "ERROR: Illegal regular expression for escape characters" << endl;
    exit(EXIT_FAILURE);
  }
}

void init_tagNames()
{  
  if(regcomp(&names_regexp, "", REG_EXTENDED))
  {
    cerr << "ERROR: Illegal regular expression for tag-names" << endl;
    exit(EXIT_FAILURE);
  }
}

string backslash(string const &str)
{
  string new_str = "";

  for(unsigned int i = 0; i < str.size(); i++)
  {
    if(str[i] == '\\')
    {
      new_str += str[i];
    }
    new_str += str[i];
  }

  return new_str;
}


wstring escape(string const &str)
{
  regmatch_t pmatch;
  
  char const *mystring = str.c_str();
  int base = 0;
  wstring result = L"";
  
  while(!regexec(&escape_chars, mystring + base, 1, &pmatch, 0))
  {
    bufferAppend(result, str.substr(base, pmatch.rm_so));
    result += L'\\';
    wchar_t micaracter;
    int pos = mbtowc(&micaracter, str.c_str() + base + pmatch.rm_so, 
MB_CUR_MAX);
    if(pos == -1)
    {
      wcerr << L"Uno" << endl;
      wcerr << L"Encoding error." << endl;
      exit(EXIT_FAILURE);      
    }
    
    result += micaracter;
    base += pmatch.rm_eo;
  }

  bufferAppend(result, str.substr(base));
  return result;
}

wstring escape(wstring const &str)
{
  string dest = "";
  
  for(size_t i = 0, limit = str.size(); i < limit; i++)
  {
    char symbol[MB_CUR_MAX+1];
    int pos = wctomb(symbol, str[i]);
    if(pos == -1)
    {
      symbol[0]='?';
      pos = 1;
    }
    symbol[pos] = 0;
    dest.append(symbol);
  }
  return escape(dest);
}

string get_tagName(string tag){
  regmatch_t pmatch;
  
  char const *mystring = tag.c_str();
  string result = "";
  if(!regexec(&names_regexp, mystring, 1, &pmatch, 0))
  {
    result=tag.substr(pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so);
    return result;
  }

  return "";
}


map<string, wstring, Ltstr> S1_substitution;

void S1_init()
{
  S1_substitution["\\'8a"] = L"Å ";
  S1_substitution["\\'8c"] = L"Ś";
  S1_substitution["\\'8d"] = L"Ť";
  S1_substitution["\\'8e"] = L"Ž";
  S1_substitution["\\'8f"] = L"Ź";
  S1_substitution["\\'9a"] = L"Å¡";
  S1_substitution["\\'9c"] = L"ś";
  S1_substitution["\\'9d"] = L"Å¥";
  S1_substitution["\\'9e"] = L"ž";
  S1_substitution["\\'9f"] = L"ź";
  S1_substitution["\\'a3"] = L"Ł";
  S1_substitution["\\'a5"] = L"Ą";
  S1_substitution["\\'aa"] = L"Ş";
  S1_substitution["\\'af"] = L"Å»";
  S1_substitution["\\'b3"] = L"ł";
  S1_substitution["\\'b5"] = L"µ";
  S1_substitution["\\'b9"] = L"ą";
  S1_substitution["\\'ba"] = L"ş";
  S1_substitution["\\'bc"] = L"Ľ";
  S1_substitution["\\'be"] = L"ľ";
  S1_substitution["\\'bf"] = L"ż";
  S1_substitution["\\'c0"] = L"Ŕ";
  S1_substitution["\\'c1"] = L"Á";
  S1_substitution["\\'c2"] = L"Â";
  S1_substitution["\\'c3"] = L"Ă";
  S1_substitution["\\'c4"] = L"Ä";
  S1_substitution["\\'c5"] = L"Ĺ";
  S1_substitution["\\'c6"] = L"Ć";
  S1_substitution["\\'c7"] = L"Ç";
  S1_substitution["\\'c8"] = L"Č";
  S1_substitution["\\'c9"] = L"É";
  S1_substitution["\\'ca"] = L"Ę";
  S1_substitution["\\'cb"] = L"Ë";
  S1_substitution["\\'cc"] = L"Ě";
  S1_substitution["\\'cd"] = L"Í";
  S1_substitution["\\'ce"] = L"Î";
  S1_substitution["\\'cf"] = L"Ď";
  S1_substitution["\\'d0"] = L"Ð";
  S1_substitution["\\'d1"] = L"Ń";
  S1_substitution["\\'d2"] = L"Ň";
  S1_substitution["\\'d3"] = L"Ó";
  S1_substitution["\\'d4"] = L"Ô";
  S1_substitution["\\'d5"] = L"Ő";
  S1_substitution["\\'d6"] = L"Ö";
  S1_substitution["\\'d8"] = L"Ř";
  S1_substitution["\\'d9"] = L"Å®";
  S1_substitution["\\'da"] = L"Ú";
  S1_substitution["\\'db"] = L"Ű";
  S1_substitution["\\'dc"] = L"Ü";
  S1_substitution["\\'dd"] = L"Ý";
  S1_substitution["\\'de"] = L"Å¢";
  S1_substitution["\\'df"] = L"ß";
  S1_substitution["\\'e0"] = L"ŕ";
  S1_substitution["\\'e1"] = L"á";
  S1_substitution["\\'e2"] = L"â";
  S1_substitution["\\'e3"] = L"ă";
  S1_substitution["\\'e4"] = L"ä";
  S1_substitution["\\'e5"] = L"ĺ";
  S1_substitution["\\'e6"] = L"ć";
  S1_substitution["\\'e7"] = L"ç";
  S1_substitution["\\'e8"] = L"č";
  S1_substitution["\\'e9"] = L"é";
  S1_substitution["\\'ea"] = L"ę";
  S1_substitution["\\'eb"] = L"ë";
  S1_substitution["\\'ec"] = L"ě";
  S1_substitution["\\'ed"] = L"í";
  S1_substitution["\\'ee"] = L"î";
  S1_substitution["\\'ef"] = L"ď";
  S1_substitution["\\'f0"] = L"đ";
  S1_substitution["\\'f1"] = L"ń";
  S1_substitution["\\'f2"] = L"ň";
  S1_substitution["\\'f3"] = L"ó";
  S1_substitution["\\'f4"] = L"ô";
  S1_substitution["\\'f5"] = L"ő";
  S1_substitution["\\'f6"] = L"ö";
  S1_substitution["\\'f8"] = L"ř";
  S1_substitution["\\'f9"] = L"ů";
  S1_substitution["\\'fa"] = L"ú";
  S1_substitution["\\'fb"] = L"ű";
  S1_substitution["\\'fc"] = L"ü";
  S1_substitution["\\'fd"] = L"ý";
  S1_substitution["\\'fe"] = L"Å£";
  S1_substitution["\\'ff"] = L"˙";
  S1_substitution["\\'8a\r\n"] = L"Å ";
  S1_substitution["\\'8c\r\n"] = L"Ś";
  S1_substitution["\\'8d\r\n"] = L"Ť";
  S1_substitution["\\'8e\r\n"] = L"Ž";
  S1_substitution["\\'8f\r\n"] = L"Ź";
  S1_substitution["\\'9a\r\n"] = L"Å¡";
  S1_substitution["\\'9c\r\n"] = L"ś";
  S1_substitution["\\'9d\r\n"] = L"Å¥";
  S1_substitution["\\'9e\r\n"] = L"ž";
  S1_substitution["\\'9f\r\n"] = L"ź";
  S1_substitution["\\'a3\r\n"] = L"Ł";
  S1_substitution["\\'a5\r\n"] = L"Ą";
  S1_substitution["\\'aa\r\n"] = L"Ş";
  S1_substitution["\\'af\r\n"] = L"Å»";
  S1_substitution["\\'b3\r\n"] = L"ł";
  S1_substitution["\\'b5\r\n"] = L"µ";
  S1_substitution["\\'b9\r\n"] = L"ą";
  S1_substitution["\\'ba\r\n"] = L"ş";
  S1_substitution["\\'bc\r\n"] = L"Ľ";
  S1_substitution["\\'be\r\n"] = L"ľ";
  S1_substitution["\\'bf\r\n"] = L"ż";
  S1_substitution["\\'c0\r\n"] = L"Ŕ";
  S1_substitution["\\'c1\r\n"] = L"Á";
  S1_substitution["\\'c2\r\n"] = L"Â";
  S1_substitution["\\'c3\r\n"] = L"Ă";
  S1_substitution["\\'c4\r\n"] = L"Ä";
  S1_substitution["\\'c5\r\n"] = L"Ĺ";
  S1_substitution["\\'c6\r\n"] = L"Ć";
  S1_substitution["\\'c7\r\n"] = L"Ç";
  S1_substitution["\\'c8\r\n"] = L"Č";
  S1_substitution["\\'c9\r\n"] = L"É";
  S1_substitution["\\'ca\r\n"] = L"Ę";
  S1_substitution["\\'cb\r\n"] = L"Ë";
  S1_substitution["\\'cc\r\n"] = L"Ě";
  S1_substitution["\\'cd\r\n"] = L"Í";
  S1_substitution["\\'ce\r\n"] = L"Î";
  S1_substitution["\\'cf\r\n"] = L"Ď";
  S1_substitution["\\'d0\r\n"] = L"Ð";
  S1_substitution["\\'d1\r\n"] = L"Ń";
  S1_substitution["\\'d2\r\n"] = L"Ň";
  S1_substitution["\\'d3\r\n"] = L"Ó";
  S1_substitution["\\'d4\r\n"] = L"Ô";
  S1_substitution["\\'d5\r\n"] = L"Ő";
  S1_substitution["\\'d6\r\n"] = L"Ö";
  S1_substitution["\\'d8\r\n"] = L"Ř";
  S1_substitution["\\'d9\r\n"] = L"Å®";
  S1_substitution["\\'da\r\n"] = L"Ú";
  S1_substitution["\\'db\r\n"] = L"Ű";
  S1_substitution["\\'dc\r\n"] = L"Ü";
  S1_substitution["\\'dd\r\n"] = L"Ý";
  S1_substitution["\\'de\r\n"] = L"Å¢";
  S1_substitution["\\'df\r\n"] = L"ß";
  S1_substitution["\\'e0\r\n"] = L"ŕ";
  S1_substitution["\\'e1\r\n"] = L"á";
  S1_substitution["\\'e2\r\n"] = L"â";
  S1_substitution["\\'e3\r\n"] = L"ă";
  S1_substitution["\\'e4\r\n"] = L"ä";
  S1_substitution["\\'e5\r\n"] = L"ĺ";
  S1_substitution["\\'e6\r\n"] = L"ć";
  S1_substitution["\\'e7\r\n"] = L"ç";
  S1_substitution["\\'e8\r\n"] = L"č";
  S1_substitution["\\'e9\r\n"] = L"é";
  S1_substitution["\\'ea\r\n"] = L"ę";
  S1_substitution["\\'eb\r\n"] = L"ë";
  S1_substitution["\\'ec\r\n"] = L"ě";
  S1_substitution["\\'ed\r\n"] = L"í";
  S1_substitution["\\'ee\r\n"] = L"î";
  S1_substitution["\\'ef\r\n"] = L"ď";
  S1_substitution["\\'f0\r\n"] = L"đ";
  S1_substitution["\\'f1\r\n"] = L"ń";
  S1_substitution["\\'f2\r\n"] = L"ň";
  S1_substitution["\\'f3\r\n"] = L"ó";
  S1_substitution["\\'f4\r\n"] = L"ô";
  S1_substitution["\\'f5\r\n"] = L"ő";
  S1_substitution["\\'f6\r\n"] = L"ö";
  S1_substitution["\\'f8\r\n"] = L"ř";
  S1_substitution["\\'f9\r\n"] = L"ů";
  S1_substitution["\\'fa\r\n"] = L"ú";
  S1_substitution["\\'fb\r\n"] = L"ű";
  S1_substitution["\\'fc\r\n"] = L"ü";
  S1_substitution["\\'fd\r\n"] = L"ý";
  S1_substitution["\\'fe\r\n"] = L"Å£";
  S1_substitution["\\'ff\r\n"] = L"˙";
  S1_substitution["\\'8a\n"] = L"Å ";
  S1_substitution["\\'8c\n"] = L"Ś";
  S1_substitution["\\'8d\n"] = L"Ť";
  S1_substitution["\\'8e\n"] = L"Ž";
  S1_substitution["\\'8f\n"] = L"Ź";
  S1_substitution["\\'9a\n"] = L"Å¡";
  S1_substitution["\\'9c\n"] = L"ś";
  S1_substitution["\\'9d\n"] = L"Å¥";
  S1_substitution["\\'9e\n"] = L"ž";
  S1_substitution["\\'9f\n"] = L"ź";
  S1_substitution["\\'a3\n"] = L"Ł";
  S1_substitution["\\'a5\n"] = L"Ą";
  S1_substitution["\\'aa\n"] = L"Ş";
  S1_substitution["\\'af\n"] = L"Å»";
  S1_substitution["\\'b3\n"] = L"ł";
  S1_substitution["\\'b5\n"] = L"µ";
  S1_substitution["\\'b9\n"] = L"ą";
  S1_substitution["\\'ba\n"] = L"ş";
  S1_substitution["\\'bc\n"] = L"Ľ";
  S1_substitution["\\'be\n"] = L"ľ";
  S1_substitution["\\'bf\n"] = L"ż";
  S1_substitution["\\'c0\n"] = L"Ŕ";
  S1_substitution["\\'c1\n"] = L"Á";
  S1_substitution["\\'c2\n"] = L"Â";
  S1_substitution["\\'c3\n"] = L"Ă";
  S1_substitution["\\'c4\n"] = L"Ä";
  S1_substitution["\\'c5\n"] = L"Ĺ";
  S1_substitution["\\'c6\n"] = L"Ć";
  S1_substitution["\\'c7\n"] = L"Ç";
  S1_substitution["\\'c8\n"] = L"Č";
  S1_substitution["\\'c9\n"] = L"É";
  S1_substitution["\\'ca\n"] = L"Ę";
  S1_substitution["\\'cb\n"] = L"Ë";
  S1_substitution["\\'cc\n"] = L"Ě";
  S1_substitution["\\'cd\n"] = L"Í";
  S1_substitution["\\'ce\n"] = L"Î";
  S1_substitution["\\'cf\n"] = L"Ď";
  S1_substitution["\\'d0\n"] = L"Ð";
  S1_substitution["\\'d1\n"] = L"Ń";
  S1_substitution["\\'d2\n"] = L"Ň";
  S1_substitution["\\'d3\n"] = L"Ó";
  S1_substitution["\\'d4\n"] = L"Ô";
  S1_substitution["\\'d5\n"] = L"Ő";
  S1_substitution["\\'d6\n"] = L"Ö";
  S1_substitution["\\'d8\n"] = L"Ř";
  S1_substitution["\\'d9\n"] = L"Å®";
  S1_substitution["\\'da\n"] = L"Ú";
  S1_substitution["\\'db\n"] = L"Ű";
  S1_substitution["\\'dc\n"] = L"Ü";
  S1_substitution["\\'dd\n"] = L"Ý";
  S1_substitution["\\'de\n"] = L"Å¢";
  S1_substitution["\\'df\n"] = L"ß";
  S1_substitution["\\'e0\n"] = L"ŕ";
  S1_substitution["\\'e1\n"] = L"á";
  S1_substitution["\\'e2\n"] = L"â";
  S1_substitution["\\'e3\n"] = L"ă";
  S1_substitution["\\'e4\n"] = L"ä";
  S1_substitution["\\'e5\n"] = L"ĺ";
  S1_substitution["\\'e6\n"] = L"ć";
  S1_substitution["\\'e7\n"] = L"ç";
  S1_substitution["\\'e8\n"] = L"č";
  S1_substitution["\\'e9\n"] = L"é";
  S1_substitution["\\'ea\n"] = L"ę";
  S1_substitution["\\'eb\n"] = L"ë";
  S1_substitution["\\'ec\n"] = L"ě";
  S1_substitution["\\'ed\n"] = L"í";
  S1_substitution["\\'ee\n"] = L"î";
  S1_substitution["\\'ef\n"] = L"ď";
  S1_substitution["\\'f0\n"] = L"đ";
  S1_substitution["\\'f1\n"] = L"ń";
  S1_substitution["\\'f2\n"] = L"ň";
  S1_substitution["\\'f3\n"] = L"ó";
  S1_substitution["\\'f4\n"] = L"ô";
  S1_substitution["\\'f5\n"] = L"ő";
  S1_substitution["\\'f6\n"] = L"ö";
  S1_substitution["\\'f8\n"] = L"ř";
  S1_substitution["\\'f9\n"] = L"ů";
  S1_substitution["\\'fa\n"] = L"ú";
  S1_substitution["\\'fb\n"] = L"ű";
  S1_substitution["\\'fc\n"] = L"ü";
  S1_substitution["\\'fd\n"] = L"ý";
  S1_substitution["\\'fe\n"] = L"Å£";
  S1_substitution["\\'ff\n"] = L"˙";
  S1_substitution["\\'8a\r"] = L"Å ";
  S1_substitution["\\'8c\r"] = L"Ś";
  S1_substitution["\\'8d\r"] = L"Ť";
  S1_substitution["\\'8e\r"] = L"Ž";
  S1_substitution["\\'8f\r"] = L"Ź";
  S1_substitution["\\'9a\r"] = L"Å¡";
  S1_substitution["\\'9c\r"] = L"ś";
  S1_substitution["\\'9d\r"] = L"Å¥";
  S1_substitution["\\'9e\r"] = L"ž";
  S1_substitution["\\'9f\r"] = L"ź";
  S1_substitution["\\'a3\r"] = L"Ł";
  S1_substitution["\\'a5\r"] = L"Ą";
  S1_substitution["\\'aa\r"] = L"Ş";
  S1_substitution["\\'af\r"] = L"Å»";
  S1_substitution["\\'b3\r"] = L"ł";
  S1_substitution["\\'b5\r"] = L"µ";
  S1_substitution["\\'b9\r"] = L"ą";
  S1_substitution["\\'ba\r"] = L"ş";
  S1_substitution["\\'bc\r"] = L"Ľ";
  S1_substitution["\\'be\r"] = L"ľ";
  S1_substitution["\\'bf\r"] = L"ż";
  S1_substitution["\\'c0\r"] = L"Ŕ";
  S1_substitution["\\'c1\r"] = L"Á";
  S1_substitution["\\'c2\r"] = L"Â";
  S1_substitution["\\'c3\r"] = L"Ă";
  S1_substitution["\\'c4\r"] = L"Ä";
  S1_substitution["\\'c5\r"] = L"Ĺ";
  S1_substitution["\\'c6\r"] = L"Ć";
  S1_substitution["\\'c7\r"] = L"Ç";
  S1_substitution["\\'c8\r"] = L"Č";
  S1_substitution["\\'c9\r"] = L"É";
  S1_substitution["\\'ca\r"] = L"Ę";
  S1_substitution["\\'cb\r"] = L"Ë";
  S1_substitution["\\'cc\r"] = L"Ě";
  S1_substitution["\\'cd\r"] = L"Í";
  S1_substitution["\\'ce\r"] = L"Î";
  S1_substitution["\\'cf\r"] = L"Ď";
  S1_substitution["\\'d0\r"] = L"Ð";
  S1_substitution["\\'d1\r"] = L"Ń";
  S1_substitution["\\'d2\r"] = L"Ň";
  S1_substitution["\\'d3\r"] = L"Ó";
  S1_substitution["\\'d4\r"] = L"Ô";
  S1_substitution["\\'d5\r"] = L"Ő";
  S1_substitution["\\'d6\r"] = L"Ö";
  S1_substitution["\\'d8\r"] = L"Ř";
  S1_substitution["\\'d9\r"] = L"Å®";
  S1_substitution["\\'da\r"] = L"Ú";
  S1_substitution["\\'db\r"] = L"Ű";
  S1_substitution["\\'dc\r"] = L"Ü";
  S1_substitution["\\'dd\r"] = L"Ý";
  S1_substitution["\\'de\r"] = L"Å¢";
  S1_substitution["\\'df\r"] = L"ß";
  S1_substitution["\\'e0\r"] = L"ŕ";
  S1_substitution["\\'e1\r"] = L"á";
  S1_substitution["\\'e2\r"] = L"â";
  S1_substitution["\\'e3\r"] = L"ă";
  S1_substitution["\\'e4\r"] = L"ä";
  S1_substitution["\\'e5\r"] = L"ĺ";
  S1_substitution["\\'e6\r"] = L"ć";
  S1_substitution["\\'e7\r"] = L"ç";
  S1_substitution["\\'e8\r"] = L"č";
  S1_substitution["\\'e9\r"] = L"é";
  S1_substitution["\\'ea\r"] = L"ę";
  S1_substitution["\\'eb\r"] = L"ë";
  S1_substitution["\\'ec\r"] = L"ě";
  S1_substitution["\\'ed\r"] = L"í";
  S1_substitution["\\'ee\r"] = L"î";
  S1_substitution["\\'ef\r"] = L"ď";
  S1_substitution["\\'f0\r"] = L"đ";
  S1_substitution["\\'f1\r"] = L"ń";
  S1_substitution["\\'f2\r"] = L"ň";
  S1_substitution["\\'f3\r"] = L"ó";
  S1_substitution["\\'f4\r"] = L"ô";
  S1_substitution["\\'f5\r"] = L"ő";
  S1_substitution["\\'f6\r"] = L"ö";
  S1_substitution["\\'f8\r"] = L"ř";
  S1_substitution["\\'f9\r"] = L"ů";
  S1_substitution["\\'fa\r"] = L"ú";
  S1_substitution["\\'fb\r"] = L"ű";
  S1_substitution["\\'fc\r"] = L"ü";
  S1_substitution["\\'fd\r"] = L"ý";
  S1_substitution["\\'fe\r"] = L"Å£";
  S1_substitution["\\'ff\r"] = L"˙";
}


void printBuffer()
{
  if(isDot)
  {
    fputws_unlocked(L".[]", yyout);
    isDot = false;
  }
  if(buffer.size() > 8192)
  {
    string filename = tmpnam(NULL);
    FILE *largeblock = fopen(filename.c_str(), "w");
    fputws_unlocked(buffer.c_str(), largeblock);
    fclose(largeblock);
    fputwc_unlocked(L'[', yyout);
    fputwc_unlocked(L'@', yyout);
    wchar_t cad[filename.size()];
    size_t pos = mbstowcs(cad, filename.c_str(), filename.size());
    if(pos == (size_t) -1)
    {
      wcerr << L"Tres" << endl;

      wcerr << L"Encoding error." << endl;
      exit(EXIT_FAILURE);
    }
    cad[pos] = 0;
    fputws_unlocked(cad, yyout);
    fputwc_unlocked(L']', yyout);
  }
  else if(buffer.size() > 1)
  {
    fputwc_unlocked(L'[', yyout);
    wstring const tmp = escape(buffer);
    if(tmp[0] == L'@')
    {
      fputwc_unlocked(L'\\', yyout);
    }
    fputws_unlocked(tmp.c_str(), yyout);
    fputwc_unlocked(L']', yyout);
  }
  else if(buffer.size() == 1 && buffer[0] != L' ')
  {
    fputwc_unlocked(L'[', yyout);
    wstring const tmp = escape(buffer);
    if(tmp[0] == L'@')
    {
      fputwc_unlocked(L'\\', yyout);
    }
    fputws_unlocked(tmp.c_str(), yyout);

    fputwc_unlocked(L']', yyout);
  }     
  else
  {
    fputws_unlocked(buffer.c_str(), yyout);
  }

  buffer = L"";
}
  
%}

%x C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16 C17 C18 C19 C20 C21 
C22 C23 C24 C25 C26 C27 C28 C29 C30 C31 C32 C33
%option nounput
%option noyywrap
%option caseless
%option stack

%%


<C1>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C2>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C3>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C4>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C5>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C6>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C7>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C8>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C9>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C10>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C11>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C12>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C13>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C14>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C15>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C16>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C17>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C18>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C19>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C20>{

        ";"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C21>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C22>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C23>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C24>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C25>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C26>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C27>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C28>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C29>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C30>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C31>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C32>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}

<C33>{

        "}"     {
  last = "buffer";
  bufferAppend(buffer, yytext);
  yy_pop_state();
}

        \n|.    {
  last = "buffer";
  bufferAppend(buffer, yytext);
}

}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\snext"        {
  bufferAppend(buffer, yytext);
  yy_push_state(C1);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\keycode"      {
  bufferAppend(buffer, yytext);
  yy_push_state(C2);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fcharset"     {
  bufferAppend(buffer, yytext);
  yy_push_state(C3);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fnil" {
  bufferAppend(buffer, yytext);
  yy_push_state(C4);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\froman"       {
  bufferAppend(buffer, yytext);
  yy_push_state(C5);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fswiss"       {
  bufferAppend(buffer, yytext);
  yy_push_state(C6);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fmodern"      {
  bufferAppend(buffer, yytext);
  yy_push_state(C7);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fscript"      {
  bufferAppend(buffer, yytext);
  yy_push_state(C8);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fdecor"       {
  bufferAppend(buffer, yytext);
  yy_push_state(C9);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\ftech"        {
  bufferAppend(buffer, yytext);
  yy_push_state(C10);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fbidi"        {
  bufferAppend(buffer, yytext);
  yy_push_state(C11);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\falt" {
  bufferAppend(buffer, yytext);
  yy_push_state(C12);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fontfile"     {
  bufferAppend(buffer, yytext);
  yy_push_state(C13);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fn"   {
  bufferAppend(buffer, yytext);
  yy_push_state(C14);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sbasedon"     {
  bufferAppend(buffer, yytext);
  yy_push_state(C15);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\additive"     {
  bufferAppend(buffer, yytext);
  yy_push_state(C16);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\file" {
  bufferAppend(buffer, yytext);
  yy_push_state(C17);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\listname"     {
  bufferAppend(buffer, yytext);
  yy_push_state(C18);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\leveltext"    {
  bufferAppend(buffer, yytext);
  yy_push_state(C19);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\levelnumbers" {
  bufferAppend(buffer, yytext);
  yy_push_state(C20);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\pict" {
  bufferAppend(buffer, yytext);
  yy_push_state(C21);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sn"   {
  bufferAppend(buffer, yytext);
  yy_push_state(C22);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sv"   {
  bufferAppend(buffer, yytext);
  yy_push_state(C23);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"{\\\*\\blipuid  "[^ \n\r]+"}"   {
  bufferAppend(buffer, yytext);
  yy_push_state(C24);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\object"       {
  bufferAppend(buffer, yytext);
  yy_push_state(C25);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\docvar"       {
  bufferAppend(buffer, yytext);
  yy_push_state(C26);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\bkmkstart"    {
  bufferAppend(buffer, yytext);
  yy_push_state(C27);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\bkmkend"      {
  bufferAppend(buffer, yytext);
  yy_push_state(C28);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\rxe"  {
  bufferAppend(buffer, yytext);
  yy_push_state(C29);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fldinst"      {
  bufferAppend(buffer, yytext);
  yy_push_state(C30);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fldrslt"      {
  bufferAppend(buffer, yytext);
  yy_push_state(C31);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\pntxt"        {
  bufferAppend(buffer, yytext);
  yy_push_state(C32);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\colortbl"     {
  bufferAppend(buffer, yytext);
  yy_push_state(C33);
}
[a-f0-9]{20,}   {
  isDot = true;
  bufferAppend(buffer, yytext);
}
"\\par"|"\\row"|"\\cell"        {
  isDot = true;
  bufferAppend(buffer, yytext);
}
"{"[ \n\r]*\\[^'][^ \n\r\\]*[ \n\r]*    {
  bufferAppend(buffer, yytext);
}
\\[^'][^ \n\r\\]*[ \n\r]*       {
  bufferAppend(buffer, yytext);
}
"}"     {
  bufferAppend(buffer, yytext);
}
"{"     {
  bufferAppend(buffer, yytext);
}

"\\'"[0-9a-fA-F][0-9a-fA-F](\r|\n|"\r\n")?      {
  if(S1_substitution.find(yytext) != S1_substitution.end())
  {
    printBuffer();
    fputws_unlocked(S1_substitution[yytext].c_str(), yyout);
    offset+=S1_substitution[yytext].size();
    hasWrite_dot = hasWrite_white = true;
  }
  else
  {
    last="buffer";
    bufferAppend(buffer, yytext);
  }
}
[ \n\t\r$*]     {
  if (last == "open_tag") 
    bufferAppend(tags.back(), yytext);
  else
    bufferAppend(buffer, yytext);
    
}

\\|[][<>@^$/]   {
  printBuffer();
  fputwc_unlocked(L'\\', yyout);
  offset++;
  wchar_t symbol;
  int pos = mbtowc(&symbol, yytext, MB_CUR_MAX);
  if(pos == -1)
  {
      wcerr << L"Cuatro" << endl;

    wcerr << L"Encoding error." << endl;
    exit(EXIT_FAILURE);
  }

  fputwc_unlocked(symbol, yyout);
  offset++;
  hasWrite_dot = hasWrite_white = true;

}

.       {
  printBuffer();
  symbuf += yytext;
  wchar_t symbol;
  int pos = mbtowc(&symbol, symbuf.c_str(), MB_CUR_MAX);
  if(pos == -1)
  {
    if(symbuf.size() > MB_CUR_MAX)
    {
      // unknown character
      symbuf = "";
      fputwc_unlocked(L'?', yyout);
      offset++;
      hasWrite_dot = hasWrite_white = true;
    }
  }
  else
  {
    symbuf = "";
    fputwc_unlocked(symbol, yyout);
    offset++;
    hasWrite_dot = hasWrite_white = true;
  }
}

<<EOF>> {
  isDot = true;
  printBuffer();
  return 0;
}
%%



void usage(string const &progname)
{

  cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
  
  cerr << "rtf format processor " << endl;
  exit(EXIT_SUCCESS);  
}

int main(int argc, char *argv[])
{
  LtLocale::tryToSetLocale();

 if(argc > 3)
  {
    usage(argv[0]);
  }
 
  switch(argc)
  {
    case 3:
      yyout = fopen(argv[2], "w");
      if(!yyout)
      {
        usage(argv[0]);
      }
    case 2:
      yyin = fopen(argv[1], "r");
      if(!yyin)
      {
        usage(argv[0]);
      }
      break;
    default:
      break;
  }
  
  // prevent warning message
  yy_push_state(1);
  yy_top_state();
  yy_pop_state();

  S1_init();


  last = "";
  buffer = L"";
  isDot = hasWrite_dot = hasWrite_white = false;
  current=0;
  offset = 0;
  init_escape();
  init_tagNames();
  yylex();


  fclose(yyin);
  fclose(yyout);
}

Attachment: signature.asc
Description: PGP signature

Reply via email to