//////////////////////////////////////////////////////////////////////////////////
//
// catwpd
//
// This program extract text of WordPerfect document
//
// Developed by Manuel Rodrigues Vieira da Silva, 2000
//		prt13@netwaybbs.com.br
//		PRT - 13ª Região
// Command:
//
//	catwpd { - | file [file ...]}
//
//	-	: process stdin
// 	file	: name file
//
// Licence:
//
//	This program is free
//
// Limitations:
//
//	Only tested for WP 7.0, 8.0 and 9.0 
// 	catwpd not keep table formats
//
//--------------------------------------------------------------------------------
//
// Este programa tem o objetivo de extrair o texto de um documento do WordPerfect
//
// Autor: Manuel Rodrigues Vieira da Silva, 2000 
//	  prt13@netwaybbs.com.br
//	  PRT - 13ª Região
//
// Sintaxe:
//	catwpd { - | arquivo [arquivo ...]}
//
//	-	: Processa a entrada padrão
//	arquivo	: Nome do arquivo
//
// Licença:
//
//	Este programa é de uso livre...
//
// Limitações:
//
//	Testado apenas para o WP 7.0, 8.0 e 9.0
//	catwpd não mantém a formatação de tabelas
//
//////////////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include <math.h>

// char sets of WordPerfect
// Conjuntos de caracteres do WordPerfect 
unsigned char WP_char_sets[4][256]={ 
{0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,'Á',0x1b,'Â',0x1d,0x2D,0x20,
'À',0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,'Ê',0x2b,0x2c,0x2d,0x2e,0x2f,
'I',0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,'Ó','ó','Ô','ô',0x3e,0x3f,
0x40,0x41,'Ú',0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef},
{0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef},
{0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef},
{0x00,0x01,0x02,0x03,0x04,0x05,'§',0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,'ª',
'º',0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef}};



//
// This function verify if "f" is really a WordPerfect document
// Return the position of begin of document in the file
// Return 0 if the file is not a WordPerfect document
// 
// Verifica se o arquivo é realmente um documento do WordPerfect
// Retorna a posição do início do documento dentro do arquivo
// se for um documento do WordPerfect e zero caso contrário
//
long int eh_documento_WP( FILE *f)
{
   long int iniciodocumento = 0;
   if ( f )
   {
      char tipodocumento[4];
      getc( f);
      fread( tipodocumento, 3, 1, f);
      tipodocumento[3] = 0;
      if ( !strcmp( tipodocumento, "WPC"))
      {
         int c, i;

         for ( i = 0; i < 8; i+=2)
         {
            int midlebaixo, midlealto;
            c = getc(f);
            midlebaixo = (c & 0x0F);
            midlealto = (c & 0xF0)/16;
            iniciodocumento += midlebaixo * int(pow(16, i)) + midlealto * int( pow( 16, (i+1)));
         }
         c = getc(f);
         if ( c != 1)
         {
            fprintf(stderr,"O documento não é um documento WordPerfect\n"); 
            iniciodocumento = 0;
         }
      }
      else
           fprintf(stderr,"O documento não é um documento WordPerfect\n");
   }
   else
      fprintf(stderr,"Dispositivo inválido!!!\n");

   return iniciodocumento;
}


//
// Show command format
//
// Exibe a sintaxe para uso do catwpd
//
void help(void)
{ 
	printf(	"catwpd - Extrai o texto de um documento WordPerfect e o envia para stdout\n"
	        "Copyright Manuel Rodrigues Vieira da Silva (manuel@openline.com.br), 2000\n\n"
         	"Sintaxe: \n\n\tcatwpd { - | arquivo [arquivo ...]}\n" );
        exit(2);
}  


//
// Extract the text of document to stdout
//
// Extrai o texto do arquivo
//
void extrai_texto(FILE *f)
{ 
   	/* Default Extended International Characters */
        /* Caracteres internacionais extendidos      */
   	unsigned char DEIC[] = { 0 , 0, 0, 0, 0, 0, 0, 'á', 'à', 'â', 'ã',
           			'Ã', 'ç', 'Ç', 0, 'é', 'É', 0, 'ê', 'í', 0, 
                                0, 0, 0, 'õ', 'Õ', 0, 0, 'ü', 'Ü', 'ú', 0, 0 };

  	int c;
        long int iniciodocumento = eh_documento_WP(f);
        if ( iniciodocumento )
        {
              long int i;
              for( i = 1; i < iniciodocumento - 8 && !feof(f); i++)
                 getc(f);
              c = getc(f);
              while ( !feof(f))
              {
                 //
                 // Ignore Fixed-Length Multi-Byte Functions of WordPerfect
                 // Ignora funções de tamanho fixo do WordPerfect
                 //  
                 if ( c > 0xF0 && c <= 0xFF) 	
                 {
                    int nc = getc(f);
                               
                    while ( c != nc && !feof(f))
                        nc = getc(f);
                 } 
                 else
                  //
                  // Ignore Variable-Length Multi-Byte Functions WordPerfect
                  // Ignora as funções de tamanho variável do WordPerfect
                  //
                  if ( c >= 0xD0 && c < 0xE3)	
                  {
                     int deslocamento = 0;
                     
                     //
                     // Process End-Of-Line Functions
                     // Processa as funçõe de final de linha
                     //
                     if (c == 0xD0 )
                     {
                        c = getc(f);
                        switch(c)
                        {
                        case 1: 
                        case 2:
                        case 3:
                        case 4:
                        case 5:
                        case 6:
                           {
                              putc( '\n', stdout);
                              break;
                           }
                        case 10:
                           {
                              putc('\t', stdout);
                              putc('|', stdout);
                              putc(' ', stdout);
                              break;
                           }
                        case 11:
                        case 12:
                        case 13:
                        case 14:
                        case 15:
                        case 16:
                           {
                              putc( '\n', stdout);
                              break;
                           }
                        
                        } 
                     }
                     else
                        //
                        // Process Tab Functions 
                        // Processa as funções de tabulação
                        //
                        if ( c == 0xE0 )
                        {
                           putc( '\t', stdout);
                           getc(f);
                        }
                        else
                           //
                           // 
                           //
                           getc(f);
                           
                     // Calculate length of function
                     // Calcula tamanho da função      
                     for ( int i = 0; i < 4; i+=2)
                     {
                        int midlebaixo, midlealto;
                        c = getc(f);
                        midlebaixo = (c & 0x0F);
                        midlealto = (c & 0xF0)/16;
                        deslocamento += midlebaixo * int(pow(16, i)) + midlealto * int(pow( 16, (i+1)));
                     }
                     // Ignore to end of function
                     // Ignora até o final da função
                     for ( int i = 0; i < deslocamento - 4; i ++)
                        getc(f);
                  }
                  else
                    //
                    // Print ASCII text characters
                    // Imprime os caracteres ASCII
                    //
                    if ( c >= 33 && c <= 126 )
                       putc( c, stdout);
                    else
                       //
                       // Print international characters
                       // Imprime os caracteres internacionais
                       //
                       if (c >= 1 && c <= 32)
                          putc( DEIC[c], stdout);
                       else
                          //
                          // Print characters of char sets of WordPerfect
                          // Imprime os caracteres pertencentes aos conjuntos de caracteres do
                          // WordPerfect
                          //
                          if ( c == 0xF0)
                          {
                             int char_set;
                             c = getc(f);
                             char_set = getc(f);
                             getc(f);
                             putc ( WP_char_sets[char_set - 1][c], stdout);
                          }
                          else
                             //
                             // Print space
                             // Imprime espaço
                             //
                             if ( c == 0x80 || c == 0x81)
                                putc( ' ', stdout);
                             else
                                //
                                // Print hyphen
                                // Imprime hifen
                                //
                                if ( c == 132 )
                                   putc( '-', stdout);
                                else
                                   //
                                   // Print line feed
                                   // Imprime final de linha
                                   //
                                   if ( (c >= 180 && c <= 207) || c == 136 || c == 137   )
                                      putc('\n', stdout);
                 c = getc(f);
              }
        }
}


//
// Função principal do catwpd
//
int main(int argc,char **argv)
{ 

  	int stdin_processed=0;	// Indica se stdin já foi processado
  	
	if (argc < 2)
		help();
  	for( int i = 1; i < argc; i++)
  	{ 
		if ( i > 1)
                   puts("\n-----");
		if (!strcmp(argv[i],"-"))
			if (!stdin_processed) 
			{
                           	//
                                // Extract text of stdin
                                // Extrai texto da entrada padrão
                                //
				extrai_texto(stdin);
                               	stdin_processed=1;
			}
         		else 
			{ 
				fprintf(stderr,"Não posso processar a entrada padrão duas vezes...\n");
                		exit (2);
			}
    		else
     			if (argv[i][0]=='-') 
			{
				fprintf(stderr,"Nome de arquivo inválido %s\n",argv[i]);
                           	help();
			} 
    			else
     			{ 
                           	FILE *f=fopen(argv[i],"r");
				if ( f )
                                   //
                                   // Extract text of file
                                   // Extrai texto do arquivo
                                   //
                                    extrai_texto(f);
                                else
                                {
                                   fprintf(stderr, "Não foi possível abrir o arquivo %s!!!", argv[i]);
                                   exit(2);
                                }
     			}
   	}
  	return 0;
}      

