////////////////////////////////////////////////////////////////////////////////// // // catwpd // // This program extract text of WordPerfect document // // Developed by Manuel Rodrigues Vieira da Silva, 2000 // prt13@netwaybbs.com.br // PRT - 13ª Região // Command: // // catwpd { - | file [file ...]} // // - : process stdin // file : name file // // Licence: // // This program is free // // Limitations: // // Only tested for WP 7.0, 8.0 and 9.0 // catwpd not keep table formats // //-------------------------------------------------------------------------------- // // Este programa tem o objetivo de extrair o texto de um documento do WordPerfect // // Autor: Manuel Rodrigues Vieira da Silva, 2000 // prt13@netwaybbs.com.br // PRT - 13ª Região // // Sintaxe: // catwpd { - | arquivo [arquivo ...]} // // - : Processa a entrada padrão // arquivo : Nome do arquivo // // Licença: // // Este programa é de uso livre... // // Limitações: // // Testado apenas para o WP 7.0, 8.0 e 9.0 // catwpd não mantém a formatação de tabelas // ////////////////////////////////////////////////////////////////////////////////// #include #include // char sets of WordPerfect // Conjuntos de caracteres do WordPerfect unsigned char WP_char_sets[4][256]={ {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,'Á',0x1b,'Â',0x1d,0x2D,0x20, 'À',0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,'Ê',0x2b,0x2c,0x2d,0x2e,0x2f, 'I',0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,'Ó','ó','Ô','ô',0x3e,0x3f, 0x40,0x41,'Ú',0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef}, {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef}, {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef}, {0x00,0x01,0x02,0x03,0x04,0x05,'§',0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,'ª', 'º',0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef}}; // // This function verify if "f" is really a WordPerfect document // Return the position of begin of document in the file // Return 0 if the file is not a WordPerfect document // // Verifica se o arquivo é realmente um documento do WordPerfect // Retorna a posição do início do documento dentro do arquivo // se for um documento do WordPerfect e zero caso contrário // long int eh_documento_WP( FILE *f) { long int iniciodocumento = 0; if ( f ) { char tipodocumento[4]; getc( f); fread( tipodocumento, 3, 1, f); tipodocumento[3] = 0; if ( !strcmp( tipodocumento, "WPC")) { int c, i; for ( i = 0; i < 8; i+=2) { int midlebaixo, midlealto; c = getc(f); midlebaixo = (c & 0x0F); midlealto = (c & 0xF0)/16; iniciodocumento += midlebaixo * int(pow(16, i)) + midlealto * int( pow( 16, (i+1))); } c = getc(f); if ( c != 1) { fprintf(stderr,"O documento não é um documento WordPerfect\n"); iniciodocumento = 0; } } else fprintf(stderr,"O documento não é um documento WordPerfect\n"); } else fprintf(stderr,"Dispositivo inválido!!!\n"); return iniciodocumento; } // // Show command format // // Exibe a sintaxe para uso do catwpd // void help(void) { printf( "catwpd - Extrai o texto de um documento WordPerfect e o envia para stdout\n" "Copyright Manuel Rodrigues Vieira da Silva (manuel@openline.com.br), 2000\n\n" "Sintaxe: \n\n\tcatwpd { - | arquivo [arquivo ...]}\n" ); exit(2); } // // Extract the text of document to stdout // // Extrai o texto do arquivo // void extrai_texto(FILE *f) { /* Default Extended International Characters */ /* Caracteres internacionais extendidos */ unsigned char DEIC[] = { 0 , 0, 0, 0, 0, 0, 0, 'á', 'à', 'â', 'ã', 'Ã', 'ç', 'Ç', 0, 'é', 'É', 0, 'ê', 'í', 0, 0, 0, 0, 'õ', 'Õ', 0, 0, 'ü', 'Ü', 'ú', 0, 0 }; int c; long int iniciodocumento = eh_documento_WP(f); if ( iniciodocumento ) { long int i; for( i = 1; i < iniciodocumento - 8 && !feof(f); i++) getc(f); c = getc(f); while ( !feof(f)) { // // Ignore Fixed-Length Multi-Byte Functions of WordPerfect // Ignora funções de tamanho fixo do WordPerfect // if ( c > 0xF0 && c <= 0xFF) { int nc = getc(f); while ( c != nc && !feof(f)) nc = getc(f); } else // // Ignore Variable-Length Multi-Byte Functions WordPerfect // Ignora as funções de tamanho variável do WordPerfect // if ( c >= 0xD0 && c < 0xE3) { int deslocamento = 0; // // Process End-Of-Line Functions // Processa as funçõe de final de linha // if (c == 0xD0 ) { c = getc(f); switch(c) { case 1: case 2: case 3: case 4: case 5: case 6: { putc( '\n', stdout); break; } case 10: { putc('\t', stdout); putc('|', stdout); putc(' ', stdout); break; } case 11: case 12: case 13: case 14: case 15: case 16: { putc( '\n', stdout); break; } } } else // // Process Tab Functions // Processa as funções de tabulação // if ( c == 0xE0 ) { putc( '\t', stdout); getc(f); } else // // // getc(f); // Calculate length of function // Calcula tamanho da função for ( int i = 0; i < 4; i+=2) { int midlebaixo, midlealto; c = getc(f); midlebaixo = (c & 0x0F); midlealto = (c & 0xF0)/16; deslocamento += midlebaixo * int(pow(16, i)) + midlealto * int(pow( 16, (i+1))); } // Ignore to end of function // Ignora até o final da função for ( int i = 0; i < deslocamento - 4; i ++) getc(f); } else // // Print ASCII text characters // Imprime os caracteres ASCII // if ( c >= 33 && c <= 126 ) putc( c, stdout); else // // Print international characters // Imprime os caracteres internacionais // if (c >= 1 && c <= 32) putc( DEIC[c], stdout); else // // Print characters of char sets of WordPerfect // Imprime os caracteres pertencentes aos conjuntos de caracteres do // WordPerfect // if ( c == 0xF0) { int char_set; c = getc(f); char_set = getc(f); getc(f); putc ( WP_char_sets[char_set - 1][c], stdout); } else // // Print space // Imprime espaço // if ( c == 0x80 || c == 0x81) putc( ' ', stdout); else // // Print hyphen // Imprime hifen // if ( c == 132 ) putc( '-', stdout); else // // Print line feed // Imprime final de linha // if ( (c >= 180 && c <= 207) || c == 136 || c == 137 ) putc('\n', stdout); c = getc(f); } } } // // Função principal do catwpd // int main(int argc,char **argv) { int stdin_processed=0; // Indica se stdin já foi processado if (argc < 2) help(); for( int i = 1; i < argc; i++) { if ( i > 1) puts("\n-----"); if (!strcmp(argv[i],"-")) if (!stdin_processed) { // // Extract text of stdin // Extrai texto da entrada padrão // extrai_texto(stdin); stdin_processed=1; } else { fprintf(stderr,"Não posso processar a entrada padrão duas vezes...\n"); exit (2); } else if (argv[i][0]=='-') { fprintf(stderr,"Nome de arquivo inválido %s\n",argv[i]); help(); } else { FILE *f=fopen(argv[i],"r"); if ( f ) // // Extract text of file // Extrai texto do arquivo // extrai_texto(f); else { fprintf(stderr, "Não foi possível abrir o arquivo %s!!!", argv[i]); exit(2); } } } return 0; }