Main Page   Compound List   File List   Compound Members   File Members  

unicode.c

00001 /*
00002 GOCR Copyright (C) 2000  Joerg Schulenburg Joerg.Schulenburg@physik.uni-magdeburg.de 
00003 GOCR API Copyright (C) 2001 Bruno Barberi Gnecco <brunobg@sourceforge.net>
00004 
00005 This program is free software; you can redistribute it and/or
00006 modify it under the terms of the GNU General Public License
00007 as published by the Free Software Foundation; either version 2
00008 of the License, or (at your option) any later version.
00009 
00010 This program is distributed in the hope that it will be useful,
00011 but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 GNU General Public License for more details.
00014 
00015 You should have received a copy of the GNU General Public License
00016 along with this program; if not, write to the Free Software
00017 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00018 
00019 */
00020 
00021 #include "_gocr.h" 
00022 #include "hash.h"
00023 #include "unicode.h"
00024 #include <stdarg.h>
00025 
00026 static HashTable blockAttrib, charAttrib;
00027 
00028 struct charattribute {
00029         /* name is the hash key, so don't waste memory holding it twice */
00030         unsigned char           *format;        /* like printf */
00031         gocrCharAttributeType   type;
00032         int                     index;          /* hash/attribute index */
00033         union {
00034           int                   settable;
00035           unsigned char         *value;
00036         } data;
00037 };
00038 
00039 int _gocr_initUnicode ( void ) {
00040   struct initial {
00041     char *name;
00042     gocrCharAttributeType type;
00043     char *format;
00044   };
00045 
00046   const struct initial chardata[] = {   { "BOLD", SETTABLE, NULL },
00047                                         { "ITALIC", SETTABLE, NULL },
00048                                         { "FONT", UNTIL_OVERRIDEN, "%s %d" } };
00049   int i;
00050 
00051   if ( hash_init(&blockAttrib, 0xFF, NULL) == -1 ) {
00052     _gocr_debug(1, fprintf(_data.error, "_gocr_initUnicode: hash_init(block)");)
00053     return -1;
00054   }
00055   if ( hash_init(&charAttrib, 0xFF, NULL) == -1 ) {
00056     _gocr_debug(1, fprintf(_data.error, "_gocr_initUnicode: hash_init(char)");)
00057     return -1;
00058   }
00059 
00060 //  for ( i = 0; i < sizeof(blockdata)/sizeof(char *); i++ )
00061 
00062   for ( i = 0; i < sizeof(chardata)/sizeof(char *); i++ )
00063     gocr_charAttributeRegister(chardata[i].name, chardata[i].type,
00064         chardata[i].format);
00065 
00066   return 0;
00067 }
00068 
00069 void _free_ca ( void *data ) {
00070   struct charattribute *ca = (struct charattribute *)data;
00071   if ( ca == NULL )
00072     return;
00073   if ( ca->format )
00074     free(ca->format);
00075   free(ca);
00076 }
00077 
00078 void _gocr_endUnicode ( void ) {
00079   hash_free(&blockAttrib, NULL);
00080   hash_free(&charAttrib, _free_ca);
00081 }
00082 
00083 int gocr_charAttributeRegister ( char *name, gocrCharAttributeType t, 
00084     char *format ) {
00085   struct charattribute *ca;
00086 
00087   _gocr_debug(3, fprintf(_data.error, "gocr_charAttributeCreate(%s, %d, %s)\n", 
00088       name, t, format);)
00089 
00090   if ( !name ) {
00091     _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeCreate: NULL name\n");)
00092     return -1;
00093   }
00094 
00095   /* fill structure */
00096   ca = (struct charattribute *)malloc(sizeof(struct charattribute));
00097   if ( ca == NULL ) {
00098     _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeCreate: NULL malloc\n");)
00099     return -1;
00100   }
00101 
00102   switch ( t ) {
00103     case SETTABLE:
00104       ca->data.settable = 0;
00105       break;
00106     case UNTIL_OVERRIDEN:
00107       ca->data.value = NULL;
00108       break;
00109     default:
00110       _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: unexistant type\n");)
00111       free(ca);
00112       return -1;
00113   }
00114   ca->type = t;
00115 
00116   /* future: check format to see if it's a valid one */
00117   ca->format = ( format == NULL ? NULL : (unsigned char *)strdup(format) );
00118 
00119   ca->index = hash_insert(&charAttrib, name, (void *)ca);
00120   if ( ca->index < 0 ) {
00121     _gocr_debug(1, fprintf(_data.error, "Hash error %d\n", ca->index);)
00122     if ( ca->format )
00123       free(ca->format);
00124     free(ca);
00125   }
00126 
00127   return 0;
00128 }
00129 
00130 int gocr_boxAttributeSet ( gocrBox *box, int action, char *name, ... ) {
00131   wchar_t *t;
00132   int length;
00133   struct charattribute *ca;
00134 
00135   _gocr_debug(3, fprintf(_data.error, "gocr_charAttributeInsert(%p, %d, %s,...)", 
00136       box, action, name);)
00137   if ( name == NULL ) {
00138     _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL name\n");)
00139     return -1;
00140   }
00141 
00142   ca = (struct charattribute *)hash_data(&charAttrib, name);
00143   if ( ca == NULL ) {
00144     _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: attribute not found\n");)
00145     return -1;
00146   }
00147 
00148   if ( action == 1 ) { /* insert */
00149     unsigned char *buffer = NULL, *p;
00150 
00151     /* check if it already exists */
00152     for ( t = box->attributes; *t != '\0'; t++ ) {
00153       if ( *t = gocr_setCharAttribute(ca->index) ) {
00154         _gocr_debug(2, fprintf(_data.error, "gocr_charAttributeInsert: attribute exists\n");)
00155         return -1;
00156       }
00157     }
00158 
00159     /* create format string */
00160     if ( ca->format != NULL ) {
00161       int size = 100;
00162       va_list va;
00163 
00164       va_start(va, name);
00165 
00166       /* fill the buffer */
00167       buffer = (unsigned char *)malloc(size);
00168       if ( buffer == NULL ) {
00169         _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL malloc\n");)
00170         return -1;
00171       }
00172 
00173       /* sprintf, making sure it fits */
00174       while (1) {
00175         int nchars = vsnprintf (buffer, size, ca->format, va);
00176 
00177         if (nchars > -1)
00178           break;
00179 
00180         size *= 2;
00181         buffer = (unsigned char *)realloc(buffer, size);
00182         if ( buffer == NULL ) {
00183           _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL realloc %d\n", size);)
00184           return -1;
00185         }
00186       }
00187       va_end(va);
00188 
00189     }
00190     /* point t to the end of the the string, realloc to fit */
00191     length = (box->attributes == NULL ? 0 : wcslen(box->attributes));
00192     box->attributes = (wchar_t *)realloc(box->attributes, (length +
00193                   (buffer == NULL ? 0 : strlen(buffer)) + 2)*sizeof(wchar_t));
00194     if ( box->attributes == NULL ) {
00195       _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL wrealloc\n");)
00196       return -1;
00197     }
00198     t = box->attributes + wcslen(box->attributes);
00199 
00200     /* and fill with the data */
00201     *t++ = gocr_setCharAttribute(ca->index);
00202     for ( p = buffer; *p != '\0'; p++ )
00203       *t++ = gocr_setCharAttributeData(*p);
00204     *t = '\0';
00205 
00206     switch ( ca->type ) {
00207       case SETTABLE: /* it's a settable attribute, so set/unset it */
00208         ca->data.settable = !ca->data.settable;
00209         break;
00210       case UNTIL_OVERRIDEN:
00211         if ( ca->data.value )
00212           free(ca->data.value);
00213         ca->data.value = (unsigned char*)strdup(buffer);
00214         break;
00215       default:
00216         _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: unexistant type\n");)
00217     }
00218 
00219     if ( buffer )
00220       free(buffer);
00221   }
00222   else if ( action == 0 ) { /* delete */
00223     wchar_t c = gocr_setCharAttribute(ca->index), *s;
00224 
00225     if ( !box->attributes )
00226       return 0;
00227 
00228     /* find the attribute */
00229     for ( t = box->attributes; *t != '\0'; t++ )
00230       if ( *t == c )
00231         break;
00232     if ( *t == '\0' ) /* not found */
00233       return 0;
00234 
00235     /* find the end of the attribute */
00236     for ( s = t+1; gocr_ischarAttributeData(*s); s++ ) 
00237       ;
00238 
00239     /* move the rest of the string, and realloc it */
00240     memmove(t, s, (wcslen(s)+1)*sizeof(wchar_t));
00241     box->attributes = (wchar_t *)realloc(box->attributes,
00242         (wcslen(box->attributes)+1)*sizeof(wchar_t));
00243     
00244     switch ( ca->type ) {
00245       case SETTABLE: /* it's a settable attribute, so set/unset it */
00246         ca->data.settable = !ca->data.settable;
00247         break;
00248       case UNTIL_OVERRIDEN:
00249         /*TODO: must search the previous value, etc */
00250         _gocr_debug(0, fprintf(_data.error, "gocr_charAttributeInsert: UNTIL_OVERRIDEN not done yet;\n"
00251                             "Unpredictable behaviour may occur.\n");)
00252         break;
00253       default:
00254         _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: unexistant type\n");)
00255     }
00256   }
00257 
00258   return 0;
00259 }
00260 
00261 /* Arguments: the character (main), and the modifier (accent, etc). See the
00262       function if you want to know the modifiers. 
00263    Description: This function intends to be a small helper, to avoid having
00264       to write switches in functions. It's therefore mainly to accents, and
00265       specially for the most usual ones. It supports the basic greek 
00266       characters too, which is actually not very helpful.
00267    Returns: the unicode character corresponding to the composed character. */
00268 wchar_t gocr_compose ( wchar_t main, wchar_t modifier ) {
00269 
00270 /* supported by now: part of ISO8859-1, basic greek characters */
00271   _gocr_debug(3, fprintf(_data.error, "compose(%l, %l)\n", main, modifier);)
00272   switch (modifier) {
00273     case UNICODE_NULL:
00274     case SPACE:
00275         return      (wchar_t)main;
00276 
00277     case APOSTROPHE: /* do NOT USE this. It's here for compatibility only. 
00278                             Use ACUTE_ACCENT instead. */
00279       _gocr_debug(2, fprintf( _data.error, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");)
00280     case ACUTE_ACCENT: /* acute/cedilla */
00281       switch (main) {
00282         case 'a':           return LATIN_SMALL_LETTER_A_WITH_ACUTE;
00283         case 'A':           return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
00284         case 'e':           return LATIN_SMALL_LETTER_E_WITH_ACUTE;
00285         case 'E':           return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
00286         case 'i':           return LATIN_SMALL_LETTER_I_WITH_ACUTE;
00287         case 'I':           return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
00288         case 'o':           return LATIN_SMALL_LETTER_O_WITH_ACUTE;
00289         case 'O':           return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
00290         case 'u':           return LATIN_SMALL_LETTER_U_WITH_ACUTE;
00291         case 'U':           return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
00292         case 'y':           return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
00293         case 'Y':           return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
00294         default:    return(wchar_t)0;
00295       }
00296       break;
00297 
00298     case CEDILLA:
00299       switch (main) {
00300         case 'c':           return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
00301         case 'C':           return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
00302       }
00303       break;
00304 
00305     case TILDE:
00306       switch (main) {
00307         case 'a':           return LATIN_SMALL_LETTER_A_WITH_TILDE;
00308         case 'A':           return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
00309         case 'n':           return LATIN_SMALL_LETTER_N_WITH_TILDE;
00310         case 'N':           return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
00311         case 'o':           return LATIN_SMALL_LETTER_O_WITH_TILDE;
00312         case 'O':           return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
00313         default:    return(wchar_t)0;
00314       }
00315       break;
00316     case GRAVE_ACCENT:
00317       switch (main) {
00318         case 'a':           return LATIN_SMALL_LETTER_A_WITH_GRAVE;
00319         case 'A':           return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
00320         case 'e':           return LATIN_SMALL_LETTER_E_WITH_GRAVE;
00321         case 'E':           return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
00322         case 'i':           return LATIN_SMALL_LETTER_I_WITH_GRAVE;
00323         case 'I':           return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
00324         case 'o':           return LATIN_SMALL_LETTER_O_WITH_GRAVE;
00325         case 'O':           return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
00326         case 'u':           return LATIN_SMALL_LETTER_U_WITH_GRAVE;
00327         case 'U':           return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
00328         default:    return(wchar_t)0;
00329       }
00330       break;
00331     case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only. 
00332                             Use DIAERESIS instead. */
00333       _gocr_debug(2, fprintf( _data.error, "COMPOSE: QUOTATION_MARK instead of DIAERESIS");)
00334     case DIAERESIS:
00335       switch (main) {
00336         case 'a':           return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
00337         case 'A':           return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
00338         case 'e':           return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
00339         case 'E':           return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
00340         case 'i':           return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
00341         case 'I':           return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
00342         case 'o':           return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
00343         case 'O':           return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
00344         case 'u':           return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
00345         case 'U':           return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
00346         case 'y':           return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
00347         case 'Y':           return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
00348         default:    return(wchar_t)0;
00349       }
00350       break;
00351     case CIRCUMFLEX_ACCENT:
00352       switch (main) {
00353         case 'a':           return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
00354         case 'A':           return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
00355         case 'e':           return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
00356         case 'E':           return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
00357         case 'i':           return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
00358         case 'I':           return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
00359         case 'o':           return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
00360         case 'O':           return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
00361         case 'u':           return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
00362         case 'U':           return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
00363         default:    return(wchar_t)0;
00364       }
00365       break;
00366     case RING_ABOVE:
00367       switch (main) {
00368         case 'a':           return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
00369         case 'A':           return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
00370         default:    return(wchar_t)0;
00371       }
00372       break;
00373     case 'e': /* e ligatures: ae, oe. */
00374     case 'E':
00375       switch (main) {
00376         case 'a':           return LATIN_SMALL_LETTER_AE;
00377         case 'A':           return LATIN_CAPITAL_LETTER_AE;
00378         case 'o':           return LATIN_SMALL_LIGATURE_OE;
00379         case 'O':           return LATIN_CAPITAL_LIGATURE_OE;
00380         default:    return(wchar_t)0;
00381       }
00382       break;
00383     case 'g': /* greek */
00384       switch (main) {
00385         /* missing 0x37A-0x390 */
00386         /* weird cases: Q -> theta (it resembles a little, doesn't it?)
00387                         V -> psi   (what can I do?) */
00388         case 'A':   return GREEK_CAPITAL_LETTER_ALPHA;
00389         case 'B':   return GREEK_CAPITAL_LETTER_BETA;
00390         case 'G':   return GREEK_CAPITAL_LETTER_GAMMA;
00391         case 'D':   return GREEK_CAPITAL_LETTER_DELTA;
00392         case 'E':   return GREEK_CAPITAL_LETTER_EPSILON;
00393         case 'Z':   return GREEK_CAPITAL_LETTER_ZETA;
00394         case 'H':   return GREEK_CAPITAL_LETTER_ETA;
00395         case 'Q':   return GREEK_CAPITAL_LETTER_THETA;
00396         case 'I':   return GREEK_CAPITAL_LETTER_IOTA;
00397         case 'K':   return GREEK_CAPITAL_LETTER_KAPPA;
00398         case 'L':   return GREEK_CAPITAL_LETTER_LAMDA;
00399         case 'M':   return GREEK_CAPITAL_LETTER_MU;
00400         case 'N':   return GREEK_CAPITAL_LETTER_NU;
00401         case 'X':   return GREEK_CAPITAL_LETTER_XI;
00402         case 'O':   return GREEK_CAPITAL_LETTER_OMICRON;
00403         case 'P':   return GREEK_CAPITAL_LETTER_PI;
00404         case 'R':   return GREEK_CAPITAL_LETTER_RHO;
00405         case 'S':   return GREEK_CAPITAL_LETTER_SIGMA;
00406         case 'T':   return GREEK_CAPITAL_LETTER_TAU;
00407         case 'Y':   return GREEK_CAPITAL_LETTER_UPSILON;
00408         case 'F':   return GREEK_CAPITAL_LETTER_PHI;
00409         case 'C':   return GREEK_CAPITAL_LETTER_CHI;
00410         case 'V':   return GREEK_CAPITAL_LETTER_PSI;
00411         case 'W':   return GREEK_CAPITAL_LETTER_OMEGA;
00412 /*
00413         case '':   return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
00414         case '':   return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
00415         case '':   return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
00416         case '':   return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
00417         case '':   return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
00418         case '':   return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
00419         case '':   return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
00420 */
00421         case 'a':   return GREEK_SMALL_LETTER_ALPHA;
00422         case 'b':   return GREEK_SMALL_LETTER_BETA;
00423         case 'g':   return GREEK_SMALL_LETTER_GAMMA;
00424         case 'd':   return GREEK_SMALL_LETTER_DELTA;
00425         case 'e':   return GREEK_SMALL_LETTER_EPSILON;
00426         case 'z':   return GREEK_SMALL_LETTER_ZETA;
00427         case 'h':   return GREEK_SMALL_LETTER_ETA;
00428         case 'q':   return GREEK_SMALL_LETTER_THETA;
00429         case 'i':   return GREEK_SMALL_LETTER_IOTA;
00430         case 'k':   return GREEK_SMALL_LETTER_KAPPA;
00431         case 'l':   return GREEK_SMALL_LETTER_LAMDA;
00432         case 'm':   return GREEK_SMALL_LETTER_MU;
00433         case 'n':   return GREEK_SMALL_LETTER_NU;
00434         case 'x':   return GREEK_SMALL_LETTER_XI;
00435         case 'o':   return GREEK_SMALL_LETTER_OMICRON;
00436         case 'p':   return GREEK_SMALL_LETTER_PI;
00437         case 'r':   return GREEK_SMALL_LETTER_RHO;
00438         case '&':   return GREEK_SMALL_LETTER_FINAL_SIGMA;
00439         case 's':   return GREEK_SMALL_LETTER_SIGMA;
00440         case 't':   return GREEK_SMALL_LETTER_TAU;
00441         case 'y':   return GREEK_SMALL_LETTER_UPSILON;
00442         case 'f':   return GREEK_SMALL_LETTER_PHI;
00443         case 'c':   return GREEK_SMALL_LETTER_CHI;
00444         case 'v':   return GREEK_SMALL_LETTER_PSI;
00445         case 'w':   return GREEK_SMALL_LETTER_OMEGA;
00446 /*
00447         case '':   return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
00448         case '':   return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
00449         case '':   return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
00450         case '':   return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
00451         case '':   return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
00452         case '':   return GREEK_BETA_SYMBOL;
00453         case '':   return GREEK_THETA_SYMBOL;
00454         case '':   return GREEK_UPSILON_WITH_HOOK_SYMBOL;
00455         case '':   return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
00456         case '':   return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
00457         case '':   return GREEK_PHI_SYMBOL;
00458         case '':   return GREEK_PI_SYMBOL;
00459 */
00460         default:   return (wchar_t)0;
00461       }
00462       break;   
00463     default:
00464       return (wchar_t)0;
00465   }
00466 }
00467 
00468 #ifdef DEPRECATED
00469 /* this function won't be provided anymore, but is kept here because it may
00470 be useful when writing outputFormatter modules */
00471 #define UNDEFINED                       "X"
00472 
00473 /* Arguments: character in Unicode format, type of format to convert to.
00474    Returns: a string containing the Unicode character converted to the chosen
00475     format. This string is statically allocated and should not be freed. */
00476 const unsigned char *decode(wchar_t c, FORMAT type) {
00477   static unsigned char d;
00478   switch (type) {
00479     case ISO8859_1:
00480       if ( c <= 0xFF ) { /* UNICODE == IS08859-1 */
00481         d = (unsigned char)c;
00482         return &d;
00483       }
00484       switch (c) { /* not found in list, but perhaps we can describe it */
00485         /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
00486         
00487         /* general puctuation */
00488         case HYPHEN:
00489           return (const unsigned char *)"-";
00490         case FIGURE_DASH:
00491         case EN_DASH:
00492           return (const unsigned char *)"--";
00493         case EM_DASH:
00494           return (const unsigned char *)"---";
00495         case LEFT_SINGLE_QUOTATION_MARK:
00496           return (const unsigned char *)"`";
00497         case RIGHT_SINGLE_QUOTATION_MARK:
00498           return (const unsigned char *)"'";
00499         case SINGLE_LOW_9_QUOTATION_MARK:
00500           return (const unsigned char *)",";
00501         case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
00502           return (const unsigned char *)UNDEFINED;
00503         case LEFT_DOUBLE_QUOTATION_MARK:
00504           return (const unsigned char *)"``";
00505         case RIGHT_DOUBLE_QUOTATION_MARK:
00506           return (const unsigned char *)"''";
00507         case DOUBLE_LOW_9_QUOTATION_MARK:
00508           return (const unsigned char *)",,";
00509         case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
00510           return (const unsigned char *)UNDEFINED;
00511         case DAGGER:
00512           return (const unsigned char *)"+";
00513         case DOUBLE_DAGGER:
00514           return (const unsigned char *)"*";
00515         case BULLET:
00516           return (const unsigned char *)"*";
00517         case TRIANGULAR_BULLET:
00518           return (const unsigned char *)"*";
00519         case HYPHENATION_POINT:
00520           return (const unsigned char *)"-";
00521         case HORIZONTAL_ELLIPSIS:
00522           return (const unsigned char *)"...";
00523         case PER_MILLE_SIGN:
00524           return (const unsigned char *)"%%"; /* awk! */
00525         case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
00526           return (const unsigned char *)"<";
00527         case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
00528           return (const unsigned char *)">";
00529         
00530         /* ligatures */
00531         case LATIN_SMALL_LIGATURE_FF:
00532           return (const unsigned char *)"ff";
00533         case LATIN_SMALL_LIGATURE_FI:
00534           return (const unsigned char *)"fi";
00535         case LATIN_SMALL_LIGATURE_FL:
00536           return (const unsigned char *)"fl";
00537         case LATIN_SMALL_LIGATURE_FFI:
00538           return (const unsigned char *)"ffi";
00539         case LATIN_SMALL_LIGATURE_FFL:
00540           return (const unsigned char *)"ffl";
00541         case LATIN_SMALL_LIGATURE_LONG_S_T:
00542         case LATIN_SMALL_LIGATURE_ST:
00543           return (const unsigned char *)"st";
00544         
00545         /* extra */
00546         case UNKNOWN:
00547           return (const unsigned char *)"X";
00548         case PICTURE:
00549           return (const unsigned char *)"PICTURE";
00550         
00551         
00552         default:
00553           return (const unsigned char *)UNDEFINED;
00554       }
00555       break;
00556     case TeX:
00557       if ( c >= SPACE && c <= TILDE ) { /* ASCII */
00558         switch (c) {
00559           case '$':
00560               return (const unsigned char *)"\\$";
00561           case '&':
00562               return (const unsigned char *)"\\&";
00563           case '%':
00564               return (const unsigned char *)"\\%";
00565           case '#':
00566               return (const unsigned char *)"\\#";
00567           case '_':
00568               return (const unsigned char *)"\\_";
00569           case '{':
00570               return (const unsigned char *)"\\{";
00571           case '}':
00572               return (const unsigned char *)"\\}";
00573           case '\\':
00574               return (const unsigned char *)"$\\backslash$";
00575           case '~':
00576               return (const unsigned char *)"\\~{}";
00577           case '^':
00578               return (const unsigned char *)"\\^{}";
00579           default:
00580               d = (unsigned char)c;
00581               return (const unsigned char *)&d;
00582         }
00583       }
00584       switch (c) {
00585         /* ISO8859_1 */
00586         case NO_BREAK_SPACE:
00587           return (const unsigned char *)"~";
00588         case INVERTED_EXCLAMATION_MARK:
00589           return (const unsigned char *)"!'";
00590         case CENT_SIGN:
00591           return (const unsigned char *)UNDEFINED;
00592         case POUND_SIGN:
00593           return (const unsigned char *)"\\pounds";
00594         case CURRENCY_SIGN:
00595           return (const unsigned char *)UNDEFINED;
00596         case YEN_SIGN:
00597           return (const unsigned char *)UNDEFINED;
00598         case BROKEN_BAR:
00599           return (const unsigned char *)UNDEFINED;
00600         case SECTION_SIGN:
00601           return (const unsigned char *)"\\S";
00602         case DIAERESIS:
00603           return (const unsigned char *)"\"";
00604         case COPYRIGHT_SIGN:
00605           return (const unsigned char *)"\\copyright";
00606         case FEMININE_ORDINAL_INDICATOR:
00607           return (const unsigned char *)"$^{\\underbar{a}}$";
00608         case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
00609           return (const unsigned char *)"\\flqq{}";
00610         case NOT_SIGN:
00611           return (const unsigned char *)"$\\lnot$";
00612         case SOFT_HYPHEN:
00613           return (const unsigned char *)"\\-";
00614         case REGISTERED_SIGN:
00615           return (const unsigned char *)UNDEFINED;
00616         case MACRON:
00617           return (const unsigned char *)UNDEFINED;
00618         case DEGREE_SIGN:
00619           return (const unsigned char *)"$^{o}$";
00620         case PLUS_MINUS_SIGN:
00621           return (const unsigned char *)"$\\pm$";
00622         case SUPERSCRIPT_TWO:
00623           return (const unsigned char *)"$^{2}$";
00624         case SUPERSCRIPT_THREE:
00625           return (const unsigned char *)"$^{3}$";
00626         case ACUTE_ACCENT:
00627           return (const unsigned char *)"\\( \\prime \\)";
00628         case MICRO_SIGN:
00629           return (const unsigned char *)"$\\mu$";
00630         case PILCROW_SIGN:
00631           return (const unsigned char *)"\\P";
00632         case MIDDLE_DOT:
00633           return (const unsigned char *)"$\\cdot$";
00634         case CEDILLA:
00635           return (const unsigned char *)"\\,";
00636         case SUPERSCRIPT_ONE:
00637           return (const unsigned char *)"$^{1}$";
00638         case MASCULINE_ORDINAL_INDICATOR:
00639           return (const unsigned char *)"$^{\\underbar{o}}$";
00640         case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
00641           return (const unsigned char *)"\\frqq{}";
00642         case VULGAR_FRACTION_ONE_QUARTER:        /* these fractions are not good*/
00643           return (const unsigned char *)"\\( 1\\over 4 \\)";
00644         case VULGAR_FRACTION_ONE_HALF:
00645           return (const unsigned char *)"\\( 1\\over 2 \\)";
00646         case VULGAR_FRACTION_THREE_QUARTERS:
00647           return (const unsigned char *)"\\( 3\\over 4 \\)";
00648         case INVERTED_QUESTION_MARK:
00649           return (const unsigned char *)"?'";
00650         case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
00651           return (const unsigned char *)"\\`A";
00652         case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
00653           return (const unsigned char *)"\\'A";
00654         case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
00655           return (const unsigned char *)"\\^A";
00656         case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
00657           return (const unsigned char *)"\\~A";
00658         case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
00659           return (const unsigned char *)"\\\"A";
00660         case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
00661           return (const unsigned char *)"\\AA";
00662         case LATIN_CAPITAL_LETTER_AE:
00663           return (const unsigned char *)"\\AE";
00664         case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
00665           return (const unsigned char *)"\\C";
00666         case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
00667           return (const unsigned char *)"\\`E";
00668         case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
00669           return (const unsigned char *)"\\'E";
00670         case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
00671           return (const unsigned char *)"\\^E";
00672         case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
00673           return (const unsigned char *)"\\\"E";
00674         case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
00675           return (const unsigned char *)"\\`I";
00676         case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
00677           return (const unsigned char *)"\\'I";
00678         case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
00679           return (const unsigned char *)"\\^I";
00680         case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
00681           return (const unsigned char *)"\\\"I";
00682         case LATIN_CAPITAL_LETTER_ETH:
00683           return (const unsigned char *)UNDEFINED;
00684         case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
00685           return (const unsigned char *)"\\~N";
00686         case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
00687           return (const unsigned char *)"\\`O";
00688         case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
00689           return (const unsigned char *)"\\'O";
00690         case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
00691           return (const unsigned char *)"\\^O";
00692         case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
00693           return (const unsigned char *)"\\~O";
00694         case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
00695           return (const unsigned char *)"\\\"O";
00696         case MULTIPLICATION_SIGN:
00697           return (const unsigned char *)"$\\times$";
00698         case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
00699           return (const unsigned char *)"\\O";
00700         case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
00701           return (const unsigned char *)"\\`U";
00702         case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
00703           return (const unsigned char *)"\\'U";
00704         case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
00705           return (const unsigned char *)"\\^U";
00706         case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
00707           return (const unsigned char *)"\\\"U";
00708         case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
00709           return (const unsigned char *)"\\'Y";
00710         case LATIN_CAPITAL_LETTER_THORN:
00711           return (const unsigned char *)UNDEFINED;
00712         case LATIN_SMALL_LETTER_SHARP_S:
00713           return (const unsigned char *)"\\ss";
00714         case LATIN_SMALL_LETTER_A_WITH_GRAVE:
00715           return (const unsigned char *)"\\`a";
00716         case LATIN_SMALL_LETTER_A_WITH_ACUTE:
00717           return (const unsigned char *)"\\'a";
00718         case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
00719           return (const unsigned char *)"\\^a";
00720         case LATIN_SMALL_LETTER_A_WITH_TILDE:
00721           return (const unsigned char *)"\\~a";
00722         case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
00723           return (const unsigned char *)"\\\"a";
00724         case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
00725           return (const unsigned char *)"\\aa";
00726         case LATIN_SMALL_LETTER_AE:
00727           return (const unsigned char *)"\\ae";
00728         case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
00729           return (const unsigned char *)"\\c";
00730         case LATIN_SMALL_LETTER_E_WITH_GRAVE:
00731           return (const unsigned char *)"\\`e";
00732         case LATIN_SMALL_LETTER_E_WITH_ACUTE:
00733           return (const unsigned char *)"\\'e";
00734         case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
00735           return (const unsigned char *)"\\^e";
00736         case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
00737           return (const unsigned char *)"\\\"e";
00738         case LATIN_SMALL_LETTER_I_WITH_GRAVE:
00739           return (const unsigned char *)"\\`i";
00740         case LATIN_SMALL_LETTER_I_WITH_ACUTE:
00741           return (const unsigned char *)"\\'i";
00742         case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
00743           return (const unsigned char *)"\\^i";
00744         case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
00745           return (const unsigned char *)"\\\"i";
00746         case LATIN_SMALL_LETTER_ETH:
00747           return (const unsigned char *)UNDEFINED;
00748         case LATIN_SMALL_LETTER_N_WITH_TILDE:
00749           return (const unsigned char *)"\\~n";
00750         case LATIN_SMALL_LETTER_O_WITH_GRAVE:
00751           return (const unsigned char *)"\\`o";
00752         case LATIN_SMALL_LETTER_O_WITH_ACUTE:
00753           return (const unsigned char *)"\\'o";
00754         case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
00755           return (const unsigned char *)"\\^o";
00756         case LATIN_SMALL_LETTER_O_WITH_TILDE:
00757           return (const unsigned char *)"\\~o";
00758         case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
00759           return (const unsigned char *)"\\\"o";
00760         case DIVISION_SIGN:
00761           return (const unsigned char *)"$\\div$";
00762         case LATIN_SMALL_LETTER_O_WITH_STROKE:
00763           return (const unsigned char *)"\\o";
00764         case LATIN_SMALL_LETTER_U_WITH_GRAVE:
00765           return (const unsigned char *)"\\`u";
00766         case LATIN_SMALL_LETTER_U_WITH_ACUTE:
00767           return (const unsigned char *)"\\'u";
00768         case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
00769           return (const unsigned char *)"\\^u";
00770         case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
00771           return (const unsigned char *)"\\\"u";
00772         case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
00773           return (const unsigned char *)"\\'y";
00774         case LATIN_SMALL_LETTER_THORN:
00775           return (const unsigned char *)UNDEFINED;
00776         case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
00777           return (const unsigned char *)"\\\"y";
00778 
00779         /* greek */
00780           /* some (punctuation, accents, accented capital) greek letters missing*/
00781         case GREEK_CAPITAL_LETTER_ALPHA:
00782           return (const unsigned char *)"A";
00783         case GREEK_CAPITAL_LETTER_BETA:
00784           return (const unsigned char *)"B";
00785         case GREEK_CAPITAL_LETTER_GAMMA:
00786           return (const unsigned char *)"\\( \\Gamma \\)";
00787         case GREEK_CAPITAL_LETTER_DELTA:
00788           return (const unsigned char *)"\\( \\Delta \\)";
00789         case GREEK_CAPITAL_LETTER_EPSILON:
00790           return (const unsigned char *)"E";
00791         case GREEK_CAPITAL_LETTER_ZETA:
00792           return (const unsigned char *)"Z";
00793         case GREEK_CAPITAL_LETTER_ETA:
00794           return (const unsigned char *)"H";
00795         case GREEK_CAPITAL_LETTER_THETA:
00796           return (const unsigned char *)"\\( \\Theta \\)";
00797         case GREEK_CAPITAL_LETTER_IOTA:
00798           return (const unsigned char *)"I";
00799         case GREEK_CAPITAL_LETTER_KAPPA:
00800           return (const unsigned char *)"K";
00801         case GREEK_CAPITAL_LETTER_LAMDA:
00802           return (const unsigned char *)"\\( \\Lambda \\)";
00803         case GREEK_CAPITAL_LETTER_MU:
00804           return (const unsigned char *)"M";
00805         case GREEK_CAPITAL_LETTER_NU:
00806           return (const unsigned char *)"N";
00807         case GREEK_CAPITAL_LETTER_XI:
00808           return (const unsigned char *)"\\( \\Xi \\)";
00809         case GREEK_CAPITAL_LETTER_OMICRON:
00810           return (const unsigned char *)"O";
00811         case GREEK_CAPITAL_LETTER_PI:
00812           return (const unsigned char *)"\\( \\Pi \\)";
00813         case GREEK_CAPITAL_LETTER_RHO:
00814           return (const unsigned char *)"P";
00815         case GREEK_CAPITAL_LETTER_SIGMA:
00816           return (const unsigned char *)"\\( \\Sigma \\)";
00817         case GREEK_CAPITAL_LETTER_TAU:
00818           return (const unsigned char *)"T";
00819         case GREEK_CAPITAL_LETTER_UPSILON:
00820           return (const unsigned char *)"\\( \\Upsilon \\)";
00821         case GREEK_CAPITAL_LETTER_PHI:
00822           return (const unsigned char *)"\\( \\Phi \\)";
00823         case GREEK_CAPITAL_LETTER_CHI:
00824           return (const unsigned char *)UNDEFINED;
00825         case GREEK_CAPITAL_LETTER_PSI:
00826           return (const unsigned char *)"\\( \\Psi \\)";
00827         case GREEK_CAPITAL_LETTER_OMEGA:
00828           return (const unsigned char *)"\\( \\Omega \\)";
00829         case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
00830           return (const unsigned char *)UNDEFINED;
00831         case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
00832           return (const unsigned char *)UNDEFINED;
00833         case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
00834           return (const unsigned char *)UNDEFINED;
00835         case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
00836           return (const unsigned char *)UNDEFINED;
00837         case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
00838           return (const unsigned char *)UNDEFINED;
00839         case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
00840           return (const unsigned char *)UNDEFINED;
00841         case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
00842           return (const unsigned char *)UNDEFINED;
00843         case GREEK_SMALL_LETTER_ALPHA:
00844           return (const unsigned char *)"\\( \\alpha \\)";
00845         case GREEK_SMALL_LETTER_BETA:
00846           return (const unsigned char *)"\\( \\beta \\)";
00847         case GREEK_SMALL_LETTER_GAMMA:
00848           return (const unsigned char *)"\\( \\gamma \\)";
00849         case GREEK_SMALL_LETTER_DELTA:
00850           return (const unsigned char *)"\\( \\delta \\)";
00851         case GREEK_SMALL_LETTER_EPSILON:
00852           return (const unsigned char *)"\\( \\epsilon \\)";
00853         case GREEK_SMALL_LETTER_ZETA:
00854           return (const unsigned char *)"\\( \\zeta \\)";
00855         case GREEK_SMALL_LETTER_ETA:
00856           return (const unsigned char *)"\\( \\eta \\)";
00857         case GREEK_SMALL_LETTER_THETA:
00858           return (const unsigned char *)"\\( \\theta \\)";
00859         case GREEK_SMALL_LETTER_IOTA:
00860           return (const unsigned char *)"\\( \\iota \\)";
00861         case GREEK_SMALL_LETTER_KAPPA:
00862           return (const unsigned char *)"\\( \\kappa \\)";
00863         case GREEK_SMALL_LETTER_LAMDA:
00864           return (const unsigned char *)"\\( \\lambda \\)";
00865         case GREEK_SMALL_LETTER_MU:
00866           return (const unsigned char *)"\\( \\mu \\)";
00867         case GREEK_SMALL_LETTER_NU:
00868           return (const unsigned char *)"\\( \\nu \\)";
00869         case GREEK_SMALL_LETTER_XI:
00870           return (const unsigned char *)"\\( \\xi \\)";
00871         case GREEK_SMALL_LETTER_OMICRON:
00872           return (const unsigned char *)"\\( \\omicron \\)";
00873         case GREEK_SMALL_LETTER_PI:
00874           return (const unsigned char *)"\\( \\pi \\)";
00875         case GREEK_SMALL_LETTER_RHO:
00876           return (const unsigned char *)"\\( \\rho \\)";
00877         case GREEK_SMALL_LETTER_FINAL_SIGMA:
00878           return (const unsigned char *)"\\( \\varsigma \\)";
00879         case GREEK_SMALL_LETTER_SIGMA:
00880           return (const unsigned char *)"\\( \\sigma \\)";
00881         case GREEK_SMALL_LETTER_TAU:
00882           return (const unsigned char *)"\\( \\tau \\)";
00883         case GREEK_SMALL_LETTER_UPSILON:
00884           return (const unsigned char *)"\\( \\upsilon \\)";
00885         case GREEK_SMALL_LETTER_PHI:
00886           return (const unsigned char *)"\\( \\varphi \\)";
00887         case GREEK_SMALL_LETTER_CHI:
00888           return (const unsigned char *)"\\( \\chi \\)";
00889         case GREEK_SMALL_LETTER_PSI:
00890           return (const unsigned char *)"\\( \\psi \\)";
00891         case GREEK_SMALL_LETTER_OMEGA:
00892           return (const unsigned char *)"\\( \\omega \\)";
00893         case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
00894           return (const unsigned char *)UNDEFINED;
00895         case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
00896           return (const unsigned char *)UNDEFINED;
00897         case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
00898           return (const unsigned char *)UNDEFINED;
00899         case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
00900           return (const unsigned char *)UNDEFINED;
00901         case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
00902           return (const unsigned char *)UNDEFINED;
00903         case GREEK_BETA_SYMBOL:
00904           return (const unsigned char *)UNDEFINED;
00905         case GREEK_THETA_SYMBOL:
00906           return (const unsigned char *)"\\( \\vartheta \\)";
00907         case GREEK_UPSILON_WITH_HOOK_SYMBOL:
00908           return (const unsigned char *)UNDEFINED;
00909         case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
00910           return (const unsigned char *)UNDEFINED;
00911         case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
00912           return (const unsigned char *)UNDEFINED;
00913         case GREEK_PHI_SYMBOL:
00914           return (const unsigned char *)"\\( \\phi \\)";
00915         case GREEK_PI_SYMBOL:
00916           return (const unsigned char *)"\\( \\varpi \\)";
00917           /* and some greek letters missing*/
00918 
00919         /* punctuation (partial) */
00920         case HYPHEN:
00921           return (const unsigned char *)"-";
00922         case NON_BREAKING_HYPHEN:
00923           return (const unsigned char *)UNDEFINED;
00924         case FIGURE_DASH:
00925         case EN_DASH:
00926           return (const unsigned char *)"--";
00927         case EM_DASH:
00928           return (const unsigned char *)"---";
00929         case HORIZONTAL_BAR:
00930           return (const unsigned char *)UNDEFINED;
00931         case LEFT_SINGLE_QUOTATION_MARK:
00932           return (const unsigned char *)"`";
00933         case RIGHT_SINGLE_QUOTATION_MARK:
00934           return (const unsigned char *)"'";
00935         case SINGLE_LOW_9_QUOTATION_MARK:
00936           return (const unsigned char *)"\\glq{}";
00937         case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
00938           return (const unsigned char *)UNDEFINED;
00939         case LEFT_DOUBLE_QUOTATION_MARK:
00940           return (const unsigned char *)"``";
00941         case RIGHT_DOUBLE_QUOTATION_MARK:
00942           return (const unsigned char *)"''";
00943         case DOUBLE_LOW_9_QUOTATION_MARK:
00944           return (const unsigned char *)"\\glqq{}";
00945         case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
00946           return (const unsigned char *)UNDEFINED;
00947         case DAGGER:
00948           return (const unsigned char *)"\\dag";
00949         case DOUBLE_DAGGER:
00950           return (const unsigned char *)"\\ddag";
00951         case BULLET:
00952           return (const unsigned char *)"$\\bullet$";
00953         case TRIANGULAR_BULLET:
00954           return (const unsigned char *)"$\\blacktriangleright";
00955         case HYPHENATION_POINT:
00956           return (const unsigned char *)"\\-";
00957         case HORIZONTAL_ELLIPSIS:
00958           return (const unsigned char *)"\\ldots";
00959         case PER_MILLE_SIGN:
00960           return (const unsigned char *)UNDEFINED;
00961         case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
00962           return (const unsigned char *)"\\flq{}";
00963         case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
00964           return (const unsigned char *)"\\frq{}";
00965         /* ligatures */
00966         case LATIN_SMALL_LIGATURE_FF:
00967           return (const unsigned char *)"ff";
00968         case LATIN_SMALL_LIGATURE_FI:
00969           return (const unsigned char *)"fi";
00970         case LATIN_SMALL_LIGATURE_FL:
00971           return (const unsigned char *)"fl";
00972         case LATIN_SMALL_LIGATURE_FFI:
00973           return (const unsigned char *)"ffi";
00974         case LATIN_SMALL_LIGATURE_FFL:
00975           return (const unsigned char *)"ffl";
00976         case LATIN_SMALL_LIGATURE_LONG_S_T:
00977         case LATIN_SMALL_LIGATURE_ST:
00978           return (const unsigned char *)"st";
00979         /* reserved */
00980         case UNKNOWN:
00981           return (const unsigned char *)"X";
00982         case PICTURE:
00983           return (const unsigned char *)"PICTURE";
00984         default:
00985           return (const unsigned char *)UNDEFINED;
00986         }
00987     case HTML:
00988       if ( c >= SPACE && c <= TILDE ) { /* ASCII */
00989         d = (unsigned char)c;
00990         return &d;
00991       }
00992       switch (c) {
00993         case FORM_FEED:
00994         case CARRIAGE_RETURN:
00995           return (const unsigned char *)"<br>";
00996         case NO_BREAK_SPACE:
00997           return (const unsigned char *)"<nobr>";
00998         case INVERTED_EXCLAMATION_MARK:
00999           return (const unsigned char *)"&iexcl;";
01000         case CENT_SIGN:
01001           return (const unsigned char *)"&cent;";
01002         case POUND_SIGN:
01003           return (const unsigned char *)"&pound;";
01004         case CURRENCY_SIGN:
01005           return (const unsigned char *)"&curren;";
01006         case YEN_SIGN:
01007           return (const unsigned char *)"&yen;";
01008         case BROKEN_BAR:
01009           return (const unsigned char *)"&brvbar;";
01010         case SECTION_SIGN:
01011           return (const unsigned char *)"&sect;";
01012         case DIAERESIS:
01013           return (const unsigned char *)"&uml;";
01014         case COPYRIGHT_SIGN:
01015           return (const unsigned char *)"&copy;";
01016         case FEMININE_ORDINAL_INDICATOR:
01017           return (const unsigned char *)"&ordfem;";
01018         case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
01019           return (const unsigned char *)"&laquo;";
01020         case NOT_SIGN:
01021           return (const unsigned char *)"&not;";
01022         case SOFT_HYPHEN:
01023           return (const unsigned char *)"&shy;";
01024         case REGISTERED_SIGN:
01025           return (const unsigned char *)"&reg;";
01026         case MACRON:
01027           return (const unsigned char *)"&macr;";
01028         case DEGREE_SIGN:
01029           return (const unsigned char *)"&deg;";
01030         case PLUS_MINUS_SIGN:
01031           return (const unsigned char *)"&plusmn;";
01032         case SUPERSCRIPT_TWO:
01033           return (const unsigned char *)"&sup2;";
01034         case SUPERSCRIPT_THREE:
01035           return (const unsigned char *)"&sup3;";
01036         case ACUTE_ACCENT:
01037           return (const unsigned char *)"&acute;";
01038         case MICRO_SIGN:
01039           return (const unsigned char *)"&micro;";
01040         case PILCROW_SIGN:
01041           return (const unsigned char *)"&para;";
01042         case MIDDLE_DOT:
01043           return (const unsigned char *)"&middot;";
01044         case CEDILLA:
01045           return (const unsigned char *)"&cedil;";
01046         case SUPERSCRIPT_ONE:
01047           return (const unsigned char *)"&sup1;";
01048         case MASCULINE_ORDINAL_INDICATOR:
01049           return (const unsigned char *)"&ordm;";
01050         case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
01051           return (const unsigned char *)"&raquo;";
01052         case VULGAR_FRACTION_ONE_QUARTER:
01053           return (const unsigned char *)"&frac14;";
01054         case VULGAR_FRACTION_ONE_HALF:
01055           return (const unsigned char *)"&frac12;";
01056         case VULGAR_FRACTION_THREE_QUARTERS:
01057           return (const unsigned char *)"&frac34;";
01058         case INVERTED_QUESTION_MARK:
01059           return (const unsigned char *)"&iquest;";       
01060         case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
01061           return (const unsigned char *)"&Agrave;";
01062         case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
01063           return (const unsigned char *)"&Aacute;";
01064         case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
01065           return (const unsigned char *)"&Acirc;";
01066         case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
01067           return (const unsigned char *)"&Atilde;";
01068         case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
01069           return (const unsigned char *)"&Auml;";
01070         case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
01071           return (const unsigned char *)"&Aring;";
01072         case LATIN_CAPITAL_LETTER_AE:
01073           return (const unsigned char *)"&AElig;";
01074         case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
01075           return (const unsigned char *)"&Ccedil;";
01076         case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
01077           return (const unsigned char *)"&Egrave;";
01078         case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
01079           return (const unsigned char *)"&Eacute;";
01080         case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
01081           return (const unsigned char *)"&Ecirc;";
01082         case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
01083           return (const unsigned char *)"&Euml;";
01084         case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
01085           return (const unsigned char *)"&Igrave;";
01086         case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
01087           return (const unsigned char *)"&Iacute;";
01088         case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
01089           return (const unsigned char *)"&Icirc;";
01090         case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
01091           return (const unsigned char *)"&Iuml;";
01092         case LATIN_CAPITAL_LETTER_ETH:
01093           return (const unsigned char *)"&ETH;";
01094         case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
01095           return (const unsigned char *)"&Ntilde;";
01096         case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
01097           return (const unsigned char *)"&Ograve;";
01098         case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
01099           return (const unsigned char *)"&Oacute;";
01100         case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
01101           return (const unsigned char *)"&Ocirc;";
01102         case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
01103           return (const unsigned char *)"&Otilde;";
01104         case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
01105           return (const unsigned char *)"&Ouml;";
01106         case MULTIPLICATION_SIGN:
01107           return (const unsigned char *)"&times";
01108         case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
01109           return (const unsigned char *)"&Oslash;";
01110         case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
01111           return (const unsigned char *)"&Ugrave;";
01112         case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
01113           return (const unsigned char *)"&Uacute;";
01114         case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
01115           return (const unsigned char *)"&Ucirc;";
01116         case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
01117           return (const unsigned char *)"&Uuml;";
01118         case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
01119           return (const unsigned char *)"&Yacute;";
01120         case LATIN_CAPITAL_LETTER_THORN:
01121           return (const unsigned char *)"&THORN;";
01122         case LATIN_SMALL_LETTER_SHARP_S:
01123           return (const unsigned char *)"&szlig;";
01124         case LATIN_SMALL_LETTER_A_WITH_GRAVE:
01125           return (const unsigned char *)"&agrave;";
01126         case LATIN_SMALL_LETTER_A_WITH_ACUTE:
01127           return (const unsigned char *)"&acute;";
01128         case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
01129           return (const unsigned char *)"&acirc;";
01130         case LATIN_SMALL_LETTER_A_WITH_TILDE:
01131           return (const unsigned char *)"&atilde;";
01132         case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
01133           return (const unsigned char *)"&auml;";
01134         case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
01135           return (const unsigned char *)"&aring;";
01136         case LATIN_SMALL_LETTER_AE:
01137           return (const unsigned char *)"&aelig;";
01138         case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
01139           return (const unsigned char *)"&ccedil;";
01140         case LATIN_SMALL_LETTER_E_WITH_GRAVE:
01141           return (const unsigned char *)"&egrave;";
01142         case LATIN_SMALL_LETTER_E_WITH_ACUTE:
01143           return (const unsigned char *)"&eacute;";
01144         case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
01145           return (const unsigned char *)"&ecirc;";
01146         case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
01147           return (const unsigned char *)"&euml;";
01148         case LATIN_SMALL_LETTER_I_WITH_GRAVE:
01149           return (const unsigned char *)"&igrave;";
01150         case LATIN_SMALL_LETTER_I_WITH_ACUTE:
01151           return (const unsigned char *)"&iacute;";
01152         case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
01153           return (const unsigned char *)"&icirc;";
01154         case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
01155           return (const unsigned char *)"&iuml;";
01156         case LATIN_SMALL_LETTER_ETH:
01157           return (const unsigned char *)"&eth;";
01158         case LATIN_SMALL_LETTER_N_WITH_TILDE:
01159           return (const unsigned char *)"&ntilde;";
01160         case LATIN_SMALL_LETTER_O_WITH_GRAVE:
01161           return (const unsigned char *)"&ograve;";
01162         case LATIN_SMALL_LETTER_O_WITH_ACUTE:
01163           return (const unsigned char *)"&oacute;";
01164         case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
01165           return (const unsigned char *)"&ocirc;";
01166         case LATIN_SMALL_LETTER_O_WITH_TILDE:
01167           return (const unsigned char *)"&otilde;";
01168         case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
01169           return (const unsigned char *)"&ouml;";
01170         case DIVISION_SIGN:
01171           return (const unsigned char *)"&divide;";
01172         case LATIN_SMALL_LETTER_O_WITH_STROKE:
01173           return (const unsigned char *)"&oslash;";
01174         case LATIN_SMALL_LETTER_U_WITH_GRAVE:
01175           return (const unsigned char *)"&ugrave;";
01176         case LATIN_SMALL_LETTER_U_WITH_ACUTE:
01177           return (const unsigned char *)"&uacute;";
01178         case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
01179           return (const unsigned char *)"&ucirc;";
01180         case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
01181           return (const unsigned char *)"&uuml;";
01182         case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
01183           return (const unsigned char *)"&yacute;";
01184         case LATIN_SMALL_LETTER_THORN:
01185           return (const unsigned char *)"&thorn;";
01186         case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
01187           return (const unsigned char *)"&yuml;";
01188         default:
01189           return (const unsigned char *)UNDEFINED;
01190       }
01191       break;
01192     case SGML:
01193       switch (c) {
01194         default:
01195           return (const unsigned char *)UNDEFINED;
01196       }
01197       break;
01198     default:
01199         return (const unsigned char *)NULL;
01200   }
01201 }
01202 #endif /* deprecated */

Generated at Thu Mar 1 10:05:33 2001 for GOCR API by doxygen1.2.2 written by Dimitri van Heesch, © 1997-2000