DomainPlug-insDocumentation
translitmap.h
Go to the documentation of this file.
00001 #ifndef _MB_UTF_TRANSLIT_MAP_
00002 #define _MB_UTF_TRANSLIT_MAP_
00003 
00004 //two byte UTF transliteration rule
00005 typedef struct 
00006 {
00007   unsigned char from1;
00008   unsigned char from2;
00009   char * to;
00010 } trans_two;
00011 
00012 //three byte UTF transliteration rule
00013 typedef struct 
00014 {
00015   unsigned char from1;
00016   unsigned char from2;
00017   unsigned char from3;
00018 
00019   char * to;
00020 } trans_tree;
00021 
00022 //four byte UTF transliteration rule
00023 typedef struct 
00024 {
00025   unsigned char from1;
00026   unsigned char from2;
00027   unsigned char from3;
00028   unsigned char from4;
00029 
00030   char * to;
00031 } trans_four;
00032 
00033 trans_two TMAP2[] = {
00034   // Russian symbols
00035   {0xd0, 0xb0, "a"},
00036   {0xd0, 0xb1, "b"},
00037   {0xd0, 0xb2, "v"},
00038   {0xd0, 0xb3, "g"},
00039   {0xd0, 0xb4, "d"},
00040   {0xd0, 0xb5, "e"},
00041   {0xd1, 0x91, "e"},
00042   {0xd0, 0xb6, "zh"},
00043   {0xd0, 0xb7, "z"},
00044   {0xd0, 0xb8, "i"},
00045   {0xd0, 0xb9, "j"},
00046   {0xd0, 0xba, "k"},
00047   {0xd0, 0xbb, "l"},
00048   {0xd0, 0xbc, "m"},
00049   {0xd0, 0xbd, "n"},
00050   {0xd0, 0xbe, "o"},
00051   {0xd0, 0xbf, "p"},
00052   {0xd1, 0x80, "r"},
00053   {0xd1, 0x81, "s"},
00054   {0xd1, 0x82, "t"},
00055   {0xd1, 0x83, "u"},
00056   {0xd1, 0x84, "f"},
00057   {0xd1, 0x85, "kh"},
00058   {0xd1, 0x86, "ts"},
00059   {0xd1, 0x87, "ch"},
00060   {0xd1, 0x88, "sh"},
00061   {0xd1, 0x89, "shch"},
00062   {0xd1, 0x8a, ""},
00063   {0xd1, 0x8b, "y"},
00064   {0xd1, 0x8c, ""},
00065   {0xd1, 0x8d, "eh"},
00066   {0xd1, 0x8e, "yu"},
00067   {0xd1, 0x8f, "ya"},
00068   {0xd0, 0x90, "A"},
00069   {0xd0, 0x91, "B"},
00070   {0xd0, 0x92, "V"},
00071   {0xd0, 0x93, "G"},
00072   {0xd0, 0x94, "D"},
00073   {0xd0, 0x95, "E"},
00074   {0xd0, 0x81, "E"},
00075   {0xd0, 0x96, "ZH"},
00076   {0xd0, 0x97, "Z"},
00077   {0xd0, 0x98, "I"},
00078   {0xd0, 0x99, "J"},
00079   {0xd0, 0x9a, "K"},
00080   {0xd0, 0x9b, "L"},
00081   {0xd0, 0x9c, "M"},
00082   {0xd0, 0x9d, "N"},
00083   {0xd0, 0x9e, "O"},
00084   {0xd0, 0x9f, "P"},
00085   {0xd0, 0xa0, "R"},
00086   {0xd0, 0xa1, "S"},
00087   {0xd0, 0xa2, "T"},
00088   {0xd0, 0xa3, "U"},
00089   {0xd0, 0xa4, "F"},
00090   {0xd0, 0xa5, "KH"},
00091   {0xd0, 0xa6, "TS"},
00092   {0xd0, 0xa7, "CH"},
00093   {0xd0, 0xa8, "SH"},
00094   {0xd0, 0xa9, "SHCH"},
00095   {0xd0, 0xaa, ""},
00096   {0xd0, 0xab, "Y"},
00097   {0xd0, 0xac, ""},
00098   {0xd0, 0xad, "EH"},
00099   {0xd0, 0xae, "YU"},
00100   {0xd0, 0xaf, "YA"},
00101   
00102   {0xc2, 0xaa, "a"},
00103   {0xc2, 0xb7, "."},
00104   {0xc2, 0xba, "o"},
00105   
00106   // ISO-8859-1 letters (transliteration is taken from 'transtab' by Markus Kuhn)
00107   // They cover special Spanish and German letters and letters of many other european languages
00108   // NOTE: In HSPC 3.2.2 German symbols with umlauts were transliterated in other way:
00109   // {0xc3, 0x84, "Ae"},{0xc3, 0x96, "Oe"},{0xc3, 0x9c, "Ue"},{0xc3, 0xa4, "ae"},{0xc3, 0xb6, "oe"},{0xc3, 0xbc, "ue"},
00110   // Here they are transliterated in common way, i.e. without e
00111   {0xc3, 0x80, "A"},
00112   {0xc3, 0x81, "A"},
00113   {0xc3, 0x82, "A"},
00114   {0xc3, 0x83, "A"},
00115   {0xc3, 0x84, "A"},
00116   {0xc3, 0x85, "A"},
00117   {0xc3, 0x86, "AE"},
00118   {0xc3, 0x87, "C"},
00119   {0xc3, 0x88, "E"},
00120   {0xc3, 0x89, "E"},
00121   {0xc3, 0x8a, "E"},
00122   {0xc3, 0x8b, "E"},
00123   {0xc3, 0x8c, "I"},
00124   {0xc3, 0x8d, "I"},
00125   {0xc3, 0x8e, "I"},
00126   {0xc3, 0x8f, "I"},
00127   {0xc3, 0x90, "D"},
00128   {0xc3, 0x91, "N"},
00129   {0xc3, 0x92, "O"},
00130   {0xc3, 0x93, "O"},
00131   {0xc3, 0x94, "O"},
00132   {0xc3, 0x95, "O"},
00133   {0xc3, 0x96, "O"},
00134   {0xc3, 0x98, "O"},
00135   {0xc3, 0x99, "U"},
00136   {0xc3, 0x9a, "U"},
00137   {0xc3, 0x9b, "U"},
00138   {0xc3, 0x9c, "U"},
00139   {0xc3, 0x9d, "Y"},
00140   {0xc3, 0x9e, "Th"},
00141   {0xc3, 0x9f, "ss"},
00142   {0xc3, 0xa0, "a"},
00143   {0xc3, 0xa1, "a"},
00144   {0xc3, 0xa2, "a"},
00145   {0xc3, 0xa3, "a"},
00146   {0xc3, 0xa4, "a"},
00147   {0xc3, 0xa5, "a"},
00148   {0xc3, 0xa6, "ae"},
00149   {0xc3, 0xa7, "c"},
00150   {0xc3, 0xa8, "e"},
00151   {0xc3, 0xa9, "e"},
00152   {0xc3, 0xaa, "e"},
00153   {0xc3, 0xab, "e"},
00154   {0xc3, 0xac, "i"},
00155   {0xc3, 0xad, "i"},
00156   {0xc3, 0xae, "i"},
00157   {0xc3, 0xaf, "i"},
00158   {0xc3, 0xb0, "d"},
00159   {0xc3, 0xb1, "n"},
00160   {0xc3, 0xb2, "o"},
00161   {0xc3, 0xb3, "o"},
00162   {0xc3, 0xb4, "o"},
00163   {0xc3, 0xb5, "o"},
00164   {0xc3, 0xb6, "o"},
00165   {0xc3, 0xb8, "o"},
00166   {0xc3, 0xb9, "u"},
00167   {0xc3, 0xba, "u"},
00168   {0xc3, 0xbb, "u"},
00169   {0xc3, 0xbc, "u"},
00170   {0xc3, 0xbd, "y"},
00171   {0xc3, 0xbe, "th"},
00172   {0xc3, 0xbf, "y"},
00173   // Turkish symbols
00174   {0xc3, 0x87, "C"},
00175   {0xc3, 0x96, "O"},
00176   {0xc3, 0x9c, "U"},
00177   {0xc3, 0xa7, "c"},
00178   {0xc3, 0xb6, "o"},
00179   {0xc3, 0xbc, "u"},
00180   {0xc4, 0x9e, "G"},
00181   {0xc4, 0x9f, "g"},
00182   {0xc4, 0xb1, "i"},
00183   {0xc4, 0xb0, "I"},
00184   {0xc5, 0x9e, "S"},
00185   {0xc5, 0x9f, "s"},
00186   // Greek symbols
00187   {0xce, 0xb1, "a"},
00188   {0xce, 0xac, "a"},
00189   {0xce, 0x91, "A"},
00190   {0xce, 0x86, "A"},
00191   {0xce, 0xb2, "b"},
00192   {0xce, 0x92, "B"},
00193   {0xce, 0xb3, "g"},
00194   {0xce, 0x93, "G"},
00195   {0xce, 0xb4, "d"},
00196   {0xce, 0x94, "D"},
00197   {0xce, 0xb5, "e"},
00198   {0xce, 0xad, "e"},
00199   {0xce, 0x95, "E"},
00200   {0xce, 0x88, "E"},
00201   {0xce, 0xb6, "z"},
00202   {0xce, 0x96, "Z"},
00203   {0xce, 0xb7, "i"},
00204   {0xce, 0xae, "i"},
00205   {0xce, 0x97, "I"},
00206   {0xce, 0x89, "I"},
00207   {0xce, 0xb8, "th"},
00208   {0xce, 0x98, "Th"},
00209   {0xce, 0xb9, "i"},
00210   {0xce, 0xaf, "i"},
00211   {0xcf, 0x8a, "i"},
00212   {0xce, 0x90, "i"},
00213   {0xce, 0x99, "I"},
00214   {0xce, 0x8a, "I"},
00215   {0xce, 0xaa, "I"},
00216   {0xce, 0xba, "k"},
00217   {0xce, 0x9a, "K"},
00218   {0xce, 0xbb, "l"},
00219   {0xce, 0x9b, "L"},
00220   {0xce, 0xbc, "m"},
00221   {0xce, 0x9c, "M"},
00222   {0xce, 0xbd, "n"},
00223   {0xce, 0x9d, "N"},
00224   {0xce, 0xbe, "ks"},
00225   {0xce, 0x9e, "Ks"},
00226   {0xce, 0xbf, "o"},
00227   {0xcf, 0x8c, "o"},
00228   {0xce, 0x9f, "O"},
00229   {0xce, 0x8c, "O"},
00230   {0xcf, 0x80, "p"},
00231   {0xce, 0xa0, "P"},
00232   {0xcf, 0x81, "r"},
00233   {0xce, 0xa1, "R"},
00234   {0xcf, 0x83, "s"},
00235   {0xcf, 0x82, "s"},
00236   {0xce, 0xa3, "S"},
00237   {0xcf, 0x84, "t"},
00238   {0xce, 0xa4, "T"},
00239   {0xcf, 0x85, "y"},
00240   {0xcf, 0x8d, "y"},
00241   {0xcf, 0x8b, "y"},
00242   {0xce, 0xb0, "y"},
00243   {0xce, 0xa5, "Y"},
00244   {0xce, 0x8e, "Y"},
00245   {0xce, 0xab, "Y"},
00246   {0xcf, 0x86, "f"},
00247   {0xce, 0xa6, "F"},
00248   {0xcf, 0x87, "h"},
00249   {0xce, 0xa7, "H"},
00250   {0xcf, 0x88, "ps"},
00251   {0xce, 0xa8, "Ps"},
00252   {0xcf, 0x89, "o"},
00253   {0xcf, 0x8e, "o"},
00254   {0xce, 0xa9, "O"},
00255   {0xce, 0x8f, "O"},
00256   {0, 0, NULL}
00257 };
00258 
00259 
00260 
00261 #endif
00262 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines