DomainPlug-insDocumentation
|
00001 #ifndef _MB_UTF_TRANSLIT_MAP_ 00002 #define _MB_UTF_TRANSLIT_MAP_ 00003 00004 //two byte UTF transliteration rule 00005 typedef struct 00006 { 00007 unsigned char from1; 00008 unsigned char from2; 00009 char * to; 00010 } trans_two; 00011 00012 //three byte UTF transliteration rule 00013 typedef struct 00014 { 00015 unsigned char from1; 00016 unsigned char from2; 00017 unsigned char from3; 00018 00019 char * to; 00020 } trans_tree; 00021 00022 //four byte UTF transliteration rule 00023 typedef struct 00024 { 00025 unsigned char from1; 00026 unsigned char from2; 00027 unsigned char from3; 00028 unsigned char from4; 00029 00030 char * to; 00031 } trans_four; 00032 00033 trans_two TMAP2[] = { 00034 // Russian symbols 00035 {0xd0, 0xb0, "a"}, 00036 {0xd0, 0xb1, "b"}, 00037 {0xd0, 0xb2, "v"}, 00038 {0xd0, 0xb3, "g"}, 00039 {0xd0, 0xb4, "d"}, 00040 {0xd0, 0xb5, "e"}, 00041 {0xd1, 0x91, "e"}, 00042 {0xd0, 0xb6, "zh"}, 00043 {0xd0, 0xb7, "z"}, 00044 {0xd0, 0xb8, "i"}, 00045 {0xd0, 0xb9, "j"}, 00046 {0xd0, 0xba, "k"}, 00047 {0xd0, 0xbb, "l"}, 00048 {0xd0, 0xbc, "m"}, 00049 {0xd0, 0xbd, "n"}, 00050 {0xd0, 0xbe, "o"}, 00051 {0xd0, 0xbf, "p"}, 00052 {0xd1, 0x80, "r"}, 00053 {0xd1, 0x81, "s"}, 00054 {0xd1, 0x82, "t"}, 00055 {0xd1, 0x83, "u"}, 00056 {0xd1, 0x84, "f"}, 00057 {0xd1, 0x85, "kh"}, 00058 {0xd1, 0x86, "ts"}, 00059 {0xd1, 0x87, "ch"}, 00060 {0xd1, 0x88, "sh"}, 00061 {0xd1, 0x89, "shch"}, 00062 {0xd1, 0x8a, ""}, 00063 {0xd1, 0x8b, "y"}, 00064 {0xd1, 0x8c, ""}, 00065 {0xd1, 0x8d, "eh"}, 00066 {0xd1, 0x8e, "yu"}, 00067 {0xd1, 0x8f, "ya"}, 00068 {0xd0, 0x90, "A"}, 00069 {0xd0, 0x91, "B"}, 00070 {0xd0, 0x92, "V"}, 00071 {0xd0, 0x93, "G"}, 00072 {0xd0, 0x94, "D"}, 00073 {0xd0, 0x95, "E"}, 00074 {0xd0, 0x81, "E"}, 00075 {0xd0, 0x96, "ZH"}, 00076 {0xd0, 0x97, "Z"}, 00077 {0xd0, 0x98, "I"}, 00078 {0xd0, 0x99, "J"}, 00079 {0xd0, 0x9a, "K"}, 00080 {0xd0, 0x9b, "L"}, 00081 {0xd0, 0x9c, "M"}, 00082 {0xd0, 0x9d, "N"}, 00083 {0xd0, 0x9e, "O"}, 00084 {0xd0, 0x9f, "P"}, 00085 {0xd0, 0xa0, "R"}, 00086 {0xd0, 0xa1, "S"}, 00087 {0xd0, 0xa2, "T"}, 00088 {0xd0, 0xa3, "U"}, 00089 {0xd0, 0xa4, "F"}, 00090 {0xd0, 0xa5, "KH"}, 00091 {0xd0, 0xa6, "TS"}, 00092 {0xd0, 0xa7, "CH"}, 00093 {0xd0, 0xa8, "SH"}, 00094 {0xd0, 0xa9, "SHCH"}, 00095 {0xd0, 0xaa, ""}, 00096 {0xd0, 0xab, "Y"}, 00097 {0xd0, 0xac, ""}, 00098 {0xd0, 0xad, "EH"}, 00099 {0xd0, 0xae, "YU"}, 00100 {0xd0, 0xaf, "YA"}, 00101 00102 {0xc2, 0xaa, "a"}, 00103 {0xc2, 0xb7, "."}, 00104 {0xc2, 0xba, "o"}, 00105 00106 // ISO-8859-1 letters (transliteration is taken from 'transtab' by Markus Kuhn) 00107 // They cover special Spanish and German letters and letters of many other european languages 00108 // NOTE: In HSPC 3.2.2 German symbols with umlauts were transliterated in other way: 00109 // {0xc3, 0x84, "Ae"},{0xc3, 0x96, "Oe"},{0xc3, 0x9c, "Ue"},{0xc3, 0xa4, "ae"},{0xc3, 0xb6, "oe"},{0xc3, 0xbc, "ue"}, 00110 // Here they are transliterated in common way, i.e. without e 00111 {0xc3, 0x80, "A"}, 00112 {0xc3, 0x81, "A"}, 00113 {0xc3, 0x82, "A"}, 00114 {0xc3, 0x83, "A"}, 00115 {0xc3, 0x84, "A"}, 00116 {0xc3, 0x85, "A"}, 00117 {0xc3, 0x86, "AE"}, 00118 {0xc3, 0x87, "C"}, 00119 {0xc3, 0x88, "E"}, 00120 {0xc3, 0x89, "E"}, 00121 {0xc3, 0x8a, "E"}, 00122 {0xc3, 0x8b, "E"}, 00123 {0xc3, 0x8c, "I"}, 00124 {0xc3, 0x8d, "I"}, 00125 {0xc3, 0x8e, "I"}, 00126 {0xc3, 0x8f, "I"}, 00127 {0xc3, 0x90, "D"}, 00128 {0xc3, 0x91, "N"}, 00129 {0xc3, 0x92, "O"}, 00130 {0xc3, 0x93, "O"}, 00131 {0xc3, 0x94, "O"}, 00132 {0xc3, 0x95, "O"}, 00133 {0xc3, 0x96, "O"}, 00134 {0xc3, 0x98, "O"}, 00135 {0xc3, 0x99, "U"}, 00136 {0xc3, 0x9a, "U"}, 00137 {0xc3, 0x9b, "U"}, 00138 {0xc3, 0x9c, "U"}, 00139 {0xc3, 0x9d, "Y"}, 00140 {0xc3, 0x9e, "Th"}, 00141 {0xc3, 0x9f, "ss"}, 00142 {0xc3, 0xa0, "a"}, 00143 {0xc3, 0xa1, "a"}, 00144 {0xc3, 0xa2, "a"}, 00145 {0xc3, 0xa3, "a"}, 00146 {0xc3, 0xa4, "a"}, 00147 {0xc3, 0xa5, "a"}, 00148 {0xc3, 0xa6, "ae"}, 00149 {0xc3, 0xa7, "c"}, 00150 {0xc3, 0xa8, "e"}, 00151 {0xc3, 0xa9, "e"}, 00152 {0xc3, 0xaa, "e"}, 00153 {0xc3, 0xab, "e"}, 00154 {0xc3, 0xac, "i"}, 00155 {0xc3, 0xad, "i"}, 00156 {0xc3, 0xae, "i"}, 00157 {0xc3, 0xaf, "i"}, 00158 {0xc3, 0xb0, "d"}, 00159 {0xc3, 0xb1, "n"}, 00160 {0xc3, 0xb2, "o"}, 00161 {0xc3, 0xb3, "o"}, 00162 {0xc3, 0xb4, "o"}, 00163 {0xc3, 0xb5, "o"}, 00164 {0xc3, 0xb6, "o"}, 00165 {0xc3, 0xb8, "o"}, 00166 {0xc3, 0xb9, "u"}, 00167 {0xc3, 0xba, "u"}, 00168 {0xc3, 0xbb, "u"}, 00169 {0xc3, 0xbc, "u"}, 00170 {0xc3, 0xbd, "y"}, 00171 {0xc3, 0xbe, "th"}, 00172 {0xc3, 0xbf, "y"}, 00173 // Turkish symbols 00174 {0xc3, 0x87, "C"}, 00175 {0xc3, 0x96, "O"}, 00176 {0xc3, 0x9c, "U"}, 00177 {0xc3, 0xa7, "c"}, 00178 {0xc3, 0xb6, "o"}, 00179 {0xc3, 0xbc, "u"}, 00180 {0xc4, 0x9e, "G"}, 00181 {0xc4, 0x9f, "g"}, 00182 {0xc4, 0xb1, "i"}, 00183 {0xc4, 0xb0, "I"}, 00184 {0xc5, 0x9e, "S"}, 00185 {0xc5, 0x9f, "s"}, 00186 // Greek symbols 00187 {0xce, 0xb1, "a"}, 00188 {0xce, 0xac, "a"}, 00189 {0xce, 0x91, "A"}, 00190 {0xce, 0x86, "A"}, 00191 {0xce, 0xb2, "b"}, 00192 {0xce, 0x92, "B"}, 00193 {0xce, 0xb3, "g"}, 00194 {0xce, 0x93, "G"}, 00195 {0xce, 0xb4, "d"}, 00196 {0xce, 0x94, "D"}, 00197 {0xce, 0xb5, "e"}, 00198 {0xce, 0xad, "e"}, 00199 {0xce, 0x95, "E"}, 00200 {0xce, 0x88, "E"}, 00201 {0xce, 0xb6, "z"}, 00202 {0xce, 0x96, "Z"}, 00203 {0xce, 0xb7, "i"}, 00204 {0xce, 0xae, "i"}, 00205 {0xce, 0x97, "I"}, 00206 {0xce, 0x89, "I"}, 00207 {0xce, 0xb8, "th"}, 00208 {0xce, 0x98, "Th"}, 00209 {0xce, 0xb9, "i"}, 00210 {0xce, 0xaf, "i"}, 00211 {0xcf, 0x8a, "i"}, 00212 {0xce, 0x90, "i"}, 00213 {0xce, 0x99, "I"}, 00214 {0xce, 0x8a, "I"}, 00215 {0xce, 0xaa, "I"}, 00216 {0xce, 0xba, "k"}, 00217 {0xce, 0x9a, "K"}, 00218 {0xce, 0xbb, "l"}, 00219 {0xce, 0x9b, "L"}, 00220 {0xce, 0xbc, "m"}, 00221 {0xce, 0x9c, "M"}, 00222 {0xce, 0xbd, "n"}, 00223 {0xce, 0x9d, "N"}, 00224 {0xce, 0xbe, "ks"}, 00225 {0xce, 0x9e, "Ks"}, 00226 {0xce, 0xbf, "o"}, 00227 {0xcf, 0x8c, "o"}, 00228 {0xce, 0x9f, "O"}, 00229 {0xce, 0x8c, "O"}, 00230 {0xcf, 0x80, "p"}, 00231 {0xce, 0xa0, "P"}, 00232 {0xcf, 0x81, "r"}, 00233 {0xce, 0xa1, "R"}, 00234 {0xcf, 0x83, "s"}, 00235 {0xcf, 0x82, "s"}, 00236 {0xce, 0xa3, "S"}, 00237 {0xcf, 0x84, "t"}, 00238 {0xce, 0xa4, "T"}, 00239 {0xcf, 0x85, "y"}, 00240 {0xcf, 0x8d, "y"}, 00241 {0xcf, 0x8b, "y"}, 00242 {0xce, 0xb0, "y"}, 00243 {0xce, 0xa5, "Y"}, 00244 {0xce, 0x8e, "Y"}, 00245 {0xce, 0xab, "Y"}, 00246 {0xcf, 0x86, "f"}, 00247 {0xce, 0xa6, "F"}, 00248 {0xcf, 0x87, "h"}, 00249 {0xce, 0xa7, "H"}, 00250 {0xcf, 0x88, "ps"}, 00251 {0xce, 0xa8, "Ps"}, 00252 {0xcf, 0x89, "o"}, 00253 {0xcf, 0x8e, "o"}, 00254 {0xce, 0xa9, "O"}, 00255 {0xce, 0x8f, "O"}, 00256 {0, 0, NULL} 00257 }; 00258 00259 00260 00261 #endif 00262