00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifdef __GNUC__
00010 #pragma implementation
00011 #endif
00012
00013 #include <aconf.h>
00014 #include <stdio.h>
00015 #include <string.h>
00016 #include "gmem.h"
00017 #include "gfile.h"
00018 #include "GString.h"
00019 #include "GList.h"
00020 #include "Error.h"
00021 #include "GlobalParams.h"
00022 #include "UnicodeMap.h"
00023
00024
00025
00026 #define maxExtCode 16
00027
00028 struct UnicodeMapExt {
00029 Unicode u;
00030 char code[maxExtCode];
00031 Guint nBytes;
00032 };
00033
00034
00035
00036 UnicodeMap *UnicodeMap::parse(GString *encodingNameA) {
00037 FILE *f;
00038 UnicodeMap *map;
00039 UnicodeMapRange *range;
00040 UnicodeMapExt *eMap;
00041 int size, eMapsSize;
00042 char buf[256];
00043 int line, nBytes, i, x;
00044 char *tok1, *tok2, *tok3;
00045
00046 if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
00047 error(-1, "Couldn't find unicodeMap file for the '%s' encoding",
00048 encodingNameA->getCString());
00049 return NULL;
00050 }
00051
00052 map = new UnicodeMap(encodingNameA->copy());
00053
00054 size = 8;
00055 map->ranges = (UnicodeMapRange *)gmalloc(size * sizeof(UnicodeMapRange));
00056 eMapsSize = 0;
00057
00058 line = 1;
00059 while (getLine(buf, sizeof(buf), f)) {
00060 if ((tok1 = strtok(buf, " \t\r\n")) &&
00061 (tok2 = strtok(NULL, " \t\r\n"))) {
00062 if (!(tok3 = strtok(NULL, " \t\r\n"))) {
00063 tok3 = tok2;
00064 tok2 = tok1;
00065 }
00066 nBytes = strlen(tok3) / 2;
00067 if (nBytes <= 4) {
00068 if (map->len == size) {
00069 size *= 2;
00070 map->ranges = (UnicodeMapRange *)
00071 grealloc(map->ranges, size * sizeof(UnicodeMapRange));
00072 }
00073 range = &map->ranges[map->len];
00074 sscanf(tok1, "%x", &range->start);
00075 sscanf(tok2, "%x", &range->end);
00076 sscanf(tok3, "%x", &range->code);
00077 range->nBytes = nBytes;
00078 ++map->len;
00079 } else if (tok2 == tok1) {
00080 if (map->eMapsLen == eMapsSize) {
00081 eMapsSize += 16;
00082 map->eMaps = (UnicodeMapExt *)
00083 grealloc(map->eMaps, eMapsSize * sizeof(UnicodeMapExt));
00084 }
00085 eMap = &map->eMaps[map->eMapsLen];
00086 sscanf(tok1, "%x", &eMap->u);
00087 for (i = 0; i < nBytes; ++i) {
00088 sscanf(tok3 + i*2, "%2x", &x);
00089 eMap->code[i] = (char)x;
00090 }
00091 eMap->nBytes = nBytes;
00092 ++map->eMapsLen;
00093 } else {
00094 error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00095 line, encodingNameA->getCString());
00096 }
00097 } else {
00098 error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00099 line, encodingNameA->getCString());
00100 }
00101 ++line;
00102 }
00103
00104 return map;
00105 }
00106
00107 UnicodeMap::UnicodeMap(GString *encodingNameA) {
00108 encodingName = encodingNameA;
00109 kind = unicodeMapUser;
00110 ranges = NULL;
00111 len = 0;
00112 eMaps = NULL;
00113 eMapsLen = 0;
00114 refCnt = 1;
00115 }
00116
00117 UnicodeMap::UnicodeMap(char *encodingNameA,
00118 UnicodeMapRange *rangesA, int lenA) {
00119 encodingName = new GString(encodingNameA);
00120 kind = unicodeMapResident;
00121 ranges = rangesA;
00122 len = lenA;
00123 eMaps = NULL;
00124 eMapsLen = 0;
00125 refCnt = 1;
00126 }
00127
00128 UnicodeMap::UnicodeMap(char *encodingNameA, UnicodeMapFunc funcA) {
00129 encodingName = new GString(encodingNameA);
00130 kind = unicodeMapFunc;
00131 func = funcA;
00132 eMaps = NULL;
00133 eMapsLen = 0;
00134 refCnt = 1;
00135 }
00136
00137 UnicodeMap::~UnicodeMap() {
00138 delete encodingName;
00139 if (kind == unicodeMapUser && ranges) {
00140 gfree(ranges);
00141 }
00142 if (eMaps) {
00143 gfree(eMaps);
00144 }
00145 }
00146
00147 void UnicodeMap::incRefCnt() {
00148 ++refCnt;
00149 }
00150
00151 void UnicodeMap::decRefCnt() {
00152 if (--refCnt == 0) {
00153 delete this;
00154 }
00155 }
00156
00157 GBool UnicodeMap::match(GString *encodingNameA) {
00158 return !encodingName->cmp(encodingNameA);
00159 }
00160
00161 int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) {
00162 int a, b, m, n, i, j;
00163 Guint code;
00164
00165 if (kind == unicodeMapFunc) {
00166 return (*func)(u, buf, bufSize);
00167 }
00168
00169 a = 0;
00170 b = len;
00171 if (u < ranges[a].start) {
00172 return 0;
00173 }
00174
00175 while (b - a > 1) {
00176 m = (a + b) / 2;
00177 if (u >= ranges[m].start) {
00178 a = m;
00179 } else if (u < ranges[m].start) {
00180 b = m;
00181 }
00182 }
00183 if (u <= ranges[a].end) {
00184 n = ranges[a].nBytes;
00185 if (n > bufSize) {
00186 return 0;
00187 }
00188 code = ranges[a].code + (u - ranges[a].start);
00189 for (i = n - 1; i >= 0; --i) {
00190 buf[i] = (char)(code & 0xff);
00191 code >>= 8;
00192 }
00193 return n;
00194 }
00195
00196 for (i = 0; i < eMapsLen; ++i) {
00197 if (eMaps[i].u == u) {
00198 n = eMaps[i].nBytes;
00199 for (j = 0; j < n; ++j) {
00200 buf[j] = eMaps[i].code[j];
00201 }
00202 return n;
00203 }
00204 }
00205
00206 return 0;
00207 }
00208
00209
00210
00211 UnicodeMapCache::UnicodeMapCache() {
00212 int i;
00213
00214 for (i = 0; i < unicodeMapCacheSize; ++i) {
00215 cache[i] = NULL;
00216 }
00217 }
00218
00219 UnicodeMapCache::~UnicodeMapCache() {
00220 int i;
00221
00222 for (i = 0; i < unicodeMapCacheSize; ++i) {
00223 if (cache[i]) {
00224 cache[i]->decRefCnt();
00225 }
00226 }
00227 }
00228
00229 UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) {
00230 UnicodeMap *map;
00231 int i, j;
00232
00233 if (cache[0] && cache[0]->match(encodingName)) {
00234 cache[0]->incRefCnt();
00235 return cache[0];
00236 }
00237 for (i = 1; i < unicodeMapCacheSize; ++i) {
00238 if (cache[i] && cache[i]->match(encodingName)) {
00239 map = cache[i];
00240 for (j = i; j >= 1; --j) {
00241 cache[j] = cache[j - 1];
00242 }
00243 cache[0] = map;
00244 map->incRefCnt();
00245 return map;
00246 }
00247 }
00248 if ((map = UnicodeMap::parse(encodingName))) {
00249 if (cache[unicodeMapCacheSize - 1]) {
00250 cache[unicodeMapCacheSize - 1]->decRefCnt();
00251 }
00252 for (j = unicodeMapCacheSize - 1; j >= 1; --j) {
00253 cache[j] = cache[j - 1];
00254 }
00255 cache[0] = map;
00256 map->incRefCnt();
00257 return map;
00258 }
00259 return NULL;
00260 }