00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifdef __GNUC__
00010 #pragma implementation
00011 #endif
00012
00013 #include <aconf.h>
00014 #include <stdio.h>
00015 #include <string.h>
00016 #include "gmem.h"
00017 #include "gfile.h"
00018 #include "GString.h"
00019 #include "Error.h"
00020 #include "GlobalParams.h"
00021 #include "PSTokenizer.h"
00022 #include "CharCodeToUnicode.h"
00023
00024
00025
00026 #define maxUnicodeString 8
00027
00028 struct CharCodeToUnicodeString {
00029 CharCode c;
00030 Unicode u[maxUnicodeString];
00031 int len;
00032 };
00033
00034
00035
00036 static int getCharFromString(void *data) {
00037 char *p;
00038 int c;
00039
00040 p = *(char **)data;
00041 if (*p) {
00042 c = *p++;
00043 *(char **)data = p;
00044 } else {
00045 c = EOF;
00046 }
00047 return c;
00048 }
00049
00050 static int getCharFromFile(void *data) {
00051 return fgetc((FILE *)data);
00052 }
00053
00054
00055
00056 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
00057 FILE *f;
00058 Unicode *mapA;
00059 CharCode size, mapLenA;
00060 char buf[64];
00061 Unicode u;
00062 CharCodeToUnicode *ctu;
00063
00064 if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) {
00065 error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
00066 collectionA->getCString());
00067 return NULL;
00068 }
00069
00070 size = 32768;
00071 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
00072 mapLenA = 0;
00073
00074 while (getLine(buf, sizeof(buf), f)) {
00075 if (mapLenA == size) {
00076 size *= 2;
00077 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
00078 }
00079 if (sscanf(buf, "%x", &u) == 1) {
00080 mapA[mapLenA] = u;
00081 } else {
00082 error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
00083 (int)(mapLenA + 1), collectionA->getCString());
00084 mapA[mapLenA] = 0;
00085 }
00086 ++mapLenA;
00087 }
00088
00089 ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue,
00090 NULL, 0);
00091 gfree(mapA);
00092 return ctu;
00093 }
00094
00095 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
00096 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
00097 }
00098
00099 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
00100 CharCodeToUnicode *ctu;
00101 char *p;
00102
00103 ctu = new CharCodeToUnicode(NULL);
00104 p = buf->getCString();
00105 ctu->parseCMap1(&getCharFromString, &p, nBits);
00106 return ctu;
00107 }
00108
00109 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
00110 int nBits) {
00111 PSTokenizer *pst;
00112 char tok1[256], tok2[256], tok3[256];
00113 int nDigits, n1, n2, n3;
00114 CharCode oldLen, i;
00115 CharCode code1, code2;
00116 Unicode u;
00117 char uHex[5];
00118 int j;
00119 GString *name;
00120 FILE *f;
00121
00122 nDigits = nBits / 4;
00123 pst = new PSTokenizer(getCharFunc, data);
00124 pst->getToken(tok1, sizeof(tok1), &n1);
00125 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
00126 if (!strcmp(tok2, "usecmap")) {
00127 if (tok1[0] == '/') {
00128 name = new GString(tok1 + 1);
00129 if ((f = globalParams->findToUnicodeFile(name))) {
00130 parseCMap1(&getCharFromFile, f, nBits);
00131 fclose(f);
00132 } else {
00133 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
00134 name->getCString());
00135 }
00136 delete name;
00137 }
00138 pst->getToken(tok1, sizeof(tok1), &n1);
00139 } else if (!strcmp(tok2, "beginbfchar")) {
00140 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
00141 if (!strcmp(tok1, "endbfchar")) {
00142 break;
00143 }
00144 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
00145 !strcmp(tok2, "endbfchar")) {
00146 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
00147 break;
00148 }
00149 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
00150 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
00151 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
00152 continue;
00153 }
00154 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
00155 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
00156 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
00157 continue;
00158 }
00159 if (code1 >= mapLen) {
00160 oldLen = mapLen;
00161 mapLen = (code1 + 256) & ~255;
00162 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
00163 for (i = oldLen; i < mapLen; ++i) {
00164 map[i] = 0;
00165 }
00166 }
00167 if (n2 == 6) {
00168 if (sscanf(tok2 + 1, "%x", &u) != 1) {
00169 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
00170 continue;
00171 }
00172 map[code1] = u;
00173 } else {
00174 map[code1] = 0;
00175 if (sMapLen == sMapSize) {
00176 sMapSize += 8;
00177 sMap = (CharCodeToUnicodeString *)
00178 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
00179 }
00180 sMap[sMapLen].c = code1;
00181 sMap[sMapLen].len = (n2 - 2) / 4;
00182 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
00183 strncpy(uHex, tok2 + 1 + j*4, 4);
00184 uHex[4] = '\0';
00185 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
00186 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
00187 }
00188 }
00189 ++sMapLen;
00190 }
00191 }
00192 pst->getToken(tok1, sizeof(tok1), &n1);
00193 } else if (!strcmp(tok2, "beginbfrange")) {
00194 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
00195 if (!strcmp(tok1, "endbfrange")) {
00196 break;
00197 }
00198 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
00199 !strcmp(tok2, "endbfrange") ||
00200 !pst->getToken(tok3, sizeof(tok3), &n3) ||
00201 !strcmp(tok3, "endbfrange")) {
00202 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
00203 break;
00204 }
00205 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
00206 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
00207 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
00208 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
00209 continue;
00210 }
00211 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
00212 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
00213 sscanf(tok2 + 1, "%x", &code2) != 1) {
00214 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
00215 continue;
00216 }
00217 if (code2 >= mapLen) {
00218 oldLen = mapLen;
00219 mapLen = (code2 + 256) & ~255;
00220 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
00221 for (i = oldLen; i < mapLen; ++i) {
00222 map[i] = 0;
00223 }
00224 }
00225 if (n3 == 6) {
00226 if (sscanf(tok3 + 1, "%x", &u) != 1) {
00227 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
00228 continue;
00229 }
00230 for (; code1 <= code2; ++code1) {
00231 map[code1] = u++;
00232 }
00233 } else {
00234 if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) {
00235 sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7;
00236 sMap = (CharCodeToUnicodeString *)
00237 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
00238 }
00239 for (i = 0; code1 <= code2; ++code1, ++i) {
00240 map[code1] = 0;
00241 sMap[sMapLen].c = code1;
00242 sMap[sMapLen].len = (n3 - 2) / 4;
00243 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
00244 strncpy(uHex, tok3 + 1 + j*4, 4);
00245 uHex[4] = '\0';
00246 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
00247 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
00248 }
00249 }
00250 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
00251 ++sMapLen;
00252 }
00253 }
00254 }
00255 pst->getToken(tok1, sizeof(tok1), &n1);
00256 } else {
00257 strcpy(tok1, tok2);
00258 }
00259 }
00260 delete pst;
00261 }
00262
00263 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
00264 CharCode i;
00265
00266 collection = collectionA;
00267 mapLen = 256;
00268 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
00269 for (i = 0; i < mapLen; ++i) {
00270 map[i] = 0;
00271 }
00272 sMap = NULL;
00273 sMapLen = sMapSize = 0;
00274 refCnt = 1;
00275 }
00276
00277 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
00278 CharCode mapLenA, GBool copyMap,
00279 CharCodeToUnicodeString *sMapA,
00280 int sMapLenA) {
00281 collection = collectionA;
00282 mapLen = mapLenA;
00283 if (copyMap) {
00284 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
00285 memcpy(map, mapA, mapLen * sizeof(Unicode));
00286 } else {
00287 map = mapA;
00288 }
00289 sMap = sMapA;
00290 sMapLen = sMapSize = sMapLenA;
00291 refCnt = 1;
00292 }
00293
00294 CharCodeToUnicode::~CharCodeToUnicode() {
00295 if (collection) {
00296 delete collection;
00297 }
00298 gfree(map);
00299 if (sMap) {
00300 gfree(sMap);
00301 }
00302 }
00303
00304 void CharCodeToUnicode::incRefCnt() {
00305 ++refCnt;
00306 }
00307
00308 void CharCodeToUnicode::decRefCnt() {
00309 if (--refCnt == 0) {
00310 delete this;
00311 }
00312 }
00313
00314 GBool CharCodeToUnicode::match(GString *collectionA) {
00315 return collection && !collection->cmp(collectionA);
00316 }
00317
00318 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
00319 int i, j;
00320
00321 if (c >= mapLen) {
00322 return 0;
00323 }
00324 if (map[c]) {
00325 u[0] = map[c];
00326 return 1;
00327 }
00328 for (i = 0; i < sMapLen; ++i) {
00329 if (sMap[i].c == c) {
00330 for (j = 0; j < sMap[i].len && j < size; ++j) {
00331 u[j] = sMap[i].u[j];
00332 }
00333 return j;
00334 }
00335 }
00336 return 0;
00337 }
00338
00339
00340
00341 CIDToUnicodeCache::CIDToUnicodeCache() {
00342 int i;
00343
00344 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
00345 cache[i] = NULL;
00346 }
00347 }
00348
00349 CIDToUnicodeCache::~CIDToUnicodeCache() {
00350 int i;
00351
00352 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
00353 if (cache[i]) {
00354 cache[i]->decRefCnt();
00355 }
00356 }
00357 }
00358
00359 CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
00360 CharCodeToUnicode *ctu;
00361 int i, j;
00362
00363 if (cache[0] && cache[0]->match(collection)) {
00364 cache[0]->incRefCnt();
00365 return cache[0];
00366 }
00367 for (i = 1; i < cidToUnicodeCacheSize; ++i) {
00368 if (cache[i] && cache[i]->match(collection)) {
00369 ctu = cache[i];
00370 for (j = i; j >= 1; --j) {
00371 cache[j] = cache[j - 1];
00372 }
00373 cache[0] = ctu;
00374 ctu->incRefCnt();
00375 return ctu;
00376 }
00377 }
00378 if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) {
00379 if (cache[cidToUnicodeCacheSize - 1]) {
00380 cache[cidToUnicodeCacheSize - 1]->decRefCnt();
00381 }
00382 for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) {
00383 cache[j] = cache[j - 1];
00384 }
00385 cache[0] = ctu;
00386 ctu->incRefCnt();
00387 return ctu;
00388 }
00389 return NULL;
00390 }