Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

UnicodeMap.cc

Go to the documentation of this file.
00001 //========================================================================
00002 //
00003 // UnicodeMap.cc
00004 //
00005 // Copyright 2001-2002 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #ifdef __GNUC__
00010 #pragma implementation
00011 #endif
00012 
00013 #include <aconf.h>
00014 #include <stdio.h>
00015 #include <string.h>
00016 #include "gmem.h"
00017 #include "gfile.h"
00018 #include "GString.h"
00019 #include "GList.h"
00020 #include "Error.h"
00021 #include "GlobalParams.h"
00022 #include "UnicodeMap.h"
00023 
00024 //------------------------------------------------------------------------
00025 
00026 #define maxExtCode 16
00027 
00028 struct UnicodeMapExt {
00029   Unicode u;                    // Unicode char
00030   char code[maxExtCode];
00031   Guint nBytes;
00032 };
00033 
00034 //------------------------------------------------------------------------
00035 
00036 UnicodeMap *UnicodeMap::parse(GString *encodingNameA) {
00037   FILE *f;
00038   UnicodeMap *map;
00039   UnicodeMapRange *range;
00040   UnicodeMapExt *eMap;
00041   int size, eMapsSize;
00042   char buf[256];
00043   int line, nBytes, i, x;
00044   char *tok1, *tok2, *tok3;
00045 
00046   if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
00047     error(-1, "Couldn't find unicodeMap file for the '%s' encoding",
00048           encodingNameA->getCString());
00049     return NULL;
00050   }
00051 
00052   map = new UnicodeMap(encodingNameA->copy());
00053 
00054   size = 8;
00055   map->ranges = (UnicodeMapRange *)gmalloc(size * sizeof(UnicodeMapRange));
00056   eMapsSize = 0;
00057 
00058   line = 1;
00059   while (getLine(buf, sizeof(buf), f)) {
00060     if ((tok1 = strtok(buf, " \t\r\n")) &&
00061         (tok2 = strtok(NULL, " \t\r\n"))) {
00062       if (!(tok3 = strtok(NULL, " \t\r\n"))) {
00063         tok3 = tok2;
00064         tok2 = tok1;
00065       }
00066       nBytes = strlen(tok3) / 2;
00067       if (nBytes <= 4) {
00068         if (map->len == size) {
00069           size *= 2;
00070           map->ranges = (UnicodeMapRange *)
00071             grealloc(map->ranges, size * sizeof(UnicodeMapRange));
00072         }
00073         range = &map->ranges[map->len];
00074         sscanf(tok1, "%x", &range->start);
00075         sscanf(tok2, "%x", &range->end);
00076         sscanf(tok3, "%x", &range->code);
00077         range->nBytes = nBytes;
00078         ++map->len;
00079       } else if (tok2 == tok1) {
00080         if (map->eMapsLen == eMapsSize) {
00081           eMapsSize += 16;
00082           map->eMaps = (UnicodeMapExt *)
00083             grealloc(map->eMaps, eMapsSize * sizeof(UnicodeMapExt));
00084         }
00085         eMap = &map->eMaps[map->eMapsLen];
00086         sscanf(tok1, "%x", &eMap->u);
00087         for (i = 0; i < nBytes; ++i) {
00088           sscanf(tok3 + i*2, "%2x", &x);
00089           eMap->code[i] = (char)x;
00090         }
00091         eMap->nBytes = nBytes;
00092         ++map->eMapsLen;
00093       } else {
00094         error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00095               line, encodingNameA->getCString());
00096       }
00097     } else {
00098       error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00099             line, encodingNameA->getCString());
00100     }
00101     ++line;
00102   }
00103 
00104   return map;
00105 }
00106 
00107 UnicodeMap::UnicodeMap(GString *encodingNameA) {
00108   encodingName = encodingNameA;
00109   kind = unicodeMapUser;
00110   ranges = NULL;
00111   len = 0;
00112   eMaps = NULL;
00113   eMapsLen = 0;
00114   refCnt = 1;
00115 }
00116 
00117 UnicodeMap::UnicodeMap(char *encodingNameA,
00118                        UnicodeMapRange *rangesA, int lenA) {
00119   encodingName = new GString(encodingNameA);
00120   kind = unicodeMapResident;
00121   ranges = rangesA;
00122   len = lenA;
00123   eMaps = NULL;
00124   eMapsLen = 0;
00125   refCnt = 1;
00126 }
00127 
00128 UnicodeMap::UnicodeMap(char *encodingNameA, UnicodeMapFunc funcA) {
00129   encodingName = new GString(encodingNameA);
00130   kind = unicodeMapFunc;
00131   func = funcA;
00132   eMaps = NULL;
00133   eMapsLen = 0;
00134   refCnt = 1;
00135 }
00136 
00137 UnicodeMap::~UnicodeMap() {
00138   delete encodingName;
00139   if (kind == unicodeMapUser && ranges) {
00140     gfree(ranges);
00141   }
00142   if (eMaps) {
00143     gfree(eMaps);
00144   }
00145 }
00146 
00147 void UnicodeMap::incRefCnt() {
00148   ++refCnt;
00149 }
00150 
00151 void UnicodeMap::decRefCnt() {
00152   if (--refCnt == 0) {
00153     delete this;
00154   }
00155 }
00156 
00157 GBool UnicodeMap::match(GString *encodingNameA) {
00158   return !encodingName->cmp(encodingNameA);
00159 }
00160 
00161 int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) {
00162   int a, b, m, n, i, j;
00163   Guint code;
00164 
00165   if (kind == unicodeMapFunc) {
00166     return (*func)(u, buf, bufSize);
00167   }
00168 
00169   a = 0;
00170   b = len;
00171   if (u < ranges[a].start) {
00172     return 0;
00173   }
00174   // invariant: ranges[a].start <= u < ranges[b].start
00175   while (b - a > 1) {
00176     m = (a + b) / 2;
00177     if (u >= ranges[m].start) {
00178       a = m;
00179     } else if (u < ranges[m].start) {
00180       b = m;
00181     }
00182   }
00183   if (u <= ranges[a].end) {
00184     n = ranges[a].nBytes;
00185     if (n > bufSize) {
00186       return 0;
00187     }
00188     code = ranges[a].code + (u - ranges[a].start);
00189     for (i = n - 1; i >= 0; --i) {
00190       buf[i] = (char)(code & 0xff);
00191       code >>= 8;
00192     }
00193     return n;
00194   }
00195 
00196   for (i = 0; i < eMapsLen; ++i) {
00197     if (eMaps[i].u == u) {
00198       n = eMaps[i].nBytes;
00199       for (j = 0; j < n; ++j) {
00200         buf[j] = eMaps[i].code[j];
00201       }
00202       return n;
00203     }
00204   }
00205 
00206   return 0;
00207 }
00208 
00209 //------------------------------------------------------------------------
00210 
00211 UnicodeMapCache::UnicodeMapCache() {
00212   int i;
00213 
00214   for (i = 0; i < unicodeMapCacheSize; ++i) {
00215     cache[i] = NULL;
00216   }
00217 }
00218 
00219 UnicodeMapCache::~UnicodeMapCache() {
00220   int i;
00221 
00222   for (i = 0; i < unicodeMapCacheSize; ++i) {
00223     if (cache[i]) {
00224       cache[i]->decRefCnt();
00225     }
00226   }
00227 }
00228 
00229 UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) {
00230   UnicodeMap *map;
00231   int i, j;
00232 
00233   if (cache[0] && cache[0]->match(encodingName)) {
00234     cache[0]->incRefCnt();
00235     return cache[0];
00236   }
00237   for (i = 1; i < unicodeMapCacheSize; ++i) {
00238     if (cache[i] && cache[i]->match(encodingName)) {
00239       map = cache[i];
00240       for (j = i; j >= 1; --j) {
00241         cache[j] = cache[j - 1];
00242       }
00243       cache[0] = map;
00244       map->incRefCnt();
00245       return map;
00246     }
00247   }
00248   if ((map = UnicodeMap::parse(encodingName))) {
00249     if (cache[unicodeMapCacheSize - 1]) {
00250       cache[unicodeMapCacheSize - 1]->decRefCnt();
00251     }
00252     for (j = unicodeMapCacheSize - 1; j >= 1; --j) {
00253       cache[j] = cache[j - 1];
00254     }
00255     cache[0] = map;
00256     map->incRefCnt();
00257     return map;
00258   }
00259   return NULL;
00260 }

Generated on Sat Nov 5 16:18:17 2005 for OPIE by  doxygen 1.4.2