Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

Lexer.cc

Go to the documentation of this file.
00001 //========================================================================
00002 //
00003 // Lexer.cc
00004 //
00005 // Copyright 1996-2002 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #ifdef __GNUC__
00010 #pragma implementation
00011 #endif
00012 
00013 #include <aconf.h>
00014 #include <stdlib.h>
00015 #include <stddef.h>
00016 #include <string.h>
00017 #include <ctype.h>
00018 #include "Lexer.h"
00019 #include "Error.h"
00020 
00021 //------------------------------------------------------------------------
00022 
00023 // A '1' in this array means the character is white space.  A '1' or
00024 // '2' means the character ends a name or command.
00025 static char specialChars[256] = {
00026   1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,   // 0x
00027   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 1x
00028   1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2,   // 2x
00029   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,   // 3x
00030   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 4x
00031   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 5x
00032   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 6x
00033   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 7x
00034   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 8x
00035   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 9x
00036   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ax
00037   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // bx
00038   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // cx
00039   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // dx
00040   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ex
00041   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0    // fx
00042 };
00043 
00044 //------------------------------------------------------------------------
00045 // Lexer
00046 //------------------------------------------------------------------------
00047 
00048 Lexer::Lexer(XRef *xref, Stream *str) {
00049   Object obj;
00050 
00051   curStr.initStream(str);
00052   streams = new Array(xref);
00053   streams->add(curStr.copy(&obj));
00054   strPtr = 0;
00055   freeArray = gTrue;
00056   curStr.streamReset();
00057 }
00058 
00059 Lexer::Lexer(XRef *xref, Object *obj) {
00060   Object obj2;
00061 
00062   if (obj->isStream()) {
00063     streams = new Array(xref);
00064     freeArray = gTrue;
00065     streams->add(obj->copy(&obj2));
00066   } else {
00067     streams = obj->getArray();
00068     freeArray = gFalse;
00069   }
00070   strPtr = 0;
00071   if (streams->getLength() > 0) {
00072     streams->get(strPtr, &curStr);
00073     curStr.streamReset();
00074   }
00075 }
00076 
00077 Lexer::~Lexer() {
00078   if (!curStr.isNone()) {
00079     curStr.streamClose();
00080     curStr.free();
00081   }
00082   if (freeArray) {
00083     delete streams;
00084   }
00085 }
00086 
00087 int Lexer::getChar() {
00088   int c;
00089 
00090   c = EOF;
00091   while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
00092     curStr.streamClose();
00093     curStr.free();
00094     ++strPtr;
00095     if (strPtr < streams->getLength()) {
00096       streams->get(strPtr, &curStr);
00097       curStr.streamReset();
00098     }
00099   }
00100   return c;
00101 }
00102 
00103 int Lexer::lookChar() {
00104   if (curStr.isNone()) {
00105     return EOF;
00106   }
00107   return curStr.streamLookChar();
00108 }
00109 
00110 Object *Lexer::getObj(Object *obj) {
00111   char *p;
00112   int c, c2;
00113   GBool comment, neg, done;
00114   int numParen;
00115   int xi;
00116   fouble xf, scale;
00117   GString *s;
00118   int n, m;
00119 
00120   // skip whitespace and comments
00121   comment = gFalse;
00122   while (1) {
00123     if ((c = getChar()) == EOF) {
00124       return obj->initEOF();
00125     }
00126     if (comment) {
00127       if (c == '\r' || c == '\n')
00128         comment = gFalse;
00129     } else if (c == '%') {
00130       comment = gTrue;
00131     } else if (specialChars[c] != 1) {
00132       break;
00133     }
00134   }
00135 
00136   // start reading token
00137   switch (c) {
00138 
00139   // number
00140   case '0': case '1': case '2': case '3': case '4':
00141   case '5': case '6': case '7': case '8': case '9':
00142   case '-': case '.':
00143     neg = gFalse;
00144     xi = 0;
00145     if (c == '-') {
00146       neg = gTrue;
00147     } else if (c == '.') {
00148       goto doReal;
00149     } else {
00150       xi = c - '0';
00151     }
00152     while (1) {
00153       c = lookChar();
00154       if (isdigit(c)) {
00155         getChar();
00156         xi = xi * 10 + (c - '0');
00157       } else if (c == '.') {
00158         getChar();
00159         goto doReal;
00160       } else {
00161         break;
00162       }
00163     }
00164     if (neg)
00165       xi = -xi;
00166     obj->initInt(xi);
00167     break;
00168   doReal:
00169     xf = xi;
00170     scale = 0.1;
00171     while (1) {
00172       c = lookChar();
00173       if (!isdigit(c)) {
00174         break;
00175       }
00176       getChar();
00177       xf = xf + scale * (c - '0');
00178       scale *= 0.1;
00179     }
00180     if (neg)
00181       xf = -xf;
00182     obj->initReal(xf);
00183     break;
00184 
00185   // string
00186   case '(':
00187     p = tokBuf;
00188     n = 0;
00189     numParen = 1;
00190     done = gFalse;
00191     s = NULL;
00192     do {
00193       c2 = EOF;
00194       switch (c = getChar()) {
00195 
00196       case EOF:
00197 #if 0
00198       // This breaks some PDF files, e.g., ones from Photoshop.
00199       case '\r':
00200       case '\n':
00201 #endif
00202         error(getPos(), "Unterminated string");
00203         done = gTrue;
00204         break;
00205 
00206       case '(':
00207         ++numParen;
00208         c2 = c;
00209         break;
00210 
00211       case ')':
00212         if (--numParen == 0) {
00213           done = gTrue;
00214         } else {
00215           c2 = c;
00216         }
00217         break;
00218 
00219       case '\\':
00220         switch (c = getChar()) {
00221         case 'n':
00222           c2 = '\n';
00223           break;
00224         case 'r':
00225           c2 = '\r';
00226           break;
00227         case 't':
00228           c2 = '\t';
00229           break;
00230         case 'b':
00231           c2 = '\b';
00232           break;
00233         case 'f':
00234           c2 = '\f';
00235           break;
00236         case '\\':
00237         case '(':
00238         case ')':
00239           c2 = c;
00240           break;
00241         case '0': case '1': case '2': case '3':
00242         case '4': case '5': case '6': case '7':
00243           c2 = c - '0';
00244           c = lookChar();
00245           if (c >= '0' && c <= '7') {
00246             getChar();
00247             c2 = (c2 << 3) + (c - '0');
00248             c = lookChar();
00249             if (c >= '0' && c <= '7') {
00250               getChar();
00251               c2 = (c2 << 3) + (c - '0');
00252             }
00253           }
00254           break;
00255         case '\r':
00256           c = lookChar();
00257           if (c == '\n') {
00258             getChar();
00259           }
00260           break;
00261         case '\n':
00262           break;
00263         case EOF:
00264           error(getPos(), "Unterminated string");
00265           done = gTrue;
00266           break;
00267         default:
00268           c2 = c;
00269           break;
00270         }
00271         break;
00272 
00273       default:
00274         c2 = c;
00275         break;
00276       }
00277 
00278       if (c2 != EOF) {
00279         if (n == tokBufSize) {
00280           if (!s)
00281             s = new GString(tokBuf, tokBufSize);
00282           else
00283             s->append(tokBuf, tokBufSize);
00284           p = tokBuf;
00285           n = 0;
00286         }
00287         *p++ = (char)c2;
00288         ++n;
00289       }
00290     } while (!done);
00291     if (!s)
00292       s = new GString(tokBuf, n);
00293     else
00294       s->append(tokBuf, n);
00295     obj->initString(s);
00296     break;
00297 
00298   // name
00299   case '/':
00300     p = tokBuf;
00301     n = 0;
00302     while ((c = lookChar()) != EOF && !specialChars[c]) {
00303       getChar();
00304       if (c == '#') {
00305         c2 = lookChar();
00306         if (c2 >= '0' && c2 <= '9') {
00307           c = c2 - '0';
00308         } else if (c2 >= 'A' && c2 <= 'F') {
00309           c = c2 - 'A' + 10;
00310         } else if (c2 >= 'a' && c2 <= 'f') {
00311           c = c2 - 'a' + 10;
00312         } else {
00313           goto notEscChar;
00314         }
00315         getChar();
00316         c <<= 4;
00317         c2 = getChar();
00318         if (c2 >= '0' && c2 <= '9') {
00319           c += c2 - '0';
00320         } else if (c2 >= 'A' && c2 <= 'F') {
00321           c += c2 - 'A' + 10;
00322         } else if (c2 >= 'a' && c2 <= 'f') {
00323           c += c2 - 'a' + 10;
00324         } else {
00325           error(getPos(), "Illegal digit in hex char in name");
00326         }
00327       }
00328      notEscChar:
00329       if (++n == tokBufSize) {
00330         error(getPos(), "Name token too long");
00331         break;
00332       }
00333       *p++ = c;
00334     }
00335     *p = '\0';
00336     obj->initName(tokBuf);
00337     break;
00338 
00339   // array punctuation
00340   case '[':
00341   case ']':
00342     tokBuf[0] = c;
00343     tokBuf[1] = '\0';
00344     obj->initCmd(tokBuf);
00345     break;
00346 
00347   // hex string or dict punctuation
00348   case '<':
00349     c = lookChar();
00350 
00351     // dict punctuation
00352     if (c == '<') {
00353       getChar();
00354       tokBuf[0] = tokBuf[1] = '<';
00355       tokBuf[2] = '\0';
00356       obj->initCmd(tokBuf);
00357 
00358     // hex string
00359     } else {
00360       p = tokBuf;
00361       m = n = 0;
00362       c2 = 0;
00363       s = NULL;
00364       while (1) {
00365         c = getChar();
00366         if (c == '>') {
00367           break;
00368         } else if (c == EOF) {
00369           error(getPos(), "Unterminated hex string");
00370           break;
00371         } else if (specialChars[c] != 1) {
00372           c2 = c2 << 4;
00373           if (c >= '0' && c <= '9')
00374             c2 += c - '0';
00375           else if (c >= 'A' && c <= 'F')
00376             c2 += c - 'A' + 10;
00377           else if (c >= 'a' && c <= 'f')
00378             c2 += c - 'a' + 10;
00379           else
00380             error(getPos(), "Illegal character <%02x> in hex string", c);
00381           if (++m == 2) {
00382             if (n == tokBufSize) {
00383               if (!s)
00384                 s = new GString(tokBuf, tokBufSize);
00385               else
00386                 s->append(tokBuf, tokBufSize);
00387               p = tokBuf;
00388               n = 0;
00389             }
00390             *p++ = (char)c2;
00391             ++n;
00392             c2 = 0;
00393             m = 0;
00394           }
00395         }
00396       }
00397       if (!s)
00398         s = new GString(tokBuf, n);
00399       else
00400         s->append(tokBuf, n);
00401       if (m == 1)
00402         s->append((char)(c2 << 4));
00403       obj->initString(s);
00404     }
00405     break;
00406 
00407   // dict punctuation
00408   case '>':
00409     c = lookChar();
00410     if (c == '>') {
00411       getChar();
00412       tokBuf[0] = tokBuf[1] = '>';
00413       tokBuf[2] = '\0';
00414       obj->initCmd(tokBuf);
00415     } else {
00416       error(getPos(), "Illegal character '>'");
00417       obj->initError();
00418     }
00419     break;
00420 
00421   // error
00422   case ')':
00423   case '{':
00424   case '}':
00425     error(getPos(), "Illegal character '%c'", c);
00426     obj->initError();
00427     break;
00428 
00429   // command
00430   default:
00431     p = tokBuf;
00432     *p++ = c;
00433     n = 1;
00434     while ((c = lookChar()) != EOF && !specialChars[c]) {
00435       getChar();
00436       if (++n == tokBufSize) {
00437         error(getPos(), "Command token too long");
00438         break;
00439       }
00440       *p++ = c;
00441     }
00442     *p = '\0';
00443     if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
00444       obj->initBool(gTrue);
00445     } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
00446       obj->initBool(gFalse);
00447     } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
00448       obj->initNull();
00449     } else {
00450       obj->initCmd(tokBuf);
00451     }
00452     break;
00453   }
00454 
00455   return obj;
00456 }
00457 
00458 void Lexer::skipToNextLine() {
00459   int c;
00460 
00461   while (1) {
00462     c = getChar();
00463     if (c == EOF || c == '\n') {
00464       return;
00465     }
00466     if (c == '\r') {
00467       if ((c = lookChar()) == '\n') {
00468         getChar();
00469       }
00470       return;
00471     }
00472   }
00473 }

Generated on Sat Nov 5 16:18:15 2005 for OPIE by  doxygen 1.4.2