Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

XRef.cc

Go to the documentation of this file.
00001 //========================================================================
00002 //
00003 // XRef.cc
00004 //
00005 // Copyright 1996-2002 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #ifdef __GNUC__
00010 #pragma implementation
00011 #endif
00012 
00013 #include <aconf.h>
00014 #include <stdlib.h>
00015 #include <stddef.h>
00016 #include <string.h>
00017 #include <ctype.h>
00018 #include "gmem.h"
00019 #include "Object.h"
00020 #include "Stream.h"
00021 #include "Lexer.h"
00022 #include "Parser.h"
00023 #include "Dict.h"
00024 #ifndef NO_DECRYPTION
00025 #include "Decrypt.h"
00026 #endif
00027 #include "Error.h"
00028 #include "ErrorCodes.h"
00029 #include "XRef.h"
00030 
00031 //------------------------------------------------------------------------
00032 
00033 #define xrefSearchSize 1024     // read this many bytes at end of file
00034                                 //   to look for 'startxref'
00035 
00036 #ifndef NO_DECRYPTION
00037 //------------------------------------------------------------------------
00038 // Permission bits
00039 //------------------------------------------------------------------------
00040 
00041 #define permPrint    (1<<2)
00042 #define permChange   (1<<3)
00043 #define permCopy     (1<<4)
00044 #define permNotes    (1<<5)
00045 #define defPermFlags 0xfffc
00046 #endif
00047 
00048 //------------------------------------------------------------------------
00049 // XRef
00050 //------------------------------------------------------------------------
00051 
00052 XRef::XRef(BaseStream *strA, GString *ownerPassword, GString *userPassword) {
00053   Guint pos;
00054   int i;
00055 
00056   ok = gTrue;
00057   errCode = errNone;
00058   size = 0;
00059   entries = NULL;
00060   streamEnds = NULL;
00061   streamEndsLen = 0;
00062 
00063   // read the trailer
00064   str = strA;
00065   start = str->getStart();
00066   pos = readTrailer();
00067 
00068   // if there was a problem with the trailer,
00069   // try to reconstruct the xref table
00070   if (pos == 0) {
00071     if (!(ok = constructXRef())) {
00072       errCode = errDamaged;
00073       return;
00074     }
00075 
00076   // trailer is ok - read the xref table
00077   } else {
00078     entries = (XRefEntry *)gmalloc(size * sizeof(XRefEntry));
00079     for (i = 0; i < size; ++i) {
00080       entries[i].offset = 0xffffffff;
00081       entries[i].used = gFalse;
00082     }
00083     while (readXRef(&pos)) ;
00084 
00085     // if there was a problem with the xref table,
00086     // try to reconstruct it
00087     if (!ok) {
00088       gfree(entries);
00089       size = 0;
00090       entries = NULL;
00091       if (!(ok = constructXRef())) {
00092         errCode = errDamaged;
00093         return;
00094       }
00095     }
00096   }
00097 
00098   // now set the trailer dictionary's xref pointer so we can fetch
00099   // indirect objects from it
00100   trailerDict.getDict()->setXRef(this);
00101 
00102   // check for encryption
00103 #ifndef NO_DECRYPTION
00104   encrypted = gFalse;
00105 #endif
00106   if (checkEncrypted(ownerPassword, userPassword)) {
00107     ok = gFalse;
00108     errCode = errEncrypted;
00109     return;
00110   }
00111 }
00112 
00113 XRef::~XRef() {
00114   gfree(entries);
00115   trailerDict.free();
00116   if (streamEnds) {
00117     gfree(streamEnds);
00118   }
00119 }
00120 
00121 // Read startxref position, xref table size, and root.  Returns
00122 // first xref position.
00123 Guint XRef::readTrailer() {
00124   Parser *parser;
00125   Object obj;
00126   char buf[xrefSearchSize+1];
00127   int n;
00128   Guint pos, pos1;
00129   char *p;
00130   int c;
00131   int i;
00132 
00133   // read last xrefSearchSize bytes
00134   str->setPos(xrefSearchSize, -1);
00135   for (n = 0; n < xrefSearchSize; ++n) {
00136     if ((c = str->getChar()) == EOF)
00137       break;
00138     buf[n] = c;
00139   }
00140   buf[n] = '\0';
00141 
00142   // find startxref
00143   for (i = n - 9; i >= 0; --i) {
00144     if (!strncmp(&buf[i], "startxref", 9))
00145       break;
00146   }
00147   if (i < 0)
00148     return 0;
00149   for (p = &buf[i+9]; isspace(*p); ++p) ;
00150   pos = lastXRefPos = strToUnsigned(p);
00151 
00152   // find trailer dict by looking after first xref table
00153   // (NB: we can't just use the trailer dict at the end of the file --
00154   // this won't work for linearized files.)
00155   str->setPos(start + pos);
00156   for (i = 0; i < 4; ++i)
00157     buf[i] = str->getChar();
00158   if (strncmp(buf, "xref", 4))
00159     return 0;
00160   pos1 = pos + 4;
00161   while (1) {
00162     str->setPos(start + pos1);
00163     for (i = 0; i < 35; ++i) {
00164       if ((c = str->getChar()) == EOF)
00165         return 0;
00166       buf[i] = c;
00167     }
00168     if (!strncmp(buf, "trailer", 7))
00169       break;
00170     p = buf;
00171     while (isspace(*p)) ++p;
00172     while ('0' <= *p && *p <= '9') ++p;
00173     while (isspace(*p)) ++p;
00174     n = atoi(p);
00175     while ('0' <= *p && *p <= '9') ++p;
00176     while (isspace(*p)) ++p;
00177     if (p == buf)
00178       return 0;
00179     pos1 += (p - buf) + n * 20;
00180   }
00181   pos1 += 7;
00182 
00183   // read trailer dict
00184   obj.initNull();
00185   parser = new Parser(NULL,
00186              new Lexer(NULL,
00187                str->makeSubStream(start + pos1, gFalse, 0, &obj)));
00188   parser->getObj(&trailerDict);
00189   if (trailerDict.isDict()) {
00190     trailerDict.dictLookupNF("Size", &obj);
00191     if (obj.isInt())
00192       size = obj.getInt();
00193     else
00194       pos = 0;
00195     obj.free();
00196     trailerDict.dictLookupNF("Root", &obj);
00197     if (obj.isRef()) {
00198       rootNum = obj.getRefNum();
00199       rootGen = obj.getRefGen();
00200     } else {
00201       pos = 0;
00202     }
00203     obj.free();
00204   } else {
00205     pos = 0;
00206   }
00207   delete parser;
00208 
00209   // return first xref position
00210   return pos;
00211 }
00212 
00213 // Read an xref table and the prev pointer from the trailer.
00214 GBool XRef::readXRef(Guint *pos) {
00215   Parser *parser;
00216   Object obj, obj2;
00217   char s[20];
00218   GBool more;
00219   int first, newSize, n, i, j;
00220   int c;
00221 
00222   // seek to xref in stream
00223   str->setPos(start + *pos);
00224 
00225   // make sure it's an xref table
00226   while ((c = str->getChar()) != EOF && isspace(c)) ;
00227   s[0] = (char)c;
00228   s[1] = (char)str->getChar();
00229   s[2] = (char)str->getChar();
00230   s[3] = (char)str->getChar();
00231   if (!(s[0] == 'x' && s[1] == 'r' && s[2] == 'e' && s[3] == 'f')) {
00232     goto err2;
00233   }
00234 
00235   // read xref
00236   while (1) {
00237     while ((c = str->lookChar()) != EOF && isspace(c)) {
00238       str->getChar();
00239     }
00240     if (c == 't') {
00241       break;
00242     }
00243     for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i) {
00244       s[i] = (char)c;
00245     }
00246     if (i == 0) {
00247       goto err2;
00248     }
00249     s[i] = '\0';
00250     first = atoi(s);
00251     while ((c = str->lookChar()) != EOF && isspace(c)) {
00252       str->getChar();
00253     }
00254     for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i) {
00255       s[i] = (char)c;
00256     }
00257     if (i == 0) {
00258       goto err2;
00259     }
00260     s[i] = '\0';
00261     n = atoi(s);
00262     while ((c = str->lookChar()) != EOF && isspace(c)) {
00263       str->getChar();
00264     }
00265     // check for buggy PDF files with an incorrect (too small) xref
00266     // table size
00267     if (first + n > size) {
00268       newSize = size + 256;
00269       entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
00270       for (i = size; i < newSize; ++i) {
00271         entries[i].offset = 0xffffffff;
00272         entries[i].used = gFalse;
00273       }
00274       size = newSize;
00275     }
00276     for (i = first; i < first + n; ++i) {
00277       for (j = 0; j < 20; ++j) {
00278         if ((c = str->getChar()) == EOF) {
00279           goto err2;
00280         }
00281         s[j] = (char)c;
00282       }
00283       if (entries[i].offset == 0xffffffff) {
00284         s[10] = '\0';
00285         entries[i].offset = strToUnsigned(s);
00286         s[16] = '\0';
00287         entries[i].gen = atoi(&s[11]);
00288         if (s[17] == 'n') {
00289           entries[i].used = gTrue;
00290         } else if (s[17] == 'f') {
00291           entries[i].used = gFalse;
00292         } else {
00293           goto err2;
00294         }
00295         // PDF files of patents from the IBM Intellectual Property
00296         // Network have a bug: the xref table claims to start at 1
00297         // instead of 0.
00298         if (i == 1 && first == 1 &&
00299             entries[1].offset == 0 && entries[1].gen == 65535 &&
00300             !entries[1].used) {
00301           i = first = 0;
00302           entries[0] = entries[1];
00303           entries[1].offset = 0xffffffff;
00304         }
00305       }
00306     }
00307   }
00308 
00309   // read prev pointer from trailer dictionary
00310   obj.initNull();
00311   parser = new Parser(NULL,
00312              new Lexer(NULL,
00313                str->makeSubStream(str->getPos(), gFalse, 0, &obj)));
00314   parser->getObj(&obj);
00315   if (!obj.isCmd("trailer")) {
00316     goto err1;
00317   }
00318   obj.free();
00319   parser->getObj(&obj);
00320   if (!obj.isDict()) {
00321     goto err1;
00322   }
00323   obj.getDict()->lookupNF("Prev", &obj2);
00324   if (obj2.isInt()) {
00325     *pos = (Guint)obj2.getInt();
00326     more = gTrue;
00327   } else {
00328     more = gFalse;
00329   }
00330   obj.free();
00331   obj2.free();
00332 
00333   delete parser;
00334   return more;
00335 
00336  err1:
00337   obj.free();
00338  err2:
00339   ok = gFalse;
00340   return gFalse;
00341 }
00342 
00343 // Attempt to construct an xref table for a damaged file.
00344 GBool XRef::constructXRef() {
00345   Parser *parser;
00346   Object obj;
00347   char buf[256];
00348   Guint pos;
00349   int num, gen;
00350   int newSize;
00351   int streamEndsSize;
00352   char *p;
00353   int i;
00354   GBool gotRoot;
00355 
00356   error(0, "PDF file is damaged - attempting to reconstruct xref table...");
00357   gotRoot = gFalse;
00358   streamEndsLen = streamEndsSize = 0;
00359 
00360   str->reset();
00361   while (1) {
00362     pos = str->getPos();
00363     if (!str->getLine(buf, 256)) {
00364       break;
00365     }
00366     p = buf;
00367 
00368     // got trailer dictionary
00369     if (!strncmp(p, "trailer", 7)) {
00370       obj.initNull();
00371       parser = new Parser(NULL,
00372                  new Lexer(NULL,
00373                    str->makeSubStream(start + pos + 7, gFalse, 0, &obj)));
00374       if (!trailerDict.isNone())
00375         trailerDict.free();
00376       parser->getObj(&trailerDict);
00377       if (trailerDict.isDict()) {
00378         trailerDict.dictLookupNF("Root", &obj);
00379         if (obj.isRef()) {
00380           rootNum = obj.getRefNum();
00381           rootGen = obj.getRefGen();
00382           gotRoot = gTrue;
00383         }
00384         obj.free();
00385       } else {
00386         pos = 0;
00387       }
00388       delete parser;
00389 
00390     // look for object
00391     } else if (isdigit(*p)) {
00392       num = atoi(p);
00393       do {
00394         ++p;
00395       } while (*p && isdigit(*p));
00396       if (isspace(*p)) {
00397         do {
00398           ++p;
00399         } while (*p && isspace(*p));
00400         if (isdigit(*p)) {
00401           gen = atoi(p);
00402           do {
00403             ++p;
00404           } while (*p && isdigit(*p));
00405           if (isspace(*p)) {
00406             do {
00407               ++p;
00408             } while (*p && isspace(*p));
00409             if (!strncmp(p, "obj", 3)) {
00410               if (num >= size) {
00411                 newSize = (num + 1 + 255) & ~255;
00412                 entries = (XRefEntry *)
00413                             grealloc(entries, newSize * sizeof(XRefEntry));
00414                 for (i = size; i < newSize; ++i) {
00415                   entries[i].offset = 0xffffffff;
00416                   entries[i].used = gFalse;
00417                 }
00418                 size = newSize;
00419               }
00420               if (!entries[num].used || gen >= entries[num].gen) {
00421                 entries[num].offset = pos - start;
00422                 entries[num].gen = gen;
00423                 entries[num].used = gTrue;
00424               }
00425             }
00426           }
00427         }
00428       }
00429 
00430     } else if (!strncmp(p, "endstream", 9)) {
00431       if (streamEndsLen == streamEndsSize) {
00432         streamEndsSize += 64;
00433         streamEnds = (Guint *)grealloc(streamEnds,
00434                                        streamEndsSize * sizeof(int));
00435       }
00436       streamEnds[streamEndsLen++] = pos;
00437     }
00438   }
00439 
00440   if (gotRoot)
00441     return gTrue;
00442 
00443   error(-1, "Couldn't find trailer dictionary");
00444   return gFalse;
00445 }
00446 
00447 #ifndef NO_DECRYPTION
00448 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
00449   Object encrypt, filterObj, versionObj, revisionObj, lengthObj;
00450   Object ownerKey, userKey, permissions, fileID, fileID1;
00451   GBool encrypted1;
00452   GBool ret;
00453 
00454   ret = gFalse;
00455 
00456   permFlags = defPermFlags;
00457   trailerDict.dictLookup("Encrypt", &encrypt);
00458   if ((encrypted1 = encrypt.isDict())) {
00459     ret = gTrue;
00460     encrypt.dictLookup("Filter", &filterObj);
00461     if (filterObj.isName("Standard")) {
00462       encrypt.dictLookup("V", &versionObj);
00463       encrypt.dictLookup("R", &revisionObj);
00464       encrypt.dictLookup("Length", &lengthObj);
00465       encrypt.dictLookup("O", &ownerKey);
00466       encrypt.dictLookup("U", &userKey);
00467       encrypt.dictLookup("P", &permissions);
00468       trailerDict.dictLookup("ID", &fileID);
00469       if (versionObj.isInt() &&
00470           revisionObj.isInt() &&
00471           ownerKey.isString() && ownerKey.getString()->getLength() == 32 &&
00472           userKey.isString() && userKey.getString()->getLength() == 32 &&
00473           permissions.isInt() &&
00474           fileID.isArray()) {
00475         encVersion = versionObj.getInt();
00476         encRevision = revisionObj.getInt();
00477         if (lengthObj.isInt()) {
00478           keyLength = lengthObj.getInt() / 8;
00479         } else {
00480           keyLength = 5;
00481         }
00482         permFlags = permissions.getInt();
00483         if (encVersion >= 1 && encVersion <= 2 &&
00484             encRevision >= 2 && encRevision <= 3) {
00485           fileID.arrayGet(0, &fileID1);
00486           if (fileID1.isString()) {
00487             if (Decrypt::makeFileKey(encVersion, encRevision, keyLength,
00488                                      ownerKey.getString(), userKey.getString(),
00489                                      permFlags, fileID1.getString(),
00490                                      ownerPassword, userPassword, fileKey,
00491                                      &ownerPasswordOk)) {
00492               if (ownerPassword && !ownerPasswordOk) {
00493                 error(-1, "Incorrect owner password");
00494               }
00495               ret = gFalse;
00496             } else {
00497               error(-1, "Incorrect password");
00498             }
00499           } else {
00500             error(-1, "Weird encryption info");
00501           }
00502           fileID1.free();
00503         } else {
00504           error(-1, "Unsupported version/revision (%d/%d) of Standard security handler",
00505                 encVersion, encRevision);
00506         }
00507       } else {
00508         error(-1, "Weird encryption info");
00509       }
00510       fileID.free();
00511       permissions.free();
00512       userKey.free();
00513       ownerKey.free();
00514       lengthObj.free();
00515       revisionObj.free();
00516       versionObj.free();
00517     } else {
00518       error(-1, "Unknown security handler '%s'",
00519             filterObj.isName() ? filterObj.getName() : "???");
00520     }
00521     filterObj.free();
00522   }
00523   encrypt.free();
00524 
00525   // this flag has to be set *after* we read the O/U/P strings
00526   encrypted = encrypted1;
00527 
00528   return ret;
00529 }
00530 #else
00531 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
00532   Object obj;
00533   GBool encrypted;
00534 
00535   trailerDict.dictLookup("Encrypt", &obj);
00536   if ((encrypted = !obj.isNull())) {
00537     error(-1, "PDF file is encrypted and this version of the Xpdf tools");
00538     error(-1, "was built without decryption support.");
00539   }
00540   obj.free();
00541   return encrypted;
00542 }
00543 #endif
00544 
00545 GBool XRef::okToPrint(GBool ignoreOwnerPW) {
00546 #ifndef NO_DECRYPTION
00547   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permPrint)) {
00548     return gFalse;
00549   }
00550 #endif
00551   return gTrue;
00552 }
00553 
00554 GBool XRef::okToChange(GBool ignoreOwnerPW) {
00555 #ifndef NO_DECRYPTION
00556   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permChange)) {
00557     return gFalse;
00558   }
00559 #endif
00560   return gTrue;
00561 }
00562 
00563 GBool XRef::okToCopy(GBool ignoreOwnerPW) {
00564 #ifndef NO_DECRYPTION
00565   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permCopy)) {
00566     return gFalse;
00567   }
00568 #endif
00569   return gTrue;
00570 }
00571 
00572 GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
00573 #ifndef NO_DECRYPTION
00574   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permNotes)) {
00575     return gFalse;
00576   }
00577 #endif
00578   return gTrue;
00579 }
00580 
00581 Object *XRef::fetch(int num, int gen, Object *obj) {
00582   XRefEntry *e;
00583   Parser *parser;
00584   Object obj1, obj2, obj3;
00585 
00586   // check for bogus ref - this can happen in corrupted PDF files
00587   if (num < 0 || num >= size) {
00588     obj->initNull();
00589     return obj;
00590   }
00591 
00592   e = &entries[num];
00593   if (e->gen == gen && e->offset != 0xffffffff) {
00594     obj1.initNull();
00595     parser = new Parser(this,
00596                new Lexer(this,
00597                  str->makeSubStream(start + e->offset, gFalse, 0, &obj1)));
00598     parser->getObj(&obj1);
00599     parser->getObj(&obj2);
00600     parser->getObj(&obj3);
00601     if (obj1.isInt() && obj1.getInt() == num &&
00602         obj2.isInt() && obj2.getInt() == gen &&
00603         obj3.isCmd("obj")) {
00604 #ifndef NO_DECRYPTION
00605       parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, keyLength,
00606                      num, gen);
00607 #else
00608       parser->getObj(obj);
00609 #endif
00610     } else {
00611       obj->initNull();
00612     }
00613     obj1.free();
00614     obj2.free();
00615     obj3.free();
00616     delete parser;
00617   } else {
00618     obj->initNull();
00619   }
00620   return obj;
00621 }
00622 
00623 Object *XRef::getDocInfo(Object *obj) {
00624   return trailerDict.dictLookup("Info", obj);
00625 }
00626 
00627 // Added for the pdftex project.
00628 Object *XRef::getDocInfoNF(Object *obj) {
00629   return trailerDict.dictLookupNF("Info", obj);
00630 }
00631 
00632 GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
00633   int a, b, m;
00634 
00635   if (streamEndsLen == 0 ||
00636       streamStart > streamEnds[streamEndsLen - 1]) {
00637     return gFalse;
00638   }
00639 
00640   a = -1;
00641   b = streamEndsLen - 1;
00642   // invariant: streamEnds[a] < streamStart <= streamEnds[b]
00643   while (b - a > 1) {
00644     m = (a + b) / 2;
00645     if (streamStart <= streamEnds[m]) {
00646       b = m;
00647     } else {
00648       a = m;
00649     }
00650   }
00651   *streamEnd = streamEnds[b];
00652   return gTrue;
00653 }
00654 
00655 Guint XRef::strToUnsigned(char *s) {
00656   Guint x;
00657   char *p;
00658   int i;
00659 
00660   x = 0;
00661   for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
00662     x = 10 * x + (*p - '0');
00663   }
00664   return x;
00665 }

Generated on Sat Nov 5 16:18:17 2005 for OPIE by  doxygen 1.4.2