00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifdef __GNUC__
00010 #pragma implementation
00011 #endif
00012
00013 #include <aconf.h>
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <stddef.h>
00017 #include <math.h>
00018 #include <ctype.h>
00019 #include "GString.h"
00020 #include "gmem.h"
00021 #include "config.h"
00022 #include "Error.h"
00023 #include "GlobalParams.h"
00024 #include "UnicodeMap.h"
00025 #include "GfxState.h"
00026 #include "TextOutputDev.h"
00027
00028 #ifdef MACOS
00029
00030 #include "ICSupport.h"
00031 #endif
00032
00033
00034
00035
00036
00037 TextString::TextString(GfxState *state, fouble fontSize) {
00038 GfxFont *font;
00039 fouble x, y;
00040
00041 state->transform(state->getCurX(), state->getCurY(), &x, &y);
00042 if ((font = state->getFont())) {
00043 yMin = y - font->getAscent() * fontSize;
00044 yMax = y - font->getDescent() * fontSize;
00045 } else {
00046
00047
00048 yMin = y - 0.95 * fontSize;
00049 yMax = y + 0.35 * fontSize;
00050 }
00051 if (yMin == yMax) {
00052
00053
00054 yMin = y;
00055 yMax = y + 1;
00056 }
00057 col = 0;
00058 text = NULL;
00059 xRight = NULL;
00060 len = size = 0;
00061 yxNext = NULL;
00062 xyNext = NULL;
00063 }
00064
00065 TextString::~TextString() {
00066 gfree(text);
00067 gfree(xRight);
00068 }
00069
00070 void TextString::addChar(GfxState *state, fouble x, fouble y,
00071 fouble dx, fouble dy, Unicode u) {
00072 if (len == size) {
00073 size += 16;
00074 text = (Unicode *)grealloc(text, size * sizeof(Unicode));
00075 xRight = (fouble *)grealloc(xRight, size * sizeof(fouble));
00076 }
00077 text[len] = u;
00078 if (len == 0) {
00079 xMin = x;
00080 }
00081 xMax = xRight[len] = x + dx;
00082 ++len;
00083 }
00084
00085
00086
00087
00088
00089 TextPage::TextPage(GBool rawOrderA) {
00090 rawOrder = rawOrderA;
00091 curStr = NULL;
00092 fontSize = 0;
00093 yxStrings = NULL;
00094 xyStrings = NULL;
00095 yxCur1 = yxCur2 = NULL;
00096 nest = 0;
00097 }
00098
00099 TextPage::~TextPage() {
00100 clear();
00101 }
00102
00103 void TextPage::updateFont(GfxState *state) {
00104 GfxFont *font;
00105 fouble *fm;
00106 char *name;
00107 int code;
00108 fouble w;
00109
00110
00111 fontSize = state->getTransformedFontSize();
00112 if ((font = state->getFont()) && font->getType() == fontType3) {
00113
00114
00115
00116
00117
00118
00119 for (code = 0; code < 256; ++code) {
00120 if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
00121 name[0] == 'm' && name[1] == '\0') {
00122 break;
00123 }
00124 }
00125 if (code < 256) {
00126 w = ((Gfx8BitFont *)font)->getWidth(code);
00127 if (w != 0) {
00128
00129 fontSize *= w / 0.6;
00130 }
00131 }
00132 fm = font->getFontMatrix();
00133 if (fm[0] != 0) {
00134 fontSize *= fabs(fm[3] / fm[0]);
00135 }
00136 }
00137 }
00138
00139 void TextPage::beginString(GfxState *state) {
00140
00141
00142 if (curStr) {
00143 ++nest;
00144 return;
00145 }
00146
00147 curStr = new TextString(state, fontSize);
00148 }
00149
00150 void TextPage::addChar(GfxState *state, fouble x, fouble y,
00151 fouble dx, fouble dy, Unicode *u, int uLen) {
00152 fouble x1, y1, w1, h1, dx2, dy2;
00153 int n, i;
00154
00155 state->transform(x, y, &x1, &y1);
00156 n = curStr->len;
00157 if (n > 0 &&
00158 x1 - curStr->xRight[n-1] > 0.1 * (curStr->yMax - curStr->yMin)) {
00159 endString();
00160 beginString(state);
00161 }
00162 state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
00163 0, &dx2, &dy2);
00164 dx -= dx2;
00165 dy -= dy2;
00166 state->transformDelta(dx, dy, &w1, &h1);
00167 if (uLen != 0) {
00168 w1 /= uLen;
00169 h1 /= uLen;
00170 }
00171 for (i = 0; i < uLen; ++i) {
00172 curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
00173 }
00174 }
00175
00176 void TextPage::endString() {
00177 TextString *p1, *p2;
00178 fouble h, y1, y2;
00179
00180
00181
00182 if (nest > 0) {
00183 --nest;
00184 return;
00185 }
00186
00187
00188
00189 if (curStr->len == 0) {
00190 delete curStr;
00191 curStr = NULL;
00192 return;
00193 }
00194
00195
00196 h = curStr->yMax - curStr->yMin;
00197 y1 = curStr->yMin + 0.5 * h;
00198 y2 = curStr->yMin + 0.8 * h;
00199 if (rawOrder) {
00200 p1 = yxCur1;
00201 p2 = NULL;
00202 } else if ((!yxCur1 ||
00203 (y1 >= yxCur1->yMin &&
00204 (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
00205 (!yxCur2 ||
00206 (y1 < yxCur2->yMin ||
00207 (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
00208 p1 = yxCur1;
00209 p2 = yxCur2;
00210 } else {
00211 for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
00212 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) {
00213 break;
00214 }
00215 }
00216 yxCur2 = p2;
00217 }
00218 yxCur1 = curStr;
00219 if (p1) {
00220 p1->yxNext = curStr;
00221 } else {
00222 yxStrings = curStr;
00223 }
00224 curStr->yxNext = p2;
00225 curStr = NULL;
00226 }
00227
00228 void TextPage::coalesce() {
00229 TextString *str1, *str2;
00230 fouble space, d;
00231 GBool addSpace;
00232 int n, i;
00233
00234 #if 0 //~ for debugging
00235 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
00236 printf("x=%3d..%3d y=%3d..%3d size=%2d '",
00237 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
00238 (int)(str1->yMax - str1->yMin));
00239 for (i = 0; i < str1->len; ++i) {
00240 fputc(str1->text[i] & 0xff, stdout);
00241 }
00242 printf("'\n");
00243 }
00244 printf("\n------------------------------------------------------------\n\n");
00245 #endif
00246 str1 = yxStrings;
00247 while (str1 && (str2 = str1->yxNext)) {
00248 space = str1->yMax - str1->yMin;
00249 d = str2->xMin - str1->xMax;
00250 if (((rawOrder &&
00251 ((str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) ||
00252 (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax))) ||
00253 (!rawOrder && str2->yMin < str1->yMax)) &&
00254 d > -0.5 * space && d < space) {
00255 n = str1->len + str2->len;
00256 if ((addSpace = d > 0.1 * space)) {
00257 ++n;
00258 }
00259 str1->size = (n + 15) & ~15;
00260 str1->text = (Unicode *)grealloc(str1->text,
00261 str1->size * sizeof(Unicode));
00262 str1->xRight = (fouble *)grealloc(str1->xRight,
00263 str1->size * sizeof(fouble));
00264 if (addSpace) {
00265 str1->text[str1->len] = 0x20;
00266 str1->xRight[str1->len] = str2->xMin;
00267 ++str1->len;
00268 }
00269 for (i = 0; i < str2->len; ++i) {
00270 str1->text[str1->len] = str2->text[i];
00271 str1->xRight[str1->len] = str2->xRight[i];
00272 ++str1->len;
00273 }
00274 if (str2->xMax > str1->xMax) {
00275 str1->xMax = str2->xMax;
00276 }
00277 if (str2->yMax > str1->yMax) {
00278 str1->yMax = str2->yMax;
00279 }
00280 str1->yxNext = str2->yxNext;
00281 delete str2;
00282 } else {
00283 str1 = str2;
00284 }
00285 }
00286 }
00287
00288 GBool TextPage::findText(Unicode *s, int len,
00289 GBool top, GBool bottom,
00290 fouble *xMin, fouble *yMin,
00291 fouble *xMax, fouble *yMax) {
00292 TextString *str;
00293 Unicode *p;
00294 Unicode u1, u2;
00295 int m, i, j;
00296 fouble x;
00297
00298
00299 for (str = yxStrings; str; str = str->yxNext) {
00300
00301
00302 if (!top && (str->yMax < *yMin ||
00303 (str->yMin < *yMin && str->xMax <= *xMin))) {
00304 continue;
00305 }
00306
00307
00308 if (!bottom && (str->yMin > *yMax ||
00309 (str->yMax > *yMax && str->xMin >= *xMax))) {
00310 return gFalse;
00311 }
00312
00313
00314 m = str->len;
00315 for (i = 0, p = str->text; i <= m - len; ++i, ++p) {
00316
00317
00318 if (!top && str->yMin < *yMin) {
00319 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
00320 if (x < *xMin) {
00321 continue;
00322 }
00323 }
00324
00325
00326 if (!bottom && str->yMax > *yMax) {
00327 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
00328 if (x > *xMax) {
00329 return gFalse;
00330 }
00331 }
00332
00333
00334 for (j = 0; j < len; ++j) {
00335 #if 1 //~ this lowercases Latin A-Z only -- this will eventually be
00336
00337 if (p[j] >= 0x41 && p[j] <= 0x5a) {
00338 u1 = p[j] + 0x20;
00339 } else {
00340 u1 = p[j];
00341 }
00342 if (s[j] >= 0x41 && s[j] <= 0x5a) {
00343 u2 = s[j] + 0x20;
00344 } else {
00345 u2 = s[j];
00346 }
00347 #endif
00348 if (u1 != u2) {
00349 break;
00350 }
00351 }
00352
00353
00354 if (j == len) {
00355 *xMin = (i == 0) ? str->xMin : str->xRight[i-1];
00356 *xMax = str->xRight[i + len - 1];
00357 *yMin = str->yMin;
00358 *yMax = str->yMax;
00359 return gTrue;
00360 }
00361 }
00362 }
00363 return gFalse;
00364 }
00365
00366 GString *TextPage::getText(fouble xMin, fouble yMin,
00367 fouble xMax, fouble yMax) {
00368 GString *s;
00369 UnicodeMap *uMap;
00370 char space[8], eol[16], buf[8];
00371 int spaceLen, eolLen, n;
00372 TextString *str1;
00373 fouble x0, x1, x2, y;
00374 fouble xPrev, yPrev;
00375 int i1, i2, i;
00376 GBool multiLine;
00377
00378 s = new GString();
00379 if (!(uMap = globalParams->getTextEncoding())) {
00380 return s;
00381 }
00382 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
00383 eolLen = 0;
00384 switch (globalParams->getTextEOL()) {
00385 case eolUnix:
00386 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
00387 break;
00388 case eolDOS:
00389 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
00390 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
00391 break;
00392 case eolMac:
00393 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
00394 break;
00395 }
00396 xPrev = yPrev = 0;
00397 multiLine = gFalse;
00398 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
00399 y = 0.5 * (str1->yMin + str1->yMax);
00400 if (y > yMax) {
00401 break;
00402 }
00403 if (y > yMin && str1->xMin < xMax && str1->xMax > xMin) {
00404 x0 = x1 = x2 = str1->xMin;
00405 for (i1 = 0; i1 < str1->len; ++i1) {
00406 x0 = (i1==0) ? str1->xMin : str1->xRight[i1-1];
00407 x1 = str1->xRight[i1];
00408 if (0.5 * (x0 + x1) >= xMin) {
00409 break;
00410 }
00411 }
00412 for (i2 = str1->len - 1; i2 > i1; --i2) {
00413 x1 = (i2==0) ? str1->xMin : str1->xRight[i2-1];
00414 x2 = str1->xRight[i2];
00415 if (0.5 * (x1 + x2) <= xMax) {
00416 break;
00417 }
00418 }
00419 if (s->getLength() > 0) {
00420 if (x0 < xPrev || str1->yMin > yPrev) {
00421 s->append(eol, eolLen);
00422 multiLine = gTrue;
00423 } else {
00424 for (i = 0; i < 4; ++i) {
00425 s->append(space, spaceLen);
00426 }
00427 }
00428 }
00429 for (i = i1; i <= i2; ++i) {
00430 n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf));
00431 s->append(buf, n);
00432 }
00433 xPrev = x2;
00434 yPrev = str1->yMax;
00435 }
00436 }
00437 if (multiLine) {
00438 s->append(eol, eolLen);
00439 }
00440 uMap->decRefCnt();
00441 return s;
00442 }
00443
00444 void TextPage::dump(void *outputStream, TextOutputFunc outputFunc) {
00445 UnicodeMap *uMap;
00446 char space[8], eol[16], eop[8], buf[8];
00447 int spaceLen, eolLen, eopLen, n;
00448 TextString *str1, *str2, *str3;
00449 fouble yMin, yMax;
00450 int col1, col2, d, i;
00451
00452
00453 if (!(uMap = globalParams->getTextEncoding())) {
00454 return;
00455 }
00456 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
00457 eolLen = 0;
00458 switch (globalParams->getTextEOL()) {
00459 case eolUnix:
00460 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
00461 break;
00462 case eolDOS:
00463 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
00464 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
00465 break;
00466 case eolMac:
00467 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
00468 break;
00469 }
00470 eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
00471
00472
00473 xyStrings = NULL;
00474 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
00475 for (str2 = NULL, str3 = xyStrings;
00476 str3;
00477 str2 = str3, str3 = str3->xyNext) {
00478 if (str1->xMin < str3->xMin ||
00479 (str1->xMin == str3->xMin && str1->yMin < str3->yMin)) {
00480 break;
00481 }
00482 }
00483 if (str2) {
00484 str2->xyNext = str1;
00485 } else {
00486 xyStrings = str1;
00487 }
00488 str1->xyNext = str3;
00489 }
00490
00491
00492 for (str1 = xyStrings; str1; str1 = str1->xyNext) {
00493 col1 = 0;
00494 for (str2 = xyStrings; str2 != str1; str2 = str2->xyNext) {
00495 if (str1->xMin >= str2->xMax) {
00496 col2 = str2->col + str2->len + 4;
00497 if (col2 > col1) {
00498 col1 = col2;
00499 }
00500 } else if (str1->xMin > str2->xMin) {
00501 col2 = str2->col +
00502 (int)(((str1->xMin - str2->xMin) / (str2->xMax - str2->xMin)) *
00503 str2->len);
00504 if (col2 > col1) {
00505 col1 = col2;
00506 }
00507 }
00508 }
00509 str1->col = col1;
00510 }
00511
00512 #if 0 //~ for debugging
00513 fprintf((FILE *)outputStream, "~~~~~~~~~~\n");
00514 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
00515 fprintf((FILE *)outputStream, "(%4d,%4d) - (%4d,%4d) [%3d] '",
00516 (int)str1->xMin, (int)str1->yMin,
00517 (int)str1->xMax, (int)str1->yMax, str1->col);
00518 for (i = 0; i < str1->len; ++i) {
00519 fputc(str1->text[i] & 0xff, stdout);
00520 }
00521 printf("'\n");
00522 }
00523 fprintf((FILE *)outputStream, "~~~~~~~~~~\n");
00524 #endif
00525
00526
00527 col1 = 0;
00528 yMax = yxStrings ? yxStrings->yMax : fouble(0);
00529 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
00530
00531
00532 if (rawOrder && col1 == 0) {
00533 col1 = str1->col;
00534 } else {
00535 for (; col1 < str1->col; ++col1) {
00536 (*outputFunc)(outputStream, space, spaceLen);
00537 }
00538 }
00539
00540
00541 for (i = 0; i < str1->len; ++i) {
00542 if ((n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf))) > 0) {
00543 (*outputFunc)(outputStream, buf, n);
00544 }
00545 }
00546
00547
00548 col1 += str1->len;
00549
00550
00551 if (str1->yMax > yMax) {
00552 yMax = str1->yMax;
00553 }
00554
00555
00556 if (!(str1->yxNext &&
00557 !(rawOrder && str1->yxNext->yMax < str1->yMin) &&
00558 str1->yxNext->yMin < 0.2*str1->yMin + 0.8*str1->yMax &&
00559 str1->yxNext->xMin >= str1->xMax)) {
00560
00561
00562 (*outputFunc)(outputStream, eol, eolLen);
00563
00564
00565 if (str1->yxNext) {
00566
00567
00568 yMin = str1->yxNext->yMin;
00569 for (str2 = str1->yxNext; str2; str2 = str2->yxNext) {
00570 if (str2->yMin < yMin) {
00571 yMin = str2->yMin;
00572 }
00573 if (!(str2->yxNext && str2->yxNext->yMin < str2->yMax &&
00574 str2->yxNext->xMin >= str2->xMax))
00575 break;
00576 }
00577
00578
00579 d = (int)((yMin - yMax) / (str1->yMax - str1->yMin) + 0.5);
00580
00581
00582 if (rawOrder && d > 2) {
00583 d = 2;
00584 } else if (!rawOrder && d > 5) {
00585 d = 5;
00586 }
00587 for (; d > 0; --d) {
00588 (*outputFunc)(outputStream, eol, eolLen);
00589 }
00590 }
00591
00592
00593 col1 = 0;
00594 yMax = str1->yxNext ? str1->yxNext->yMax : fouble(0);
00595 }
00596 }
00597
00598
00599 (*outputFunc)(outputStream, eol, eolLen);
00600 (*outputFunc)(outputStream, eop, eopLen);
00601 (*outputFunc)(outputStream, eol, eolLen);
00602
00603 uMap->decRefCnt();
00604 }
00605
00606 void TextPage::clear() {
00607 TextString *p1, *p2;
00608
00609 if (curStr) {
00610 delete curStr;
00611 curStr = NULL;
00612 }
00613 for (p1 = yxStrings; p1; p1 = p2) {
00614 p2 = p1->yxNext;
00615 delete p1;
00616 }
00617 yxStrings = NULL;
00618 xyStrings = NULL;
00619 yxCur1 = yxCur2 = NULL;
00620 }
00621
00622
00623
00624
00625
00626 static void outputToFile(void *stream, char *text, int len) {
00627 fwrite(text, 1, len, (FILE *)stream);
00628 }
00629
00630 TextOutputDev::TextOutputDev(char *fileName, GBool rawOrderA, GBool append) {
00631 text = NULL;
00632 rawOrder = rawOrderA;
00633 ok = gTrue;
00634
00635
00636 needClose = gFalse;
00637 if (fileName) {
00638 if (!strcmp(fileName, "-")) {
00639 outputStream = stdout;
00640 } else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) {
00641 needClose = gTrue;
00642 } else {
00643 error(-1, "Couldn't open text file '%s'", fileName);
00644 ok = gFalse;
00645 return;
00646 }
00647 outputFunc = &outputToFile;
00648 } else {
00649 outputStream = NULL;
00650 }
00651
00652
00653 text = new TextPage(rawOrder);
00654 }
00655
00656 TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
00657 GBool rawOrderA) {
00658 outputFunc = func;
00659 outputStream = stream;
00660 needClose = gFalse;
00661 rawOrder = rawOrderA;
00662 text = new TextPage(rawOrder);
00663 ok = gTrue;
00664 }
00665
00666 TextOutputDev::~TextOutputDev() {
00667 if (needClose) {
00668 #ifdef MACOS
00669 ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle);
00670 #endif
00671 fclose((FILE *)outputStream);
00672 }
00673 if (text) {
00674 delete text;
00675 }
00676 }
00677
00678 void TextOutputDev::startPage(int pageNum, GfxState *state) {
00679 text->clear();
00680 }
00681
00682 void TextOutputDev::endPage() {
00683 text->coalesce();
00684 if (outputStream) {
00685 text->dump(outputStream, outputFunc);
00686 }
00687 }
00688
00689 void TextOutputDev::updateFont(GfxState *state) {
00690 text->updateFont(state);
00691 }
00692
00693 void TextOutputDev::beginString(GfxState *state, GString *s) {
00694 text->beginString(state);
00695 }
00696
00697 void TextOutputDev::endString(GfxState *state) {
00698 text->endString();
00699 }
00700
00701 void TextOutputDev::drawChar(GfxState *state, fouble x, fouble y,
00702 fouble dx, fouble dy,
00703 fouble originX, fouble originY,
00704 CharCode c, Unicode *u, int uLen) {
00705 text->addChar(state, x, y, dx, dy, u, uLen);
00706 }
00707
00708 GBool TextOutputDev::findText(Unicode *s, int len,
00709 GBool top, GBool bottom,
00710 fouble *xMin, fouble *yMin,
00711 fouble *xMax, fouble *yMax) {
00712 return text->findText(s, len, top, bottom, xMin, yMin, xMax, yMax);
00713 }