00001 #include <stdio.h>
00002 #include <string.h>
00003 #include <qimage.h>
00004 #include "decompress.h"
00005 #include "Reb.h"
00006 #include "my_list.h"
00007 #include "Bkmks.h"
00008 #include "Model.h"
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 CReb::CReb()
00057 :
00058 fin(NULL), m_indexpages(NULL), m_pagedetails(NULL),tagoffset(0),
00059 tags(NULL), paras(NULL), noparas(0), joins(NULL), nojoins(0)
00060 {
00061 }
00062
00063 CReb::~CReb()
00064 {
00065 if (fin != NULL) fclose(fin);
00066 if (m_indexpages != NULL) delete [] m_indexpages;
00067 if (m_pagedetails != NULL) delete [] m_pagedetails;
00068 if (tags != NULL) delete [] tags;
00069 if (paras != NULL) delete [] paras;
00070 if (joins != NULL) delete [] joins;
00071 }
00072
00073 unsigned int CReb::locate()
00074 {
00075 return m_pagedetails[currentpage.pageno()].pagestart+currentpage.offset();
00076 }
00077
00078 void CReb::locate(unsigned int n)
00079 {
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 UInt32 jl = 0,jh = nopages-1,jm = (jl+jh)/2;
00095 while (jh > jl+1)
00096 {
00097 if (m_pagedetails[jm].pagestart > n)
00098 {
00099 jh = jm;
00100 }
00101 else
00102 {
00103 jl = jm;
00104 }
00105 jm = (jl+jh)/2;
00106 }
00107
00108 unsuspend();
00109 Page_detail rs = m_pagedetails[jl];
00110 UInt32 val = n - rs.pagestart;
00111 if (jl != currentpage.pageno()) readindex(jl);
00112 currentpage.setoffset(page2pos(jl), jl, ((rs.flags & 8) != 0), rs.len, val);
00113 if (noparas > 0)
00114 {
00115
00116
00117 UInt32 jl = 0,jh = noparas-1,jm = (jl+jh)/2;
00118 while (jh > jl+1)
00119 {
00120 if (paras[jm].pos > val)
00121 {
00122 jh = jm;
00123 }
00124 else
00125 {
00126 jl = jm;
00127 }
00128 jm = (jl+jh)/2;
00129 }
00130
00131 qDebug("TAGS:%s", (const char*)tags[paras[jl].tag]);
00132 tagstring = tags[paras[jl].tag]+"<br>";
00133 tagoffset = 0;
00134 }
00135 unsigned long current = locate();
00136 if (m_currentstart > current || current > m_currentend)
00137 {
00138 start2endSection();
00139 }
00140 if (current != n) qDebug("ERROR:Ended up at %u", current);
00141 }
00142
00143 bool CReb::getFile(const QString& href, const QString& nm)
00144 {
00145 qDebug("File:%s, Name:%s", (const char*)href, (const char*)nm);
00146 QMap<QString, UInt32>::Iterator iter = m_index.find(href);
00147 if (iter != m_index.end())
00148 {
00149 qDebug("REB:BEFORE:%u", locate());
00150 startpage(iter.data());
00151 qDebug("REB:AFTER:%u", locate());
00152 return true;
00153 }
00154 else
00155 {
00156 return false;
00157 }
00158 }
00159
00160 QImage* CReb::getPicture(const QString& ref)
00161 {
00162 QMap<QString, UInt32>::Iterator iter = m_index.find(ref);
00163 if (iter != m_index.end())
00164 {
00165 unsuspend();
00166 Page_detail rs = m_pagedetails[iter.data()];
00167 char* imgbuffer = new char[rs.len];
00168 fseek(fin, page2pos(iter.data()), SEEK_SET);
00169 fread(imgbuffer, rs.len, 1, fin);
00170 QByteArray arr;
00171 arr.assign((const char*)imgbuffer, rs.len);
00172 QImage* qimage = new QImage(arr);
00173 return qimage;
00174 }
00175 else
00176 {
00177 return NULL;
00178 }
00179 }
00180
00181 CList<Bkmk>* CReb::getbkmklist() { return NULL; }
00182
00183 void CReb::home()
00184 {
00185 startpage(m_homepage);
00186 }
00187
00188 int CReb::OpenFile(const char *src)
00189 {
00190 m_binary = false;
00191 if (fin != NULL) fclose(fin);
00192 fin = fopen(src, "r");
00193 if (fin == NULL)
00194 {
00195 return -1;
00196 }
00197 UInt32 type;
00198 fseek(fin, 6, SEEK_SET);
00199 fread(&type, 1, sizeof(type), fin);
00200 qDebug("CREB:Okay %x", type);
00201
00202 if (type == 0x4f56554e || type == 0x574d4954 || type == 0x576d6954)
00203 {
00204 struct stat _stat;
00205 stat(src, &_stat);
00206 file_length = _stat.st_size;
00207 fread(&m_blocksize, 1, sizeof(m_blocksize), fin);
00208 if (type == 0x574d4954 || type == 0x576d6954)
00209 {
00210 if (type == 0x576d6954) m_binary = true;
00211 qDebug("Blocksize(1) %x", m_blocksize);
00212 unsigned char ct = (m_blocksize >> 24) & 0xff;
00213 qDebug("Compress type:%x", ct);
00214 switch (ct)
00215 {
00216 case 0:
00217 m_decompress = UnZip;
00218 break;
00219 case 3:
00220 m_decompress = getdecompressor("PluckerDecompress3");
00221 break;
00222 case 4:
00223 m_decompress = getdecompressor("PluckerDecompress4");
00224 break;
00225 }
00226 if (m_decompress == NULL) return -1;
00227 m_blocksize = 1024*(m_blocksize & 0xffffff);
00228 }
00229 else
00230 {
00231 m_blocksize = 4096;
00232 m_decompress = UnZip;
00233 }
00234 qDebug("Blocksize %u", m_blocksize);
00235 currentpage.init(fin, m_blocksize, m_decompress);
00236 qDebug("Its a REB!!!!");
00237 fseek(fin, 0x18, SEEK_SET);
00238 fread(&toc, 1, sizeof(toc), fin);
00239 qDebug("Expect this to be 128 or 20:%x", toc);
00240 fread(&type, 1, sizeof(type), fin);
00241 qDebug("File length:%u", type);
00242 fseek(fin, toc, SEEK_SET);
00243 fread(&nopages, 1, sizeof(nopages), fin);
00244 m_indexpages = new UInt32[nopages];
00245 m_pagedetails = new Page_detail[nopages];
00246 qDebug("There are %u pages", nopages);
00247 UInt32 loc = 0;
00248 UInt32 homeguess = nopages-1;
00249 QString homeurl;
00250 for (int i = 0; i < nopages; ++i)
00251 {
00252 char name[32];
00253 UInt32 len, pos, flags;
00254 fread(name, 1, 32, fin);
00255 fread(&len, 1, 4, fin);
00256 fread(&pos, 1, 4, fin);
00257 fread(&flags, 1, 4, fin);
00258
00259 m_index[name] = i;
00260 m_pagedetails[i] = Page_detail(loc, len, flags);
00261
00262 if (QString(name).find(".htm", 0, false) >= 0)
00263 {
00264 if (homeguess > i) homeguess = i;
00265 if ((flags & 8) != 0)
00266 {
00267 UInt32 lastpos = ftell(fin);
00268 loc += pagelength(i);
00269 fseek(fin, lastpos, SEEK_SET);
00270 }
00271 else
00272 {
00273 loc += len;
00274 }
00275 }
00276 if ((flags & 2) != 0)
00277 {
00278 UInt32 lastpos = ftell(fin);
00279 RBPage* idx = new RBPage();
00280 idx->init(fin, m_blocksize, m_decompress);
00281 idx->startpage(page2pos(i), i, ((flags & 8) != 0), len);
00282 int c = 0;
00283 while (c != EOF)
00284 {
00285 QString s("");
00286 while (1)
00287 {
00288 c = idx->getch(this);
00289 if (c == 10 || c == EOF) break;
00290 s += c;
00291 }
00292 if (s.left(5) == "BODY=")
00293 {
00294 homeurl = s.right(s.length()-5);
00295 qDebug("Home:%s", (const char*)homeurl);
00296 }
00297 else
00298 {
00299 qDebug("Info:%s", (const char*)s);
00300 }
00301 }
00302 delete idx;
00303 fseek(fin, lastpos, SEEK_SET);
00304 }
00305 }
00306 text_length = loc;
00307 qDebug("Looking for homepage");
00308 if (homeurl.isEmpty())
00309 {
00310 m_homepage = homeguess;
00311 }
00312 else
00313 {
00314 QMap<QString, UInt32>::Iterator iter = m_index.find(homeurl);
00315 if (iter != m_index.end())
00316 {
00317 m_homepage = iter.data();
00318 }
00319 else
00320 {
00321 m_homepage = homeguess;
00322 }
00323 }
00324 m_homepos = m_pagedetails[m_homepage].pagestart;
00325 qDebug("Finding indices");
00326 for (QMap<QString, UInt32>::Iterator iter = m_index.begin(); iter != m_index.end(); ++iter)
00327 {
00328 QString href = iter.key();
00329 if (href.find(".htm", 0, false) >= 0)
00330 {
00331 QString hind = href.left(href.find(".htm", 0, false))+".hidx";
00332
00333 QMap<QString, UInt32>::Iterator iter2 = m_index.find(hind);
00334 if (iter2 != m_index.end())
00335 {
00336 m_indexpages[iter.data()] = iter2.data();
00337 }
00338 }
00339 }
00340 qDebug("Going home");
00341 home();
00342 return 0;
00343 }
00344 else
00345 {
00346 char * tmp = (char*)(&type);
00347 for (int i = 0; i < 4; ++i) qDebug("%d:%c", i, tmp[i]);
00348 return -1;
00349 }
00350 }
00351
00352 UInt32 CReb::page2pos(UInt32 page)
00353 {
00354 fseek(fin, toc+40+44*page, SEEK_SET);
00355 UInt32 pos;
00356 fread(&pos, 1, 4, fin);
00357 return pos;
00358 }
00359
00360 UInt32 CReb::pagelength(UInt32 pagenum)
00361 {
00362 fseek(fin, toc+40+44*pagenum, SEEK_SET);
00363 UInt32 pos;
00364 fread(&pos, 1, 4, fin);
00365 fseek(fin, pos+4, SEEK_SET);
00366 UInt32 len;
00367 fread(&len, 1, sizeof(len), fin);
00368 return len;
00369 }
00370
00371 void CReb::readindex(UInt32 cp)
00372 {
00373 if (joins != NULL)
00374 {
00375 delete [] joins;
00376 joins = NULL;
00377 }
00378 if (tags != NULL)
00379 {
00380 delete [] tags;
00381 tags = NULL;
00382 }
00383 if (paras != NULL)
00384 {
00385 delete [] paras;
00386 paras = NULL;
00387 }
00388 noparas = 0;
00389 nojoins = 0;
00390 names.clear();
00391
00392 UInt32 rspage = m_indexpages[cp];
00393 if (rspage != 0)
00394 {
00395 Page_detail rs = m_pagedetails[rspage];
00396 int count = 0;
00397 RBPage* idx = new RBPage();
00398 idx->init(fin, m_blocksize, m_decompress);
00399 idx->startpage(page2pos(rspage), rspage, ((rs.flags & 8) != 0), rs.len);
00400 int c = 0;
00401 int phase = 0;
00402 int i;
00403 if (m_binary)
00404 {
00405 count = idx->getuint(this);
00406 qDebug("tag count:%d", count);
00407 tags = new QString[count];
00408 for (int i = 0; i < count; ++i)
00409 {
00410 QString s;
00411 while (1)
00412 {
00413 c = idx->getch(this);
00414 if (c == 0 || c == EOF) break;
00415 s += c;
00416 }
00417 unsigned short val = idx->getuint(this);
00418 if (val != 0xffff)
00419 {
00420 tags[i] = tags[val]+s;
00421 }
00422 else
00423 {
00424 tags[i] = s;
00425 }
00426
00427 }
00428 noparas = idx->getint(this);
00429 qDebug("Para count %d", noparas);
00430 paras = new ParaRef[noparas];
00431 for (int i = 0; i < noparas; ++i)
00432 {
00433 paras[i] = ParaRef(idx->getint(this), idx->getuint(this));
00434 }
00435 count = idx->getint(this);
00436 qDebug("Name count %d", count);
00437 for (int i = 0; i < count; ++i)
00438 {
00439 QString s;
00440 while (1)
00441 {
00442 c = idx->getch(this);
00443 if (c == 0 || c == EOF) break;
00444 s += c;
00445 }
00446 int val = idx->getint(this);
00447 names[s.mid(1,s.length()-2)] = val;
00448 qDebug("names[%s] = %d", (const char*)s, val);
00449 }
00450 count = idx->getint(this);
00451 qDebug("Join count %d", count);
00452 if (count > 0)
00453 {
00454 nojoins = count+2;
00455 joins = new UInt32[count+2];
00456 joins[0] = 0;
00457 joins[count+1] = currentpage.length();
00458 for (int i = 1; i < count+1; ++i)
00459 {
00460 joins[i] = idx->getint(this);
00461 }
00462 }
00463 }
00464 else
00465 {
00466 while (c != EOF)
00467 {
00468 QString s("");
00469 while (1)
00470 {
00471 c = idx->getch(this);
00472 if (c == 10 || c == EOF) break;
00473 s += c;
00474 }
00475
00476 if (count > 0)
00477 {
00478 --count;
00479 int sp = s.findRev(' ');
00480 QString l = s.left(sp);
00481 int val = s.right(s.length()-sp).toInt();
00482 switch (phase)
00483 {
00484 case 4:
00485
00486 joins[i++] = val;
00487 break;
00488 case 3:
00489
00490 names[l.mid(1,l.length()-2)] = val;
00491 break;
00492 case 1:
00493
00494 if (val >= 0)
00495 {
00496 tags[i++] = tags[val]+l;
00497 }
00498 else
00499 {
00500 tags[i++] = l;
00501 }
00502
00503 break;
00504 case 2:
00505 paras[i++] = ParaRef(QString(l).toInt(), val);
00506
00507 break;
00508 default:
00509 qDebug("%s:%d", (const char*)l, val);
00510 break;
00511 }
00512 }
00513 else
00514 {
00515 QString key = "[tags ";
00516 if (s.left(key.length()) == key)
00517 {
00518 phase = 1;
00519 i = 0;
00520 count = s.mid(key.length(),s.length()-key.length()-1).toInt();
00521 qDebug("%s:%s:%d", (const char*)key, (const char*)s, count);
00522 tags = new QString[count];
00523 }
00524 key = "[paragraphs ";
00525 if (s.left(key.length()) == key)
00526 {
00527 phase = 2;
00528 i = 0;
00529 count = s.mid(key.length(),s.length()-key.length()-1).toInt();
00530 qDebug("%s:%s:%d", (const char*)key, (const char*)s, count);
00531 paras = new ParaRef[count];
00532 noparas = count;
00533 }
00534 key = "[names ";
00535 if (s.left(key.length()) == key)
00536 {
00537 phase = 3;
00538 count = s.mid(key.length(),s.length()-key.length()-1).toInt();
00539 qDebug("%s:%s:%d", (const char*)key, (const char*)s, count);
00540 }
00541 key = "[joins ";
00542 if (s.left(key.length()) == key)
00543 {
00544 phase = 4;
00545 count = s.mid(key.length(),s.length()-key.length()-1).toInt();
00546 qDebug("%s:%s:%d", (const char*)key, (const char*)s, count);
00547 nojoins = count+2;
00548 i = 1;
00549 joins = new UInt32[count+2];
00550 joins[0] = 0;
00551 joins[count+1] = currentpage.length();
00552 qDebug("%s:%s:%d", (const char*)key, (const char*)s, count);
00553 }
00554 qDebug("ZC:%s", (const char*)s);
00555 }
00556 }
00557 }
00558
00559 delete idx;
00560 }
00561 }
00562
00563 bool CReb::findanchor(const QString& _info)
00564 {
00565 QMap<QString, int>::Iterator iter = names.find(_info);
00566 if (iter != names.end())
00567 {
00568 locate(iter.data()+m_pagedetails[currentpage.pageno()].pagestart);
00569 return true;
00570 }
00571 return false;
00572 }
00573
00574 #ifdef USEQPE
00575 void CReb::suspend()
00576 {
00577 CExpander::suspend(fin);
00578 }
00579 void CReb::unsuspend()
00580 {
00581 CExpander::unsuspend(fin);
00582 }
00583 #endif
00584
00585 #ifndef __STATIC
00586 extern "C"
00587 {
00588 CExpander* newcodec() { return new CReb; }
00589 }
00590 #endif
00591
00592 void CReb::startpage(UInt32 pgno)
00593 {
00594 Page_detail rs = m_pagedetails[pgno];
00595 unsuspend();
00596 readindex(pgno);
00597 currentpage.startpage(page2pos(pgno), pgno, ((rs.flags & 8) != 0), rs.len);
00598 }
00599
00600 void CReb::startpage(UInt32 _cp, bool _isCompressed, UInt32 _len)
00601 {
00602 unsuspend();
00603 readindex(_cp);
00604 currentpage.startpage(page2pos(_cp), _cp, _isCompressed, _len);
00605 }
00606
00607 void RBPage::initpage(UInt32 pos, size_t _cp, bool _isCompressed, UInt32 _len)
00608 {
00609 filepos = pos;
00610 m_pageno = _cp;
00611 m_Compressed = _isCompressed;
00612 m_pagelen = _len;
00613 currentchunk = 0;
00614 pageoffset = 0;
00615
00616 if (chunklist != NULL) delete [] chunklist;
00617
00618 fseek(fin, filepos, SEEK_SET);
00619 if (m_Compressed)
00620 {
00621 fread(&nochunks, 1, sizeof(nochunks), fin);
00622 fread(&m_pagelen, 1, sizeof(m_pagelen), fin);
00623 chunklist = new UInt32[nochunks];
00624 fread(chunklist, nochunks, 4, fin);
00625 }
00626 else
00627 {
00628 chunklist = NULL;
00629 nochunks = (_len+m_blocksize-1)/m_blocksize;
00630 }
00631 m_startoff = 0;
00632 m_endoff = m_pagelen;
00633 chunkpos = ftell(fin);
00634 qDebug("Compressed:%u Expanded:%u", _len, m_pagelen);
00635 }
00636
00637 void RBPage::startpage(UInt32 pos, UInt32 _cp, bool _isCompressed, UInt32 _len)
00638 {
00639 initpage(pos, _cp, _isCompressed, _len);
00640 readchunk();
00641 }
00642
00643 int CReb::getch()
00644 {
00645 if (tagoffset < tagstring.length())
00646 return tagstring[tagoffset++].unicode();
00647 else
00648 return currentpage.getch(this);
00649 }
00650
00651 int RBPage::getch(CReb* parent)
00652 {
00653 if (chunkoffset >= chunklen)
00654 {
00655 if (++currentchunk >= nochunks)
00656 {
00657 --currentchunk;
00658 return EOF;
00659 }
00660 pageoffset += chunklen;
00661 parent->unsuspend();
00662 readchunk();
00663 }
00664 if (offset() == m_endoff) return EOF;
00665 return chunk[chunkoffset++];
00666 }
00667
00668 unsigned short int RBPage::getuint(CReb* parent)
00669 {
00670 unsigned short int ret = 0;
00671 char *buffer = (char*)(&ret);
00672 for (int i = 0; i < 2; ++i)
00673 {
00674 int ch = getch(parent);
00675 if (ch == EOF) return 0;
00676 buffer[i] = ch;
00677 }
00678 return ret;
00679 }
00680
00681 int RBPage::getint(CReb* parent)
00682 {
00683 int ret = 0;
00684 char *buffer = (char*)(&ret);
00685 for (int i = 0; i < 4; ++i)
00686 {
00687 int ch = getch(parent);
00688 if (ch == EOF) return 0;
00689 buffer[i] = ch;
00690 }
00691 return ret;
00692 }
00693
00694 void RBPage::readchunk()
00695 {
00696 if (m_Compressed)
00697 {
00698 chunkoffset = 0;
00699 fseek(fin, chunkpos, SEEK_SET);
00700 UInt8* inbuf = new UInt8[chunklist[currentchunk]];
00701 fread(inbuf, 1, chunklist[currentchunk], fin);
00702 chunklen = (*m_decompress)(inbuf, chunklist[currentchunk], chunk, m_blocksize);
00703 delete [] inbuf;
00704 chunkpos = ftell(fin);
00705 }
00706 else
00707 {
00708 chunkoffset = 0;
00709 chunklen = m_blocksize;
00710 if (m_blocksize*(currentchunk+1) > m_pagelen)
00711 {
00712 chunklen = m_pagelen - currentchunk*m_blocksize;
00713 }
00714 fseek(fin, chunkpos, SEEK_SET);
00715 chunklen = fread(chunk, 1, chunklen, fin);
00716 chunkpos = ftell(fin);
00717 }
00718 }
00719
00720 void RBPage::setoffset(UInt32 pos, size_t _cp, bool _isCompressed, UInt32 _len, UInt32 _offset)
00721 {
00722 if (m_pageno != _cp)
00723 {
00724 initpage(pos, _cp, _isCompressed, _len);
00725 }
00726 else
00727 {
00728 if (m_Compressed)
00729 {
00730 chunkpos = filepos + sizeof(nochunks) + sizeof(m_pagelen) + 4*nochunks;
00731 }
00732 else
00733 {
00734 chunkpos = filepos;
00735 }
00736 }
00737
00738 currentchunk = _offset/m_blocksize;
00739 pageoffset = m_blocksize*currentchunk;
00740 if (m_Compressed)
00741 {
00742 for (int i = 0; i < currentchunk; ++i)
00743 {
00744 chunkpos += chunklist[i];
00745 }
00746 }
00747 else
00748 {
00749 chunkpos += pageoffset;
00750 }
00751 readchunk();
00752 chunkoffset = _offset - pageoffset;
00753 }
00754
00755 void CReb::start2endSection()
00756 {
00757 if (m_pagedetails != NULL)
00758 {
00759 if (nojoins > 0)
00760 {
00761
00762
00763 UInt32 jl = 0,jh = nojoins-1,jm = (jl+jh)/2;
00764 while (jh > jl+1)
00765 {
00766 if (joins[jm] > currentpage.offset())
00767 {
00768 jh = jm;
00769 }
00770 else
00771 {
00772 jl = jm;
00773 }
00774 jm = (jl+jh)/2;
00775 }
00776
00777 currentpage.m_startoff = joins[jl];
00778 currentpage.m_endoff = joins[jl+1]-1;
00779
00780 }
00781 m_currentstart = m_pagedetails[currentpage.pageno()].pagestart+currentpage.m_startoff;
00782 m_currentend = m_pagedetails[currentpage.pageno()].pagestart+currentpage.m_endoff;
00783 }
00784 else
00785 {
00786 m_currentstart = m_currentend = 0;
00787 }
00788 qDebug("s2e:[%u, %u, %u]", m_currentstart, locate(), m_currentend);
00789 }