00001 #include <qmap.h>
00002 #include <qfileinfo.h>
00003 #include <qtextstream.h>
00004 #include <qdir.h>
00005 #ifdef USEQPE
00006 #include <qpe/global.h>
00007 #endif
00008 #include "CDrawBuffer.h"
00009 #include "striphtml.h"
00010 #include "hrule.h"
00011
00012 #include <qregexp.h>
00013 #include <qimage.h>
00014 #include <qpixmap.h>
00015
00016
00017
00018 static unsigned char h2i(unsigned char c)
00019 {
00020 unsigned char ret = 0;
00021 if ('0' <= c && c <= '9')
00022 {
00023 ret = c - '0';
00024 }
00025 else if ('a' <= c && c <= 'f')
00026 {
00027 ret = c - 'a' + 10;
00028 }
00029 return ret;
00030 }
00031
00032 static void parse_color(const QString& attr, unsigned char& r, unsigned char& g, unsigned char& b)
00033 {
00034 r = g = b = 0;
00035 if (attr.length() >= 7 && attr[0] == '#')
00036 {
00037 r = h2i(attr[1].unicode());
00038 r = 16*r + h2i(attr[2].unicode());
00039 g = h2i(attr[3].unicode());
00040 g = 16*g + h2i(attr[4].unicode());
00041 b = h2i(attr[5].unicode());
00042 b = 16*b + h2i(attr[6].unicode());
00043 }
00044 else if (attr == "red")
00045 {
00046 r = 255;
00047 }
00048 else if (attr == "green")
00049 {
00050 g = 255;
00051 }
00052 else if (attr == "blue")
00053 {
00054 b = 255;
00055 }
00056 else if (attr == "white")
00057 {
00058 r = g = b = 255;
00059 }
00060 else if (attr == "black")
00061 {
00062 r = g = b = 0;
00063 }
00064 else
00065 {
00066 qDebug("Don't understand colour \"%s\"", (const char*)attr);
00067 }
00068 }
00069
00070 CNavigation_base<htmlmark> striphtml::m_nav;
00071
00072 void striphtml::skipblock(const QString& _ent)
00073 {
00074 tchar ch = '>';
00075 CStyle dummy;
00076 QString ent;
00077 unsigned long pos;
00078 do
00079 {
00080 while (ch != '<' && ch != UEOF)
00081 {
00082 mygetch(ch, dummy, pos);
00083 }
00084
00085 ch = skip_ws();
00086
00087 ent = getname(ch, " >").lower();
00088 qDebug("Skipblock:%s", (const char*)ent);
00089 } while (ent != _ent && ch != UEOF);
00090 }
00091
00092 void striphtml::reset()
00093 {
00094 m_inblock = false;
00095 text_q = "";
00096 q = "";
00097 tablenesteddepth = 0;
00098 forcecentre = false;
00099 ignorespace = false;
00100 indent = 0;
00101 while (!stylestack.isEmpty()) stylestack.pop();
00102 currentstyle.unset();
00103 }
00104
00105 void striphtml::locate(unsigned int n)
00106 {
00107 qDebug("striphtml:locating:%u", n);
00108 reset();
00109 parent->locate(n);
00110 }
00111
00112 int striphtml::getpara(CBuffer& buff, unsigned long& startpos)
00113 {
00114 tchar ch;
00115 CStyle sty;
00116 unsigned long pos;
00117 int i = 0;
00118 parent->getch(ch, sty, startpos);
00119 pos = startpos;
00120 while (1)
00121 {
00122 if (ch == 10 && !isPre)
00123 {
00124 ch = ' ';
00125 }
00126 if (ch == UEOF)
00127 {
00128
00129 buff[i] = 0;
00130 if (i == 0)
00131 {
00132 i = -1;
00133 }
00134 return i;
00135 }
00136 else if (ch == '<')
00137 {
00138 tchar ch2 = skip_ws();
00139 QString ent = getname(ch2, " >");
00140 ent = ent.lower();
00141
00142 if (ent == "a")
00143 {
00144 buff[i++] = '<';
00145 buff[i++] = 'a';
00146 buff[i++] = ch2;
00147
00148 }
00149 else if (ent == "/a")
00150 {
00151 buff[i++] = '<';
00152 buff[i++] = '/';
00153 buff[i++] = 'a';
00154 buff[i++] = ch2;
00155
00156 }
00157 else if (ent == "div")
00158 {
00159
00160 if (i == 0)
00161 {
00162 buff[i++] = '<';
00163 buff[i++] = 'd';
00164 buff[i++] = 'i';
00165 buff[i++] = 'v';
00166 buff[i++] = ' ';
00167 buff[i++] = ch2;
00168 while (ch2 != '>' && ch2 != UEOF && i < 2048)
00169 {
00170 parent->getch(ch2, sty, pos);
00171 buff[i++] = ch2;
00172 }
00173 }
00174 else
00175 {
00176 locate(pos);
00177 }
00178 buff[i++] = 0;
00179
00180 return i;
00181 }
00182 else if (ent == "p" || (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1))
00183 {
00184 buff[i++] = 0;
00185 while (ch2 != '>' && ch2 != UEOF)
00186 {
00187 parent->getch(ch2, sty, pos);
00188 }
00189 return i;
00190 }
00191 else
00192 {
00193 while (ch2 != '>' && ch2 != UEOF)
00194 {
00195 parent->getch(ch2, sty, pos);
00196 }
00197 }
00198 }
00199 else
00200 {
00201 buff[i++] = ch;
00202 }
00203 parent->getch(ch, sty, pos);
00204 }
00205 }
00206
00207 QString striphtml::dehtml(const QString& _info)
00208 {
00209 QString info;
00210 for (int i = 0; i < _info.length(); i++)
00211 {
00212 tchar ch = _info[i];
00213 if (ch == '%')
00214 {
00215 ch = 0;
00216 for (int j = 0; j < 2; j++)
00217 {
00218 ch <<= 4;
00219 tchar ch1 = _info[++i];
00220 if ('0' <= ch1 && ch1 <= '9')
00221 {
00222 ch += ch1 - '0';
00223 }
00224 else if ('a' <= ch1 && ch1 <= 'f')
00225 {
00226 ch += ch1 - 'a' + 10;
00227 }
00228 else if ('A' <= ch1 && ch1 <= 'F')
00229 {
00230 ch += ch1 - 'A' + 10;
00231 }
00232 }
00233 }
00234 info += ch;
00235 }
00236 return info;
00237 }
00238
00239 bool striphtml::findanchor(const QString& _info)
00240 {
00241
00242
00243 if (parent->findanchor(_info))
00244 {
00245 reset();
00246 return true;
00247 }
00248 qDebug("Using html find");
00249 parent->locate(parent->startSection());
00250 #if defined(USEQPE) || defined(_WINDOWS)
00251 QString info;
00252 for (int i = 0; i < _info.length(); i++)
00253 {
00254 tchar ch = _info[i];
00255 if (QString(".^$[]*+?").find(ch) != -1)
00256 {
00257 info += '\\';
00258 }
00259 info += ch;
00260 }
00261 #else
00262 QString info = QRegExp::escape(_info);
00263 #endif
00264 qDebug("Adjusted searchstring:%s", (const char*)info);
00265 QString sname("<[Aa][^>]*[ \t]+[Nn][Aa][Mm][Ee][ \t]*=[ \t]*\"?");
00266 sname += info + "\"?[ \t>]";
00267 QString sid("<[A-Za-z][^>]*[ \t]+[Ii][Dd][ \t]*=[ \t]*\"?");
00268 sid += info+"\"?[ \t>]";
00269 #ifdef USEQPE
00270 QRegExp name(sname);
00271 QRegExp id(sid);
00272 #else
00273 QRegExp name(sname+"|"+sid);
00274 #endif
00275 bool ret = true;
00276 locate(0);
00277 unsigned long pos = 0;
00278 unsigned long startpos = 0;
00279 int offset;
00280 CBuffer test;
00281 qDebug("striphtml::findanchor");
00282
00283 if (getpara(test, pos) >= 0)
00284 {
00285 while (1)
00286 {
00287
00288 if ((offset = name.match(toQString(test.data()))) != -1) break;
00289 #ifdef USEQPE
00290 if ((offset = id.match(toQString(test.data()))) != -1) break;
00291 #endif
00292 if (getpara(test, pos) < 0)
00293 {
00294 locate(startpos);
00295 qDebug("Not found");
00296 return false;
00297 }
00298 }
00299 locate(pos);
00300 qDebug("Found");
00301 ret = true;
00302 }
00303 else
00304 {
00305 locate(startpos);
00306 qDebug("Not found");
00307 ret = false;
00308 }
00309 return ret;
00310 }
00311
00312 striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false), tablenesteddepth(0)
00313 {
00314 href2filepos = new QMap<QString, unsigned long>;
00315 id2href = new QMap<unsigned long, QString>;
00316 }
00317
00318 striphtml::~striphtml()
00319 {
00320 if (entmap != NULL) delete entmap;
00321 delete href2filepos;
00322 delete id2href;
00323 }
00324
00325 void striphtml::initentmap()
00326 {
00327 entmap = new QMap<QString, tchar>;
00328 #ifdef USEQPE
00329 #ifdef OPIE
00330 QString fname(getenv("OPIEDIR"));
00331 #else
00332 QString fname(getenv("QTDIR"));
00333 #endif
00334 fname += "/plugins/reader/data";
00335 #else
00336 QString fname(getenv("READERDIR"));
00337 fname += "/data";
00338 #endif
00339 QFileInfo fi;
00340 fi.setFile(fname, "HTMLentities");
00341 if (fi.exists())
00342 {
00343 fname = fi.absFilePath();
00344
00345 QFile fl(fname);
00346 if (fl.open(IO_ReadOnly))
00347 {
00348 QTextStream t(&fl);
00349 QString key, value;
00350 while (!t.eof())
00351 {
00352 QString data = t.readLine();
00353 int colon = data.find(':');
00354 if (colon > 0)
00355 {
00356 QString key = data.left(colon);
00357 QString value = data.right(data.length()-colon-1);
00358 bool ok;
00359 int ret = value.toInt(&ok);
00360 if (ok)
00361 {
00362 (*entmap)[key] = ret;
00363 }
00364 }
00365 }
00366 fl.close();
00367 }
00368 }
00369 }
00370
00371 unsigned short striphtml::skip_ws()
00372 {
00373 tchar ch;
00374 CStyle sty;
00375 unsigned long dummy;
00376 do
00377 {
00378 mygetch(ch, sty, dummy);
00379 }
00380 while (ch < 33 && ch != UEOF);
00381 return ch;
00382 }
00383
00384 unsigned short striphtml::skip_ws_end()
00385 {
00386 unsigned long dummy;
00387 return skip_ws_end(dummy);
00388 }
00389
00390 unsigned short striphtml::skip_ws_end(unsigned long& pos)
00391 {
00392 tchar ch;
00393 CStyle sty;
00394 do
00395 {
00396 mygetch(ch, sty, pos);
00397 }
00398 while (ch != '>' && ch != UEOF);
00399 return ch;
00400 }
00401
00402 QString striphtml::getname(tchar& ch, const QString& nd)
00403 {
00404 QString nm = "";
00405
00406 CStyle sty;
00407 unsigned long dummy;
00408 while (1)
00409 {
00410
00411 if (ch != UEOF && nd.find(ch, 0, false) == -1 && nm.length() < 2048)
00412 {
00413 nm += ch;
00414 }
00415 else
00416 {
00417 break;
00418 }
00419 mygetch(ch, sty, dummy);
00420 }
00421 return nm;
00422 }
00423
00424 QString striphtml::getattr(tchar& ch)
00425 {
00426 QString ref;
00427 CStyle sty;
00428 unsigned long pos;
00429 if (ch == ' ') ch = skip_ws();
00430 if (ch == '=')
00431 {
00432 ch = skip_ws();
00433 if (ch == '"')
00434 {
00435 mygetch(ch, sty, pos);
00436 ref = getname(ch, "\"");
00437 ch = skip_ws();
00438 }
00439 else if (ch == '\'')
00440 {
00441 mygetch(ch, sty, pos);
00442 ref = getname(ch, "\'");
00443 ch = skip_ws();
00444 }
00445 else
00446 {
00447 ref = getname(ch, " >");
00448 if (ch == ' ') ch = skip_ws();
00449 }
00450 }
00451 return ref;
00452 }
00453
00454 linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& nm)
00455 {
00456 #if defined(USEQPE) || defined(_WINDOWS)
00457 QMap<unsigned long, QString>::Iterator hrefit = id2href->find(n);
00458 #else
00459 QMap<unsigned long, QString>::iterator hrefit = id2href->find(n);
00460 #endif
00461 if (hrefit == id2href->end())
00462 {
00463 return eNone;
00464 }
00465 QString href = *hrefit;
00466 #if defined(USEQPE) || defined(_WINDOWS)
00467 QMap<QString, unsigned long>::Iterator fpit = href2filepos->find(href);
00468 #else
00469 QMap<QString, unsigned long>::iterator fpit = href2filepos->find(href);
00470 #endif
00471 if (fpit == href2filepos->end())
00472 {
00473 if (href == "history.back()")
00474 {
00475 QString fc = currentfile;
00476 unsigned long loc;
00477 htmlmark m(fc, loc);
00478 linkType ret = (m_nav.back(m)) ? eFile : eNone;
00479 if (fc == m.filename())
00480 {
00481 if ((ret & eFile) != 0)
00482 {
00483 locate(m.posn());
00484 return eLink;
00485 }
00486 }
00487 return eNone;
00488 }
00489 qDebug("Searching for %s", (const char*)href);
00490
00491
00492 QString file, name;
00493
00494 int colon = href.find('#');
00495 if (colon >= 0)
00496 {
00497 file = dehtml(href.left(colon));
00498 name = dehtml(href.right(href.length()-colon-1));
00499 }
00500 else
00501 {
00502 file = dehtml(href);
00503 }
00504
00505 qDebug("File:%s", (const char*)file);
00506 qDebug("Name:%s", (const char*)name);
00507
00508
00509 if (file.isEmpty())
00510 {
00511 if (parent->findanchor(name))
00512 {
00513 reset();
00514 return eLink;
00515 }
00516 fpit = href2filepos->find(name);
00517 if (fpit != href2filepos->end())
00518 {
00519 locate(*fpit);
00520 return eLink;
00521 }
00522 else
00523 {
00524
00525 qDebug("Do a search for:%s", (const char*)name);
00526 findanchor(name);
00527 return eLink;
00528 }
00529 }
00530 else
00531
00532 {
00533 if (m_bchm)
00534 {
00535 w = file;
00536 nm = name;
00537 return eFile;
00538 }
00539 else
00540 {
00541 QFileInfo f(currentfile);
00542 QFileInfo f1(f.dir(true), file);
00543 if (f1.exists())
00544 {
00545 w = f1.absFilePath();
00546 nm = name;
00547 }
00548 else
00549 {
00550 w = file;
00551 }
00552 return (f1.exists() ? eFile : eNone);
00553 }
00554 }
00555 return eNone;
00556 }
00557 locate(*fpit);
00558
00559 return eLink;
00560 }
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580 void striphtml::mygetch(tchar& ch, CStyle& sty, unsigned long& pos)
00581 {
00582 if (!text_q.isEmpty() && !m_inblock)
00583 {
00584 ch = text_q[0].unicode();
00585 text_q = text_q.right(text_q.length()-1);
00586 }
00587 else
00588 {
00589 parent->getch(ch, sty, pos);
00590 if (ch == '<')
00591 {
00592 m_inblock = true;
00593 }
00594 if (ch == '>')
00595 {
00596 m_inblock = false;
00597 }
00598 }
00599 if (ch == 10 && !isPre)
00600 {
00601 #ifdef REMOVE_LF_BEFORE_ENDTAG
00602 parent->getch(ch, sty, pos);
00603 if (ch == '<')
00604 {
00605 parent->getch(ch, sty, pos);
00606 if (ch == '/')
00607 {
00608 ch = '<';
00609 text_q += '/';
00610 }
00611 else
00612 {
00613 text_q += '<';
00614 text_q += ch;
00615 ch = ' ';
00616 }
00617 }
00618 else
00619 {
00620 text_q += ch;
00621 ch = ' ';
00622 }
00623 #else
00624 ch = ' ';
00625 #endif
00626 }
00627 }
00628
00629 void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long pos)
00630 {
00631
00632
00633
00634
00635
00636
00637
00638
00639 if (stylestack.isEmpty())
00640 {
00641 currentstyle.unset();
00642 }
00643 else
00644 {
00645 currentstyle = stylestack.first();
00646 }
00647 if (forcecentre)
00648 {
00649 currentstyle.setCentreJustify();
00650 }
00651 if (ch == ' ') ch = skip_ws();
00652 while (ch != '>' && ch != UEOF)
00653 {
00654 QString ent = getname(ch, " =>").lower();
00655 QString attr = getattr(ch).lower();
00656
00657 if (ent == "align")
00658 {
00659 if (attr == "center")
00660 {
00661 currentstyle.setCentreJustify();
00662 }
00663 if (attr == "right")
00664 {
00665 currentstyle.setRightJustify();
00666 }
00667 if (attr == "justify")
00668 {
00669 currentstyle.setFullJustify();
00670 }
00671 }
00672 if (ent == "id")
00673 {
00674 (*href2filepos)[attr] = pos;
00675 }
00676 if (ent == "bgcolor")
00677 {
00678 qDebug("Got paper colour:%s", (const char*)attr);
00679 unsigned char r,g,b;
00680 parse_color(attr, r, g, b);
00681 currentstyle.setPaper(r, g, b);
00682 }
00683 if (ent == "color")
00684 {
00685 qDebug("Got foreground colour:%s", (const char*)attr);
00686 unsigned char r,g,b;
00687 parse_color(attr, r, g, b);
00688 currentstyle.setColour(r, g, b);
00689 }
00690 if (ch == ' ') ch = skip_ws();
00691 }
00692 ch = 10;
00693 }
00694
00695 void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
00696 {
00697 currentstyle.clearPicture();
00698 if (!q.isEmpty())
00699 {
00700 ch = q[0].unicode();
00701 if (ch == '-')
00702 {
00703 tchar w = q[1].unicode();
00704 tchar h = q[2].unicode();
00705 unsigned char r = q[3].unicode();
00706 unsigned char g = q[4].unicode();
00707 unsigned char b = q[5].unicode();
00708 ch = '#';
00709
00710 currentstyle.setPicture(false, hRule(w,h,r,g,b));
00711 q = q.right(q.length()-6);
00712 }
00713 else
00714 {
00715 q = q.right(q.length()-1);
00716 }
00717 sty = currentstyle;
00718 lastch = ch;
00719 return;
00720 }
00721 do
00722 {
00723 unsigned long npos;
00724 CStyle dummy;
00725 mygetch(ch, dummy, pos);
00726 while (ch == '<' && ch != UEOF)
00727 {
00728 ch = skip_ws();
00729 QString ent = getname(ch, " >").lower();
00730
00731
00732
00733 if (ent == "a")
00734 {
00735 if (ch == ' ') ch = skip_ws();
00736 bool fileposfound = false;
00737 bool ishref = false;
00738 unsigned int filepos = 0;
00739 QString ref, name;
00740 while (ch != '>' && ch != UEOF)
00741 {
00742 QString ent = getname(ch, " =>").lower();
00743 QString attr = getattr(ch);
00744
00745 if (ent == "name")
00746 {
00747 name = attr;
00748 }
00749 if (ent == "onclick")
00750 {
00751 int st = attr.find('\'');
00752 int nd = attr.findRev('\'');
00753 ref = attr.mid(st+1, nd-st-1);
00754 ishref = true;
00755 qDebug("Onclick:%s", (const char*)ref);
00756 }
00757 if (ent == "href")
00758 {
00759 ishref = true;
00760 ref = attr;
00761 }
00762 if (ent == "filepos")
00763 {
00764 filepos = attr.toUInt(&fileposfound);
00765 if (ref.isEmpty())
00766 {
00767 ishref = true;
00768 ref = attr;
00769 }
00770 }
00771 if (ent == "title")
00772 {
00773 text_q = attr + "</a><p>";
00774 }
00775
00776 }
00777 if (ishref)
00778 {
00779 currentstyle.setColour(0,0,255);
00780 currentstyle.setLink(true);
00781 currentstyle.setData(currentid);
00782 if (!text_q.isEmpty())
00783 {
00784 currentstyle.setBold();
00785 currentstyle.setCentreJustify();
00786 }
00787 (*id2href)[currentid] = ref;
00788 currentid++;
00789
00790
00791 if (fileposfound)
00792 {
00793 (*href2filepos)[ref] = filepos;
00794 }
00795 }
00796 if (!name.isEmpty())
00797 {
00798 (*href2filepos)[name] = pos;
00799 }
00800 }
00801 else if (ent == "p")
00802 {
00803 parse_paragraph(currentstyle, ch, pos);
00804 currentstyle.setExtraSpace(3);
00805 continue;
00806 }
00807 else if (ent == "div")
00808 {
00809 parse_paragraph(currentstyle, ch, pos);
00810 stylestack.push_front(currentstyle);
00811 currentstyle.setExtraSpace(16);
00812
00813 continue;
00814 }
00815 else if (ent == "sup")
00816 {
00817 currentstyle.setVOffset(-1);
00818 }
00819 else if (ent == "sup")
00820 {
00821 currentstyle.setVOffset(1);
00822 }
00823 else if (ent == "/sup" || ent == "/sub")
00824 {
00825 currentstyle.setVOffset(0);
00826 }
00827 else if (ent == "span")
00828 {
00829 if (ch == ' ') ch = skip_ws();
00830 while (ch != '>' && ch != UEOF)
00831 {
00832 QString ent = getname(ch, " =>").lower();
00833 QString attr = getattr(ch).lower();
00834 if (ent == "bgcolor")
00835 {
00836 qDebug("Got background colour:%s", (const char*)attr);
00837 unsigned char r,g,b;
00838 parse_color(attr, r, g, b);
00839 currentstyle.setBackground(r, g, b);
00840 }
00841 if (ent == "color")
00842 {
00843 qDebug("Got foreground colour:%s", (const char*)attr);
00844 unsigned char r,g,b;
00845 parse_color(attr, r, g, b);
00846 currentstyle.setColour(r, g, b);
00847 }
00848 }
00849 stylestack.push_front(currentstyle);
00850 }
00851 else if (ent == "/span")
00852 {
00853 if (ch != '>') ch = skip_ws_end();
00854 currentstyle.setBackground(255, 255, 255);
00855 currentstyle.setColour(0, 0, 0);
00856 if (!stylestack.isEmpty())
00857 {
00858 stylestack.pop();
00859 }
00860 }
00861 else if (ent == "pre")
00862 {
00863 isPre = true;
00864 currentstyle.setNoJustify();
00865 currentstyle.setMono();
00866 }
00867 else if (ent == "tt")
00868 {
00869 currentstyle.setMono();
00870 }
00871 else if (ent == "b" || ent == "strong")
00872 {
00873 currentstyle.setBold();
00874 }
00875 else if (ent == "u")
00876 {
00877 currentstyle.setUnderline();
00878 }
00879 else if (ent == "/u")
00880 {
00881 currentstyle.unsetUnderline();
00882 }
00883 else if (ent == "blockquote")
00884 {
00885 if (ch != '>') ch = skip_ws_end();
00886 ch = 10;
00887 currentstyle.setExtraSpace(0);
00888 currentstyle.setLeftMargin(30);
00889 currentstyle.setRightMargin(30);
00890 continue;
00891 }
00892 else if (ent == "br" || ent == "br/")
00893 {
00894 if (ch != '>') ch = skip_ws_end();
00895 ch = 10;
00896 currentstyle.setExtraSpace(0);
00897 lastch = 0;
00898 continue;
00899 }
00900 else if (ent == "mbp:pagebreak")
00901 {
00902
00903
00904
00905
00906
00907
00908
00909 ch = 6;
00910
00911 continue;
00912 }
00913 else if (ent == "center")
00914 {
00915
00916 qDebug("setting centre");
00917 currentstyle.setCentreJustify();
00918 ch = 10;
00919 continue;
00920 }
00921 else if (ent == "/center")
00922 {
00923 qDebug("unsetting centre");
00924 forcecentre = false;
00925 }
00926 else if (ent == "li")
00927 {
00928 if (ch != '>') ch = skip_ws_end();
00929 lastch = 0;
00930 ch = 10;
00931 if (m_listtype[indent % m_cmaxdepth] == 1)
00932 {
00933 q.setNum(m_ctr[indent % m_cmaxdepth]++);
00934 }
00935 else
00936 {
00937 q += QChar(8226);
00938 }
00939 q += ' ';
00940 currentstyle.setLeftMargin(6*indent);
00941 qDebug("Setting indent:%d", indent);
00942 continue;
00943 }
00944 else if (ent == "ul")
00945 {
00946 indent++;
00947 m_listtype[indent % m_cmaxdepth] = 0;
00948 }
00949 else if (ent == "/ul")
00950 {
00951 indent--;
00952 }
00953 else if (ent == "ol")
00954 {
00955 indent++;
00956 m_listtype[indent % m_cmaxdepth] = 1;
00957 m_ctr[indent % m_cmaxdepth] = 1;
00958 }
00959 else if (ent == "/ol")
00960 {
00961 indent--;
00962 }
00963 else if (ent == "i")
00964 {
00965 currentstyle.setItalic();
00966 }
00967 else if (ent == "em")
00968 {
00969 currentstyle.setItalic();
00970 }
00971 else if (ent == "small")
00972 {
00973 currentstyle.setFontSize(-2);
00974 }
00975 else if (ent == "/small")
00976 {
00977 currentstyle.setFontSize(0);
00978 }
00979 else if (ent == "big")
00980 {
00981 currentstyle.setFontSize(2);
00982 }
00983 else if (ent == "/big")
00984 {
00985 currentstyle.setFontSize(0);
00986 }
00987 else if (ent[0] == '/' && ent[1] == 'h' && ent.length() == 3 && QString("123456789").find(ent[2]) != -1)
00988 {
00989 parse_paragraph(currentstyle, ch, pos);
00990 currentstyle.setExtraSpace(3);
00991 continue;
00992 }
00993 else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1)
00994 {
00995 indent = 0;
00996 if (ent[1] == '1')
00997 {
00998 parse_paragraph(currentstyle, ch, pos);
00999 currentstyle.setFontSize(3);
01000 currentstyle.setExtraSpace(8);
01001 currentstyle.setBold();
01002
01003 }
01004 else if (ent[1] == '2')
01005 {
01006 parse_paragraph(currentstyle, ch, pos);
01007 currentstyle.setFontSize(2);
01008 currentstyle.setExtraSpace(6);
01009 currentstyle.setBold();
01010
01011 }
01012 else if (ent[1] == '3')
01013 {
01014 parse_paragraph(currentstyle, ch, pos);
01015 currentstyle.setFontSize(1);
01016 currentstyle.setExtraSpace(4);
01017 currentstyle.setBold();
01018
01019 }
01020 else
01021 {
01022 parse_paragraph(currentstyle, ch, pos);
01023 currentstyle.setExtraSpace(4);
01024 currentstyle.setBold();
01025
01026 }
01027 ch = 10;
01028 continue;
01029 }
01030
01031
01032 else if (ent == "/a")
01033 {
01034 currentstyle.setColour(0,0,0);
01035 currentstyle.setLink(false);
01036 }
01037 else if (ent == "/pre")
01038 {
01039 currentstyle.unsetMono();
01040 isPre = false;
01041 }
01042 else if (ent == "/tt")
01043 {
01044 currentstyle.unsetMono();
01045 }
01046 else if (ent == "/b" || ent == "/strong")
01047 {
01048 currentstyle.unsetBold();
01049 }
01050 else if (ent == "/i")
01051 {
01052 currentstyle.unsetItalic();
01053 }
01054 else if (ent == "/em")
01055 {
01056 currentstyle.unsetItalic();
01057 }
01058 else if (ent == "/div")
01059 {
01060 currentstyle.unset();
01061 if (ch != '>') ch = skip_ws_end();
01062 ch = 10;
01063 if (!stylestack.isEmpty())
01064 {
01065 stylestack.pop();
01066 }
01067 continue;
01068 }
01069 else if (ent == "tr")
01070 {
01071 if (ch != '>') ch = skip_ws_end();
01072 ch = 10;
01073 q += '-';
01074 q += QChar(parent->getwidth());
01075 q += 2;
01076 q += '\0';
01077 q += '\0';
01078 q += '\0';
01079 continue;
01080 }
01081 else if (ent == "td")
01082 {
01083 if (ch != '>') ch = skip_ws_end();
01084 ignorespace = false;
01085 }
01086 else if (ent == "/td")
01087 {
01088 ignorespace = true;
01089
01090
01091 if (ch != '>') ch = skip_ws_end();
01092
01093
01094 ch = 10;
01095 q += '-';
01096 q += QChar(parent->getwidth());
01097 q += 1;
01098 q += '\0';
01099 q += '\0';
01100 q += '\0';
01101 continue;
01102 }
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116 else if (ent[0] == '/' && ent.length() == 3 && ent[1] == 'h' && QString("123456789").find(ent[2]) != -1)
01117 {
01118 currentstyle.unset();
01119 if (ch != '>') ch = skip_ws_end();
01120
01121
01122 }
01123 else if (ent == "table" || ent == "/table")
01124 {
01125 currentstyle.unset();
01126 ignorespace = (ent == "table");
01127 if (ent == "table")
01128 {
01129 if (tablenesteddepth++ == 0) currentstyle.setTable(pos);
01130 }
01131 else
01132 {
01133 if (--tablenesteddepth <= 0)
01134 {
01135 tablenesteddepth = 0;
01136 currentstyle.setTable(0xffffffff);
01137 }
01138 }
01139 if (ch == ' ') ch = skip_ws();
01140 while (ch != '>' && ch != UEOF)
01141 {
01142 QString ent = getname(ch, " =>").lower();
01143 QString attr = getattr(ch);
01144 qDebug("<table>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
01145 }
01146 if (ch != '>') ch = skip_ws_end();
01147
01148 currentstyle.setLeftMargin(6*tablenesteddepth);
01149
01150
01151 lastch = 0;
01152 ch = 10;
01153 q += '-';
01154 q += QChar(parent->getwidth());
01155 q += 3;
01156 q += '\0';
01157 q += '\0';
01158 q += '\0';
01159 continue;
01160 }
01161 else if (ent == "hr")
01162 {
01163
01164 if (ch == ' ') ch = skip_ws();
01165 unsigned char red = 0, green = 0, blue = 0;
01166 while (ch != '>' && ch != UEOF)
01167 {
01168 QString ent = getname(ch, " =>").lower();
01169 QString attr = getattr(ch);
01170 if (ent == "color")
01171 {
01172 parse_color(attr, red, green, blue);
01173 }
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183 qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
01184 }
01185 if (ch != '>') ch = skip_ws_end();
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196 currentstyle.unset();
01197
01198
01199
01200
01201
01202
01203
01204
01205 lastch = 0;
01206 ch = 10;
01207 q += '-';
01208 q += QChar(parent->getwidth());
01209 q += 3;
01210 q += red;
01211 q += green;
01212 q += blue;
01213
01214 continue;
01215 }
01216
01217
01218
01219 else if (ent == "img")
01220 {
01221 if (ch == ' ') ch = skip_ws();
01222 while (ch != '>' && ch != UEOF)
01223 {
01224 QString ent = getname(ch, " =>").lower();
01225 QString attr = getattr(ch);
01226 qDebug("<img>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
01227 if (ent == "src")
01228 {
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241 QImage* img = parent->getPicture(attr);
01242 if (img != NULL)
01243 {
01244 currentstyle.setPicture(true, img);
01245 }
01246 else
01247 {
01248 QFileInfo f(currentfile);
01249 QFileInfo f1(f.dir(true), attr);
01250 QPixmap pm;
01251 if (pm.load(f1.absFilePath()))
01252 {
01253 QImage* img = new QImage(pm.convertToImage());
01254 currentstyle.setPicture(true, img);
01255 }
01256 }
01257 }
01258 if (ent == "recindex")
01259 {
01260 bool ok;
01261 unsigned int picindex = attr.toUInt(&ok);
01262 qDebug("Looking for image at %u", picindex);
01263 QImage* img = parent->getPicture(picindex);
01264 if (img != NULL)
01265 {
01266 currentstyle.setPicture(true, img);
01267 }
01268 else
01269 {
01270 qDebug("No image found");
01271 }
01272 }
01273 }
01274 if (ch != '>') ch = skip_ws_end();
01275 ch = '#';
01276 break;
01277 }
01278 else if (ent.left(2) == "dc")
01279 {
01280 QString nd("/");
01281 skipblock(nd+ent);
01282 }
01283 else if (ent == "metadata")
01284 {
01285
01286 }
01287 else if (ent == "title")
01288 {
01289 skipblock("/title");
01290 }
01291 else if (ent == "head")
01292 {
01293 skipblock("/head");
01294 }
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307 else
01308 {
01309 if (ent[0] != '/')
01310 qDebug("Not handling:%s", (const char*)ent);
01311 }
01312
01313 if (ch != '>') ch = skip_ws_end();
01314 if (ent[0] == '/')
01315 mygetch(ch, dummy, pos);
01316 else
01317 mygetch(ch, dummy, npos);
01318 }
01319 if (ch == '&')
01320 {
01321 mygetch(ch, dummy, npos);
01322 if (ch == '#')
01323 {
01324 int id = 0;
01325 mygetch(ch, dummy, npos);
01326 while (ch != ';' && ch != UEOF)
01327 {
01328 id = 10*id+ch-'0';
01329 mygetch(ch, dummy, npos);
01330 }
01331 ch = id;
01332 }
01333 else
01334 {
01335 QString en;
01336 en += ch;
01337 mygetch(ch, dummy, npos);
01338 while (ch != ';' && ch != UEOF)
01339 {
01340 en += ch;
01341 mygetch(ch, dummy, npos);
01342 }
01343 if (entmap == NULL) initentmap();
01344 #if defined(USEQPE) || defined(_WINDOWS)
01345 QMap<QString, tchar>::Iterator it = entmap->find(en);
01346 #else
01347 QMap<QString, tchar>::iterator it = entmap->find(en);
01348 #endif
01349 if (it != entmap->end())
01350 {
01351 ch = *it;
01352 }
01353 else
01354 {
01355 ch = '.';
01356 }
01357 }
01358 }
01359
01360 if (lastch == 10 && ch == 10 && sty.getExtraSpace() > currentstyle.getExtraSpace())
01361 {
01362 currentstyle.setExtraSpace(sty.getExtraSpace());
01363 }
01364 sty = currentstyle;
01365 }
01366 while (!isPre && (((lastch == ' ' || lastch == 10 || ignorespace) && ch == ' ') || ((ch == 10) && (lastch == 10))));
01367
01368 lastch = ch;
01369 return;
01370 }
01371
01372 QString striphtml::getTableAsHtml(unsigned long loc)
01373 {
01374 qDebug("striphtml::getTableAsHtml");
01375 QString ret;
01376 tchar ch(0);
01377 CStyle sty;
01378 unsigned long pos;
01379 locate(loc);
01380 int endpos(0);
01381 QString endmarker("</table>");
01382 QString startmarker("<table");
01383 int startpos(0);
01384 int depth(0);
01385 while (ch != UEOF)
01386 {
01387 parent->getch(ch, sty, pos);
01388 QChar qc(ch);
01389 ret += qc;
01390 if (qc.lower() == endmarker[endpos])
01391 {
01392 if ((++endpos >= endmarker.length()) && (--depth <= 0)) break;
01393 }
01394 else
01395 {
01396 endpos = 0;
01397 }
01398 if (qc.lower() == startmarker[startpos])
01399 {
01400 if (++startpos >= startmarker.length()) ++depth;
01401 }
01402 else
01403 {
01404 startpos = 0;
01405 }
01406 }
01407 return ret;
01408 }
01409
01410
01411 extern "C"
01412 {
01413 CFilter* newfilter(const QString& s) { return new striphtml(s); }
01414 }