Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

striphtml.cpp

Go to the documentation of this file.
00001 #include <qmap.h>
00002 #include <qfileinfo.h>
00003 #include <qtextstream.h>
00004 #include <qdir.h>
00005 #ifdef USEQPE
00006 #include <qpe/global.h>
00007 #endif
00008 #include "CDrawBuffer.h"
00009 #include "striphtml.h"
00010 #include "hrule.h"
00011 
00012 #include <qregexp.h>
00013 #include <qimage.h>
00014 #include <qpixmap.h>
00015 //#include <qprogressdialog.h>
00016 //#include <qapplication.h>
00017 
00018 static unsigned char h2i(unsigned char c)
00019 {
00020   unsigned char ret = 0;
00021   if ('0' <= c && c <= '9')
00022     {
00023       ret = c - '0';
00024     }
00025   else if ('a' <= c && c <= 'f')
00026     {
00027       ret = c - 'a' + 10;
00028     }
00029   return ret;
00030 }
00031 
00032 static void parse_color(const QString& attr, unsigned char& r, unsigned char& g, unsigned char& b)
00033 {
00034   r = g = b = 0;
00035   if (attr.length() >= 7 && attr[0] == '#')
00036     {
00037       r = h2i(attr[1].unicode());
00038       r = 16*r + h2i(attr[2].unicode());
00039       g = h2i(attr[3].unicode());
00040       g = 16*g + h2i(attr[4].unicode());
00041       b = h2i(attr[5].unicode());
00042       b = 16*b + h2i(attr[6].unicode());
00043     }
00044   else if (attr == "red")
00045     {
00046       r = 255;
00047     }
00048   else if (attr == "green")
00049     {
00050       g = 255;
00051     }
00052   else if (attr == "blue")
00053     {
00054       b = 255;
00055     }
00056   else if (attr == "white")
00057     {
00058       r = g = b = 255;
00059     }
00060   else if (attr == "black")
00061     {
00062       r = g = b = 0;
00063     }
00064   else
00065     {
00066       qDebug("Don't understand colour \"%s\"", (const char*)attr);
00067     }
00068 }
00069 
00070 CNavigation_base<htmlmark> striphtml::m_nav;
00071 
00072 void striphtml::skipblock(const QString& _ent)
00073 {
00074   tchar ch = '>';
00075   CStyle dummy;
00076   QString ent;
00077   unsigned long pos;
00078   do 
00079     {
00080       while (ch != '<' && ch != UEOF)
00081         {
00082           mygetch(ch, dummy, pos);
00083         }
00084       
00085       ch = skip_ws();
00086       
00087       ent = getname(ch, " >").lower();
00088       qDebug("Skipblock:%s", (const char*)ent);
00089     } while (ent != _ent && ch != UEOF);
00090 }
00091 
00092 void striphtml::reset()
00093 {
00094   m_inblock = false;
00095   text_q = "";
00096   q = "";
00097   tablenesteddepth = 0;
00098   forcecentre = false;
00099   ignorespace = false;
00100   indent = 0;
00101   while (!stylestack.isEmpty()) stylestack.pop();
00102   currentstyle.unset();
00103 }
00104 
00105 void striphtml::locate(unsigned int n)
00106 {
00107   qDebug("striphtml:locating:%u", n);
00108   reset();
00109   parent->locate(n);
00110 }
00111 
00112 int striphtml::getpara(CBuffer& buff, unsigned long& startpos)
00113 {
00114   tchar ch;
00115   CStyle sty;
00116   unsigned long pos;
00117   int i = 0;
00118   parent->getch(ch, sty, startpos);
00119   pos = startpos;
00120   while (1)
00121     {
00122       if (ch == 10 && !isPre)
00123         {
00124           ch = ' ';
00125         }
00126       if (ch == UEOF)
00127         {
00128           //      qDebug("EOF:%d:%u", i, pos);
00129           buff[i] = 0;
00130           if (i == 0)
00131             {
00132               i = -1;
00133             }
00134           return i;
00135         }
00136       else if (ch == '<')
00137         {
00138           tchar ch2 = skip_ws();
00139           QString ent = getname(ch2, " >");
00140           ent = ent.lower();
00141           //      qDebug("ent:%s", (const char*)ent);
00142           if (ent == "a")
00143             {
00144               buff[i++] = '<';
00145               buff[i++] = 'a';
00146               buff[i++] = ch2;
00147               //              buff[i] = 0; qDebug("ANCHOR:%s", (const char*)toQString(buff.data()));
00148             }
00149           else if (ent == "/a")
00150             {
00151               buff[i++] = '<';
00152               buff[i++] = '/';
00153               buff[i++] = 'a';
00154               buff[i++] = ch2;
00155               //              buff[i] = 0; qDebug("/ANCHOR:%s", (const char*)toQString(buff.data()));
00156             }
00157           else if (ent == "div")
00158             {
00159               //              buff[i] = 0; qDebug("DIV:%s", (const char*)toQString(buff.data()));
00160               if (i == 0)
00161                 {
00162                   buff[i++] = '<';
00163                   buff[i++] = 'd';
00164                   buff[i++] = 'i';
00165                   buff[i++] = 'v';
00166                   buff[i++] = ' ';
00167                   buff[i++] = ch2;
00168                   while (ch2 != '>' && ch2 != UEOF && i < 2048)
00169                     {
00170                       parent->getch(ch2, sty, pos);
00171                       buff[i++] = ch2;
00172                     }
00173                 }
00174               else
00175                 {
00176                   locate(pos);
00177                 }
00178               buff[i++] = 0;
00179               //              qDebug("DIV:%s", (const char*)toQString(buff.data()));
00180               return i;
00181             }
00182           else if (ent == "p" || (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1))
00183             {
00184               buff[i++] = 0;
00185               while (ch2 != '>' && ch2 != UEOF)
00186                 {
00187                   parent->getch(ch2, sty, pos);
00188                 }
00189               return i;
00190             }
00191           else
00192             {
00193               while (ch2 != '>' && ch2 != UEOF)
00194                 {
00195                   parent->getch(ch2, sty, pos);
00196                 }
00197             }
00198         }
00199       else
00200         {
00201           buff[i++] = ch;
00202         }
00203       parent->getch(ch, sty, pos);
00204     }
00205 }
00206 
00207 QString striphtml::dehtml(const QString& _info)
00208 {
00209   QString info;
00210   for (int i = 0; i < _info.length(); i++)
00211   {
00212     tchar ch = _info[i];
00213     if (ch == '%')
00214       {
00215         ch = 0;
00216         for (int j = 0; j < 2; j++)
00217           {
00218             ch <<= 4;
00219             tchar ch1 = _info[++i];
00220             if ('0' <= ch1 && ch1 <= '9')
00221               {
00222                 ch += ch1 - '0';
00223               }
00224             else if ('a' <= ch1 && ch1 <= 'f')
00225               {
00226                 ch += ch1 - 'a' + 10;
00227               }
00228             else if ('A' <= ch1 && ch1 <= 'F')
00229               {
00230                 ch += ch1 - 'A' + 10;
00231               }
00232           }
00233       }
00234     info += ch;
00235   }
00236   return info;
00237 }
00238 
00239 bool striphtml::findanchor(const QString& _info)
00240 {
00241   //  QProgressDialog dlg("Finding link...", QString::null, 0, NULL, "progress", true);
00242   //  QProgressBar dlg(0);
00243   if (parent->findanchor(_info))
00244     {
00245       reset();
00246       return true;
00247     }
00248   qDebug("Using html find");
00249   parent->locate(parent->startSection());
00250 #if defined(USEQPE) || defined(_WINDOWS) 
00251   QString info;
00252   for (int i = 0; i < _info.length(); i++)
00253   {
00254     tchar ch = _info[i];
00255     if (QString(".^$[]*+?").find(ch) != -1)
00256       {
00257         info += '\\';
00258       }
00259     info += ch;
00260   }
00261 #else
00262   QString info = QRegExp::escape(_info);
00263 #endif
00264   qDebug("Adjusted searchstring:%s", (const char*)info);
00265   QString sname("<[Aa][^>]*[ \t]+[Nn][Aa][Mm][Ee][ \t]*=[ \t]*\"?");
00266   sname += info + "\"?[ \t>]";
00267   QString sid("<[A-Za-z][^>]*[ \t]+[Ii][Dd][ \t]*=[ \t]*\"?");
00268   sid += info+"\"?[ \t>]";
00269 #ifdef USEQPE
00270   QRegExp name(sname);
00271   QRegExp id(sid);
00272 #else
00273   QRegExp name(sname+"|"+sid);
00274 #endif
00275   bool ret = true;
00276   locate(0);
00277   unsigned long pos = 0;
00278   unsigned long startpos = 0;
00279   int offset;
00280   CBuffer test;
00281   qDebug("striphtml::findanchor");
00282   //  dlg.show();
00283   if (getpara(test, pos) >= 0)
00284     {
00285       while (1)
00286         {
00287           //      qApp->processEvents();
00288           if ((offset = name.match(toQString(test.data()))) != -1) break;
00289 #ifdef USEQPE
00290           if ((offset = id.match(toQString(test.data()))) != -1) break;
00291 #endif
00292           if (getpara(test, pos) < 0)
00293             {
00294               locate(startpos);
00295               qDebug("Not found");
00296               return false;
00297             }
00298         }
00299       locate(pos);
00300       qDebug("Found");
00301       ret = true;
00302     }
00303   else
00304     {
00305       locate(startpos);
00306       qDebug("Not found");
00307       ret = false;
00308     }
00309   return ret;
00310 }
00311 
00312 striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false), tablenesteddepth(0)
00313 {
00314   href2filepos = new QMap<QString, unsigned long>;
00315   id2href = new QMap<unsigned long, QString>;
00316 }
00317 
00318 striphtml::~striphtml()
00319 {
00320   if (entmap != NULL) delete entmap;
00321   delete href2filepos;
00322   delete id2href;
00323 }
00324 
00325 void striphtml::initentmap()
00326 {
00327   entmap = new QMap<QString, tchar>;
00328 #ifdef USEQPE
00329 #ifdef OPIE
00330   QString fname(getenv("OPIEDIR"));
00331 #else
00332   QString fname(getenv("QTDIR"));
00333 #endif
00334   fname += "/plugins/reader/data";
00335 #else
00336   QString fname(getenv("READERDIR"));
00337   fname += "/data";
00338 #endif  
00339   QFileInfo fi;
00340   fi.setFile(fname, "HTMLentities");
00341   if (fi.exists())
00342     {
00343       fname = fi.absFilePath();
00344 
00345       QFile fl(fname);
00346       if (fl.open(IO_ReadOnly))
00347         {
00348           QTextStream t(&fl);
00349           QString key, value;
00350           while (!t.eof())
00351             {
00352               QString data = t.readLine();
00353               int colon = data.find(':');
00354               if (colon > 0)
00355                 {
00356                   QString key = data.left(colon);
00357                   QString value = data.right(data.length()-colon-1);
00358                   bool ok;
00359                   int ret = value.toInt(&ok);
00360                   if (ok)
00361                     {
00362                       (*entmap)[key] = ret;
00363                     }
00364                 }
00365             }
00366           fl.close();
00367         }
00368     }
00369 }
00370 
00371 unsigned short striphtml::skip_ws()
00372 {
00373     tchar ch;
00374     CStyle sty;
00375     unsigned long dummy;
00376     do
00377     {
00378         mygetch(ch, sty, dummy);
00379     }
00380     while (ch < 33 && ch != UEOF);
00381     return ch;
00382 }
00383 
00384 unsigned short striphtml::skip_ws_end()
00385 {
00386     unsigned long dummy;
00387     return skip_ws_end(dummy);
00388 }
00389 
00390 unsigned short striphtml::skip_ws_end(unsigned long& pos)
00391 {
00392     tchar ch;
00393     CStyle sty;
00394     do
00395       {
00396         mygetch(ch, sty, pos);
00397       }
00398     while (ch != '>' && ch != UEOF);
00399     return ch;
00400 }
00401 
00402 QString striphtml::getname(tchar& ch, const QString& nd)
00403 {
00404   QString nm = "";
00405   //  nm += ch;
00406   CStyle sty;
00407   unsigned long dummy;
00408   while (1)
00409     {
00410       //      if ( QChar(ch).isLetterOrNumber() )
00411       if (ch != UEOF && nd.find(ch, 0, false) == -1 && nm.length() < 2048)
00412         {
00413           nm += ch;
00414         }
00415       else
00416         {
00417           break;
00418         }
00419       mygetch(ch, sty, dummy);
00420     }
00421   return nm;
00422 }
00423 
00424 QString striphtml::getattr(tchar& ch)
00425 {
00426   QString ref;
00427   CStyle sty;
00428   unsigned long pos;
00429   if (ch == ' ') ch = skip_ws();
00430   if (ch == '=')
00431     {
00432       ch = skip_ws();
00433       if (ch == '"')
00434         {
00435           mygetch(ch, sty, pos);
00436           ref = getname(ch, "\"");
00437           ch = skip_ws();
00438         }
00439       else if (ch == '\'')
00440         {
00441           mygetch(ch, sty, pos);
00442           ref = getname(ch, "\'");
00443           ch = skip_ws();
00444         }
00445       else
00446         {
00447           ref = getname(ch, " >");
00448           if (ch == ' ') ch = skip_ws();
00449         }
00450     }
00451   return ref;
00452 }
00453 
00454 linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& nm)
00455 {
00456 #if defined(USEQPE) || defined(_WINDOWS)
00457   QMap<unsigned long, QString>::Iterator hrefit = id2href->find(n);
00458 #else
00459   QMap<unsigned long, QString>::iterator hrefit = id2href->find(n);
00460 #endif
00461   if (hrefit == id2href->end())
00462     {
00463       return eNone;
00464     }
00465   QString href = *hrefit;
00466 #if defined(USEQPE) || defined(_WINDOWS)
00467   QMap<QString, unsigned long>::Iterator fpit = href2filepos->find(href);
00468 #else
00469   QMap<QString, unsigned long>::iterator fpit = href2filepos->find(href);
00470 #endif
00471   if (fpit == href2filepos->end())
00472     {
00473       if (href == "history.back()")
00474         {
00475           QString fc = currentfile;
00476           unsigned long loc;
00477           htmlmark m(fc, loc);
00478           linkType ret = (m_nav.back(m)) ? eFile : eNone;
00479           if (fc == m.filename())
00480             {
00481               if ((ret & eFile) != 0)
00482                 {
00483                   locate(m.posn());
00484                   return eLink;
00485                 }
00486             }
00487           return eNone;
00488         }
00489       qDebug("Searching for %s", (const char*)href);
00490 
00491 
00492       QString file, name;
00493 
00494       int colon = href.find('#');
00495       if (colon >= 0)
00496         {
00497           file = dehtml(href.left(colon));
00498           name = dehtml(href.right(href.length()-colon-1));
00499         }
00500       else
00501         {
00502           file = dehtml(href);
00503         }
00504       
00505       qDebug("File:%s", (const char*)file);
00506       qDebug("Name:%s", (const char*)name);
00507       
00508 
00509       if (file.isEmpty())
00510         {
00511           if (parent->findanchor(name))
00512             {
00513               reset();
00514               return eLink;
00515             }
00516           fpit = href2filepos->find(name);
00517           if (fpit != href2filepos->end())
00518             {
00519               locate(*fpit);
00520               return eLink;
00521             }
00522           else
00523             {
00524               //              nm = QString("<a[^>]*name[ \t]*=[ \t]*\"") + name + "\"";
00525               qDebug("Do a search for:%s", (const char*)name);
00526               findanchor(name);
00527               return eLink;
00528             }
00529         }
00530       else
00531         //      if (href.find('#') == -1)
00532         {
00533           if (m_bchm)
00534             {
00535               w = file;
00536               nm = name;
00537               return eFile;
00538             }
00539           else
00540             {
00541               QFileInfo f(currentfile);
00542               QFileInfo f1(f.dir(true), file);
00543               if (f1.exists())
00544                 {
00545                   w = f1.absFilePath();
00546                   nm = name;
00547                 }
00548               else
00549                 {
00550                   w = file;
00551                 }
00552               return (f1.exists() ? eFile : eNone);
00553             }
00554         }
00555       return eNone;
00556     }
00557   locate(*fpit);
00558   //  parent->locate((*href2filepos)[(*id2href)[n]]);
00559   return eLink;
00560 }
00561 /*
00562 unsigned short striphtml::parse_m()
00563 {
00564     tchar ch;
00565     CStyle sty;
00566     unsigned long dummy;
00567     mygetch(ch, sty, dummy);
00568     if (ch == 'm' || ch == 'M')
00569     {
00570         ch = skip_ws_end();
00571         if (ch == '>')
00572         {
00573             return 0;
00574         }
00575     }
00576     return ch;
00577 }
00578 */
00579 
00580 void striphtml::mygetch(tchar& ch, CStyle& sty, unsigned long& pos)
00581 {
00582   if (!text_q.isEmpty() && !m_inblock)
00583     {
00584       ch = text_q[0].unicode();
00585       text_q = text_q.right(text_q.length()-1);
00586     }
00587   else
00588     {
00589       parent->getch(ch, sty, pos);
00590       if (ch == '<')
00591         {
00592           m_inblock = true;
00593         }
00594       if (ch == '>')
00595         {
00596           m_inblock = false;
00597         }
00598     }
00599   if (ch == 10 && !isPre)
00600     {
00601 #ifdef REMOVE_LF_BEFORE_ENDTAG
00602       parent->getch(ch, sty, pos);
00603       if (ch == '<')
00604         {
00605           parent->getch(ch, sty, pos);
00606           if (ch == '/')
00607             {
00608               ch = '<';
00609               text_q += '/';
00610             }
00611           else
00612             {
00613               text_q += '<';
00614               text_q += ch;
00615               ch = ' ';
00616             }
00617         }
00618       else
00619         {
00620           text_q += ch;
00621           ch = ' ';
00622         }
00623 #else
00624       ch = ' ';
00625 #endif
00626     }
00627 }
00628 
00629 void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long pos)
00630 {
00631 /*
00632  int count = 0;
00633    for (CList<CStyle>::iterator iter = stylestack.begin(); iter != stylestack.end(); ++iter)
00634      {
00635         count++;
00636      }
00637    qDebug("Currently have %u styles", count);
00638 */
00639    if (stylestack.isEmpty())
00640      {
00641         currentstyle.unset();
00642      }
00643    else
00644      {
00645         currentstyle = stylestack.first();
00646      }
00647   if (forcecentre)
00648     {
00649       currentstyle.setCentreJustify();
00650     }
00651   if (ch == ' ') ch = skip_ws();
00652   while (ch != '>' && ch != UEOF)
00653     {
00654       QString ent = getname(ch, " =>").lower();
00655       QString attr = getattr(ch).lower();
00656       //qDebug("(Paragraph)Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
00657       if (ent == "align")
00658         {
00659           if (attr == "center")
00660             {
00661               currentstyle.setCentreJustify();
00662             }
00663           if (attr == "right")
00664             {
00665               currentstyle.setRightJustify();
00666             }
00667           if (attr == "justify")
00668             {
00669               currentstyle.setFullJustify();
00670             }
00671         }
00672       if (ent == "id")
00673         {
00674           (*href2filepos)[attr] = pos;
00675         }
00676       if (ent == "bgcolor")
00677         {
00678           qDebug("Got paper colour:%s", (const char*)attr);
00679           unsigned char r,g,b;
00680           parse_color(attr, r, g, b);
00681           currentstyle.setPaper(r, g, b);
00682         }
00683       if (ent == "color")
00684         {
00685           qDebug("Got foreground colour:%s", (const char*)attr);
00686           unsigned char r,g,b;
00687           parse_color(attr, r, g, b);
00688           currentstyle.setColour(r, g, b);
00689         }
00690       if (ch == ' ') ch = skip_ws();
00691     }
00692   ch = 10;
00693 }
00694 
00695 void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
00696 {
00697   currentstyle.clearPicture();
00698   if (!q.isEmpty())
00699     {
00700       ch = q[0].unicode();
00701       if (ch == '-')
00702         {
00703           tchar w = q[1].unicode();
00704           tchar h = q[2].unicode();
00705           unsigned char r = q[3].unicode();
00706           unsigned char g = q[4].unicode();
00707           unsigned char b = q[5].unicode();
00708           ch = '#';
00709           //qDebug("html:hrule<%u, %u>", w, h);
00710           currentstyle.setPicture(false, hRule(w,h,r,g,b));
00711           q = q.right(q.length()-6);
00712         }
00713       else
00714         {
00715           q = q.right(q.length()-1);
00716         }
00717       sty = currentstyle;
00718       lastch = ch;
00719       return;
00720     }
00721   do
00722     {
00723       unsigned long npos;
00724       CStyle dummy;
00725       mygetch(ch, dummy, pos);
00726       while (ch == '<' && ch != UEOF)
00727         {
00728           ch = skip_ws();
00729           QString ent = getname(ch, " >").lower();
00730 
00731           //      qDebug("Entity:%s", (const char*)ent);
00732         
00733           if (ent == "a"/* || ent == "reference"*/)
00734             {
00735               if (ch == ' ') ch = skip_ws();
00736               bool fileposfound = false;
00737               bool ishref = false;
00738               unsigned int filepos = 0;
00739               QString ref, name;
00740               while (ch != '>' && ch != UEOF)
00741                 {
00742                   QString ent = getname(ch, " =>").lower();
00743                   QString attr = getattr(ch);
00744                   //qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
00745                   if (ent == "name")
00746                     {
00747                       name = attr;
00748                     }
00749                    if (ent == "onclick")
00750                      {
00751                         int st = attr.find('\'');
00752                         int nd = attr.findRev('\'');
00753                         ref = attr.mid(st+1, nd-st-1);
00754                         ishref = true;
00755                         qDebug("Onclick:%s", (const char*)ref);
00756                      }
00757                   if (ent == "href")
00758                     {
00759                       ishref = true;
00760                       ref = attr;
00761                     }
00762                   if (ent == "filepos")
00763                     {
00764                       filepos = attr.toUInt(&fileposfound);
00765                       if (ref.isEmpty())
00766                         {
00767                           ishref = true;
00768                           ref = attr;
00769                         }
00770                     }
00771                   if (ent == "title")
00772                     {
00773                       text_q = attr + "</a><p>";
00774                     }
00775                   //qDebug("<a %s=%s>", (const char*)ent, (const char*)ref);
00776                 }
00777               if (ishref)
00778                 {
00779                   currentstyle.setColour(0,0,255);
00780                   currentstyle.setLink(true);
00781                   currentstyle.setData(currentid);
00782                   if (!text_q.isEmpty())
00783                     {
00784                       currentstyle.setBold();
00785                       currentstyle.setCentreJustify();
00786                     }
00787                   (*id2href)[currentid] = ref;
00788                   currentid++;
00789                 
00790                 
00791                   if (fileposfound)
00792                     {
00793                       (*href2filepos)[ref] = filepos;
00794                     }
00795                 }
00796               if (!name.isEmpty())
00797                 {
00798                   (*href2filepos)[name] = pos;
00799                 }
00800             }
00801           else if (ent == "p")
00802             {
00803               parse_paragraph(currentstyle, ch, pos);
00804               currentstyle.setExtraSpace(3);
00805               continue;
00806             }
00807           else if (ent == "div")
00808             {
00809               parse_paragraph(currentstyle, ch, pos);
00810               stylestack.push_front(currentstyle);
00811               currentstyle.setExtraSpace(16);
00812                   //indent = 0;
00813               continue;
00814             }
00815           else if (ent == "sup")
00816             {
00817               currentstyle.setVOffset(-1);
00818             }
00819           else if (ent == "sup")
00820             {
00821               currentstyle.setVOffset(1);
00822             }
00823           else if (ent == "/sup" || ent == "/sub")
00824             {
00825               currentstyle.setVOffset(0);
00826             }
00827           else if (ent == "span")
00828             {
00829               if (ch == ' ') ch = skip_ws();
00830               while (ch != '>' && ch != UEOF)
00831                 {
00832                   QString ent = getname(ch, " =>").lower();
00833                   QString attr = getattr(ch).lower();
00834                   if (ent == "bgcolor")
00835                     {
00836                       qDebug("Got background colour:%s", (const char*)attr);
00837                       unsigned char r,g,b;
00838                       parse_color(attr, r, g, b);
00839                       currentstyle.setBackground(r, g, b);
00840                     }
00841                   if (ent == "color")
00842                     {
00843                       qDebug("Got foreground colour:%s", (const char*)attr);
00844                       unsigned char r,g,b;
00845                       parse_color(attr, r, g, b);
00846                       currentstyle.setColour(r, g, b);
00847                     }
00848                 }
00849                stylestack.push_front(currentstyle);
00850             }
00851           else if (ent == "/span")
00852             {
00853               if (ch != '>') ch = skip_ws_end();
00854               currentstyle.setBackground(255, 255, 255);
00855               currentstyle.setColour(0, 0, 0);
00856                if (!stylestack.isEmpty())
00857                  {
00858                     stylestack.pop();
00859                  }
00860             }
00861           else if (ent == "pre")
00862             {
00863               isPre = true;
00864               currentstyle.setNoJustify();
00865               currentstyle.setMono();
00866             }
00867           else if (ent == "tt")
00868             {
00869               currentstyle.setMono();
00870             }
00871           else if (ent == "b" || ent == "strong")
00872             {
00873               currentstyle.setBold();
00874             }
00875           else if (ent == "u")
00876             {
00877               currentstyle.setUnderline();
00878             }
00879           else if (ent == "/u")
00880             {
00881               currentstyle.unsetUnderline();
00882             }
00883           else if (ent == "blockquote")
00884             {
00885               if (ch != '>') ch = skip_ws_end();
00886               ch = 10;
00887               currentstyle.setExtraSpace(0);
00888               currentstyle.setLeftMargin(30);
00889               currentstyle.setRightMargin(30);
00890               continue;
00891             }
00892           else if (ent == "br" || ent == "br/")
00893             {
00894               if (ch != '>') ch = skip_ws_end();
00895               ch = 10;
00896               currentstyle.setExtraSpace(0);
00897               lastch = 0;
00898               continue;
00899             }
00900           else if (ent == "mbp:pagebreak")
00901             {
00902               /*
00903               if (ch != '>') ch = skip_ws_end(pos);
00904               q += 10;
00905               q += QChar(UEOF);
00906               ch = 10;
00907               continue;
00908               */
00909               ch = 6;
00910               //              currentstyle.setTop();
00911               continue;
00912             }
00913           else if (ent == "center")
00914             {
00915               //forcecentre = true;
00916               qDebug("setting centre");
00917               currentstyle.setCentreJustify();
00918                ch = 10;
00919                continue;
00920             }
00921           else if (ent == "/center")
00922             {
00923               qDebug("unsetting centre");
00924               forcecentre = false;
00925             }
00926           else if (ent == "li")
00927             {
00928               if (ch != '>') ch = skip_ws_end();
00929               lastch = 0;
00930               ch = 10;
00931               if (m_listtype[indent % m_cmaxdepth] == 1)
00932                 {
00933                   q.setNum(m_ctr[indent % m_cmaxdepth]++);
00934                 }
00935               else
00936                 {
00937                   q += QChar(8226);
00938                 }
00939               q += ' ';
00940               currentstyle.setLeftMargin(6*indent);
00941               qDebug("Setting indent:%d", indent);
00942               continue;
00943             }
00944           else if (ent == "ul")
00945             {
00946               indent++;
00947               m_listtype[indent % m_cmaxdepth] = 0;
00948             }
00949           else if (ent == "/ul")
00950             {
00951               indent--;
00952             }
00953           else if (ent == "ol")
00954             {
00955               indent++;
00956               m_listtype[indent % m_cmaxdepth] = 1;
00957               m_ctr[indent % m_cmaxdepth] = 1;
00958             }
00959           else if (ent == "/ol")
00960             {
00961               indent--;
00962             }
00963           else if (ent == "i")
00964             {
00965               currentstyle.setItalic();
00966             }
00967           else if (ent == "em")
00968             {
00969               currentstyle.setItalic();
00970             }
00971           else if (ent == "small")
00972             {
00973               currentstyle.setFontSize(-2);
00974             }
00975           else if (ent == "/small")
00976             {
00977               currentstyle.setFontSize(0);
00978             }
00979           else if (ent == "big")
00980             {
00981               currentstyle.setFontSize(2);
00982             }
00983           else if (ent == "/big")
00984             {
00985               currentstyle.setFontSize(0);
00986             }
00987           else if (ent[0] == '/' && ent[1] == 'h' && ent.length() == 3 && QString("123456789").find(ent[2]) != -1)
00988             {
00989               parse_paragraph(currentstyle, ch, pos);
00990               currentstyle.setExtraSpace(3);
00991               continue;
00992             }
00993           else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1)
00994             {
00995               indent = 0;
00996               if (ent[1] == '1')
00997                 {
00998                   parse_paragraph(currentstyle, ch, pos);
00999                   currentstyle.setFontSize(3);
01000                   currentstyle.setExtraSpace(8);
01001                   currentstyle.setBold();
01002                   //                currentstyle.setExtraSpace(10);
01003                 }
01004               else if (ent[1] == '2')
01005                 {
01006                   parse_paragraph(currentstyle, ch, pos);
01007                   currentstyle.setFontSize(2);
01008                   currentstyle.setExtraSpace(6);
01009                   currentstyle.setBold();
01010                   //                currentstyle.setExtraSpace(10);
01011                 }
01012               else if (ent[1] == '3')
01013                 {
01014                   parse_paragraph(currentstyle, ch, pos);
01015                   currentstyle.setFontSize(1);
01016                   currentstyle.setExtraSpace(4);
01017                   currentstyle.setBold();
01018                   //                currentstyle.setExtraSpace(10);
01019                 }
01020               else
01021                 {
01022                   parse_paragraph(currentstyle, ch, pos);
01023                   currentstyle.setExtraSpace(4);
01024                   currentstyle.setBold();
01025                   //                currentstyle.setExtraSpace(10);
01026                 }
01027               ch = 10;
01028               continue;
01029             }
01030         
01031         
01032           else if (ent == "/a")
01033             {
01034               currentstyle.setColour(0,0,0);
01035               currentstyle.setLink(false);
01036             }
01037           else if (ent == "/pre")
01038             {
01039               currentstyle.unsetMono();
01040               isPre = false;
01041             }
01042           else if (ent == "/tt")
01043             {
01044               currentstyle.unsetMono();
01045             }
01046           else if (ent == "/b" || ent == "/strong")
01047             {
01048               currentstyle.unsetBold();
01049             }
01050           else if (ent == "/i")
01051             {
01052               currentstyle.unsetItalic();
01053             }
01054           else if (ent == "/em")
01055             {
01056               currentstyle.unsetItalic();
01057             }
01058           else if (ent == "/div")
01059             {
01060               currentstyle.unset();
01061               if (ch != '>') ch = skip_ws_end();
01062               ch = 10;
01063                if (!stylestack.isEmpty())
01064                  {
01065                     stylestack.pop();
01066                  }
01067               continue;
01068             }
01069           else if (ent == "tr")
01070             {
01071               if (ch != '>') ch = skip_ws_end();
01072               ch = 10;
01073               q += '-';
01074               q += QChar(parent->getwidth());
01075               q += 2;
01076               q += '\0';
01077               q += '\0';
01078               q += '\0';
01079               continue;
01080             }
01081           else if (ent == "td")
01082             {
01083               if (ch != '>') ch = skip_ws_end();
01084               ignorespace = false;
01085             }
01086           else if (ent == "/td")
01087             {
01088               ignorespace = true;
01089               //              parse_paragraph(currentstyle, ch, pos);
01090               //stylestack.push_front(currentstyle);
01091               if (ch != '>') ch = skip_ws_end();
01092               //              ch = '|';
01093               //continue;
01094               ch = 10;
01095               q += '-';
01096               q += QChar(parent->getwidth());
01097               q += 1;
01098               q += '\0';
01099               q += '\0';
01100               q += '\0';
01101               continue;
01102             }
01103           /*
01104           else if (ent == "/td")
01105             {
01106               currentstyle.unset();
01107               if (ch != '>') ch = skip_ws_end();
01108                if (!stylestack.isEmpty())
01109                  {
01110                     stylestack.pop();
01111                  }
01112                //              ch = 10;
01113                continue;
01114             }
01115           */
01116           else if (ent[0] == '/' && ent.length() == 3 && ent[1] == 'h' && QString("123456789").find(ent[2]) != -1)
01117             {
01118               currentstyle.unset();
01119               if (ch != '>') ch = skip_ws_end();
01120               //ch = 10;
01121               //continue;
01122             }
01123           else if (ent == "table" || ent == "/table")
01124             {
01125               currentstyle.unset();
01126               ignorespace = (ent == "table");
01127               if (ent == "table")
01128                 {
01129                   if (tablenesteddepth++ == 0) currentstyle.setTable(pos);
01130                 }
01131               else
01132                 {
01133                   if (--tablenesteddepth <= 0)
01134                     {
01135                       tablenesteddepth = 0;
01136                       currentstyle.setTable(0xffffffff);
01137                     }
01138                 }
01139               if (ch == ' ') ch = skip_ws();
01140               while (ch != '>' && ch != UEOF)
01141                 {
01142                   QString ent = getname(ch, " =>").lower();
01143                   QString attr = getattr(ch);
01144                   qDebug("<table>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
01145                 }
01146               if (ch != '>') ch = skip_ws_end();
01147 
01148               currentstyle.setLeftMargin(6*tablenesteddepth);
01149 
01150 
01151               lastch = 0; // Anything but 10
01152               ch = 10;
01153               q += '-';
01154               q += QChar(parent->getwidth());
01155               q += 3;
01156               q += '\0';
01157               q += '\0';
01158               q += '\0';
01159               continue;
01160             }
01161           else if (ent == "hr")
01162             {
01163               //bool isPageBreak = false;
01164               if (ch == ' ') ch = skip_ws();
01165               unsigned char red = 0, green = 0, blue = 0;
01166               while (ch != '>' && ch != UEOF)
01167                 {
01168                   QString ent = getname(ch, " =>").lower();
01169                   QString attr = getattr(ch);
01170                   if (ent == "color")
01171                     {
01172                       parse_color(attr, red, green, blue);
01173                     }
01174                   /*
01175                   if (ent == "size")
01176                     {
01177                       if (attr == "0")
01178                         {
01179                           isPageBreak = true;
01180                         }
01181                     }
01182                   */
01183                   qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
01184                 }
01185               if (ch != '>') ch = skip_ws_end();
01186               /*
01187               if (isPageBreak)
01188                 {
01189                   ch = UEOF;
01190                 }
01191               else
01192                 {
01193               */
01194                   //          if (stylestack.isEmpty())
01195                   //            {
01196                   currentstyle.unset();
01197                   //            }
01198                   /*
01199                     else
01200                     {
01201                     qDebug("Using stack style");
01202                     currentstyle = stylestack.first();
01203                     }
01204                   */
01205                   lastch = 0; //Anything but 10 or ' '
01206                   ch = 10;
01207                   q += '-';
01208                   q += QChar(parent->getwidth());
01209                   q += 3;
01210                   q += red;
01211                   q += green;
01212                   q += blue;
01213 
01214               continue;
01215             }
01216 
01217 
01218 
01219           else if (ent == "img")
01220             {
01221               if (ch == ' ') ch = skip_ws();
01222               while (ch != '>' && ch != UEOF)
01223                 {
01224                   QString ent = getname(ch, " =>").lower();
01225                   QString attr = getattr(ch);
01226                   qDebug("<img>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
01227                   if (ent == "src")
01228                     {
01229                       /*
01230                       if (m_bchm)
01231                         {
01232                           QImage* img = parent->getPicture(attr);
01233                           if (img != NULL)
01234                             {
01235                               currentstyle.setPicture(true, img);
01236                             }
01237                         }
01238                       */
01239 
01240 
01241                       QImage* img = parent->getPicture(attr);
01242                       if (img != NULL)
01243                         {
01244                           currentstyle.setPicture(true, img);
01245                         }
01246                       else
01247                         {
01248                           QFileInfo f(currentfile);
01249                           QFileInfo f1(f.dir(true), attr);
01250                           QPixmap pm;
01251                           if (pm.load(f1.absFilePath()))
01252                             {
01253                               QImage* img = new QImage(pm.convertToImage());
01254                               currentstyle.setPicture(true, img);
01255                             }
01256                         }
01257                     }
01258                   if (ent == "recindex")
01259                     {
01260                       bool ok;
01261                       unsigned int picindex = attr.toUInt(&ok);
01262                       qDebug("Looking for image at %u", picindex);
01263                       QImage* img = parent->getPicture(picindex);
01264                       if (img != NULL)
01265                         {
01266                           currentstyle.setPicture(true, img);
01267                         }
01268                       else
01269                         {
01270                           qDebug("No image found");
01271                         }
01272                     }
01273                 }
01274               if (ch != '>') ch = skip_ws_end();
01275               ch = '#';
01276               break;
01277             }
01278           else if (ent.left(2) == "dc")
01279             {
01280               QString nd("/");
01281               skipblock(nd+ent);
01282             }
01283           else if (ent == "metadata")
01284             {
01285               //              skipblock("/metadata");
01286             }
01287           else if (ent == "title")
01288             {
01289               skipblock("/title");
01290             }
01291           else if (ent == "head")
01292             {
01293               skipblock("/head");
01294             }
01295           /*
01296           else if (ent == "metadata")
01297             {
01298               currentstyle.setFontSize(-2);
01299             }
01300           else if (ent == "/metadata")
01301             {
01302               currentstyle.unset();
01303               ch = 10;
01304               continue;
01305             }
01306           */
01307           else
01308             {
01309               if (ent[0] != '/')
01310                 qDebug("Not handling:%s", (const char*)ent);
01311             }
01312 
01313           if (ch != '>') ch = skip_ws_end();
01314           if (ent[0] == '/')
01315             mygetch(ch, dummy, pos);
01316           else
01317             mygetch(ch, dummy, npos);
01318         }
01319       if (ch == '&')
01320         {
01321           mygetch(ch, dummy, npos);
01322           if (ch == '#')
01323             {
01324               int id = 0;
01325               mygetch(ch, dummy, npos);
01326               while (ch != ';' && ch != UEOF)
01327                 {
01328                   id = 10*id+ch-'0';
01329                   mygetch(ch, dummy, npos);
01330                 }
01331               ch = id;
01332             }
01333           else
01334             {
01335               QString en;
01336               en += ch;
01337               mygetch(ch, dummy, npos);
01338               while (ch != ';' && ch != UEOF)
01339                 {
01340                   en += ch;
01341                   mygetch(ch, dummy, npos);
01342                 }
01343               if (entmap == NULL) initentmap();
01344 #if defined(USEQPE) || defined(_WINDOWS)
01345               QMap<QString, tchar>::Iterator it = entmap->find(en);
01346 #else
01347               QMap<QString, tchar>::iterator it = entmap->find(en);
01348 #endif
01349               if (it != entmap->end())
01350                 {
01351                   ch = *it;
01352                 }
01353               else
01354                 {
01355                   ch = '.';
01356                 }
01357             }
01358         }
01359       //    sty = (dummy == ucFontBase) ? currentstyle : dummy;
01360       if (lastch == 10 && ch == 10 && sty.getExtraSpace() > currentstyle.getExtraSpace())
01361         {
01362           currentstyle.setExtraSpace(sty.getExtraSpace());
01363         }
01364       sty = currentstyle;
01365     }
01366   while (!isPre && (((lastch == ' '  || lastch == 10 || ignorespace) && ch == ' ') || ((ch == 10) && (lastch == 10))));
01367   //  lastch = ch;
01368   lastch = ch;
01369   return;
01370 }
01371 
01372 QString striphtml::getTableAsHtml(unsigned long loc)
01373 {
01374   qDebug("striphtml::getTableAsHtml");
01375   QString ret;
01376   tchar ch(0);
01377   CStyle sty;
01378   unsigned long pos;
01379   locate(loc);
01380   int endpos(0);
01381   QString endmarker("</table>");
01382   QString startmarker("<table");
01383   int startpos(0);
01384   int depth(0);
01385   while (ch != UEOF)
01386     {
01387       parent->getch(ch, sty, pos);
01388       QChar qc(ch);
01389       ret += qc;
01390       if (qc.lower() == endmarker[endpos])
01391         {
01392           if ((++endpos >= endmarker.length()) && (--depth <= 0)) break;
01393         }
01394       else
01395         {
01396           endpos = 0;
01397         }
01398       if (qc.lower() == startmarker[startpos])
01399         {
01400           if (++startpos >= startmarker.length()) ++depth;
01401         }
01402       else
01403         {
01404           startpos = 0;
01405         }
01406     }
01407   return ret;
01408 }
01409 
01410 
01411 extern "C"
01412 {
01413   CFilter* newfilter(const QString& s) { return new striphtml(s); }
01414 }

Generated on Sat Nov 5 16:16:59 2005 for OPIE by  doxygen 1.4.2