00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #if QT_VERSION >= 0x030000
00038 #error QRegExp3 is now in QT 3 use QRegExp instead
00039 #endif
00040
00041 #if QT_VERSION < 0x030000
00042 #include "./qregexp3.h"
00043 #else
00044 #include "qregexp.h"
00045 #endif
00046
00047
00048 #include <opie2/odebug.h>
00049
00050
00051 #include <qarray.h>
00052 #include <qbitarray.h>
00053 #include <qcache.h>
00054 #include <qintdict.h>
00055 #include <qmap.h>
00056 #include <qstring.h>
00057 #include <qtl.h>
00058 #include <qvector.h>
00059
00060
00061 #include <limits.h>
00062
00063
00064
00065
00066
00662 static const int NumBadChars = 128;
00663 #define BadChar( ch ) ( (ch).cell() % NumBadChars )
00664
00665 static const int NoOccurrence = INT_MAX;
00666 static const int EmptyCapture = INT_MAX;
00667 static const int InftyLen = INT_MAX;
00668 static const int InftyRep = 1000;
00669 static const int EOS = -1;
00670
00671 #ifndef QT_NO_REGEXP_OPTIM
00672 static int engCount = 0;
00673 static QArray<int> *noOccurrences = 0;
00674 static QArray<int> *firstOccurrenceAtZero = 0;
00675 #endif
00676
00677
00678
00679
00680 static void mergeInto( QArray<int> *a, const QArray<int>& b )
00681 {
00682 int asize = a->size();
00683 int bsize = b.size();
00684 if ( asize == 0 ) {
00685 *a = b.copy();
00686 #ifndef QT_NO_REGEXP_OPTIM
00687 } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
00688 a->resize( asize + 1 );
00689 (*a)[asize] = b[0];
00690 #endif
00691 } else if ( bsize >= 1 ) {
00692 int csize = asize + bsize;
00693 QArray<int> c( csize );
00694 int i = 0, j = 0, k = 0;
00695 while ( i < asize ) {
00696 if ( j < bsize ) {
00697 if ( (*a)[i] == b[j] ) {
00698 i++;
00699 csize--;
00700 } else if ( (*a)[i] < b[j] ) {
00701 c[k++] = (*a)[i++];
00702 } else {
00703 c[k++] = b[j++];
00704 }
00705 } else {
00706 memcpy( c.data() + k, (*a).data() + i,
00707 (asize - i) * sizeof(int) );
00708 break;
00709 }
00710 }
00711 c.resize( csize );
00712 if ( j < bsize )
00713 memcpy( c.data() + k, b.data() + j, (bsize - j) * sizeof(int) );
00714 *a = c;
00715 }
00716 }
00717
00718
00719
00720
00721
00722 static void mergeInto( QMap<int, int> *a, const QMap<int, int>& b )
00723 {
00724 QMap<int, int>::ConstIterator it;
00725 for ( it = b.begin(); it != b.end(); ++it )
00726 a->insert( it.key(), *it );
00727 }
00728
00729
00730
00731
00732
00733 static int at( const QMap<int, int>& m, int k )
00734 {
00735 QMap<int, int>::ConstIterator it = m.find( k );
00736 if ( it == m.end() )
00737 return 0;
00738 else
00739 return *it;
00740 }
00741
00742 #ifndef QT_NO_REGEXP_WILDCARD
00743
00744
00745
00746
00747 static QString wc2rx( const QString& wc )
00748 {
00749 int wclen = wc.length();
00750 QString rx = QString::fromLatin1( "" );
00751 int i = 0;
00752 while ( i < wclen ) {
00753 QChar c = wc[i++];
00754 switch ( c.unicode() ) {
00755 case '*':
00756 rx += QString::fromLatin1( ".*" );
00757 break;
00758 case '?':
00759 rx += QChar( '.' );
00760 break;
00761 case '$':
00762 case '(':
00763 case ')':
00764 case '+':
00765 case '.':
00766 case '\\':
00767 case '^':
00768 case '{':
00769 case '|':
00770 case '}':
00771 rx += QChar( '\\' );
00772 rx += c;
00773 break;
00774 case '[':
00775 rx += c;
00776 if ( wc[i] == QChar('^') )
00777 rx += wc[i++];
00778 if ( i < wclen ) {
00779 if ( rx[i] == ']' )
00780 rx += wc[i++];
00781 while ( i < wclen && wc[i] != QChar(']') ) {
00782 if ( wc[i] == '\\' )
00783 rx += QChar( '\\' );
00784 rx += wc[i++];
00785 }
00786 }
00787 break;
00788 default:
00789 rx += c;
00790 }
00791 }
00792 return rx;
00793 }
00794 #endif
00795
00796
00797
00798
00799
00800 class QRegExpEngine : public QShared
00801 {
00802 public:
00803 #ifndef QT_NO_REGEXP_CCLASS
00804
00805
00806
00807
00808 class CharClass
00809 {
00810 public:
00811 CharClass();
00812 CharClass( const CharClass& cc ) { operator=( cc ); }
00813
00814 CharClass& operator=( const CharClass& cc );
00815
00816 void clear();
00817 bool negative() const { return n; }
00818 void setNegative( bool negative );
00819 void addCategories( int cats );
00820 void addRange( ushort from, ushort to );
00821 void addSingleton( ushort ch ) { addRange( ch, ch ); }
00822
00823 bool in( QChar ch ) const;
00824 #ifndef QT_NO_REGEXP_OPTIM
00825 const QArray<int>& firstOccurrence() const { return occ1; }
00826 #endif
00827
00828 #if defined(QT_DEBUG)
00829 void dump() const;
00830 #endif
00831
00832 private:
00833
00834
00835
00836
00837 struct Range
00838 {
00839 ushort from;
00840 ushort to;
00841 };
00842
00843 int c;
00844 QArray<Range> r;
00845 bool n;
00846 #ifndef QT_NO_REGEXP_OPTIM
00847 QArray<int> occ1;
00848 #endif
00849 };
00850 #else
00851 struct CharClass
00852 {
00853 int x;
00854
00855 #ifndef QT_NO_REGEXP_OPTIM
00856 const QArray<int>& firstOccurrence() const {
00857 return *firstOccurrenceAtZero;
00858 }
00859 #endif
00860 };
00861 #endif
00862
00863 QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); }
00864 QRegExpEngine( const QString& rx, bool caseSensitive );
00865 #ifndef QT_NO_REGEXP_OPTIM
00866 ~QRegExpEngine();
00867 #endif
00868
00869 bool isValid() const { return valid; }
00870 bool caseSensitive() const { return cs; }
00871 int numCaptures() const { return realncap; }
00872 QArray<int> match( const QString& str, int pos, bool minimal,
00873 bool oneTest );
00874 int matchedLength() const { return mmMatchedLen; }
00875
00876 int createState( QChar ch );
00877 int createState( const CharClass& cc );
00878 #ifndef QT_NO_REGEXP_BACKREF
00879 int createState( int bref );
00880 #endif
00881
00882 void addCatTransitions( const QArray<int>& from, const QArray<int>& to );
00883 #ifndef QT_NO_REGEXP_CAPTURE
00884 void addPlusTransitions( const QArray<int>& from, const QArray<int>& to,
00885 int atom );
00886 #endif
00887
00888 #ifndef QT_NO_REGEXP_ANCHOR_ALT
00889 int anchorAlternation( int a, int b );
00890 int anchorConcatenation( int a, int b );
00891 #else
00892 int anchorAlternation( int a, int b ) { return a & b; }
00893 int anchorConcatenation( int a, int b ) { return a | b; }
00894 #endif
00895 void addAnchors( int from, int to, int a );
00896
00897 #ifndef QT_NO_REGEXP_OPTIM
00898 void setupGoodStringHeuristic( int earlyStart, int lateStart,
00899 const QString& str );
00900 void setupBadCharHeuristic( int minLen, const QArray<int>& firstOcc );
00901 void heuristicallyChooseHeuristic();
00902 #endif
00903
00904 #if defined(QT_DEBUG)
00905 void dump() const;
00906 #endif
00907
00908 private:
00909 enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
00910
00911
00912
00913
00914
00915
00916 struct State
00917 {
00918 #ifndef QT_NO_REGEXP_CAPTURE
00919 int atom;
00920 #endif
00921 int match;
00922 QArray<int> outs;
00923 QMap<int, int> *reenter;
00924 QMap<int, int> *anchors;
00925
00926 #ifndef QT_NO_REGEXP_CAPTURE
00927 State( int a, int m )
00928 : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { }
00929 #else
00930 State( int m )
00931 : match( m ), reenter( 0 ), anchors( 0 ) { }
00932 #endif
00933 ~State() { delete reenter; delete anchors; }
00934 };
00935
00936 #ifndef QT_NO_REGEXP_LOOKAHEAD
00937
00938
00939
00940
00941 struct Lookahead
00942 {
00943 QRegExpEngine *eng;
00944 bool neg;
00945
00946 Lookahead( QRegExpEngine *eng0, bool neg0 )
00947 : eng( eng0 ), neg( neg0 ) { }
00948 ~Lookahead() { delete eng; }
00949 };
00950 #endif
00951
00952 #ifndef QT_NO_REGEXP_CAPTURE
00953
00954
00955
00956
00957 struct Atom
00958 {
00959 int parent;
00960 int capture;
00961 };
00962 #endif
00963
00964 #ifndef QT_NO_REGEXP_ANCHOR_ALT
00965
00966
00967
00968
00969 struct AnchorAlternation
00970 {
00971 int a;
00972 int b;
00973 };
00974 #endif
00975
00976 enum { InitialState = 0, FinalState = 1 };
00977 void setup( bool caseSensitive );
00978 int setupState( int match );
00979
00980
00981
00982
00983 enum { MaxLookaheads = 13, MaxBackRefs = 14 };
00984 enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002,
00985 Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,
00986 Anchor_FirstLookahead = 0x00000010,
00987 Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
00988 Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
00989 Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,
00990
00991 Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^
00992 ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) };
00993 #ifndef QT_NO_REGEXP_CAPTURE
00994 int startAtom( bool capture );
00995 void finishAtom( int atom ) { cf = f[atom].parent; }
00996 #endif
00997
00998 #ifndef QT_NO_REGEXP_LOOKAHEAD
00999 int addLookahead( QRegExpEngine *eng, bool negative );
01000 #endif
01001
01002 #ifndef QT_NO_REGEXP_CAPTURE
01003 bool isBetterCapture( const int *begin1, const int *end1, const int *begin2,
01004 const int *end2 );
01005 #endif
01006 bool testAnchor( int i, int a, const int *capBegin );
01007
01008 #ifndef QT_NO_REGEXP_OPTIM
01009 bool goodStringMatch();
01010 bool badCharMatch();
01011 #else
01012 bool bruteMatch();
01013 #endif
01014 bool matchHere();
01015
01016 QVector<State> s;
01017 int ns;
01018 #ifndef QT_NO_REGEXP_CAPTURE
01019 QArray<Atom> f;
01020 int nf;
01021 int cf;
01022 #endif
01023 int realncap;
01024 int ncap;
01025 #ifndef QT_NO_REGEXP_CCLASS
01026 QVector<CharClass> cl;
01027 #endif
01028 #ifndef QT_NO_REGEXP_LOOKAHEAD
01029 QVector<Lookahead> ahead;
01030 #endif
01031 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01032 QArray<AnchorAlternation> aa;
01033 #endif
01034 #ifndef QT_NO_REGEXP_OPTIM
01035 bool caretAnchored;
01036 #endif
01037 bool valid;
01038 bool cs;
01039 #ifndef QT_NO_REGEXP_BACKREF
01040 int nbrefs;
01041 #endif
01042
01043 #ifndef QT_NO_REGEXP_OPTIM
01044 bool useGoodStringHeuristic;
01045
01046 int goodEarlyStart;
01047 int goodLateStart;
01048 QString goodStr;
01049
01050 int minl;
01051 QArray<int> occ1;
01052 #endif
01053
01054
01055
01056
01057
01058
01059
01060
01061 class Box
01062 {
01063 public:
01064 Box( QRegExpEngine *engine );
01065 Box( const Box& b ) { operator=( b ); }
01066
01067 Box& operator=( const Box& b );
01068
01069 void clear() { operator=(Box(eng)); }
01070 void set( QChar ch );
01071 void set( const CharClass& cc );
01072 #ifndef QT_NO_REGEXP_BACKREF
01073 void set( int bref );
01074 #endif
01075
01076 void cat( const Box& b );
01077 void orx( const Box& b );
01078 void plus( int atom );
01079 void opt();
01080 void catAnchor( int a );
01081 #ifndef QT_NO_REGEXP_OPTIM
01082 void setupHeuristics();
01083 #endif
01084
01085 #if defined(QT_DEBUG)
01086 void dump() const;
01087 #endif
01088
01089 private:
01090 void addAnchorsToEngine( const Box& to ) const;
01091
01092 QRegExpEngine *eng;
01093 QArray<int> ls;
01094 QArray<int> rs;
01095 QMap<int, int> lanchors;
01096 QMap<int, int> ranchors;
01097 int skipanchors;
01098
01099 #ifndef QT_NO_REGEXP_OPTIM
01100 int earlyStart;
01101 int lateStart;
01102 QString str;
01103 QString leftStr;
01104 QString rightStr;
01105 int maxl;
01106 #endif
01107
01108 int minl;
01109 #ifndef QT_NO_REGEXP_OPTIM
01110 QArray<int> occ1;
01111 #endif
01112 };
01113 friend class Box;
01114
01115
01116
01117
01118 enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,
01119 Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,
01120 Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord,
01121 Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
01122 int getChar();
01123 int getEscape();
01124 #ifndef QT_NO_REGEXP_INTERVAL
01125 int getRep( int def );
01126 #endif
01127 #ifndef QT_NO_REGEXP_LOOKAHEAD
01128 void skipChars( int n );
01129 #endif
01130 void startTokenizer( const QChar *rx, int len );
01131 int getToken();
01132
01133 const QChar *yyIn;
01134 int yyPos0;
01135 int yyPos;
01136 int yyLen;
01137 int yyCh;
01138 CharClass *yyCharClass;
01139 int yyMinRep;
01140 int yyMaxRep;
01141 bool yyError;
01142
01143
01144
01145
01146 int parse( const QChar *rx, int len );
01147 void parseAtom( Box *box );
01148 void parseFactor( Box *box );
01149 void parseTerm( Box *box );
01150 void parseExpression( Box *box );
01151
01152 int yyTok;
01153 bool yyMayCapture;
01154
01155
01156
01157
01158 const QString *mmStr;
01159 const QChar *mmIn;
01160 int mmPos;
01161 int mmLen;
01162 bool mmMinimal;
01163 QArray<int> mmCaptured;
01164 QArray<int> mmCapturedNoMatch;
01165 QArray<int> mmBigArray;
01166 int *mmInNextStack;
01167 int *mmCurStack;
01168 int *mmNextStack;
01169 int *mmCurCapBegin;
01170 int *mmNextCapBegin;
01171 int *mmCurCapEnd;
01172 int *mmNextCapEnd;
01173 int *mmTempCapBegin;
01174 int *mmTempCapEnd;
01175 int *mmCapBegin;
01176 int *mmCapEnd;
01177 int *mmSlideTab;
01178 int mmSlideTabSize;
01179 #ifndef QT_NO_REGEXP_BACKREF
01180 QIntDict<int> mmSleeping;
01181 #endif
01182 int mmMatchedLen;
01183 };
01184
01185 QRegExpEngine::QRegExpEngine( const QString& rx, bool caseSensitive )
01186 #ifndef QT_NO_REGEXP_BACKREF
01187 : mmSleeping( 101 )
01188 #endif
01189 {
01190 setup( caseSensitive );
01191 valid = ( parse(rx.unicode(), rx.length()) == (int) rx.length() );
01192 }
01193
01194 #ifndef QT_NO_REGEXP_OPTIM
01195 QRegExpEngine::~QRegExpEngine()
01196 {
01197 if ( --engCount == 0 ) {
01198 delete noOccurrences;
01199 noOccurrences = 0;
01200 delete firstOccurrenceAtZero;
01201 firstOccurrenceAtZero = 0;
01202 }
01203 }
01204 #endif
01205
01206
01207
01208
01209
01210 QArray<int> QRegExpEngine::match( const QString& str, int pos, bool minimal,
01211 bool oneTest )
01212 {
01213 mmStr = &str;
01214 mmIn = str.unicode();
01215 if ( mmIn == 0 )
01216 mmIn = &QChar::null;
01217 mmPos = pos;
01218 mmLen = str.length();
01219 mmMinimal = minimal;
01220 mmMatchedLen = 0;
01221
01222 bool matched = FALSE;
01223 if ( valid && mmPos >= 0 && mmPos <= mmLen ) {
01224 #ifndef QT_NO_REGEXP_OPTIM
01225 if ( mmPos <= mmLen - minl ) {
01226 if ( caretAnchored || oneTest )
01227 matched = matchHere();
01228 else if ( useGoodStringHeuristic )
01229 matched = goodStringMatch();
01230 else
01231 matched = badCharMatch();
01232 }
01233 #else
01234 matched = oneTest ? matchHere() : bruteMatch();
01235 #endif
01236 }
01237
01238 if ( matched ) {
01239 mmCaptured.detach();
01240 mmCaptured[0] = mmPos;
01241 mmCaptured[1] = mmMatchedLen;
01242 for ( int j = 0; j < realncap; j++ ) {
01243 int len = mmCapEnd[j] - mmCapBegin[j];
01244 mmCaptured[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0;
01245 mmCaptured[2 + 2 * j + 1] = len;
01246 }
01247 return mmCaptured;
01248 } else {
01249 return mmCapturedNoMatch;
01250 }
01251 }
01252
01253
01254
01255
01256
01257
01258 int QRegExpEngine::createState( QChar ch )
01259 {
01260 return setupState( ch.unicode() );
01261 }
01262
01263 int QRegExpEngine::createState( const CharClass& cc )
01264 {
01265 #ifndef QT_NO_REGEXP_CCLASS
01266 int n = cl.size();
01267 cl.resize( n + 1 );
01268 cl.insert( n, new CharClass(cc) );
01269 return setupState( CharClassBit | n );
01270 #else
01271 Q_UNUSED( cc );
01272 return setupState( CharClassBit );
01273 #endif
01274 }
01275
01276 #ifndef QT_NO_REGEXP_BACKREF
01277 int QRegExpEngine::createState( int bref )
01278 {
01279 if ( bref > nbrefs ) {
01280 nbrefs = bref;
01281 if ( nbrefs > MaxBackRefs ) {
01282 yyError = TRUE;
01283 return 0;
01284 }
01285 }
01286 return setupState( BackRefBit | bref );
01287 }
01288 #endif
01289
01290
01291
01292
01293
01294
01295
01296
01297 void QRegExpEngine::addCatTransitions( const QArray<int>& from,
01298 const QArray<int>& to )
01299 {
01300 for ( int i = 0; i < (int) from.size(); i++ ) {
01301 State *st = s[from[i]];
01302 mergeInto( &st->outs, to );
01303 }
01304 }
01305
01306 #ifndef QT_NO_REGEXP_CAPTURE
01307 void QRegExpEngine::addPlusTransitions( const QArray<int>& from,
01308 const QArray<int>& to, int atom )
01309 {
01310 for ( int i = 0; i < (int) from.size(); i++ ) {
01311 State *st = s[from[i]];
01312 QArray<int> oldOuts = st->outs.copy();
01313 mergeInto( &st->outs, to );
01314 if ( f[atom].capture >= 0 ) {
01315 if ( st->reenter == 0 )
01316 st->reenter = new QMap<int, int>;
01317 for ( int j = 0; j < (int) to.size(); j++ ) {
01318 if ( !st->reenter->contains(to[j]) &&
01319 oldOuts.bsearch(to[j]) < 0 )
01320 st->reenter->insert( to[j], atom );
01321 }
01322 }
01323 }
01324 }
01325 #endif
01326
01327 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01328
01329
01330
01331 int QRegExpEngine::anchorAlternation( int a, int b )
01332 {
01333 if ( ((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0 )
01334 return a & b;
01335
01336 int n = aa.size();
01337 aa.resize( n + 1 );
01338 aa[n].a = a;
01339 aa[n].b = b;
01340 return Anchor_Alternation | n;
01341 }
01342
01343
01344
01345
01346 int QRegExpEngine::anchorConcatenation( int a, int b )
01347 {
01348 if ( ((a | b) & Anchor_Alternation) == 0 )
01349 return a | b;
01350 if ( (b & Anchor_Alternation) != 0 )
01351 qSwap( a, b );
01352 int aprime = anchorConcatenation( aa[a ^ Anchor_Alternation].a, b );
01353 int bprime = anchorConcatenation( aa[a ^ Anchor_Alternation].b, b );
01354 return anchorAlternation( aprime, bprime );
01355 }
01356 #endif
01357
01358
01359
01360
01361 void QRegExpEngine::addAnchors( int from, int to, int a )
01362 {
01363 State *st = s[from];
01364 if ( st->anchors == 0 )
01365 st->anchors = new QMap<int, int>;
01366 if ( st->anchors->contains(to) )
01367 a = anchorAlternation( (*st->anchors)[to], a );
01368 st->anchors->insert( to, a );
01369 }
01370
01371 #ifndef QT_NO_REGEXP_OPTIM
01372
01373
01374
01375
01376
01377 void QRegExpEngine::setupGoodStringHeuristic( int earlyStart, int lateStart,
01378 const QString& str )
01379 {
01380 goodEarlyStart = earlyStart;
01381 goodLateStart = lateStart;
01382 goodStr = cs ? str : str.lower();
01383 }
01384
01385 void QRegExpEngine::setupBadCharHeuristic( int minLen,
01386 const QArray<int>& firstOcc )
01387 {
01388 minl = minLen;
01389 occ1 = cs ? firstOcc : *firstOccurrenceAtZero;
01390 }
01391
01392
01393
01394
01395
01396
01397
01398
01399
01400
01401
01402
01403
01404 void QRegExpEngine::heuristicallyChooseHeuristic()
01405 {
01406 int i;
01407
01408 if ( minl == 0 )
01409 return;
01410
01411
01412
01413
01414
01415 int goodStringScore = ( 64 * goodStr.length() / minl ) -
01416 ( goodLateStart - goodEarlyStart );
01417
01418
01419
01420
01421
01422 int badCharScore = 0;
01423 int step = QMAX( 1, NumBadChars / 32 );
01424 for ( i = 1; i < NumBadChars; i += step ) {
01425 if ( occ1[i] == NoOccurrence )
01426 badCharScore += minl;
01427 else
01428 badCharScore += occ1[i];
01429 }
01430 badCharScore /= minl;
01431
01432 useGoodStringHeuristic = ( goodStringScore > badCharScore );
01433 }
01434 #endif
01435
01436 #if defined(QT_DEBUG)
01437 void QRegExpEngine::dump() const
01438 {
01439 int i, j;
01440 odebug << "Case " << (cs ? "" : "in") << "sensitive engine" << oendl;
01441 odebug << " States" << oendl;
01442 for ( i = 0; i < ns; i++ ) {
01443 odebug << " " << i
01444 << (i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "") << oendl;
01445
01446 #ifndef QT_NO_REGEXP_CAPTURE
01447 odebug << " in atom " << s[i]->atom << oendl;
01448 #endif
01449 int m = s[i]->match;
01450 if ( (m & CharClassBit) != 0 ) {
01451 odebug << " match character class " << (m ^ CharClassBit) << oendl;
01452 #ifndef QT_NO_REGEXP_CCLASS
01453 cl[m ^ CharClassBit]->dump();
01454 #else
01455 odebug << " negative character class" << oendl;
01456 #endif
01457 } else if ( (m & BackRefBit) != 0 ) {
01458 odebug << " match back-reference " << (m ^ BackRefBit) << oendl;
01459 } else if ( m >= 0x20 && m <= 0x7e ) {
01460 odebug << " match " << QString().sprintf( "0x%.4x", m) << " (" << m << ")" << oendl;
01461
01462 } else {
01463 odebug << " match " << QString().sprintf( "0x%.4x", m) << oendl;
01464 }
01465 for ( j = 0; j < (int) s[i]->outs.size(); j++ ) {
01466 int next = s[i]->outs[j];
01467 odebug << " -> " << next << oendl;
01468 if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) )
01469 odebug << " [reenter " << (*s[i]->reenter)[next] << "]" << oendl;
01470 if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 )
01471 odebug << " [anchors " << QString().sprintf( "0x%.8x]", (*s[i]->anchors)[next] ) << oendl;
01472 }
01473 }
01474 #ifndef QT_NO_REGEXP_CAPTURE
01475 if ( nf > 0 ) {
01476 odebug << " Atom Parent Capture" << oendl;
01477 for ( i = 0; i < nf; i++ )
01478 odebug << QString().sprintf(" %6d %6d %6d", i, f[i].parent, f[i].capture ) << oendl;
01479 }
01480 #endif
01481 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01482 for ( i = 0; i < (int) aa.size(); i++ )
01483 odebug << QString().sprintf(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b ) << oendl;
01484 #endif
01485 }
01486 #endif
01487
01488 void QRegExpEngine::setup( bool caseSensitive )
01489 {
01490 #ifndef QT_NO_REGEXP_OPTIM
01491 if ( engCount++ == 0 ) {
01492 noOccurrences = new QArray<int>( NumBadChars );
01493 firstOccurrenceAtZero = new QArray<int>( NumBadChars );
01494 noOccurrences->fill( NoOccurrence );
01495 firstOccurrenceAtZero->fill( 0 );
01496 }
01497 #endif
01498 s.setAutoDelete( TRUE );
01499 s.resize( 32 );
01500 ns = 0;
01501 #ifndef QT_NO_REGEXP_CAPTURE
01502 f.resize( 32 );
01503 nf = 0;
01504 cf = -1;
01505 #endif
01506 realncap = 0;
01507 ncap = 0;
01508 #ifndef QT_NO_REGEXP_CCLASS
01509 cl.setAutoDelete( TRUE );
01510 #endif
01511 #ifndef QT_NO_REGEXP_LOOKAHEAD
01512 ahead.setAutoDelete( TRUE );
01513 #endif
01514 #ifndef QT_NO_REGEXP_OPTIM
01515 caretAnchored = TRUE;
01516 #endif
01517 valid = FALSE;
01518 cs = caseSensitive;
01519 #ifndef QT_NO_REGEXP_BACKREF
01520 nbrefs = 0;
01521 #endif
01522 #ifndef QT_NO_REGEXP_OPTIM
01523 useGoodStringHeuristic = FALSE;
01524 minl = 0;
01525 occ1 = *firstOccurrenceAtZero;
01526 #endif
01527 mmCapturedNoMatch.fill( -1, 2 );
01528 }
01529
01530 int QRegExpEngine::setupState( int match )
01531 {
01532 if ( (ns & (ns + 1)) == 0 && ns + 1 >= (int) s.size() )
01533 s.resize( (ns + 1) << 1 );
01534 #ifndef QT_NO_REGEXP_CAPTURE
01535 s.insert( ns, new State(cf, match) );
01536 #else
01537 s.insert( ns, new State(match) );
01538 #endif
01539 return ns++;
01540 }
01541
01542 #ifndef QT_NO_REGEXP_CAPTURE
01543
01544
01545
01546
01547
01548 int QRegExpEngine::startAtom( bool capture )
01549 {
01550 if ( (nf & (nf + 1)) == 0 && nf + 1 >= (int) f.size() )
01551 f.resize( (nf + 1) << 1 );
01552 f[nf].parent = cf;
01553 cf = nf++;
01554 f[cf].capture = capture ? ncap++ : -1;
01555 return cf;
01556 }
01557 #endif
01558
01559 #ifndef QT_NO_REGEXP_LOOKAHEAD
01560
01561
01562
01563 int QRegExpEngine::addLookahead( QRegExpEngine *eng, bool negative )
01564 {
01565 int n = ahead.size();
01566 if ( n == MaxLookaheads ) {
01567 yyError = TRUE;
01568 return 0;
01569 }
01570 ahead.resize( n + 1 );
01571 ahead.insert( n, new Lookahead(eng, negative) );
01572 return Anchor_FirstLookahead << n;
01573 }
01574 #endif
01575
01576 #ifndef QT_NO_REGEXP_CAPTURE
01577
01578
01579
01580 bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
01581 const int *begin2, const int *end2 )
01582 {
01583 for ( int i = 0; i < ncap; i++ ) {
01584 int delta = begin2[i] - begin1[i];
01585 if ( delta == 0 )
01586 delta = end1[i] - end2[i];
01587
01588 if ( delta != 0 )
01589 return delta > 0;
01590 }
01591 return FALSE;
01592 }
01593 #endif
01594
01595
01596
01597
01598
01599 bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
01600 {
01601 int j;
01602
01603 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01604 if ( (a & Anchor_Alternation) != 0 ) {
01605 return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) ||
01606 testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin );
01607 }
01608 #endif
01609
01610 if ( (a & Anchor_Caret) != 0 ) {
01611 if ( mmPos + i != 0 )
01612 return FALSE;
01613 }
01614 if ( (a & Anchor_Dollar) != 0 ) {
01615 if ( mmPos + i != mmLen )
01616 return FALSE;
01617 }
01618 #ifndef QT_NO_REGEXP_ESCAPE
01619 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
01620 bool before = FALSE, after = FALSE;
01621 if ( mmPos + i != 0 )
01622 before = mmIn[mmPos + i - 1].isLetterOrNumber();
01623 if ( mmPos + i != mmLen )
01624 after = mmIn[mmPos + i].isLetterOrNumber();
01625 if ( (a & Anchor_Word) != 0 && (before == after) )
01626 return FALSE;
01627 if ( (a & Anchor_NonWord) != 0 && (before != after) )
01628 return FALSE;
01629 }
01630 #endif
01631 #ifndef QT_NO_REGEXP_LOOKAHEAD
01632 bool catchx = TRUE;
01633
01634 if ( (a & Anchor_LookaheadMask) != 0 ) {
01635 QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
01636 mmLen - mmPos - i );
01637 for ( j = 0; j < (int) ahead.size(); j++ ) {
01638 if ( (a & (Anchor_FirstLookahead << j)) != 0 ) {
01639 catchx = ( ahead[j]->eng->match(cstr.string(), 0, TRUE,
01640 TRUE)[0] == 0 );
01641 if ( catchx == ahead[j]->neg )
01642 return FALSE;
01643 }
01644 }
01645 }
01646 #endif
01647 #ifndef QT_NO_REGEXP_CAPTURE
01648 #ifndef QT_NO_REGEXP_BACKREF
01649 for ( j = 0; j < nbrefs; j++ ) {
01650 if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) {
01651 if ( capBegin[j] != EmptyCapture )
01652 return FALSE;
01653 }
01654 }
01655 #endif
01656 #endif
01657 return TRUE;
01658 }
01659
01660 #ifndef QT_NO_REGEXP_OPTIM
01661
01662
01663
01664
01665
01666
01667 bool QRegExpEngine::goodStringMatch()
01668 {
01669 int k = mmPos + goodEarlyStart;
01670
01671 while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) {
01672 int from = k - goodLateStart;
01673 int to = k - goodEarlyStart;
01674 if ( from > mmPos )
01675 mmPos = from;
01676
01677 while ( mmPos <= to ) {
01678 if ( matchHere() )
01679 return TRUE;
01680 mmPos++;
01681 }
01682 k++;
01683 }
01684 return FALSE;
01685 }
01686
01687 bool QRegExpEngine::badCharMatch()
01688 {
01689 int slideHead = 0;
01690 int slideNext = 0;
01691 int i;
01692 int lastPos = mmLen - minl;
01693 memset( mmSlideTab, 0, mmSlideTabSize * sizeof(int) );
01694
01695
01696
01697
01698
01699 for ( i = 0; i < minl; i++ ) {
01700 int sk = occ1[BadChar(mmIn[mmPos + i])];
01701 if ( sk == NoOccurrence )
01702 sk = i + 1;
01703 if ( sk > 0 ) {
01704 int k = i + 1 - sk;
01705 if ( k < 0 ) {
01706 sk = i + 1;
01707 k = 0;
01708 }
01709 if ( sk > mmSlideTab[k] )
01710 mmSlideTab[k] = sk;
01711 }
01712 }
01713
01714 if ( mmPos > lastPos )
01715 return FALSE;
01716
01717 while ( TRUE ) {
01718 if ( ++slideNext >= mmSlideTabSize )
01719 slideNext = 0;
01720 if ( mmSlideTab[slideHead] > 0 ) {
01721 if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] )
01722 mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1;
01723 mmSlideTab[slideHead] = 0;
01724 } else {
01725 if ( matchHere() )
01726 return TRUE;
01727 }
01728
01729 if ( mmPos == lastPos )
01730 break;
01731
01732
01733
01734
01735
01736 int sk = occ1[BadChar(mmIn[mmPos + minl])];
01737 if ( sk == NoOccurrence ) {
01738 mmSlideTab[slideNext] = minl;
01739 } else if ( sk > 0 ) {
01740 int k = slideNext + minl - sk;
01741 if ( k >= mmSlideTabSize )
01742 k -= mmSlideTabSize;
01743 if ( sk > mmSlideTab[k] )
01744 mmSlideTab[k] = sk;
01745 }
01746 slideHead = slideNext;
01747 mmPos++;
01748 }
01749 return FALSE;
01750 }
01751 #else
01752 bool QRegExpEngine::bruteMatch()
01753 {
01754 while ( mmPos <= mmLen ) {
01755 if ( matchHere() )
01756 return TRUE;
01757 mmPos++;
01758 }
01759 return FALSE;
01760 }
01761 #endif
01762
01763
01764
01765
01766 bool QRegExpEngine::matchHere()
01767 {
01768 int ncur = 1, nnext = 0;
01769 int i = 0, j, k, m;
01770 bool match = FALSE;
01771
01772 mmMatchedLen = -1;
01773 mmCurStack[0] = InitialState;
01774
01775 #ifndef QT_NO_REGEXP_CAPTURE
01776 if ( ncap > 0 ) {
01777 for ( j = 0; j < ncap; j++ ) {
01778 mmCurCapBegin[j] = EmptyCapture;
01779 mmCurCapEnd[j] = EmptyCapture;
01780 }
01781 }
01782 #endif
01783
01784 #ifndef QT_NO_REGEXP_BACKREF
01785 int *zzZ = 0;
01786
01787 while ( (ncur > 0 || mmSleeping.count() > 0) && i <= mmLen - mmPos &&
01788 !match )
01789 #else
01790 while ( ncur > 0 && i <= mmLen - mmPos && !match )
01791 #endif
01792 {
01793 int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0;
01794 for ( j = 0; j < ncur; j++ ) {
01795 int cur = mmCurStack[j];
01796 State *scur = s[cur];
01797 QArray<int>& outs = scur->outs;
01798 for ( k = 0; k < (int) outs.size(); k++ ) {
01799 int next = outs[k];
01800 State *snext = s[next];
01801 bool in = TRUE;
01802 #ifndef QT_NO_REGEXP_BACKREF
01803 int needSomeSleep = 0;
01804 #endif
01805
01806
01807
01808
01809 if ( scur->anchors != 0 ) {
01810 int a = at( *scur->anchors, next );
01811 if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) )
01812 in = FALSE;
01813 }
01814
01815
01816
01817
01818 if ( in ) {
01819 m = snext->match;
01820 if ( (m & (CharClassBit | BackRefBit)) == 0 ) {
01821 if ( cs )
01822 in = ( m == ch );
01823 else
01824 in = ( QChar(m).lower() == QChar(ch).lower() );
01825 } else if ( next == FinalState ) {
01826 mmMatchedLen = i;
01827 match = mmMinimal;
01828 in = TRUE;
01829 } else if ( (m & CharClassBit) != 0 ) {
01830 #ifndef QT_NO_REGEXP_CCLASS
01831 const CharClass *cc = cl[m ^ CharClassBit];
01832 if ( cs )
01833 in = cc->in( ch );
01834 else if ( cc->negative() )
01835 in = cc->in( QChar(ch).lower() ) &&
01836 cc->in( QChar(ch).upper() );
01837 else
01838 in = cc->in( QChar(ch).lower() ) ||
01839 cc->in( QChar(ch).upper() );
01840 #endif
01841 #ifndef QT_NO_REGEXP_BACKREF
01842 } else {
01843 int bref = m ^ BackRefBit;
01844 int ell = j * ncap + ( bref - 1 );
01845
01846 in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture;
01847 if ( in ) {
01848 if ( cs )
01849 in = ( mmIn[mmPos + mmCurCapBegin[ell]]
01850 == QChar(ch) );
01851 else
01852 in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower()
01853 == QChar(ch).lower() );
01854 }
01855
01856 if ( in ) {
01857 int delta;
01858 if ( mmCurCapEnd[ell] == EmptyCapture )
01859 delta = i - mmCurCapBegin[ell];
01860 else
01861 delta = mmCurCapEnd[ell] - mmCurCapBegin[ell];
01862
01863 in = ( delta <= mmLen - mmPos );
01864 if ( in && delta > 1 ) {
01865 int n;
01866 if ( cs ) {
01867 for ( n = 1; n < delta; n++ ) {
01868 if ( mmIn[mmPos +
01869 mmCurCapBegin[ell] + n] !=
01870 mmIn[mmPos + i + n] )
01871 break;
01872 }
01873 } else {
01874 for ( n = 1; n < delta; n++ ) {
01875 QChar a = mmIn[mmPos +
01876 mmCurCapBegin[ell] + n];
01877 QChar b = mmIn[mmPos + i + n];
01878 if ( a.lower() != b.lower() )
01879 break;
01880 }
01881 }
01882 in = ( n == delta );
01883 if ( in )
01884 needSomeSleep = delta - 1;
01885 }
01886 }
01887 #endif
01888 }
01889 }
01890
01891
01892
01893
01894 if ( in ) {
01895 #ifndef QT_NO_REGEXP_CAPTURE
01896 int *capBegin, *capEnd;
01897 #endif
01898
01899
01900
01901 if ( (m = mmInNextStack[next]) == -1 ) {
01902 m = nnext++;
01903 mmNextStack[m] = next;
01904 mmInNextStack[next] = m;
01905 #ifndef QT_NO_REGEXP_CAPTURE
01906 capBegin = mmNextCapBegin + m * ncap;
01907 capEnd = mmNextCapEnd + m * ncap;
01908
01909
01910
01911
01912
01913
01914 } else {
01915 capBegin = mmTempCapBegin;
01916 capEnd = mmTempCapEnd;
01917 #endif
01918 }
01919
01920 #ifndef QT_NO_REGEXP_CAPTURE
01921
01922
01923
01924 if ( ncap > 0 ) {
01925 memcpy( capBegin, mmCurCapBegin + j * ncap,
01926 ncap * sizeof(int) );
01927 memcpy( capEnd, mmCurCapEnd + j * ncap,
01928 ncap * sizeof(int) );
01929 int c = scur->atom, n = snext->atom;
01930 int p = -1, q = -1;
01931 int cap;
01932
01933
01934
01935
01936
01937
01938
01939
01940
01941
01942
01943
01944
01945
01946 if ( scur->reenter != 0 &&
01947 (q = at(*scur->reenter, next)) != 0 ) {
01948 QBitArray b;
01949 b.fill( FALSE, nf );
01950 b.setBit( q, TRUE );
01951 for ( int ell = q + 1; ell < nf; ell++ ) {
01952 if ( b.testBit(f[ell].parent) ) {
01953 b.setBit( ell, TRUE );
01954 cap = f[ell].capture;
01955 if ( cap >= 0 ) {
01956 capBegin[cap] = EmptyCapture;
01957 capEnd[cap] = EmptyCapture;
01958 }
01959 }
01960 }
01961 p = f[q].parent;
01962
01963
01964
01965
01966
01967
01968
01969
01970
01971
01972 } else {
01973 p = c;
01974 q = n;
01975 while ( p != q ) {
01976 if ( p > q ) {
01977 cap = f[p].capture;
01978 if ( cap >= 0 ) {
01979 if ( capBegin[cap] == i ) {
01980 capBegin[cap] = EmptyCapture;
01981 capEnd[cap] = EmptyCapture;
01982 } else {
01983 capEnd[cap] = i;
01984 }
01985 }
01986 p = f[p].parent;
01987 } else {
01988 q = f[q].parent;
01989 }
01990 }
01991 }
01992
01993
01994
01995
01996
01997
01998
01999 while ( n > p ) {
02000 cap = f[n].capture;
02001 if ( cap >= 0 ) {
02002 capBegin[cap] = i;
02003 capEnd[cap] = EmptyCapture;
02004 }
02005 n = f[n].parent;
02006 }
02007
02008
02009
02010
02011 if ( capBegin == mmTempCapBegin &&
02012 isBetterCapture(capBegin, capEnd,
02013 mmNextCapBegin + m * ncap,
02014 mmNextCapEnd + m * ncap) ) {
02015 memcpy( mmNextCapBegin + m * ncap, capBegin,
02016 ncap * sizeof(int) );
02017 memcpy( mmNextCapEnd + m * ncap, capEnd,
02018 ncap * sizeof(int) );
02019 }
02020 }
02021 #ifndef QT_NO_REGEXP_BACKREF
02022
02023
02024
02025
02026
02027 if ( needSomeSleep > 0 ) {
02028 zzZ = new int[1 + 2 * ncap];
02029 zzZ[0] = next;
02030 if ( ncap > 0 ) {
02031 memcpy( zzZ + 1, capBegin, ncap * sizeof(int) );
02032 memcpy( zzZ + 1 + ncap, capEnd,
02033 ncap * sizeof(int) );
02034 }
02035 mmInNextStack[mmNextStack[--nnext]] = -1;
02036 mmSleeping.insert( i + needSomeSleep, zzZ );
02037 }
02038 #endif
02039 #endif
02040 }
02041 }
02042 }
02043 #ifndef QT_NO_REGEXP_CAPTURE
02044
02045
02046
02047 if ( ncap > 0 && (m = mmInNextStack[FinalState]) != -1 ) {
02048 memcpy( mmCapBegin, mmNextCapBegin + m * ncap, ncap * sizeof(int) );
02049 memcpy( mmCapEnd, mmNextCapEnd + m * ncap, ncap * sizeof(int) );
02050 }
02051 #ifndef QT_NO_REGEXP_BACKREF
02052
02053
02054
02055 if ( mmSleeping.count() > 0 ) {
02056 while ( (zzZ = mmSleeping.take(i)) != 0 ) {
02057 int next = zzZ[0];
02058 int *capBegin = zzZ + 1;
02059 int *capEnd = zzZ + 1 + ncap;
02060 bool copyOver = TRUE;
02061
02062 if ( (m = mmInNextStack[zzZ[0]]) == -1 ) {
02063 m = nnext++;
02064 mmNextStack[m] = next;
02065 mmInNextStack[next] = m;
02066 } else {
02067 copyOver = isBetterCapture( mmNextCapBegin + m * ncap,
02068 mmNextCapEnd + m * ncap,
02069 capBegin, capEnd );
02070 }
02071 if ( copyOver ) {
02072 memcpy( mmNextCapBegin + m * ncap, capBegin,
02073 ncap * sizeof(int) );
02074 memcpy( mmNextCapEnd + m * ncap, capEnd,
02075 ncap * sizeof(int) );
02076 }
02077 delete[] zzZ;
02078 }
02079 }
02080 #endif
02081 #endif
02082 for ( j = 0; j < nnext; j++ )
02083 mmInNextStack[mmNextStack[j]] = -1;
02084
02085 qSwap( mmCurStack, mmNextStack );
02086 #ifndef QT_NO_REGEXP_CAPTURE
02087 qSwap( mmCurCapBegin, mmNextCapBegin );
02088 qSwap( mmCurCapEnd, mmNextCapEnd );
02089 #endif
02090 ncur = nnext;
02091 nnext = 0;
02092 i++;
02093 }
02094
02095 #ifndef QT_NO_REGEXP_BACKREF
02096
02097
02098
02099 while ( !mmSleeping.isEmpty() ) {
02100 zzZ = mmSleeping.take( *QIntDictIterator<int>(mmSleeping) );
02101 delete[] zzZ;
02102 }
02103 #endif
02104
02105 match = ( mmMatchedLen >= 0 );
02106 if ( !match )
02107 mmMatchedLen = i - 1;
02108 return match;
02109 }
02110
02111 #ifndef QT_NO_REGEXP_CCLASS
02112
02113 QRegExpEngine::CharClass::CharClass()
02114 : c( 0 ), n( FALSE )
02115 #ifndef QT_NO_REGEXP_OPTIM
02116 , occ1( *noOccurrences )
02117 #endif
02118 {
02119 }
02120
02121 QRegExpEngine::CharClass& QRegExpEngine::CharClass::operator=(
02122 const CharClass& cc )
02123 {
02124 c = cc.c;
02125 r = cc.r.copy();
02126 n = cc.n;
02127 #ifndef QT_NO_REGEXP_OPTIM
02128 occ1 = cc.occ1;
02129 #endif
02130 return *this;
02131 }
02132
02133 void QRegExpEngine::CharClass::clear()
02134 {
02135 c = 0;
02136 r.resize( 0 );
02137 n = FALSE;
02138 }
02139
02140 void QRegExpEngine::CharClass::setNegative( bool negative )
02141 {
02142 n = negative;
02143 #ifndef QT_NO_REGEXP_OPTIM
02144 occ1 = *firstOccurrenceAtZero;
02145 #endif
02146 }
02147
02148 void QRegExpEngine::CharClass::addCategories( int cats )
02149 {
02150 c |= cats;
02151 #ifndef QT_NO_REGEXP_OPTIM
02152 occ1 = *firstOccurrenceAtZero;
02153 #endif
02154 }
02155
02156 void QRegExpEngine::CharClass::addRange( ushort from, ushort to )
02157 {
02158 if ( from > to )
02159 qSwap( from, to );
02160 int n = r.size();
02161 r.resize( n + 1 );
02162 r[n].from = from;
02163 r[n].to = to;
02164
02165 #ifndef QT_NO_REGEXP_OPTIM
02166 int i;
02167
02168 if ( to - from < NumBadChars ) {
02169 occ1.detach();
02170 if ( from % NumBadChars <= to % NumBadChars ) {
02171 for ( i = from % NumBadChars; i <= to % NumBadChars; i++ )
02172 occ1[i] = 0;
02173 } else {
02174 for ( i = 0; i <= to % NumBadChars; i++ )
02175 occ1[i] = 0;
02176 for ( i = from % NumBadChars; i < NumBadChars; i++ )
02177 occ1[i] = 0;
02178 }
02179 } else {
02180 occ1 = *firstOccurrenceAtZero;
02181 }
02182 #endif
02183 }
02184
02185 bool QRegExpEngine::CharClass::in( QChar ch ) const
02186 {
02187 #ifndef QT_NO_REGEXP_OPTIM
02188 if ( occ1[BadChar(ch)] == NoOccurrence )
02189 return n;
02190 #endif
02191
02192 if ( c != 0 && (c & (1 << (int) ch.category())) != 0 )
02193 return !n;
02194 for ( int i = 0; i < (int) r.size(); i++ ) {
02195 if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to )
02196 return !n;
02197 }
02198 return n;
02199 }
02200
02201 #if defined(QT_DEBUG)
02202 void QRegExpEngine::CharClass::dump() const
02203 {
02204 int i;
02205 odebug << " " << (n ? "nega" : "posi") << "tive character class" << oendl;
02206 #ifndef QT_NO_REGEXP_CCLASS
02207 if ( c != 0 )
02208 odebug << QString().sprintf(" categories 0x%.8x", c ) << oendl;
02209 #endif
02210 for ( i = 0; i < (int) r.size(); i++ )
02211 odebug << QString().sprintf(" 0x%.4x through 0x%.4x", r[i].from, r[i].to ) << oendl;
02212 }
02213 #endif
02214 #endif
02215
02216 QRegExpEngine::Box::Box( QRegExpEngine *engine )
02217 : eng( engine ), skipanchors( 0 )
02218 #ifndef QT_NO_REGEXP_OPTIM
02219 , earlyStart( 0 ), lateStart( 0 ), maxl( 0 ), occ1( *noOccurrences )
02220 #endif
02221 {
02222 minl = 0;
02223 }
02224
02225 QRegExpEngine::Box& QRegExpEngine::Box::operator=( const Box& b )
02226 {
02227 eng = b.eng;
02228 ls = b.ls;
02229 rs = b.rs;
02230 lanchors = b.lanchors;
02231 ranchors = b.ranchors;
02232 skipanchors = b.skipanchors;
02233 #ifndef QT_NO_REGEXP_OPTIM
02234 earlyStart = b.earlyStart;
02235 lateStart = b.lateStart;
02236 str = b.str;
02237 leftStr = b.leftStr;
02238 rightStr = b.rightStr;
02239 maxl = b.maxl;
02240 occ1 = b.occ1;
02241 #endif
02242 minl = b.minl;
02243 return *this;
02244 }
02245
02246 void QRegExpEngine::Box::set( QChar ch )
02247 {
02248 ls.resize( 1 );
02249 ls[0] = eng->createState( ch );
02250 rs = ls;
02251 rs.detach();
02252 #ifndef QT_NO_REGEXP_OPTIM
02253 str = ch;
02254 leftStr = ch;
02255 rightStr = ch;
02256 maxl = 1;
02257 occ1.detach();
02258 occ1[BadChar(ch)] = 0;
02259 #endif
02260 minl = 1;
02261 }
02262
02263 void QRegExpEngine::Box::set( const CharClass& cc )
02264 {
02265 ls.resize( 1 );
02266 ls[0] = eng->createState( cc );
02267 rs = ls;
02268 rs.detach();
02269 #ifndef QT_NO_REGEXP_OPTIM
02270 maxl = 1;
02271 occ1 = cc.firstOccurrence();
02272 #endif
02273 minl = 1;
02274 }
02275
02276 #ifndef QT_NO_REGEXP_BACKREF
02277 void QRegExpEngine::Box::set( int bref )
02278 {
02279 ls.resize( 1 );
02280 ls[0] = eng->createState( bref );
02281 rs = ls;
02282 rs.detach();
02283 if ( bref >= 1 && bref <= MaxBackRefs )
02284 skipanchors = Anchor_BackRef0Empty << bref;
02285 #ifndef QT_NO_REGEXP_OPTIM
02286 maxl = InftyLen;
02287 #endif
02288 minl = 0;
02289 }
02290 #endif
02291
02292 void QRegExpEngine::Box::cat( const Box& b )
02293 {
02294 eng->addCatTransitions( rs, b.ls );
02295 addAnchorsToEngine( b );
02296 if ( minl == 0 ) {
02297 mergeInto( &lanchors, b.lanchors );
02298 if ( skipanchors != 0 ) {
02299 for ( int i = 0; i < (int) b.ls.size(); i++ ) {
02300 int a = eng->anchorConcatenation( at(lanchors, b.ls[i]),
02301 skipanchors );
02302 lanchors.insert( b.ls[i], a );
02303 }
02304 }
02305 mergeInto( &ls, b.ls );
02306 }
02307 if ( b.minl == 0 ) {
02308 mergeInto( &ranchors, b.ranchors );
02309 if ( b.skipanchors != 0 ) {
02310 for ( int i = 0; i < (int) rs.size(); i++ ) {
02311 int a = eng->anchorConcatenation( at(ranchors, rs[i]),
02312 b.skipanchors );
02313 ranchors.insert( rs[i], a );
02314 }
02315 }
02316 mergeInto( &rs, b.rs );
02317 } else {
02318 ranchors = b.ranchors;
02319 rs = b.rs;
02320 }
02321
02322 #ifndef QT_NO_REGEXP_OPTIM
02323 if ( maxl != InftyLen ) {
02324 if ( rightStr.length() + b.leftStr.length() >
02325 QMAX(str.length(), b.str.length()) ) {
02326 earlyStart = minl - rightStr.length();
02327 lateStart = maxl - rightStr.length();
02328 str = rightStr + b.leftStr;
02329 } else if ( b.str.length() > str.length() ) {
02330 earlyStart = minl + b.earlyStart;
02331 lateStart = maxl + b.lateStart;
02332 str = b.str;
02333 }
02334 }
02335
02336 if ( (int) leftStr.length() == maxl )
02337 leftStr += b.leftStr;
02338 if ( (int) b.rightStr.length() == b.maxl )
02339 rightStr += b.rightStr;
02340 else
02341 rightStr = b.rightStr;
02342
02343 if ( maxl == InftyLen || b.maxl == InftyLen )
02344 maxl = InftyLen;
02345 else
02346 maxl += b.maxl;
02347
02348 occ1.detach();
02349 for ( int i = 0; i < NumBadChars; i++ ) {
02350 if ( b.occ1[i] != NoOccurrence && minl + b.occ1[i] < occ1[i] )
02351 occ1[i] = minl + b.occ1[i];
02352 }
02353 #endif
02354
02355 minl += b.minl;
02356 if ( minl == 0 )
02357 skipanchors = eng->anchorConcatenation( skipanchors, b.skipanchors );
02358 else
02359 skipanchors = 0;
02360 }
02361
02362 void QRegExpEngine::Box::orx( const Box& b )
02363 {
02364 mergeInto( &ls, b.ls );
02365 mergeInto( &lanchors, b.lanchors );
02366 mergeInto( &rs, b.rs );
02367 mergeInto( &ranchors, b.ranchors );
02368 skipanchors = eng->anchorAlternation( skipanchors, b.skipanchors );
02369
02370 #ifndef QT_NO_REGEXP_OPTIM
02371 occ1.detach();
02372 for ( int i = 0; i < NumBadChars; i++ ) {
02373 if ( occ1[i] > b.occ1[i] )
02374 occ1[i] = b.occ1[i];
02375 }
02376 earlyStart = 0;
02377 lateStart = 0;
02378 str = QString::null;
02379 leftStr = QString::null;
02380 rightStr = QString::null;
02381 if ( b.maxl > maxl )
02382 maxl = b.maxl;
02383 #endif
02384 if ( b.minl < minl )
02385 minl = b.minl;
02386 }
02387
02388 void QRegExpEngine::Box::plus( int atom )
02389 {
02390 #ifndef QT_NO_REGEXP_CAPTURE
02391 eng->addPlusTransitions( rs, ls, atom );
02392 #else
02393 Q_UNUSED( atom );
02394 eng->addCatTransitions( rs, ls );
02395 #endif
02396 addAnchorsToEngine( *this );
02397 #ifndef QT_NO_REGEXP_OPTIM
02398 maxl = InftyLen;
02399 #endif
02400 }
02401
02402 void QRegExpEngine::Box::opt()
02403 {
02404 #ifndef QT_NO_REGEXP_OPTIM
02405 earlyStart = 0;
02406 lateStart = 0;
02407 str = QString::null;
02408 leftStr = QString::null;
02409 rightStr = QString::null;
02410 #endif
02411 skipanchors = 0;
02412 minl = 0;
02413 }
02414
02415 void QRegExpEngine::Box::catAnchor( int a )
02416 {
02417 if ( a != 0 ) {
02418 for ( int i = 0; i < (int) rs.size(); i++ ) {
02419 a = eng->anchorConcatenation( at(ranchors, rs[i]), a );
02420 ranchors.insert( rs[i], a );
02421 }
02422 if ( minl == 0 )
02423 skipanchors = eng->anchorConcatenation( skipanchors, a );
02424 }
02425 }
02426
02427 #ifndef QT_NO_REGEXP_OPTIM
02428 void QRegExpEngine::Box::setupHeuristics()
02429 {
02430 eng->setupGoodStringHeuristic( earlyStart, lateStart, str );
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441 for ( int i = 0; i < NumBadChars; i++ ) {
02442 if ( occ1[i] != NoOccurrence && occ1[i] >= minl )
02443 occ1[i] = minl;
02444 }
02445 eng->setupBadCharHeuristic( minl, occ1 );
02446
02447 eng->heuristicallyChooseHeuristic();
02448 }
02449 #endif
02450
02451 #if defined(QT_DEBUG)
02452 void QRegExpEngine::Box::dump() const
02453 {
02454 int i;
02455 odebug << "Box of at least " << minl << " character" << (minl == 1 ? "" : "s") << oendl;
02456 odebug << " Left states:" << oendl;
02457 for ( i = 0; i < (int) ls.size(); i++ ) {
02458 if ( at(lanchors, ls[i]) == 0 )
02459 odebug << " " << ls[i] << oendl;
02460 else
02461 odebug << " " << ls[i] << QString().sprintf(" [anchors 0x%.8x]", lanchors[ls[i]]) << oendl;
02462 }
02463 odebug << " Right states:" << oendl;
02464 for ( i = 0; i < (int) rs.size(); i++ ) {
02465 if ( at(ranchors, ls[i]) == 0 )
02466 odebug << " " << rs[i] << oendl;
02467 else
02468 odebug << " " << rs[i] << QString().sprintf(" [anchors 0x%.8x]", ranchors[rs[i]]) << oendl;
02469 }
02470 odebug << QString().sprintf(" Skip anchors: 0x%.8x", skipanchors) << oendl;
02471 }
02472 #endif
02473
02474 void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const
02475 {
02476 for ( int i = 0; i < (int) to.ls.size(); i++ ) {
02477 for ( int j = 0; j < (int) rs.size(); j++ ) {
02478 int a = eng->anchorConcatenation( at(ranchors, rs[j]),
02479 at(to.lanchors, to.ls[i]) );
02480 eng->addAnchors( rs[j], to.ls[i], a );
02481 }
02482 }
02483 }
02484
02485 int QRegExpEngine::getChar()
02486 {
02487 return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode();
02488 }
02489
02490 int QRegExpEngine::getEscape()
02491 {
02492 #ifndef QT_NO_REGEXP_ESCAPE
02493 const char tab[] = "afnrtv";
02494 const char backTab[] = "\a\f\n\r\t\v";
02495 ushort low;
02496 int i;
02497 #endif
02498 ushort val;
02499 int prevCh = yyCh;
02500
02501 if ( prevCh == EOS ) {
02502 yyError = TRUE;
02503 return Tok_Char | '\\';
02504 }
02505 yyCh = getChar();
02506 #ifndef QT_NO_REGEXP_ESCAPE
02507 if ( (prevCh & ~0xff) == 0 ) {
02508 const char *p = strchr( tab, prevCh );
02509 if ( p != 0 )
02510 return Tok_Char | backTab[p - tab];
02511 }
02512 #endif
02513
02514 switch ( prevCh ) {
02515 #ifndef QT_NO_REGEXP_ESCAPE
02516 case '0':
02517 val = 0;
02518 for ( i = 0; i < 3; i++ ) {
02519 if ( yyCh >= '0' && yyCh <= '7' )
02520 val = ( val << 3 ) | ( yyCh - '0' );
02521 else
02522 break;
02523 yyCh = getChar();
02524 }
02525 if ( (val & ~0377) != 0 )
02526 yyError = TRUE;
02527 return Tok_Char | val;
02528 #endif
02529 #ifndef QT_NO_REGEXP_ESCAPE
02530 case 'B':
02531 return Tok_NonWord;
02532 #endif
02533 #ifndef QT_NO_REGEXP_CCLASS
02534 case 'D':
02535
02536 yyCharClass->addCategories( 0x7fffffef );
02537 return Tok_CharClass;
02538 case 'S':
02539
02540 yyCharClass->addCategories( 0x7ffff87f );
02541 yyCharClass->addRange( 0x0000, 0x0008 );
02542 yyCharClass->addRange( 0x000e, 0x001f );
02543 yyCharClass->addRange( 0x007f, 0x009f );
02544 return Tok_CharClass;
02545 case 'W':
02546
02547 yyCharClass->addCategories( 0x7ff07f8f );
02548 return Tok_CharClass;
02549 #endif
02550 #ifndef QT_NO_REGEXP_ESCAPE
02551 case 'b':
02552 return Tok_Word;
02553 #endif
02554 #ifndef QT_NO_REGEXP_CCLASS
02555 case 'd':
02556
02557 yyCharClass->addCategories( 0x00000010 );
02558 return Tok_CharClass;
02559 case 's':
02560
02561 yyCharClass->addCategories( 0x00000380 );
02562 yyCharClass->addRange( 0x0009, 0x000d );
02563 return Tok_CharClass;
02564 case 'w':
02565
02566 yyCharClass->addCategories( 0x000f8070 );
02567 return Tok_CharClass;
02568 #endif
02569 #ifndef QT_NO_REGEXP_ESCAPE
02570 case 'x':
02571 val = 0;
02572 for ( i = 0; i < 4; i++ ) {
02573 low = QChar( yyCh ).lower();
02574 if ( low >= '0' && low <= '9' )
02575 val = ( val << 4 ) | ( low - '0' );
02576 else if ( low >= 'a' && low <= 'f' )
02577 val = ( val << 4 ) | ( low - 'a' + 10 );
02578 else
02579 break;
02580 yyCh = getChar();
02581 }
02582 return Tok_Char | val;
02583 #endif
02584 default:
02585 if ( prevCh >= '1' && prevCh <= '9' ) {
02586 #ifndef QT_NO_REGEXP_BACKREF
02587 val = prevCh - '0';
02588 while ( yyCh >= '0' && yyCh <= '9' ) {
02589 val = ( val *= 10 ) | ( yyCh - '0' );
02590 yyCh = getChar();
02591 }
02592 return Tok_BackRef | val;
02593 #else
02594 yyError = TRUE;
02595 #endif
02596 }
02597 return Tok_Char | prevCh;
02598 }
02599 }
02600
02601 #ifndef QT_NO_REGEXP_INTERVAL
02602 int QRegExpEngine::getRep( int def )
02603 {
02604 if ( yyCh >= '0' && yyCh <= '9' ) {
02605 int rep = 0;
02606 do {
02607 rep = 10 * rep + yyCh - '0';
02608 if ( rep >= InftyRep ) {
02609 yyError = TRUE;
02610 rep = def;
02611 }
02612 yyCh = getChar();
02613 } while ( yyCh >= '0' && yyCh <= '9' );
02614 return rep;
02615 } else {
02616 return def;
02617 }
02618 }
02619 #endif
02620
02621 #ifndef QT_NO_REGEXP_LOOKAHEAD
02622 void QRegExpEngine::skipChars( int n )
02623 {
02624 if ( n > 0 ) {
02625 yyPos += n - 1;
02626 yyCh = getChar();
02627 }
02628 }
02629 #endif
02630
02631 void QRegExpEngine::startTokenizer( const QChar *rx, int len )
02632 {
02633 yyIn = rx;
02634 yyPos0 = 0;
02635 yyPos = 0;
02636 yyLen = len;
02637 yyCh = getChar();
02638 yyCharClass = new CharClass;
02639 yyMinRep = 0;
02640 yyMaxRep = 0;
02641 yyError = FALSE;
02642 }
02643
02644 int QRegExpEngine::getToken()
02645 {
02646 #ifndef QT_NO_REGEXP_CCLASS
02647 ushort pendingCh = 0;
02648 bool charPending;
02649 bool rangePending;
02650 int tok;
02651 #endif
02652 int prevCh = yyCh;
02653
02654 yyPos0 = yyPos - 1;
02655 #ifndef QT_NO_REGEXP_CCLASS
02656 yyCharClass->clear();
02657 #endif
02658 yyMinRep = 0;
02659 yyMaxRep = 0;
02660 yyCh = getChar();
02661 switch ( prevCh ) {
02662 case EOS:
02663 yyPos0 = yyPos;
02664 return Tok_Eos;
02665 case '$':
02666 return Tok_Dollar;
02667 case '(':
02668 if ( yyCh == '?' ) {
02669 prevCh = getChar();
02670 yyCh = getChar();
02671 switch ( prevCh ) {
02672 #ifndef QT_NO_REGEXP_LOOKAHEAD
02673 case '!':
02674 return Tok_NegLookahead;
02675 case '=':
02676 return Tok_PosLookahead;
02677 #endif
02678 case ':':
02679 return Tok_MagicLeftParen;
02680 default:
02681 yyError = TRUE;
02682 return Tok_MagicLeftParen;
02683 }
02684 } else {
02685 return Tok_LeftParen;
02686 }
02687 case ')':
02688 return Tok_RightParen;
02689 case '*':
02690 yyMinRep = 0;
02691 yyMaxRep = InftyRep;
02692 return Tok_Quantifier;
02693 case '+':
02694 yyMinRep = 1;
02695 yyMaxRep = InftyRep;
02696 return Tok_Quantifier;
02697 case '.':
02698 #ifndef QT_NO_REGEXP_CCLASS
02699 yyCharClass->setNegative( TRUE );
02700 #endif
02701 return Tok_CharClass;
02702 case '?':
02703 yyMinRep = 0;
02704 yyMaxRep = 1;
02705 return Tok_Quantifier;
02706 case '[':
02707 #ifndef QT_NO_REGEXP_CCLASS
02708 if ( yyCh == '^' ) {
02709 yyCharClass->setNegative( TRUE );
02710 yyCh = getChar();
02711 }
02712 charPending = FALSE;
02713 rangePending = FALSE;
02714 do {
02715 if ( yyCh == '-' && charPending && !rangePending ) {
02716 rangePending = TRUE;
02717 yyCh = getChar();
02718 } else {
02719 if ( charPending && !rangePending ) {
02720 yyCharClass->addSingleton( pendingCh );
02721 charPending = FALSE;
02722 }
02723 if ( yyCh == '\\' ) {
02724 yyCh = getChar();
02725 tok = getEscape();
02726 if ( tok == Tok_Word )
02727 tok = '\b';
02728 } else {
02729 tok = Tok_Char | yyCh;
02730 yyCh = getChar();
02731 }
02732 if ( tok == Tok_CharClass ) {
02733 if ( rangePending ) {
02734 yyCharClass->addSingleton( '-' );
02735 yyCharClass->addSingleton( pendingCh );
02736 charPending = FALSE;
02737 rangePending = FALSE;
02738 }
02739 } else if ( (tok & Tok_Char) != 0 ) {
02740 if ( rangePending ) {
02741 yyCharClass->addRange( pendingCh, tok ^ Tok_Char );
02742 charPending = FALSE;
02743 rangePending = FALSE;
02744 } else {
02745 pendingCh = tok ^ Tok_Char;
02746 charPending = TRUE;
02747 }
02748 } else {
02749 yyError = TRUE;
02750 }
02751 }
02752 } while ( yyCh != ']' && yyCh != EOS );
02753 if ( rangePending )
02754 yyCharClass->addSingleton( '-' );
02755 if ( charPending )
02756 yyCharClass->addSingleton( pendingCh );
02757 if ( yyCh == EOS )
02758 yyError = TRUE;
02759 else
02760 yyCh = getChar();
02761 return Tok_CharClass;
02762 #else
02763 yyError = TRUE;
02764 return Tok_Char | '[';
02765 #endif
02766 case '\\':
02767 return getEscape();
02768 case ']':
02769 yyError = TRUE;
02770 return Tok_Char | ']';
02771 case '^':
02772 return Tok_Caret;
02773 #ifndef QT_NO_REGEXP_INTERVAL
02774 case '{':
02775 yyMinRep = getRep( 0 );
02776 yyMaxRep = yyMinRep;
02777 if ( yyCh == ',' ) {
02778 yyCh = getChar();
02779 yyMaxRep = getRep( InftyRep );
02780 }
02781 if ( yyMaxRep < yyMinRep )
02782 qSwap( yyMinRep, yyMaxRep );
02783 if ( yyCh != '}' )
02784 yyError = TRUE;
02785 yyCh = getChar();
02786 return Tok_Quantifier;
02787 #else
02788 yyError = TRUE;
02789 return Tok_Char | '{';
02790 #endif
02791 case '|':
02792 return Tok_Bar;
02793 case '}':
02794 yyError = TRUE;
02795 return Tok_Char | '}';
02796 default:
02797 return Tok_Char | prevCh;
02798 }
02799 }
02800
02801 int QRegExpEngine::parse( const QChar *pattern, int len )
02802 {
02803 valid = TRUE;
02804 startTokenizer( pattern, len );
02805 yyTok = getToken();
02806 #ifndef QT_NO_REGEXP_CAPTURE
02807 yyMayCapture = TRUE;
02808 #else
02809 yyMayCapture = FALSE;
02810 #endif
02811
02812 #ifndef QT_NO_REGEXP_CAPTURE
02813 int atom = startAtom( FALSE );
02814 #endif
02815 CharClass anything;
02816 Box box( this );
02817 box.set( anything );
02818 Box rightBox( this );
02819 rightBox.set( anything );
02820
02821 Box middleBox( this );
02822 parseExpression( &middleBox );
02823 #ifndef QT_NO_REGEXP_CAPTURE
02824 finishAtom( atom );
02825 #endif
02826 #ifndef QT_NO_REGEXP_OPTIM
02827 middleBox.setupHeuristics();
02828 #endif
02829 box.cat( middleBox );
02830 box.cat( rightBox );
02831 delete yyCharClass;
02832 yyCharClass = 0;
02833
02834 realncap = ncap;
02835 #ifndef QT_NO_REGEXP_BACKREF
02836 if ( nbrefs > ncap )
02837 ncap = nbrefs;
02838 #endif
02839
02840 mmCaptured.resize( 2 + 2 * realncap );
02841 mmCapturedNoMatch.fill( -1, 2 + 2 * realncap );
02842
02843
02844
02845
02846
02847 #ifndef QT_NO_REGEXP_OPTIM
02848 mmSlideTabSize = QMAX( minl + 1, 16 );
02849 #else
02850 mmSlideTabSize = 0;
02851 #endif
02852 mmBigArray.resize( (3 + 4 * ncap) * ns + 4 * ncap + mmSlideTabSize );
02853
02854 mmInNextStack = mmBigArray.data();
02855 memset( mmInNextStack, -1, ns * sizeof(int) );
02856 mmCurStack = mmInNextStack + ns;
02857 mmNextStack = mmInNextStack + 2 * ns;
02858
02859 mmCurCapBegin = mmInNextStack + 3 * ns;
02860 mmNextCapBegin = mmCurCapBegin + ncap * ns;
02861 mmCurCapEnd = mmCurCapBegin + 2 * ncap * ns;
02862 mmNextCapEnd = mmCurCapBegin + 3 * ncap * ns;
02863
02864 mmTempCapBegin = mmCurCapBegin + 4 * ncap * ns;
02865 mmTempCapEnd = mmTempCapBegin + ncap;
02866 mmCapBegin = mmTempCapBegin + 2 * ncap;
02867 mmCapEnd = mmTempCapBegin + 3 * ncap;
02868
02869 mmSlideTab = mmTempCapBegin + 4 * ncap;
02870
02871 if ( yyError )
02872 return -1;
02873
02874 #ifndef QT_NO_REGEXP_OPTIM
02875 State *sinit = s[InitialState];
02876 caretAnchored = ( sinit->anchors != 0 );
02877 if ( caretAnchored ) {
02878 QMap<int, int>& anchors = *sinit->anchors;
02879 QMap<int, int>::ConstIterator a;
02880 for ( a = anchors.begin(); a != anchors.end(); ++a ) {
02881 #ifndef QT_NO_REGEXP_ANCHOR_ALT
02882 if ( (*a & Anchor_Alternation) != 0 )
02883 break;
02884 #endif
02885 if ( (*a & Anchor_Caret) == 0 ) {
02886 caretAnchored = FALSE;
02887 break;
02888 }
02889 }
02890 }
02891 #endif
02892 return yyPos0;
02893 }
02894
02895 void QRegExpEngine::parseAtom( Box *box )
02896 {
02897 #ifndef QT_NO_REGEXP_LOOKAHEAD
02898 QRegExpEngine *eng = 0;
02899 bool neg;
02900 int len;
02901 #endif
02902
02903 switch ( yyTok ) {
02904 case Tok_Dollar:
02905 box->catAnchor( Anchor_Dollar );
02906 break;
02907 case Tok_Caret:
02908 box->catAnchor( Anchor_Caret );
02909 break;
02910 #ifndef QT_NO_REGEXP_LOOKAHEAD
02911 case Tok_PosLookahead:
02912 case Tok_NegLookahead:
02913 neg = ( yyTok == Tok_NegLookahead );
02914 eng = new QRegExpEngine( cs );
02915 len = eng->parse( yyIn + yyPos - 1, yyLen - yyPos + 1 );
02916 if ( len >= 0 )
02917 skipChars( len );
02918 else
02919 yyError = TRUE;
02920 box->catAnchor( addLookahead(eng, neg) );
02921 yyTok = getToken();
02922 if ( yyTok != Tok_RightParen )
02923 yyError = TRUE;
02924 break;
02925 #endif
02926 #ifndef QT_NO_REGEXP_ESCAPE
02927 case Tok_Word:
02928 box->catAnchor( Anchor_Word );
02929 break;
02930 case Tok_NonWord:
02931 box->catAnchor( Anchor_NonWord );
02932 break;
02933 #endif
02934 case Tok_LeftParen:
02935 case Tok_MagicLeftParen:
02936 yyTok = getToken();
02937 parseExpression( box );
02938 if ( yyTok != Tok_RightParen )
02939 yyError = TRUE;
02940 break;
02941 case Tok_CharClass:
02942 box->set( *yyCharClass );
02943 break;
02944 default:
02945 if ( (yyTok & Tok_Char) != 0 )
02946 box->set( QChar(yyTok ^ Tok_Char) );
02947 #ifndef QT_NO_REGEXP_BACKREF
02948 else if ( (yyTok & Tok_BackRef) != 0 )
02949 box->set( yyTok ^ Tok_BackRef );
02950 #endif
02951 else
02952 yyError = TRUE;
02953 }
02954 yyTok = getToken();
02955 }
02956
02957 void QRegExpEngine::parseFactor( Box *box )
02958 {
02959 #ifndef QT_NO_REGEXP_CAPTURE
02960 int atom = startAtom( yyMayCapture && yyTok == Tok_LeftParen );
02961 #else
02962 static const int atom = 0;
02963 #endif
02964
02965 #ifndef QT_NO_REGEXP_INTERVAL
02966 #define YYREDO() \
02967 yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
02968 *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
02969
02970 const QChar *in = yyIn;
02971 int pos0 = yyPos0;
02972 int pos = yyPos;
02973 int len = yyLen;
02974 int ch = yyCh;
02975 CharClass charClass;
02976 if ( yyTok == Tok_CharClass )
02977 charClass = *yyCharClass;
02978 int tok = yyTok;
02979 bool mayCapture = yyMayCapture;
02980 #endif
02981
02982 parseAtom( box );
02983 #ifndef QT_NO_REGEXP_CAPTURE
02984 finishAtom( atom );
02985 #endif
02986
02987 if ( yyTok == Tok_Quantifier ) {
02988 if ( yyMaxRep == InftyRep ) {
02989 box->plus( atom );
02990 #ifndef QT_NO_REGEXP_INTERVAL
02991 } else if ( yyMaxRep == 0 ) {
02992 box->clear();
02993 #endif
02994 }
02995 if ( yyMinRep == 0 )
02996 box->opt();
02997
02998 #ifndef QT_NO_REGEXP_INTERVAL
02999 yyMayCapture = FALSE;
03000 int alpha = ( yyMinRep == 0 ) ? 0 : yyMinRep - 1;
03001 int beta = ( yyMaxRep == InftyRep ) ? 0 : yyMaxRep - ( alpha + 1 );
03002
03003 Box rightBox( this );
03004 int i;
03005
03006 for ( i = 0; i < beta; i++ ) {
03007 YYREDO();
03008 Box leftBox( this );
03009 parseAtom( &leftBox );
03010 leftBox.cat( rightBox );
03011 leftBox.opt();
03012 rightBox = leftBox;
03013 }
03014 for ( i = 0; i < alpha; i++ ) {
03015 YYREDO();
03016 Box leftBox( this );
03017 parseAtom( &leftBox );
03018 leftBox.cat( rightBox );
03019 rightBox = leftBox;
03020 }
03021 rightBox.cat( *box );
03022 *box = rightBox;
03023 #endif
03024 yyTok = getToken();
03025 #ifndef QT_NO_REGEXP_INTERVAL
03026 yyMayCapture = mayCapture;
03027 #endif
03028 }
03029 #undef YYREDO
03030 }
03031
03032 void QRegExpEngine::parseTerm( Box *box )
03033 {
03034 #ifndef QT_NO_REGEXP_OPTIM
03035 if ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar )
03036 parseFactor( box );
03037 #endif
03038 while ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) {
03039 Box rightBox( this );
03040 parseFactor( &rightBox );
03041 box->cat( rightBox );
03042 }
03043 }
03044
03045 void QRegExpEngine::parseExpression( Box *box )
03046 {
03047 parseTerm( box );
03048 while ( yyTok == Tok_Bar ) {
03049 Box rightBox( this );
03050 yyTok = getToken();
03051 parseTerm( &rightBox );
03052 box->orx( rightBox );
03053 }
03054 }
03055
03056
03057
03058
03059
03060
03061 struct QRegExpPrivate
03062 {
03063 QString pattern;
03064 QString rxpattern;
03065 #ifndef QT_NO_REGEXP_WILDCARD
03066 bool wc;
03067 #endif
03068 bool min;
03069 #ifndef QT_NO_REGEXP_CAPTURE
03070 QString t;
03071 QStringList capturedCache;
03072 #endif
03073 QArray<int> captured;
03074
03075 QRegExpPrivate() { captured.fill( -1, 2 ); }
03076 };
03077
03078 #ifndef QT_NO_REGEXP_OPTIM
03079 static QCache<QRegExpEngine> *engineCache = 0;
03080 #endif
03081
03082 static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
03083 {
03084 #ifndef QT_NO_REGEXP_OPTIM
03085 if ( engineCache != 0 ) {
03086 QRegExpEngine *eng = engineCache->take( pattern );
03087 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
03088 delete eng;
03089 } else {
03090 eng->ref();
03091 return eng;
03092 }
03093 }
03094 #endif
03095 return new QRegExpEngine( pattern, caseSensitive );
03096 }
03097
03098 static void derefEngine( QRegExpEngine *eng, const QString& pattern )
03099 {
03100 if ( eng != 0 && eng->deref() ) {
03101 #ifndef QT_NO_REGEXP_OPTIM
03102 if ( engineCache == 0 ) {
03103 engineCache = new QCache<QRegExpEngine>;
03104 engineCache->setAutoDelete( TRUE );
03105 }
03106 if ( !pattern.isNull() &&
03107 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
03108 return;
03109 #else
03110 Q_UNUSED( pattern );
03111 #endif
03112 delete eng;
03113 }
03114 }
03115
03121 QRegExp3::QRegExp3()
03122 {
03123 eng = new QRegExpEngine( TRUE );
03124 priv = new QRegExpPrivate;
03125 priv->pattern = QString::null;
03126 #ifndef QT_NO_REGEXP_WILDCARD
03127 priv->wc = FALSE;
03128 #endif
03129 priv->min = FALSE;
03130 compile( TRUE );
03131 }
03132
03142 QRegExp3::QRegExp3( const QString& pattern, bool caseSensitive, bool wildcard )
03143 {
03144 eng = 0;
03145 priv = new QRegExpPrivate;
03146 priv->pattern = pattern;
03147 #ifndef QT_NO_REGEXP_WILDCARD
03148 priv->wc = wildcard;
03149 #endif
03150 priv->min = FALSE;
03151 compile( caseSensitive );
03152 }
03153
03159 QRegExp3::QRegExp3( const QRegExp3& rx )
03160 {
03161 eng = 0;
03162 priv = new QRegExpPrivate;
03163 operator=( rx );
03164 }
03165
03169 QRegExp3::~QRegExp3()
03170 {
03171 derefEngine( eng, priv->rxpattern );
03172 delete priv;
03173 }
03174
03180 QRegExp3& QRegExp3::operator=( const QRegExp3& rx )
03181 {
03182 rx.eng->ref();
03183 derefEngine( eng, priv->rxpattern );
03184 eng = rx.eng;
03185 priv->pattern = rx.priv->pattern;
03186 priv->rxpattern = rx.priv->rxpattern;
03187 #ifndef QT_NO_REGEXP_WILDCARD
03188 priv->wc = rx.priv->wc;
03189 #endif
03190 priv->min = rx.priv->min;
03191 #ifndef QT_NO_REGEXP_CAPTURE
03192 priv->t = rx.priv->t;
03193 priv->capturedCache = rx.priv->capturedCache;
03194 #endif
03195 priv->captured = rx.priv->captured;
03196 return *this;
03197 }
03198
03207 bool QRegExp3::operator==( const QRegExp3& rx ) const
03208 {
03209 return priv->pattern == rx.priv->pattern &&
03210 eng->caseSensitive() == rx.eng->caseSensitive() &&
03211 #ifndef QT_NO_REGEXP_WILDCARD
03212 priv->wc == rx.priv->wc &&
03213 #endif
03214 priv->min == rx.priv->min;
03215 }
03216
03239 bool QRegExp3::isEmpty() const
03240 {
03241 return priv->pattern.isEmpty();
03242 }
03243
03255 bool QRegExp3::isValid() const
03256 {
03257 return eng->isValid();
03258 }
03259
03266 QString QRegExp3::pattern() const
03267 {
03268 return priv->pattern;
03269 }
03270
03278 void QRegExp3::setPattern( const QString& pattern )
03279 {
03280 if ( priv->pattern != pattern ) {
03281 priv->pattern = pattern;
03282 compile( caseSensitive() );
03283 }
03284 }
03285
03292 bool QRegExp3::caseSensitive() const
03293 {
03294 return eng->caseSensitive();
03295 }
03296
03305 void QRegExp3::setCaseSensitive( bool sensitive )
03306 {
03307 if ( sensitive != eng->caseSensitive() )
03308 compile( sensitive );
03309 }
03310
03311 #ifndef QT_NO_REGEXP_WILDCARD
03312
03318 bool QRegExp3::wildcard() const
03319 {
03320 return priv->wc;
03321 }
03322
03334 void QRegExp3::setWildcard( bool wildcard )
03335 {
03336 if ( wildcard != priv->wc ) {
03337 priv->wc = wildcard;
03338 compile( caseSensitive() );
03339 }
03340 }
03341 #endif
03342
03348 bool QRegExp3::minimal() const
03349 {
03350 return priv->min;
03351 }
03352
03370 void QRegExp3::setMinimal( bool minimal )
03371 {
03372 priv->min = minimal;
03373 }
03374
03391 bool QRegExp3::exactMatch( const QString& str )
03392 {
03393 #ifndef QT_NO_REGEXP_CAPTURE
03394 priv->t = str;
03395 priv->capturedCache.clear();
03396 #endif
03397
03398 priv->captured = eng->match( str, 0, priv->min, TRUE );
03399 if ( priv->captured[1] == (int) str.length() ) {
03400 return TRUE;
03401 } else {
03402 priv->captured.detach();
03403 priv->captured[0] = 0;
03404 priv->captured[1] = eng->matchedLength();
03405 return FALSE;
03406 }
03407 }
03408
03413 bool QRegExp3::exactMatch( const QString& str ) const
03414 {
03415 return eng->match(str, 0, priv->min, TRUE)[0] == 0 &&
03416 eng->matchedLength() == (int) str.length();
03417 }
03418
03442 #ifndef QT_NO_COMPAT
03443 int QRegExp3::match( const QString& str, int index, int *len,
03444 bool indexIsStart )
03445 {
03446 int pos;
03447 if ( indexIsStart ) {
03448 pos = search( str.mid(index) );
03449 if ( pos >= 0 ) {
03450 pos += index;
03451 if ( len != 0 )
03452 *len = matchedLength();
03453 } else {
03454 if ( len != 0 )
03455 *len = 0;
03456 }
03457 } else {
03458 pos = search( str, index );
03459 if ( len != 0 )
03460 *len = matchedLength();
03461 }
03462 return pos;
03463 }
03464 #endif
03465
03491
03492
03493 #ifdef QCHAR_SUPPORT
03494 const QString makeString(const QChar *str)
03495 {
03496
03497 const uint MAXLENGTH=65535;
03498
03499 const QChar *s=str;
03500 uint i=0;
03501 while(i < MAXLENGTH && *s != QChar::null) { i++;s++ ;}
03502 return QString(str,i);
03503
03504 }
03505 int QRegExp3::search(const QChar *str,int start)
03506 {
03507 return search(makeString(str),start);
03508 }
03509 int QRegExp3::search(const QChar *str,int start) const
03510 {
03511 return search(makeString(str),start);
03512 }
03513 int QRegExp3::searchRev(const QChar *str,int start)
03514 {
03515 return searchRev(makeString(str),start);
03516 }
03517 int QRegExp3::searchRev(const QChar *str,int start) const
03518 {
03519 return searchRev(makeString(str),start);
03520 }
03521 bool QRegExp3::exactMatch(const QChar *str)
03522 {
03523 return exactMatch(makeString(str));
03524 }
03525 bool QRegExp3::exactMatch(const QChar *str) const
03526 {
03527 return exactMatch(makeString(str));
03528 }
03529 #endif // QCHAR_SUPPORT
03530
03531 int QRegExp3::search( const QString& str, int start )
03532 {
03533 if ( start < 0 )
03534 start += str.length();
03535 #ifndef QT_NO_REGEXP_CAPTURE
03536 priv->t = str;
03537 priv->capturedCache.clear();
03538 #endif
03539 priv->captured = eng->match( str, start, priv->min, FALSE );
03540 return priv->captured[0];
03541 }
03542
03547 int QRegExp3::search( const QString& str, int start ) const
03548 {
03549 if ( start < 0 )
03550 start += str.length();
03551 return eng->match( str, start, priv->min, FALSE )[0];
03552 }
03553
03565 int QRegExp3::searchRev( const QString& str, int start )
03566 {
03567 if ( start < 0 )
03568 start += str.length();
03569 #ifndef QT_NO_REGEXP_CAPTURE
03570 priv->t = str;
03571 priv->capturedCache.clear();
03572 #endif
03573 if ( start < 0 || start > (int) str.length() ) {
03574 priv->captured.detach();
03575 priv->captured.fill( -1 );
03576 return -1;
03577 }
03578
03579 while ( start >= 0 ) {
03580 priv->captured = eng->match( str, start, priv->min, TRUE );
03581 if ( priv->captured[0] == start )
03582 return start;
03583 start--;
03584 }
03585 return -1;
03586 }
03587
03592 int QRegExp3::searchRev( const QString& str, int start ) const
03593 {
03594 if ( start < 0 )
03595 start += str.length();
03596 if ( start < 0 || start > (int) str.length() )
03597 return -1;
03598
03599 while ( start >= 0 ) {
03600 if ( eng->match(str, start, priv->min, TRUE)[0] == start )
03601 return start;
03602 start--;
03603 }
03604 return -1;
03605 }
03606
03612 int QRegExp3::matchedLength()
03613 {
03614 return priv->captured[1];
03615 }
03616
03617 #ifndef QT_NO_REGEXP_CAPTURE
03618
03662 QStringList QRegExp3::capturedTexts()
03663 {
03664 if ( priv->capturedCache.isEmpty() ) {
03665 for ( int i = 0; i < (int) priv->captured.size(); i += 2 ) {
03666 QString m;
03667 if ( priv->captured[i + 1] == 0 )
03668 m = QString::fromLatin1( "" );
03669 else if ( priv->captured[i] >= 0 )
03670 m = priv->t.mid( priv->captured[i],
03671 priv->captured[i + 1] );
03672 priv->capturedCache.append( m );
03673 }
03674 priv->t = QString::null;
03675 }
03676 return priv->capturedCache;
03677 }
03678
03720 QString QRegExp3::cap( int nth )
03721 {
03722 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03723 return QString::null;
03724 else
03725 return capturedTexts()[nth];
03726 }
03727
03747 int QRegExp3::pos( int nth )
03748 {
03749 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03750 return -1;
03751 else
03752 return priv->captured[2 * nth];
03753 }
03754 #endif
03755
03756 void QRegExp3::compile( bool caseSensitive )
03757 {
03758 derefEngine( eng, priv->rxpattern );
03759 #ifndef QT_NO_REGEXP_WILDCARD
03760 if ( priv->wc )
03761 priv->rxpattern = wc2rx( priv->pattern );
03762 else
03763 #endif
03764 priv->rxpattern = priv->pattern.isNull() ? QString::fromLatin1( "" )
03765 : priv->pattern;
03766 eng = newEngine( priv->rxpattern, caseSensitive );
03767 #ifndef QT_NO_REGEXP_CAPTURE
03768 priv->t = QString::null;
03769 priv->capturedCache.clear();
03770 #endif
03771 priv->captured.detach();
03772 priv->captured.fill( -1, 2 + 2 * eng->numCaptures() );
03773 }