00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "qregexp.h"
00039
00040 #ifndef QT_NO_REGEXP
00041
00042 #include "qmemarray.h"
00043 #include "qbitarray.h"
00044 #include "qcache.h"
00045 #include "qcleanuphandler.h"
00046 #include "qintdict.h"
00047 #include "qmap.h"
00048 #include "qptrvector.h"
00049 #include "qstring.h"
00050 #include "qtl.h"
00051
00052 #ifdef QT_THREAD_SUPPORT
00053 #include "qmutexpool_p.h"
00054 #endif // QT_THREAD_SUPPORT
00055
00056 #undef QT_TRANSLATE_NOOP
00057 #define QT_TRANSLATE_NOOP( context, sourceText ) sourceText
00058
00059 #include <limits.h>
00060
00061
00062 #define RXERR_OK QT_TRANSLATE_NOOP( "QRegExp", "no error occurred" )
00063 #define RXERR_DISABLED QT_TRANSLATE_NOOP( "QRegExp", "disabled feature used" )
00064 #define RXERR_CHARCLASS QT_TRANSLATE_NOOP( "QRegExp", "bad char class syntax" )
00065 #define RXERR_LOOKAHEAD QT_TRANSLATE_NOOP( "QRegExp", "bad lookahead syntax" )
00066 #define RXERR_REPETITION QT_TRANSLATE_NOOP( "QRegExp", "bad repetition syntax" )
00067 #define RXERR_OCTAL QT_TRANSLATE_NOOP( "QRegExp", "invalid octal value" )
00068 #define RXERR_LEFTDELIM QT_TRANSLATE_NOOP( "QRegExp", "missing left delim" )
00069 #define RXERR_END QT_TRANSLATE_NOOP( "QRegExp", "unexpected end" )
00070 #define RXERR_LIMIT QT_TRANSLATE_NOOP( "QRegExp", "met internal limit" )
00071
00072
00073
00074
00075
00716 const int NumBadChars = 64;
00717 #define BadChar( ch ) ( (ch).unicode() % NumBadChars )
00718
00719 const int NoOccurrence = INT_MAX;
00720 const int EmptyCapture = INT_MAX;
00721 const int InftyLen = INT_MAX;
00722 const int InftyRep = 1025;
00723 const int EOS = -1;
00724
00725 static bool isWord( QChar ch )
00726 {
00727 return ch.isLetterOrNumber() || ch == QChar( '_' );
00728 }
00729
00730
00731
00732
00733 static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
00734 {
00735 int asize = a->size();
00736 int bsize = b.size();
00737 if ( asize == 0 ) {
00738 *a = b.copy();
00739 #ifndef QT_NO_REGEXP_OPTIM
00740 } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
00741 a->resize( asize + 1 );
00742 (*a)[asize] = b[0];
00743 #endif
00744 } else if ( bsize >= 1 ) {
00745 int csize = asize + bsize;
00746 QMemArray<int> c( csize );
00747 int i = 0, j = 0, k = 0;
00748 while ( i < asize ) {
00749 if ( j < bsize ) {
00750 if ( (*a)[i] == b[j] ) {
00751 i++;
00752 csize--;
00753 } else if ( (*a)[i] < b[j] ) {
00754 c[k++] = (*a)[i++];
00755 } else {
00756 c[k++] = b[j++];
00757 }
00758 } else {
00759 memcpy( c.data() + k, (*a).data() + i,
00760 (asize - i) * sizeof(int) );
00761 break;
00762 }
00763 }
00764 c.resize( csize );
00765 if ( j < bsize )
00766 memcpy( c.data() + k, b.data() + j, (bsize - j) * sizeof(int) );
00767 *a = c;
00768 }
00769 }
00770
00771
00772
00773
00774
00775 static void mergeInto( QMap<int, int> *a, const QMap<int, int>& b )
00776 {
00777 QMap<int, int>::ConstIterator it;
00778 for ( it = b.begin(); it != b.end(); ++it )
00779 a->insert( it.key(), *it );
00780 }
00781
00782
00783
00784
00785
00786 static int at( const QMap<int, int>& m, int k )
00787 {
00788 QMap<int, int>::ConstIterator it = m.find( k );
00789 if ( it == m.end() )
00790 return 0;
00791 else
00792 return *it;
00793 }
00794
00795 #ifndef QT_NO_REGEXP_WILDCARD
00796
00797
00798
00799
00800 static QString wc2rx( const QString& wc_str )
00801 {
00802 int wclen = wc_str.length();
00803 QString rx = QString::fromLatin1( "" );
00804 int i = 0;
00805 const QChar *wc = wc_str.unicode();
00806 while ( i < wclen ) {
00807 QChar c = wc[i++];
00808 switch ( c.unicode() ) {
00809 case '*':
00810 rx += QString::fromLatin1( ".*" );
00811 break;
00812 case '?':
00813 rx += QChar( '.' );
00814 break;
00815 case '$':
00816 case '(':
00817 case ')':
00818 case '+':
00819 case '.':
00820 case '\\':
00821 case '^':
00822 case '{':
00823 case '|':
00824 case '}':
00825 rx += QChar( '\\' );
00826 rx += c;
00827 break;
00828 case '[':
00829 rx += c;
00830 if ( wc[i] == QChar('^') )
00831 rx += wc[i++];
00832 if ( i < wclen ) {
00833 if ( rx[i] == ']' )
00834 rx += wc[i++];
00835 while ( i < wclen && wc[i] != QChar(']') ) {
00836 if ( wc[i] == '\\' )
00837 rx += QChar( '\\' );
00838 rx += wc[i++];
00839 }
00840 }
00841 break;
00842 default:
00843 rx += c;
00844 }
00845 }
00846 return rx;
00847 }
00848 #endif
00849
00850
00851
00852
00853
00854 class QRegExpEngine : public QShared
00855 {
00856 public:
00857 #ifndef QT_NO_REGEXP_CCLASS
00858
00859
00860
00861
00862
00863 class CharClass
00864 {
00865 public:
00866 CharClass();
00867 CharClass( const CharClass& cc ) { operator=( cc ); }
00868
00869 CharClass& operator=( const CharClass& cc );
00870
00871 void clear();
00872 bool negative() const { return n; }
00873 void setNegative( bool negative );
00874 void addCategories( int cats );
00875 void addRange( ushort from, ushort to );
00876 void addSingleton( ushort ch ) { addRange( ch, ch ); }
00877
00878 bool in( QChar ch ) const;
00879 #ifndef QT_NO_REGEXP_OPTIM
00880 const QMemArray<int>& firstOccurrence() const { return occ1; }
00881 #endif
00882
00883 #if defined(QT_DEBUG)
00884 void dump() const;
00885 #endif
00886
00887 private:
00888
00889
00890
00891
00892 struct Range
00893 {
00894 ushort from;
00895 ushort to;
00896 };
00897
00898 int c;
00899 QMemArray<Range> r;
00900 bool n;
00901 #ifndef QT_NO_REGEXP_OPTIM
00902 QMemArray<int> occ1;
00903 #endif
00904 };
00905 #else
00906 struct CharClass
00907 {
00908 int dummy;
00909
00910 #ifndef QT_NO_REGEXP_OPTIM
00911 CharClass() { occ1.fill( 0, NumBadChars ); }
00912
00913 const QMemArray<int>& firstOccurrence() const { return occ1; }
00914 QMemArray<int> occ1;
00915 #endif
00916 };
00917 #endif
00918
00919 QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); }
00920 QRegExpEngine( const QString& rx, bool caseSensitive );
00921 #ifndef QT_NO_REGEXP_OPTIM
00922 ~QRegExpEngine();
00923 #endif
00924
00925 bool isValid() const { return valid; }
00926 const QString& errorString() const { return yyError; }
00927 bool caseSensitive() const { return cs; }
00928 int numCaptures() const { return officialncap; }
00929 QMemArray<int> match( const QString& str, int pos, bool minimal,
00930 bool oneTest, int caretIndex );
00931 int matchedLength() const { return mmMatchedLen; }
00932
00933 int createState( QChar ch );
00934 int createState( const CharClass& cc );
00935 #ifndef QT_NO_REGEXP_BACKREF
00936 int createState( int bref );
00937 #endif
00938
00939 void addCatTransitions( const QMemArray<int>& from,
00940 const QMemArray<int>& to );
00941 #ifndef QT_NO_REGEXP_CAPTURE
00942 void addPlusTransitions( const QMemArray<int>& from,
00943 const QMemArray<int>& to, int atom );
00944 #endif
00945
00946 #ifndef QT_NO_REGEXP_ANCHOR_ALT
00947 int anchorAlternation( int a, int b );
00948 int anchorConcatenation( int a, int b );
00949 #else
00950 int anchorAlternation( int a, int b ) { return a & b; }
00951 int anchorConcatenation( int a, int b ) { return a | b; }
00952 #endif
00953 void addAnchors( int from, int to, int a );
00954
00955 #ifndef QT_NO_REGEXP_OPTIM
00956 void setupGoodStringHeuristic( int earlyStart, int lateStart,
00957 const QString& str );
00958 void setupBadCharHeuristic( int minLen, const QMemArray<int>& firstOcc );
00959 void heuristicallyChooseHeuristic();
00960 #endif
00961
00962 #if defined(QT_DEBUG)
00963 void dump() const;
00964 #endif
00965
00966 private:
00967 enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
00968
00969
00970
00971
00972
00973
00974
00975 struct State
00976 {
00977 #ifndef QT_NO_REGEXP_CAPTURE
00978 int atom;
00979 #endif
00980 int match;
00981 QMemArray<int> outs;
00982 QMap<int, int> *reenter;
00983 QMap<int, int> *anchors;
00984
00985 #ifndef QT_NO_REGEXP_CAPTURE
00986 State( int a, int m )
00987 : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { }
00988 #else
00989 State( int m )
00990 : match( m ), reenter( 0 ), anchors( 0 ) { }
00991 #endif
00992 ~State() { delete reenter; delete anchors; }
00993 };
00994
00995 #ifndef QT_NO_REGEXP_LOOKAHEAD
00996
00997
00998
00999
01000 struct Lookahead
01001 {
01002 QRegExpEngine *eng;
01003 bool neg;
01004
01005 Lookahead( QRegExpEngine *eng0, bool neg0 )
01006 : eng( eng0 ), neg( neg0 ) { }
01007 ~Lookahead() { delete eng; }
01008 };
01009 #endif
01010
01011 #ifndef QT_NO_REGEXP_CAPTURE
01012
01013
01014
01015
01016 struct Atom
01017 {
01018 int parent;
01019 int capture;
01020 };
01021 #endif
01022
01023 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01024
01025
01026
01027
01028 struct AnchorAlternation
01029 {
01030 int a;
01031 int b;
01032 };
01033 #endif
01034
01035 enum { InitialState = 0, FinalState = 1 };
01036 void setup( bool caseSensitive );
01037 int setupState( int match );
01038
01039
01040
01041
01042
01043 enum { MaxLookaheads = 13, MaxBackRefs = 14 };
01044 enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002,
01045 Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,
01046 Anchor_FirstLookahead = 0x00000010,
01047 Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
01048 Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
01049 Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,
01050
01051 Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^
01052 ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) };
01053 #ifndef QT_NO_REGEXP_CAPTURE
01054 int startAtom( bool capture );
01055 void finishAtom( int atom ) { cf = f[atom].parent; }
01056 #endif
01057
01058 #ifndef QT_NO_REGEXP_LOOKAHEAD
01059 int addLookahead( QRegExpEngine *eng, bool negative );
01060 #endif
01061
01062 #ifndef QT_NO_REGEXP_CAPTURE
01063 bool isBetterCapture( const int *begin1, const int *end1, const int *begin2,
01064 const int *end2 );
01065 #endif
01066 bool testAnchor( int i, int a, const int *capBegin );
01067
01068 #ifndef QT_NO_REGEXP_OPTIM
01069 bool goodStringMatch();
01070 bool badCharMatch();
01071 #else
01072 bool bruteMatch();
01073 #endif
01074 bool matchHere();
01075
01076 QPtrVector<State> s;
01077 int ns;
01078 #ifndef QT_NO_REGEXP_CAPTURE
01079 QMemArray<Atom> f;
01080 int nf;
01081 int cf;
01082 #endif
01083 int officialncap;
01084 int ncap;
01085 #ifndef QT_NO_REGEXP_CCLASS
01086 QPtrVector<CharClass> cl;
01087 #endif
01088 #ifndef QT_NO_REGEXP_LOOKAHEAD
01089 QPtrVector<Lookahead> ahead;
01090 #endif
01091 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01092 QMemArray<AnchorAlternation> aa;
01093 #endif
01094 #ifndef QT_NO_REGEXP_OPTIM
01095 bool caretAnchored;
01096 #endif
01097 bool valid;
01098 bool cs;
01099 #ifndef QT_NO_REGEXP_BACKREF
01100 int nbrefs;
01101 #endif
01102
01103 #ifndef QT_NO_REGEXP_OPTIM
01104 bool useGoodStringHeuristic;
01105
01106 int goodEarlyStart;
01107 int goodLateStart;
01108 QString goodStr;
01109
01110 int minl;
01111 QMemArray<int> occ1;
01112 #endif
01113
01114
01115
01116
01117
01118
01119
01120
01121 class Box
01122 {
01123 public:
01124 Box( QRegExpEngine *engine );
01125 Box( const Box& b ) { operator=( b ); }
01126
01127 Box& operator=( const Box& b );
01128
01129 void clear() { operator=(Box(eng)); }
01130 void set( QChar ch );
01131 void set( const CharClass& cc );
01132 #ifndef QT_NO_REGEXP_BACKREF
01133 void set( int bref );
01134 #endif
01135
01136 void cat( const Box& b );
01137 void orx( const Box& b );
01138 void plus( int atom );
01139 void opt();
01140 void catAnchor( int a );
01141 #ifndef QT_NO_REGEXP_OPTIM
01142 void setupHeuristics();
01143 #endif
01144
01145 #if defined(QT_DEBUG)
01146 void dump() const;
01147 #endif
01148
01149 private:
01150 void addAnchorsToEngine( const Box& to ) const;
01151
01152 QRegExpEngine *eng;
01153 QMemArray<int> ls;
01154 QMemArray<int> rs;
01155 QMap<int, int> lanchors;
01156 QMap<int, int> ranchors;
01157 int skipanchors;
01158
01159 #ifndef QT_NO_REGEXP_OPTIM
01160 int earlyStart;
01161 int lateStart;
01162 QString str;
01163 QString leftStr;
01164 QString rightStr;
01165 int maxl;
01166 #endif
01167
01168 int minl;
01169 #ifndef QT_NO_REGEXP_OPTIM
01170 QMemArray<int> occ1;
01171 #endif
01172 };
01173 friend class Box;
01174
01175
01176
01177
01178 enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,
01179 Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,
01180 Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord,
01181 Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
01182 int getChar();
01183 int getEscape();
01184 #ifndef QT_NO_REGEXP_INTERVAL
01185 int getRep( int def );
01186 #endif
01187 #ifndef QT_NO_REGEXP_LOOKAHEAD
01188 void skipChars( int n );
01189 #endif
01190 void error( const char *msg );
01191 void startTokenizer( const QChar *rx, int len );
01192 int getToken();
01193
01194 const QChar *yyIn;
01195 int yyPos0;
01196 int yyPos;
01197 int yyLen;
01198 int yyCh;
01199 CharClass *yyCharClass;
01200 int yyMinRep;
01201 int yyMaxRep;
01202 QString yyError;
01203
01204
01205
01206
01207 int parse( const QChar *rx, int len );
01208 void parseAtom( Box *box );
01209 void parseFactor( Box *box );
01210 void parseTerm( Box *box );
01211 void parseExpression( Box *box );
01212
01213 int yyTok;
01214 bool yyMayCapture;
01215
01216
01217
01218
01219 const QString *mmStr;
01220 const QChar *mmIn;
01221 int mmPos;
01222 int mmCaretPos;
01223 int mmLen;
01224 bool mmMinimal;
01225 QMemArray<int> mmCaptured;
01226 QMemArray<int> mmCapturedNoMatch;
01227 QMemArray<int> mmBigArray;
01228 int *mmInNextStack;
01229 int *mmCurStack;
01230 int *mmNextStack;
01231 int *mmCurCapBegin;
01232 int *mmNextCapBegin;
01233 int *mmCurCapEnd;
01234 int *mmNextCapEnd;
01235 int *mmTempCapBegin;
01236 int *mmTempCapEnd;
01237 int *mmCapBegin;
01238 int *mmCapEnd;
01239 int *mmSlideTab;
01240 int mmSlideTabSize;
01241 #ifndef QT_NO_REGEXP_BACKREF
01242 QIntDict<int> mmSleeping;
01243 #endif
01244 int mmMatchLen;
01245 int mmMatchedLen;
01246 };
01247
01248 QRegExpEngine::QRegExpEngine( const QString& rx, bool caseSensitive )
01249 #ifndef QT_NO_REGEXP_BACKREF
01250 : mmSleeping( 101 )
01251 #endif
01252 {
01253 setup( caseSensitive );
01254 valid = ( parse(rx.unicode(), rx.length()) == (int) rx.length() );
01255 if ( !valid )
01256 error( RXERR_LEFTDELIM );
01257 }
01258
01259 #ifndef QT_NO_REGEXP_OPTIM
01260 QRegExpEngine::~QRegExpEngine()
01261 {
01262 }
01263 #endif
01264
01265
01266
01267
01268
01269 QMemArray<int> QRegExpEngine::match( const QString& str, int pos, bool minimal,
01270 bool oneTest, int caretIndex )
01271 {
01272 mmStr = &str;
01273 mmIn = str.unicode();
01274 if ( mmIn == 0 )
01275 mmIn = &QChar::null;
01276 mmPos = pos;
01277 mmCaretPos = caretIndex;
01278 mmLen = str.length();
01279 mmMinimal = minimal;
01280 mmMatchLen = 0;
01281 mmMatchedLen = 0;
01282
01283 bool matched = FALSE;
01284 if ( valid && mmPos >= 0 && mmPos <= mmLen ) {
01285 #ifndef QT_NO_REGEXP_OPTIM
01286 if ( oneTest ) {
01287 matched = matchHere();
01288 } else {
01289 if ( mmPos <= mmLen - minl ) {
01290 if ( caretAnchored ) {
01291 matched = matchHere();
01292 } else if ( useGoodStringHeuristic ) {
01293 matched = goodStringMatch();
01294 } else {
01295 matched = badCharMatch();
01296 }
01297 }
01298 }
01299 #else
01300 matched = oneTest ? matchHere() : bruteMatch();
01301 #endif
01302 }
01303
01304 if ( matched ) {
01305 mmCaptured.detach();
01306 mmCaptured[0] = mmPos;
01307 mmCaptured[1] = mmMatchLen;
01308 for ( int j = 0; j < officialncap; j++ ) {
01309 int len = mmCapEnd[j] - mmCapBegin[j];
01310 mmCaptured[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0;
01311 mmCaptured[2 + 2 * j + 1] = len;
01312 }
01313 return mmCaptured;
01314 } else {
01315 return mmCapturedNoMatch;
01316 }
01317 }
01318
01319
01320
01321
01322
01323
01324 int QRegExpEngine::createState( QChar ch )
01325 {
01326 return setupState( ch.unicode() );
01327 }
01328
01329 int QRegExpEngine::createState( const CharClass& cc )
01330 {
01331 #ifndef QT_NO_REGEXP_CCLASS
01332 int n = cl.size();
01333 cl.resize( n + 1 );
01334 cl.insert( n, new CharClass(cc) );
01335 return setupState( CharClassBit | n );
01336 #else
01337 Q_UNUSED( cc );
01338 return setupState( CharClassBit );
01339 #endif
01340 }
01341
01342 #ifndef QT_NO_REGEXP_BACKREF
01343 int QRegExpEngine::createState( int bref )
01344 {
01345 if ( bref > nbrefs ) {
01346 nbrefs = bref;
01347 if ( nbrefs > MaxBackRefs ) {
01348 error( RXERR_LIMIT );
01349 return 0;
01350 }
01351 }
01352 return setupState( BackRefBit | bref );
01353 }
01354 #endif
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364 void QRegExpEngine::addCatTransitions( const QMemArray<int>& from,
01365 const QMemArray<int>& to )
01366 {
01367 for ( int i = 0; i < (int) from.size(); i++ ) {
01368 State *st = s[from[i]];
01369 mergeInto( &st->outs, to );
01370 }
01371 }
01372
01373 #ifndef QT_NO_REGEXP_CAPTURE
01374 void QRegExpEngine::addPlusTransitions( const QMemArray<int>& from,
01375 const QMemArray<int>& to, int atom )
01376 {
01377 for ( int i = 0; i < (int) from.size(); i++ ) {
01378 State *st = s[from[i]];
01379 QMemArray<int> oldOuts = st->outs.copy();
01380 mergeInto( &st->outs, to );
01381 if ( f[atom].capture >= 0 ) {
01382 if ( st->reenter == 0 )
01383 st->reenter = new QMap<int, int>;
01384 for ( int j = 0; j < (int) to.size(); j++ ) {
01385 if ( !st->reenter->contains(to[j]) &&
01386 oldOuts.bsearch(to[j]) < 0 )
01387 st->reenter->insert( to[j], atom );
01388 }
01389 }
01390 }
01391 }
01392 #endif
01393
01394 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01395
01396
01397
01398 int QRegExpEngine::anchorAlternation( int a, int b )
01399 {
01400 if ( ((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0 )
01401 return a & b;
01402
01403 int n = aa.size();
01404 #ifndef QT_NO_REGEXP_OPTIM
01405 if ( n > 0 && aa[n - 1].a == a && aa[n - 1].b == b )
01406 return Anchor_Alternation | ( n - 1 );
01407 #endif
01408
01409 aa.resize( n + 1 );
01410 aa[n].a = a;
01411 aa[n].b = b;
01412 return Anchor_Alternation | n;
01413 }
01414
01415
01416
01417
01418 int QRegExpEngine::anchorConcatenation( int a, int b )
01419 {
01420 if ( ((a | b) & Anchor_Alternation) == 0 )
01421 return a | b;
01422 if ( (b & Anchor_Alternation) != 0 )
01423 qSwap( a, b );
01424
01425 int aprime = anchorConcatenation( aa[a ^ Anchor_Alternation].a, b );
01426 int bprime = anchorConcatenation( aa[a ^ Anchor_Alternation].b, b );
01427 return anchorAlternation( aprime, bprime );
01428 }
01429 #endif
01430
01431
01432
01433
01434
01435 void QRegExpEngine::addAnchors( int from, int to, int a )
01436 {
01437 State *st = s[from];
01438 if ( st->anchors == 0 )
01439 st->anchors = new QMap<int, int>;
01440 if ( st->anchors->contains(to) )
01441 a = anchorAlternation( (*st->anchors)[to], a );
01442 st->anchors->insert( to, a );
01443 }
01444
01445 #ifndef QT_NO_REGEXP_OPTIM
01446
01447
01448
01449
01450
01451 void QRegExpEngine::setupGoodStringHeuristic( int earlyStart, int lateStart,
01452 const QString& str )
01453 {
01454 goodEarlyStart = earlyStart;
01455 goodLateStart = lateStart;
01456 goodStr = cs ? str : str.lower();
01457 }
01458
01459 void QRegExpEngine::setupBadCharHeuristic( int minLen,
01460 const QMemArray<int>& firstOcc )
01461 {
01462 minl = minLen;
01463 if ( cs ) {
01464 occ1 = firstOcc;
01465 } else {
01466 occ1.fill( 0, NumBadChars );
01467 }
01468 }
01469
01470
01471
01472
01473
01474
01475
01476
01477
01478
01479
01480
01481
01482
01483 void QRegExpEngine::heuristicallyChooseHeuristic()
01484 {
01485 int i;
01486
01487 if ( minl == 0 )
01488 return;
01489
01490
01491
01492
01493
01494
01495 int goodStringScore = ( 64 * goodStr.length() / minl ) -
01496 ( goodLateStart - goodEarlyStart );
01497
01498
01499
01500
01501
01502 int badCharScore = 0;
01503 int step = QMAX( 1, NumBadChars / 32 );
01504 for ( i = 1; i < NumBadChars; i += step ) {
01505 if ( occ1[i] == NoOccurrence )
01506 badCharScore += minl;
01507 else
01508 badCharScore += occ1[i];
01509 }
01510 badCharScore /= minl;
01511
01512 useGoodStringHeuristic = ( goodStringScore > badCharScore );
01513 }
01514 #endif
01515
01516 #if defined(QT_DEBUG)
01517 void QRegExpEngine::dump() const
01518 {
01519 int i, j;
01520 qDebug( "Case %ssensitive engine", cs ? "" : "in" );
01521 qDebug( " States" );
01522 for ( i = 0; i < ns; i++ ) {
01523 qDebug( " %d%s", i,
01524 i == InitialState ? " (initial)" :
01525 i == FinalState ? " (final)" : "" );
01526 #ifndef QT_NO_REGEXP_CAPTURE
01527 qDebug( " in atom %d", s[i]->atom );
01528 #endif
01529 int m = s[i]->match;
01530 if ( (m & CharClassBit) != 0 ) {
01531 qDebug( " match character class %d", m ^ CharClassBit );
01532 #ifndef QT_NO_REGEXP_CCLASS
01533 cl[m ^ CharClassBit]->dump();
01534 #else
01535 qDebug( " negative character class" );
01536 #endif
01537 } else if ( (m & BackRefBit) != 0 ) {
01538 qDebug( " match back-reference %d", m ^ BackRefBit );
01539 } else if ( m >= 0x20 && m <= 0x7e ) {
01540 qDebug( " match 0x%.4x (%c)", m, m );
01541 } else {
01542 qDebug( " match 0x%.4x", m );
01543 }
01544 for ( j = 0; j < (int) s[i]->outs.size(); j++ ) {
01545 int next = s[i]->outs[j];
01546 qDebug( " -> %d", next );
01547 if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) )
01548 qDebug( " [reenter %d]", (*s[i]->reenter)[next] );
01549 if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 )
01550 qDebug( " [anchors 0x%.8x]", (*s[i]->anchors)[next] );
01551 }
01552 }
01553 #ifndef QT_NO_REGEXP_CAPTURE
01554 if ( nf > 0 ) {
01555 qDebug( " Atom Parent Capture" );
01556 for ( i = 0; i < nf; i++ )
01557 qDebug( " %6d %6d %6d", i, f[i].parent, f[i].capture );
01558 }
01559 #endif
01560 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01561 for ( i = 0; i < (int) aa.size(); i++ )
01562 qDebug( " Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a,
01563 aa[i].b );
01564 #endif
01565 }
01566 #endif
01567
01568 void QRegExpEngine::setup( bool caseSensitive )
01569 {
01570 s.setAutoDelete( TRUE );
01571 s.resize( 32 );
01572 ns = 0;
01573 #ifndef QT_NO_REGEXP_CAPTURE
01574 f.resize( 32 );
01575 nf = 0;
01576 cf = -1;
01577 #endif
01578 officialncap = 0;
01579 ncap = 0;
01580 #ifndef QT_NO_REGEXP_CCLASS
01581 cl.setAutoDelete( TRUE );
01582 #endif
01583 #ifndef QT_NO_REGEXP_LOOKAHEAD
01584 ahead.setAutoDelete( TRUE );
01585 #endif
01586 #ifndef QT_NO_REGEXP_OPTIM
01587 caretAnchored = TRUE;
01588 #endif
01589 valid = FALSE;
01590 cs = caseSensitive;
01591 #ifndef QT_NO_REGEXP_BACKREF
01592 nbrefs = 0;
01593 #endif
01594 #ifndef QT_NO_REGEXP_OPTIM
01595 useGoodStringHeuristic = FALSE;
01596 minl = 0;
01597 occ1.fill( 0, NumBadChars );
01598 #endif
01599 mmCapturedNoMatch.fill( -1, 2 );
01600 }
01601
01602 int QRegExpEngine::setupState( int match )
01603 {
01604 if ( (ns & (ns + 1)) == 0 && ns + 1 >= (int) s.size() )
01605 s.resize( (ns + 1) << 1 );
01606 #ifndef QT_NO_REGEXP_CAPTURE
01607 s.insert( ns, new State(cf, match) );
01608 #else
01609 s.insert( ns, new State(match) );
01610 #endif
01611 return ns++;
01612 }
01613
01614 #ifndef QT_NO_REGEXP_CAPTURE
01615
01616
01617
01618
01619
01620 int QRegExpEngine::startAtom( bool capture )
01621 {
01622 if ( (nf & (nf + 1)) == 0 && nf + 1 >= (int) f.size() )
01623 f.resize( (nf + 1) << 1 );
01624 f[nf].parent = cf;
01625 cf = nf++;
01626 f[cf].capture = capture ? ncap++ : -1;
01627 return cf;
01628 }
01629 #endif
01630
01631 #ifndef QT_NO_REGEXP_LOOKAHEAD
01632
01633
01634
01635 int QRegExpEngine::addLookahead( QRegExpEngine *eng, bool negative )
01636 {
01637 int n = ahead.size();
01638 if ( n == MaxLookaheads ) {
01639 error( RXERR_LIMIT );
01640 return 0;
01641 }
01642 ahead.resize( n + 1 );
01643 ahead.insert( n, new Lookahead(eng, negative) );
01644 return Anchor_FirstLookahead << n;
01645 }
01646 #endif
01647
01648 #ifndef QT_NO_REGEXP_CAPTURE
01649
01650
01651
01652 bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
01653 const int *begin2, const int *end2 )
01654 {
01655 for ( int i = 0; i < ncap; i++ ) {
01656 int delta = begin2[i] - begin1[i];
01657 if ( delta == 0 )
01658 delta = end1[i] - end2[i];
01659
01660 if ( delta != 0 )
01661 return delta > 0;
01662 }
01663 return FALSE;
01664 }
01665 #endif
01666
01667
01668
01669
01670
01671 bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
01672 {
01673 int j;
01674
01675 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01676 if ( (a & Anchor_Alternation) != 0 ) {
01677 return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) ||
01678 testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin );
01679 }
01680 #endif
01681
01682 if ( (a & Anchor_Caret) != 0 ) {
01683 if ( mmPos + i != mmCaretPos )
01684 return FALSE;
01685 }
01686 if ( (a & Anchor_Dollar) != 0 ) {
01687 if ( mmPos + i != mmLen )
01688 return FALSE;
01689 }
01690 #ifndef QT_NO_REGEXP_ESCAPE
01691 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
01692 bool before = FALSE;
01693 bool after = FALSE;
01694 if ( mmPos + i != 0 )
01695 before = isWord( mmIn[mmPos + i - 1] );
01696 if ( mmPos + i != mmLen )
01697 after = isWord( mmIn[mmPos + i] );
01698 if ( (a & Anchor_Word) != 0 && (before == after) )
01699 return FALSE;
01700 if ( (a & Anchor_NonWord) != 0 && (before != after) )
01701 return FALSE;
01702 }
01703 #endif
01704 #ifndef QT_NO_REGEXP_LOOKAHEAD
01705 bool catchx = TRUE;
01706
01707 if ( (a & Anchor_LookaheadMask) != 0 ) {
01708 QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
01709 mmLen - mmPos - i );
01710 for ( j = 0; j < (int) ahead.size(); j++ ) {
01711 if ( (a & (Anchor_FirstLookahead << j)) != 0 ) {
01712 catchx = ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE,
01713 mmCaretPos - mmPos - i )[0] == 0;
01714 if ( catchx == ahead[j]->neg )
01715 return FALSE;
01716 }
01717 }
01718 }
01719 #endif
01720 #ifndef QT_NO_REGEXP_CAPTURE
01721 #ifndef QT_NO_REGEXP_BACKREF
01722 for ( j = 0; j < nbrefs; j++ ) {
01723 if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) {
01724 if ( capBegin[j] != EmptyCapture )
01725 return FALSE;
01726 }
01727 }
01728 #endif
01729 #endif
01730 return TRUE;
01731 }
01732
01733 #ifndef QT_NO_REGEXP_OPTIM
01734
01735
01736
01737
01738
01739
01740 bool QRegExpEngine::goodStringMatch()
01741 {
01742 int k = mmPos + goodEarlyStart;
01743
01744 while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) {
01745 int from = k - goodLateStart;
01746 int to = k - goodEarlyStart;
01747 if ( from > mmPos )
01748 mmPos = from;
01749
01750 while ( mmPos <= to ) {
01751 if ( matchHere() )
01752 return TRUE;
01753 mmPos++;
01754 }
01755 k++;
01756 }
01757 return FALSE;
01758 }
01759
01760 bool QRegExpEngine::badCharMatch()
01761 {
01762 int slideHead = 0;
01763 int slideNext = 0;
01764 int i;
01765 int lastPos = mmLen - minl;
01766 memset( mmSlideTab, 0, mmSlideTabSize * sizeof(int) );
01767
01768
01769
01770
01771
01772 for ( i = 0; i < minl; i++ ) {
01773 int sk = occ1[BadChar(mmIn[mmPos + i])];
01774 if ( sk == NoOccurrence )
01775 sk = i + 1;
01776 if ( sk > 0 ) {
01777 int k = i + 1 - sk;
01778 if ( k < 0 ) {
01779 sk = i + 1;
01780 k = 0;
01781 }
01782 if ( sk > mmSlideTab[k] )
01783 mmSlideTab[k] = sk;
01784 }
01785 }
01786
01787 if ( mmPos > lastPos )
01788 return FALSE;
01789
01790 for ( ;; ) {
01791 if ( ++slideNext >= mmSlideTabSize )
01792 slideNext = 0;
01793 if ( mmSlideTab[slideHead] > 0 ) {
01794 if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] )
01795 mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1;
01796 mmSlideTab[slideHead] = 0;
01797 } else {
01798 if ( matchHere() )
01799 return TRUE;
01800 }
01801
01802 if ( mmPos == lastPos )
01803 break;
01804
01805
01806
01807
01808
01809 int sk = occ1[BadChar(mmIn[mmPos + minl])];
01810 if ( sk == NoOccurrence ) {
01811 mmSlideTab[slideNext] = minl;
01812 } else if ( sk > 0 ) {
01813 int k = slideNext + minl - sk;
01814 if ( k >= mmSlideTabSize )
01815 k -= mmSlideTabSize;
01816 if ( sk > mmSlideTab[k] )
01817 mmSlideTab[k] = sk;
01818 }
01819 slideHead = slideNext;
01820 mmPos++;
01821 }
01822 return FALSE;
01823 }
01824 #else
01825 bool QRegExpEngine::bruteMatch()
01826 {
01827 while ( mmPos <= mmLen ) {
01828 if ( matchHere() )
01829 return TRUE;
01830 mmPos++;
01831 }
01832 return FALSE;
01833 }
01834 #endif
01835
01836
01837
01838
01839 bool QRegExpEngine::matchHere()
01840 {
01841 int ncur = 1, nnext = 0;
01842 int i = 0, j, k, m;
01843 bool stop = FALSE;
01844
01845 mmMatchLen = -1;
01846 mmMatchedLen = -1;
01847 mmCurStack[0] = InitialState;
01848
01849 #ifndef QT_NO_REGEXP_CAPTURE
01850 if ( ncap > 0 ) {
01851 for ( j = 0; j < ncap; j++ ) {
01852 mmCurCapBegin[j] = EmptyCapture;
01853 mmCurCapEnd[j] = EmptyCapture;
01854 }
01855 }
01856 #endif
01857
01858 #ifndef QT_NO_REGEXP_BACKREF
01859 int *zzZ = 0;
01860
01861 while ( (ncur > 0 || !mmSleeping.isEmpty()) && i <= mmLen - mmPos &&
01862 !stop )
01863 #else
01864 while ( ncur > 0 && i <= mmLen - mmPos && !stop )
01865 #endif
01866 {
01867 int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0;
01868 for ( j = 0; j < ncur; j++ ) {
01869 int cur = mmCurStack[j];
01870 State *scur = s[cur];
01871 QMemArray<int>& outs = scur->outs;
01872 for ( k = 0; k < (int) outs.size(); k++ ) {
01873 int next = outs[k];
01874 State *snext = s[next];
01875 bool in = TRUE;
01876 #ifndef QT_NO_REGEXP_BACKREF
01877 int needSomeSleep = 0;
01878 #endif
01879
01880
01881
01882
01883 if ( scur->anchors != 0 ) {
01884 int a = at( *scur->anchors, next );
01885 if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) )
01886 in = FALSE;
01887 }
01888
01889
01890
01891
01892 if ( in ) {
01893 m = snext->match;
01894 if ( (m & (CharClassBit | BackRefBit)) == 0 ) {
01895 if ( cs )
01896 in = ( m == ch );
01897 else
01898 in = ( QChar(m).lower() == QChar(ch).lower() );
01899 } else if ( next == FinalState ) {
01900 mmMatchLen = i;
01901 stop = mmMinimal;
01902 in = TRUE;
01903 } else if ( (m & CharClassBit) != 0 ) {
01904 #ifndef QT_NO_REGEXP_CCLASS
01905 const CharClass *cc = cl[m ^ CharClassBit];
01906 if ( cs )
01907 in = cc->in( ch );
01908 else if ( cc->negative() )
01909 in = cc->in( QChar(ch).lower() ) &&
01910 cc->in( QChar(ch).upper() );
01911 else
01912 in = cc->in( QChar(ch).lower() ) ||
01913 cc->in( QChar(ch).upper() );
01914 #endif
01915 #ifndef QT_NO_REGEXP_BACKREF
01916 } else {
01917 int bref = m ^ BackRefBit;
01918 int ell = j * ncap + ( bref - 1 );
01919
01920 in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture;
01921 if ( in ) {
01922 if ( cs )
01923 in = ( mmIn[mmPos + mmCurCapBegin[ell]]
01924 == QChar(ch) );
01925 else
01926 in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower()
01927 == QChar(ch).lower() );
01928 }
01929
01930 if ( in ) {
01931 int delta;
01932 if ( mmCurCapEnd[ell] == EmptyCapture )
01933 delta = i - mmCurCapBegin[ell];
01934 else
01935 delta = mmCurCapEnd[ell] - mmCurCapBegin[ell];
01936
01937 in = ( delta <= mmLen - (mmPos + i) );
01938 if ( in && delta > 1 ) {
01939 int n = 1;
01940 if ( cs ) {
01941 while ( n < delta ) {
01942 if ( mmIn[mmPos +
01943 mmCurCapBegin[ell] + n] !=
01944 mmIn[mmPos + i + n] )
01945 break;
01946 n++;
01947 }
01948 } else {
01949 while ( n < delta ) {
01950 QChar a = mmIn[mmPos +
01951 mmCurCapBegin[ell] + n];
01952 QChar b = mmIn[mmPos + i + n];
01953 if ( a.lower() != b.lower() )
01954 break;
01955 n++;
01956 }
01957 }
01958 in = ( n == delta );
01959 if ( in )
01960 needSomeSleep = delta - 1;
01961 }
01962 }
01963 #endif
01964 }
01965 }
01966
01967
01968
01969
01970 if ( in ) {
01971 #ifndef QT_NO_REGEXP_CAPTURE
01972 int *capBegin, *capEnd;
01973 #endif
01974
01975
01976
01977
01978 if ( (m = mmInNextStack[next]) == -1 ) {
01979 m = nnext++;
01980 mmNextStack[m] = next;
01981 mmInNextStack[next] = m;
01982 #ifndef QT_NO_REGEXP_CAPTURE
01983 capBegin = mmNextCapBegin + m * ncap;
01984 capEnd = mmNextCapEnd + m * ncap;
01985
01986
01987
01988
01989
01990
01991
01992 } else {
01993 capBegin = mmTempCapBegin;
01994 capEnd = mmTempCapEnd;
01995 #endif
01996 }
01997
01998 #ifndef QT_NO_REGEXP_CAPTURE
01999
02000
02001
02002 if ( ncap > 0 ) {
02003 memcpy( capBegin, mmCurCapBegin + j * ncap,
02004 ncap * sizeof(int) );
02005 memcpy( capEnd, mmCurCapEnd + j * ncap,
02006 ncap * sizeof(int) );
02007 int c = scur->atom, n = snext->atom;
02008 int p = -1, q = -1;
02009 int cap;
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024 if ( scur->reenter != 0 &&
02025 (q = at(*scur->reenter, next)) != 0 ) {
02026 QBitArray b;
02027 b.fill( FALSE, nf );
02028 b.setBit( q, TRUE );
02029 for ( int ell = q + 1; ell < nf; ell++ ) {
02030 if ( b.testBit(f[ell].parent) ) {
02031 b.setBit( ell, TRUE );
02032 cap = f[ell].capture;
02033 if ( cap >= 0 ) {
02034 capBegin[cap] = EmptyCapture;
02035 capEnd[cap] = EmptyCapture;
02036 }
02037 }
02038 }
02039 p = f[q].parent;
02040
02041
02042
02043
02044
02045
02046
02047
02048
02049
02050
02051
02052
02053 } else {
02054 p = c;
02055 q = n;
02056 while ( p != q ) {
02057 if ( p > q ) {
02058 cap = f[p].capture;
02059 if ( cap >= 0 ) {
02060 if ( capBegin[cap] == i ) {
02061 capBegin[cap] = EmptyCapture;
02062 capEnd[cap] = EmptyCapture;
02063 } else {
02064 capEnd[cap] = i;
02065 }
02066 }
02067 p = f[p].parent;
02068 } else {
02069 q = f[q].parent;
02070 }
02071 }
02072 }
02073
02074
02075
02076
02077
02078
02079
02080 while ( n > p ) {
02081 cap = f[n].capture;
02082 if ( cap >= 0 ) {
02083 capBegin[cap] = i;
02084 capEnd[cap] = EmptyCapture;
02085 }
02086 n = f[n].parent;
02087 }
02088
02089
02090
02091
02092
02093 if ( capBegin == mmTempCapBegin &&
02094 isBetterCapture(capBegin, capEnd,
02095 mmNextCapBegin + m * ncap,
02096 mmNextCapEnd + m * ncap) ) {
02097 memcpy( mmNextCapBegin + m * ncap, capBegin,
02098 ncap * sizeof(int) );
02099 memcpy( mmNextCapEnd + m * ncap, capEnd,
02100 ncap * sizeof(int) );
02101 }
02102 }
02103 #ifndef QT_NO_REGEXP_BACKREF
02104
02105
02106
02107
02108
02109
02110 if ( needSomeSleep > 0 ) {
02111 zzZ = new int[1 + 2 * ncap];
02112 zzZ[0] = next;
02113 if ( ncap > 0 ) {
02114 memcpy( zzZ + 1, capBegin, ncap * sizeof(int) );
02115 memcpy( zzZ + 1 + ncap, capEnd,
02116 ncap * sizeof(int) );
02117 }
02118 mmInNextStack[mmNextStack[--nnext]] = -1;
02119 mmSleeping.insert( i + needSomeSleep, zzZ );
02120 }
02121 #endif
02122 #endif
02123 }
02124 }
02125 }
02126 #ifndef QT_NO_REGEXP_CAPTURE
02127
02128
02129
02130
02131 if ( ncap > 0 && (m = mmInNextStack[FinalState]) != -1 ) {
02132 memcpy( mmCapBegin, mmNextCapBegin + m * ncap, ncap * sizeof(int) );
02133 memcpy( mmCapEnd, mmNextCapEnd + m * ncap, ncap * sizeof(int) );
02134 }
02135 #ifndef QT_NO_REGEXP_BACKREF
02136
02137
02138
02139 if ( !mmSleeping.isEmpty() ) {
02140 while ( (zzZ = mmSleeping.take(i)) != 0 ) {
02141 int next = zzZ[0];
02142 int *capBegin = zzZ + 1;
02143 int *capEnd = zzZ + 1 + ncap;
02144 bool copyOver = TRUE;
02145
02146 if ( (m = mmInNextStack[zzZ[0]]) == -1 ) {
02147 m = nnext++;
02148 mmNextStack[m] = next;
02149 mmInNextStack[next] = m;
02150 } else {
02151 copyOver = isBetterCapture( mmNextCapBegin + m * ncap,
02152 mmNextCapEnd + m * ncap,
02153 capBegin, capEnd );
02154 }
02155 if ( copyOver ) {
02156 memcpy( mmNextCapBegin + m * ncap, capBegin,
02157 ncap * sizeof(int) );
02158 memcpy( mmNextCapEnd + m * ncap, capEnd,
02159 ncap * sizeof(int) );
02160 }
02161 delete[] zzZ;
02162 }
02163 }
02164 #endif
02165 #endif
02166 for ( j = 0; j < nnext; j++ )
02167 mmInNextStack[mmNextStack[j]] = -1;
02168
02169
02170 if ( nnext == 1 && mmNextStack[0] == FinalState
02171 #ifndef QT_NO_REGEXP_BACKREF
02172 && mmSleeping.isEmpty()
02173 #endif
02174 )
02175 stop = TRUE;
02176
02177 qSwap( mmCurStack, mmNextStack );
02178 #ifndef QT_NO_REGEXP_CAPTURE
02179 qSwap( mmCurCapBegin, mmNextCapBegin );
02180 qSwap( mmCurCapEnd, mmNextCapEnd );
02181 #endif
02182 ncur = nnext;
02183 nnext = 0;
02184 i++;
02185 }
02186
02187 #ifndef QT_NO_REGEXP_BACKREF
02188
02189
02190
02191
02192 while ( !mmSleeping.isEmpty() ) {
02193 zzZ = mmSleeping.take( *QIntDictIterator<int>(mmSleeping) );
02194 delete[] zzZ;
02195 }
02196 #endif
02197
02198 mmMatchedLen = i - 1;
02199 return ( mmMatchLen >= 0 );
02200 }
02201
02202 #ifndef QT_NO_REGEXP_CCLASS
02203
02204 QRegExpEngine::CharClass::CharClass()
02205 : c( 0 ), n( FALSE )
02206 {
02207 #ifndef QT_NO_REGEXP_OPTIM
02208 occ1.fill( NoOccurrence, NumBadChars );
02209 #endif
02210 }
02211
02212 QRegExpEngine::CharClass& QRegExpEngine::CharClass::operator=(
02213 const CharClass& cc )
02214 {
02215 c = cc.c;
02216 r = cc.r.copy();
02217 n = cc.n;
02218 #ifndef QT_NO_REGEXP_OPTIM
02219 occ1 = cc.occ1;
02220 #endif
02221 return *this;
02222 }
02223
02224 void QRegExpEngine::CharClass::clear()
02225 {
02226 c = 0;
02227 r.resize( 0 );
02228 n = FALSE;
02229 }
02230
02231 void QRegExpEngine::CharClass::setNegative( bool negative )
02232 {
02233 n = negative;
02234 #ifndef QT_NO_REGEXP_OPTIM
02235 occ1.fill( 0, NumBadChars );
02236 #endif
02237 }
02238
02239 void QRegExpEngine::CharClass::addCategories( int cats )
02240 {
02241 c |= cats;
02242 #ifndef QT_NO_REGEXP_OPTIM
02243 occ1.fill( 0, NumBadChars );
02244 #endif
02245 }
02246
02247 void QRegExpEngine::CharClass::addRange( ushort from, ushort to )
02248 {
02249 if ( from > to )
02250 qSwap( from, to );
02251 int m = r.size();
02252 r.resize( m + 1 );
02253 r[m].from = from;
02254 r[m].to = to;
02255
02256 #ifndef QT_NO_REGEXP_OPTIM
02257 int i;
02258
02259 if ( to - from < NumBadChars ) {
02260 occ1.detach();
02261 if ( from % NumBadChars <= to % NumBadChars ) {
02262 for ( i = from % NumBadChars; i <= to % NumBadChars; i++ )
02263 occ1[i] = 0;
02264 } else {
02265 for ( i = 0; i <= to % NumBadChars; i++ )
02266 occ1[i] = 0;
02267 for ( i = from % NumBadChars; i < NumBadChars; i++ )
02268 occ1[i] = 0;
02269 }
02270 } else {
02271 occ1.fill( 0, NumBadChars );
02272 }
02273 #endif
02274 }
02275
02276 bool QRegExpEngine::CharClass::in( QChar ch ) const
02277 {
02278 #ifndef QT_NO_REGEXP_OPTIM
02279 if ( occ1[BadChar(ch)] == NoOccurrence )
02280 return n;
02281 #endif
02282
02283 if ( c != 0 && (c & (1 << (int) ch.category())) != 0 )
02284 return !n;
02285 for ( int i = 0; i < (int) r.size(); i++ ) {
02286 if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to )
02287 return !n;
02288 }
02289 return n;
02290 }
02291
02292 #if defined(QT_DEBUG)
02293 void QRegExpEngine::CharClass::dump() const
02294 {
02295 int i;
02296 qDebug( " %stive character class", n ? "nega" : "posi" );
02297 #ifndef QT_NO_REGEXP_CCLASS
02298 if ( c != 0 )
02299 qDebug( " categories 0x%.8x", c );
02300 #endif
02301 for ( i = 0; i < (int) r.size(); i++ )
02302 qDebug( " 0x%.4x through 0x%.4x", r[i].from, r[i].to );
02303 }
02304 #endif
02305 #endif
02306
02307 QRegExpEngine::Box::Box( QRegExpEngine *engine )
02308 : eng( engine ), skipanchors( 0 )
02309 #ifndef QT_NO_REGEXP_OPTIM
02310 , earlyStart( 0 ), lateStart( 0 ), maxl( 0 )
02311 #endif
02312 {
02313 #ifndef QT_NO_REGEXP_OPTIM
02314 occ1.fill( NoOccurrence, NumBadChars );
02315 #endif
02316 minl = 0;
02317 }
02318
02319 QRegExpEngine::Box& QRegExpEngine::Box::operator=( const Box& b )
02320 {
02321 eng = b.eng;
02322 ls = b.ls;
02323 rs = b.rs;
02324 lanchors = b.lanchors;
02325 ranchors = b.ranchors;
02326 skipanchors = b.skipanchors;
02327 #ifndef QT_NO_REGEXP_OPTIM
02328 earlyStart = b.earlyStart;
02329 lateStart = b.lateStart;
02330 str = b.str;
02331 leftStr = b.leftStr;
02332 rightStr = b.rightStr;
02333 maxl = b.maxl;
02334 occ1 = b.occ1;
02335 #endif
02336 minl = b.minl;
02337 return *this;
02338 }
02339
02340 void QRegExpEngine::Box::set( QChar ch )
02341 {
02342 ls.resize( 1 );
02343 ls[0] = eng->createState( ch );
02344 rs = ls;
02345 rs.detach();
02346 #ifndef QT_NO_REGEXP_OPTIM
02347 str = ch;
02348 leftStr = ch;
02349 rightStr = ch;
02350 maxl = 1;
02351 occ1.detach();
02352 occ1[BadChar(ch)] = 0;
02353 #endif
02354 minl = 1;
02355 }
02356
02357 void QRegExpEngine::Box::set( const CharClass& cc )
02358 {
02359 ls.resize( 1 );
02360 ls[0] = eng->createState( cc );
02361 rs = ls;
02362 rs.detach();
02363 #ifndef QT_NO_REGEXP_OPTIM
02364 maxl = 1;
02365 occ1 = cc.firstOccurrence();
02366 #endif
02367 minl = 1;
02368 }
02369
02370 #ifndef QT_NO_REGEXP_BACKREF
02371 void QRegExpEngine::Box::set( int bref )
02372 {
02373 ls.resize( 1 );
02374 ls[0] = eng->createState( bref );
02375 rs = ls;
02376 rs.detach();
02377 if ( bref >= 1 && bref <= MaxBackRefs )
02378 skipanchors = Anchor_BackRef0Empty << bref;
02379 #ifndef QT_NO_REGEXP_OPTIM
02380 maxl = InftyLen;
02381 #endif
02382 minl = 0;
02383 }
02384 #endif
02385
02386 void QRegExpEngine::Box::cat( const Box& b )
02387 {
02388 eng->addCatTransitions( rs, b.ls );
02389 addAnchorsToEngine( b );
02390 if ( minl == 0 ) {
02391 mergeInto( &lanchors, b.lanchors );
02392 if ( skipanchors != 0 ) {
02393 for ( int i = 0; i < (int) b.ls.size(); i++ ) {
02394 int a = eng->anchorConcatenation( at(lanchors, b.ls[i]),
02395 skipanchors );
02396 lanchors.insert( b.ls[i], a );
02397 }
02398 }
02399 mergeInto( &ls, b.ls );
02400 }
02401 if ( b.minl == 0 ) {
02402 mergeInto( &ranchors, b.ranchors );
02403 if ( b.skipanchors != 0 ) {
02404 for ( int i = 0; i < (int) rs.size(); i++ ) {
02405 int a = eng->anchorConcatenation( at(ranchors, rs[i]),
02406 b.skipanchors );
02407 ranchors.insert( rs[i], a );
02408 }
02409 }
02410 mergeInto( &rs, b.rs );
02411 } else {
02412 ranchors = b.ranchors;
02413 rs = b.rs;
02414 }
02415
02416 #ifndef QT_NO_REGEXP_OPTIM
02417 if ( maxl != InftyLen ) {
02418 if ( rightStr.length() + b.leftStr.length() >
02419 QMAX(str.length(), b.str.length()) ) {
02420 earlyStart = minl - rightStr.length();
02421 lateStart = maxl - rightStr.length();
02422 str = rightStr + b.leftStr;
02423 } else if ( b.str.length() > str.length() ) {
02424 earlyStart = minl + b.earlyStart;
02425 lateStart = maxl + b.lateStart;
02426 str = b.str;
02427 }
02428 }
02429
02430 if ( (int) leftStr.length() == maxl )
02431 leftStr += b.leftStr;
02432 if ( (int) b.rightStr.length() == b.maxl )
02433 rightStr += b.rightStr;
02434 else
02435 rightStr = b.rightStr;
02436
02437 if ( maxl == InftyLen || b.maxl == InftyLen )
02438 maxl = InftyLen;
02439 else
02440 maxl += b.maxl;
02441
02442 occ1.detach();
02443 for ( int i = 0; i < NumBadChars; i++ ) {
02444 if ( b.occ1[i] != NoOccurrence && minl + b.occ1[i] < occ1[i] )
02445 occ1[i] = minl + b.occ1[i];
02446 }
02447 #endif
02448
02449 minl += b.minl;
02450 if ( minl == 0 )
02451 skipanchors = eng->anchorConcatenation( skipanchors, b.skipanchors );
02452 else
02453 skipanchors = 0;
02454 }
02455
02456 void QRegExpEngine::Box::orx( const Box& b )
02457 {
02458 mergeInto( &ls, b.ls );
02459 mergeInto( &lanchors, b.lanchors );
02460 mergeInto( &rs, b.rs );
02461 mergeInto( &ranchors, b.ranchors );
02462
02463 if ( b.minl == 0 ) {
02464 if ( minl == 0 )
02465 skipanchors = eng->anchorAlternation( skipanchors, b.skipanchors );
02466 else
02467 skipanchors = b.skipanchors;
02468 }
02469
02470 #ifndef QT_NO_REGEXP_OPTIM
02471 occ1.detach();
02472 for ( int i = 0; i < NumBadChars; i++ ) {
02473 if ( occ1[i] > b.occ1[i] )
02474 occ1[i] = b.occ1[i];
02475 }
02476 earlyStart = 0;
02477 lateStart = 0;
02478 str = QString();
02479 leftStr = QString();
02480 rightStr = QString();
02481 if ( b.maxl > maxl )
02482 maxl = b.maxl;
02483 #endif
02484 if ( b.minl < minl )
02485 minl = b.minl;
02486 }
02487
02488 void QRegExpEngine::Box::plus( int atom )
02489 {
02490 #ifndef QT_NO_REGEXP_CAPTURE
02491 eng->addPlusTransitions( rs, ls, atom );
02492 #else
02493 Q_UNUSED( atom );
02494 eng->addCatTransitions( rs, ls );
02495 #endif
02496 addAnchorsToEngine( *this );
02497 #ifndef QT_NO_REGEXP_OPTIM
02498 maxl = InftyLen;
02499 #endif
02500 }
02501
02502 void QRegExpEngine::Box::opt()
02503 {
02504 #ifndef QT_NO_REGEXP_OPTIM
02505 earlyStart = 0;
02506 lateStart = 0;
02507 str = QString();
02508 leftStr = QString();
02509 rightStr = QString();
02510 #endif
02511 skipanchors = 0;
02512 minl = 0;
02513 }
02514
02515 void QRegExpEngine::Box::catAnchor( int a )
02516 {
02517 if ( a != 0 ) {
02518 for ( int i = 0; i < (int) rs.size(); i++ ) {
02519 a = eng->anchorConcatenation( at(ranchors, rs[i]), a );
02520 ranchors.insert( rs[i], a );
02521 }
02522 if ( minl == 0 )
02523 skipanchors = eng->anchorConcatenation( skipanchors, a );
02524 }
02525 }
02526
02527 #ifndef QT_NO_REGEXP_OPTIM
02528 void QRegExpEngine::Box::setupHeuristics()
02529 {
02530 eng->setupGoodStringHeuristic( earlyStart, lateStart, str );
02531
02532
02533
02534
02535
02536
02537
02538
02539
02540
02541 for ( int i = 0; i < NumBadChars; i++ ) {
02542 if ( occ1[i] != NoOccurrence && occ1[i] >= minl )
02543 occ1[i] = minl;
02544 }
02545 eng->setupBadCharHeuristic( minl, occ1 );
02546
02547 eng->heuristicallyChooseHeuristic();
02548 }
02549 #endif
02550
02551 #if defined(QT_DEBUG)
02552 void QRegExpEngine::Box::dump() const
02553 {
02554 int i;
02555 qDebug( "Box of at least %d character%s", minl, minl == 1 ? "" : "s" );
02556 qDebug( " Left states:" );
02557 for ( i = 0; i < (int) ls.size(); i++ ) {
02558 if ( at(lanchors, ls[i]) == 0 )
02559 qDebug( " %d", ls[i] );
02560 else
02561 qDebug( " %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]] );
02562 }
02563 qDebug( " Right states:" );
02564 for ( i = 0; i < (int) rs.size(); i++ ) {
02565 if ( at(ranchors, rs[i]) == 0 )
02566 qDebug( " %d", rs[i] );
02567 else
02568 qDebug( " %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]] );
02569 }
02570 qDebug( " Skip anchors: 0x%.8x", skipanchors );
02571 }
02572 #endif
02573
02574 void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const
02575 {
02576 for ( int i = 0; i < (int) to.ls.size(); i++ ) {
02577 for ( int j = 0; j < (int) rs.size(); j++ ) {
02578 int a = eng->anchorConcatenation( at(ranchors, rs[j]),
02579 at(to.lanchors, to.ls[i]) );
02580 eng->addAnchors( rs[j], to.ls[i], a );
02581 }
02582 }
02583 }
02584
02585 int QRegExpEngine::getChar()
02586 {
02587 return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode();
02588 }
02589
02590 int QRegExpEngine::getEscape()
02591 {
02592 #ifndef QT_NO_REGEXP_ESCAPE
02593 const char tab[] = "afnrtv";
02594 const char backTab[] = "\a\f\n\r\t\v";
02595 ushort low;
02596 int i;
02597 #endif
02598 ushort val;
02599 int prevCh = yyCh;
02600
02601 if ( prevCh == EOS ) {
02602 error( RXERR_END );
02603 return Tok_Char | '\\';
02604 }
02605 yyCh = getChar();
02606 #ifndef QT_NO_REGEXP_ESCAPE
02607 if ( (prevCh & ~0xff) == 0 ) {
02608 const char *p = strchr( tab, prevCh );
02609 if ( p != 0 )
02610 return Tok_Char | backTab[p - tab];
02611 }
02612 #endif
02613
02614 switch ( prevCh ) {
02615 #ifndef QT_NO_REGEXP_ESCAPE
02616 case '0':
02617 val = 0;
02618 for ( i = 0; i < 3; i++ ) {
02619 if ( yyCh >= '0' && yyCh <= '7' )
02620 val = ( val << 3 ) | ( yyCh - '0' );
02621 else
02622 break;
02623 yyCh = getChar();
02624 }
02625 if ( (val & ~0377) != 0 )
02626 error( RXERR_OCTAL );
02627 return Tok_Char | val;
02628 #endif
02629 #ifndef QT_NO_REGEXP_ESCAPE
02630 case 'B':
02631 return Tok_NonWord;
02632 #endif
02633 #ifndef QT_NO_REGEXP_CCLASS
02634 case 'D':
02635
02636 yyCharClass->addCategories( 0x7fffffef );
02637 return Tok_CharClass;
02638 case 'S':
02639
02640 yyCharClass->addCategories( 0x7ffff87f );
02641 yyCharClass->addRange( 0x0000, 0x0008 );
02642 yyCharClass->addRange( 0x000e, 0x001f );
02643 yyCharClass->addRange( 0x007f, 0x009f );
02644 return Tok_CharClass;
02645 case 'W':
02646
02647 yyCharClass->addCategories( 0x7fe07f8f );
02648 yyCharClass->addRange( 0x203f, 0x2040 );
02649 yyCharClass->addSingleton( 0x2040 );
02650 yyCharClass->addSingleton( 0x30fb );
02651 yyCharClass->addRange( 0xfe33, 0xfe34 );
02652 yyCharClass->addRange( 0xfe4d, 0xfe4f );
02653 yyCharClass->addSingleton( 0xff3f );
02654 yyCharClass->addSingleton( 0xff65 );
02655 return Tok_CharClass;
02656 #endif
02657 #ifndef QT_NO_REGEXP_ESCAPE
02658 case 'b':
02659 return Tok_Word;
02660 #endif
02661 #ifndef QT_NO_REGEXP_CCLASS
02662 case 'd':
02663
02664 yyCharClass->addCategories( 0x00000010 );
02665 return Tok_CharClass;
02666 case 's':
02667
02668 yyCharClass->addCategories( 0x00000380 );
02669 yyCharClass->addRange( 0x0009, 0x000d );
02670 return Tok_CharClass;
02671 case 'w':
02672
02673 yyCharClass->addCategories( 0x000f8070 );
02674 yyCharClass->addSingleton( 0x005f );
02675 return Tok_CharClass;
02676 #endif
02677 #ifndef QT_NO_REGEXP_ESCAPE
02678 case 'x':
02679 val = 0;
02680 for ( i = 0; i < 4; i++ ) {
02681 low = QChar( yyCh ).lower();
02682 if ( low >= '0' && low <= '9' )
02683 val = ( val << 4 ) | ( low - '0' );
02684 else if ( low >= 'a' && low <= 'f' )
02685 val = ( val << 4 ) | ( low - 'a' + 10 );
02686 else
02687 break;
02688 yyCh = getChar();
02689 }
02690 return Tok_Char | val;
02691 #endif
02692 default:
02693 if ( prevCh >= '1' && prevCh <= '9' ) {
02694 #ifndef QT_NO_REGEXP_BACKREF
02695 val = prevCh - '0';
02696 while ( yyCh >= '0' && yyCh <= '9' ) {
02697 val = ( val *= 10 ) | ( yyCh - '0' );
02698 yyCh = getChar();
02699 }
02700 return Tok_BackRef | val;
02701 #else
02702 error( RXERR_DISABLED );
02703 #endif
02704 }
02705 return Tok_Char | prevCh;
02706 }
02707 }
02708
02709 #ifndef QT_NO_REGEXP_INTERVAL
02710 int QRegExpEngine::getRep( int def )
02711 {
02712 if ( yyCh >= '0' && yyCh <= '9' ) {
02713 int rep = 0;
02714 do {
02715 rep = 10 * rep + yyCh - '0';
02716 if ( rep >= InftyRep ) {
02717 error( RXERR_REPETITION );
02718 rep = def;
02719 }
02720 yyCh = getChar();
02721 } while ( yyCh >= '0' && yyCh <= '9' );
02722 return rep;
02723 } else {
02724 return def;
02725 }
02726 }
02727 #endif
02728
02729 #ifndef QT_NO_REGEXP_LOOKAHEAD
02730 void QRegExpEngine::skipChars( int n )
02731 {
02732 if ( n > 0 ) {
02733 yyPos += n - 1;
02734 yyCh = getChar();
02735 }
02736 }
02737 #endif
02738
02739 void QRegExpEngine::error( const char *msg )
02740 {
02741 if ( yyError.isEmpty() )
02742 yyError = QString::fromLatin1( msg );
02743 }
02744
02745 void QRegExpEngine::startTokenizer( const QChar *rx, int len )
02746 {
02747 yyIn = rx;
02748 yyPos0 = 0;
02749 yyPos = 0;
02750 yyLen = len;
02751 yyCh = getChar();
02752 yyCharClass = new CharClass;
02753 yyMinRep = 0;
02754 yyMaxRep = 0;
02755 yyError = QString();
02756 }
02757
02758 int QRegExpEngine::getToken()
02759 {
02760 #ifndef QT_NO_REGEXP_CCLASS
02761 ushort pendingCh = 0;
02762 bool charPending;
02763 bool rangePending;
02764 int tok;
02765 #endif
02766 int prevCh = yyCh;
02767
02768 yyPos0 = yyPos - 1;
02769 #ifndef QT_NO_REGEXP_CCLASS
02770 yyCharClass->clear();
02771 #endif
02772 yyMinRep = 0;
02773 yyMaxRep = 0;
02774 yyCh = getChar();
02775 switch ( prevCh ) {
02776 case EOS:
02777 yyPos0 = yyPos;
02778 return Tok_Eos;
02779 case '$':
02780 return Tok_Dollar;
02781 case '(':
02782 if ( yyCh == '?' ) {
02783 prevCh = getChar();
02784 yyCh = getChar();
02785 switch ( prevCh ) {
02786 #ifndef QT_NO_REGEXP_LOOKAHEAD
02787 case '!':
02788 return Tok_NegLookahead;
02789 case '=':
02790 return Tok_PosLookahead;
02791 #endif
02792 case ':':
02793 return Tok_MagicLeftParen;
02794 default:
02795 error( RXERR_LOOKAHEAD );
02796 return Tok_MagicLeftParen;
02797 }
02798 } else {
02799 return Tok_LeftParen;
02800 }
02801 case ')':
02802 return Tok_RightParen;
02803 case '*':
02804 yyMinRep = 0;
02805 yyMaxRep = InftyRep;
02806 return Tok_Quantifier;
02807 case '+':
02808 yyMinRep = 1;
02809 yyMaxRep = InftyRep;
02810 return Tok_Quantifier;
02811 case '.':
02812 #ifndef QT_NO_REGEXP_CCLASS
02813 yyCharClass->setNegative( TRUE );
02814 #endif
02815 return Tok_CharClass;
02816 case '?':
02817 yyMinRep = 0;
02818 yyMaxRep = 1;
02819 return Tok_Quantifier;
02820 case '[':
02821 #ifndef QT_NO_REGEXP_CCLASS
02822 if ( yyCh == '^' ) {
02823 yyCharClass->setNegative( TRUE );
02824 yyCh = getChar();
02825 }
02826 charPending = FALSE;
02827 rangePending = FALSE;
02828 do {
02829 if ( yyCh == '-' && charPending && !rangePending ) {
02830 rangePending = TRUE;
02831 yyCh = getChar();
02832 } else {
02833 if ( charPending && !rangePending ) {
02834 yyCharClass->addSingleton( pendingCh );
02835 charPending = FALSE;
02836 }
02837 if ( yyCh == '\\' ) {
02838 yyCh = getChar();
02839 tok = getEscape();
02840 if ( tok == Tok_Word )
02841 tok = '\b';
02842 } else {
02843 tok = Tok_Char | yyCh;
02844 yyCh = getChar();
02845 }
02846 if ( tok == Tok_CharClass ) {
02847 if ( rangePending ) {
02848 yyCharClass->addSingleton( '-' );
02849 yyCharClass->addSingleton( pendingCh );
02850 charPending = FALSE;
02851 rangePending = FALSE;
02852 }
02853 } else if ( (tok & Tok_Char) != 0 ) {
02854 if ( rangePending ) {
02855 yyCharClass->addRange( pendingCh, tok ^ Tok_Char );
02856 charPending = FALSE;
02857 rangePending = FALSE;
02858 } else {
02859 pendingCh = tok ^ Tok_Char;
02860 charPending = TRUE;
02861 }
02862 } else {
02863 error( RXERR_CHARCLASS );
02864 }
02865 }
02866 } while ( yyCh != ']' && yyCh != EOS );
02867 if ( rangePending )
02868 yyCharClass->addSingleton( '-' );
02869 if ( charPending )
02870 yyCharClass->addSingleton( pendingCh );
02871 if ( yyCh == EOS )
02872 error( RXERR_END );
02873 else
02874 yyCh = getChar();
02875 return Tok_CharClass;
02876 #else
02877 error( RXERR_END );
02878 return Tok_Char | '[';
02879 #endif
02880 case '\\':
02881 return getEscape();
02882 case ']':
02883 error( RXERR_LEFTDELIM );
02884 return Tok_Char | ']';
02885 case '^':
02886 return Tok_Caret;
02887 case '{':
02888 #ifndef QT_NO_REGEXP_INTERVAL
02889 yyMinRep = getRep( 0 );
02890 yyMaxRep = yyMinRep;
02891 if ( yyCh == ',' ) {
02892 yyCh = getChar();
02893 yyMaxRep = getRep( InftyRep );
02894 }
02895 if ( yyMaxRep < yyMinRep )
02896 qSwap( yyMinRep, yyMaxRep );
02897 if ( yyCh != '}' )
02898 error( RXERR_REPETITION );
02899 yyCh = getChar();
02900 return Tok_Quantifier;
02901 #else
02902 error( RXERR_DISABLED );
02903 return Tok_Char | '{';
02904 #endif
02905 case '|':
02906 return Tok_Bar;
02907 case '}':
02908 error( RXERR_LEFTDELIM );
02909 return Tok_Char | '}';
02910 default:
02911 return Tok_Char | prevCh;
02912 }
02913 }
02914
02915 int QRegExpEngine::parse( const QChar *pattern, int len )
02916 {
02917 valid = TRUE;
02918 startTokenizer( pattern, len );
02919 yyTok = getToken();
02920 #ifndef QT_NO_REGEXP_CAPTURE
02921 yyMayCapture = TRUE;
02922 #else
02923 yyMayCapture = FALSE;
02924 #endif
02925
02926 #ifndef QT_NO_REGEXP_CAPTURE
02927 int atom = startAtom( FALSE );
02928 #endif
02929 CharClass anything;
02930 Box box( this );
02931 box.set( anything );
02932 Box rightBox( this );
02933 rightBox.set( anything );
02934
02935 Box middleBox( this );
02936 parseExpression( &middleBox );
02937 #ifndef QT_NO_REGEXP_CAPTURE
02938 finishAtom( atom );
02939 #endif
02940 #ifndef QT_NO_REGEXP_OPTIM
02941 middleBox.setupHeuristics();
02942 #endif
02943 box.cat( middleBox );
02944 box.cat( rightBox );
02945 delete yyCharClass;
02946 yyCharClass = 0;
02947
02948 officialncap = ncap;
02949 #ifndef QT_NO_REGEXP_BACKREF
02950 if ( nbrefs > ncap )
02951 ncap = nbrefs;
02952 #endif
02953
02954 mmCaptured.resize( 2 + 2 * officialncap );
02955 mmCapturedNoMatch.fill( -1, 2 + 2 * officialncap );
02956
02957
02958
02959
02960
02961 #ifndef QT_NO_REGEXP_OPTIM
02962 mmSlideTabSize = QMAX( minl + 1, 16 );
02963 #else
02964 mmSlideTabSize = 0;
02965 #endif
02966 mmBigArray.resize( (3 + 4 * ncap) * ns + 4 * ncap + mmSlideTabSize );
02967
02968 mmInNextStack = mmBigArray.data();
02969 memset( mmInNextStack, -1, ns * sizeof(int) );
02970 mmCurStack = mmInNextStack + ns;
02971 mmNextStack = mmInNextStack + 2 * ns;
02972
02973 mmCurCapBegin = mmInNextStack + 3 * ns;
02974 mmNextCapBegin = mmCurCapBegin + ncap * ns;
02975 mmCurCapEnd = mmCurCapBegin + 2 * ncap * ns;
02976 mmNextCapEnd = mmCurCapBegin + 3 * ncap * ns;
02977
02978 mmTempCapBegin = mmCurCapBegin + 4 * ncap * ns;
02979 mmTempCapEnd = mmTempCapBegin + ncap;
02980 mmCapBegin = mmTempCapBegin + 2 * ncap;
02981 mmCapEnd = mmTempCapBegin + 3 * ncap;
02982
02983 mmSlideTab = mmTempCapBegin + 4 * ncap;
02984
02985 if ( !yyError.isEmpty() )
02986 return -1;
02987
02988 #ifndef QT_NO_REGEXP_OPTIM
02989 State *sinit = s[InitialState];
02990 caretAnchored = ( sinit->anchors != 0 );
02991 if ( caretAnchored ) {
02992 QMap<int, int>& anchors = *sinit->anchors;
02993 QMap<int, int>::ConstIterator a;
02994 for ( a = anchors.begin(); a != anchors.end(); ++a ) {
02995 #ifndef QT_NO_REGEXP_ANCHOR_ALT
02996 if ( (*a & Anchor_Alternation) != 0 )
02997 break;
02998 #endif
02999 if ( (*a & Anchor_Caret) == 0 ) {
03000 caretAnchored = FALSE;
03001 break;
03002 }
03003 }
03004 }
03005 #endif
03006 return yyPos0;
03007 }
03008
03009 void QRegExpEngine::parseAtom( Box *box )
03010 {
03011 #ifndef QT_NO_REGEXP_LOOKAHEAD
03012 QRegExpEngine *eng = 0;
03013 bool neg;
03014 int len;
03015 #endif
03016
03017 switch ( yyTok ) {
03018 case Tok_Dollar:
03019 box->catAnchor( Anchor_Dollar );
03020 break;
03021 case Tok_Caret:
03022 box->catAnchor( Anchor_Caret );
03023 break;
03024 #ifndef QT_NO_REGEXP_LOOKAHEAD
03025 case Tok_PosLookahead:
03026 case Tok_NegLookahead:
03027 neg = ( yyTok == Tok_NegLookahead );
03028 eng = new QRegExpEngine( cs );
03029 len = eng->parse( yyIn + yyPos - 1, yyLen - yyPos + 1 );
03030 if ( len >= 0 )
03031 skipChars( len );
03032 else
03033 error( RXERR_LOOKAHEAD );
03034 box->catAnchor( addLookahead(eng, neg) );
03035 yyTok = getToken();
03036 if ( yyTok != Tok_RightParen )
03037 error( RXERR_LOOKAHEAD );
03038 break;
03039 #endif
03040 #ifndef QT_NO_REGEXP_ESCAPE
03041 case Tok_Word:
03042 box->catAnchor( Anchor_Word );
03043 break;
03044 case Tok_NonWord:
03045 box->catAnchor( Anchor_NonWord );
03046 break;
03047 #endif
03048 case Tok_LeftParen:
03049 case Tok_MagicLeftParen:
03050 yyTok = getToken();
03051 parseExpression( box );
03052 if ( yyTok != Tok_RightParen )
03053 error( RXERR_END );
03054 break;
03055 case Tok_CharClass:
03056 box->set( *yyCharClass );
03057 break;
03058 case Tok_Quantifier:
03059 error( RXERR_REPETITION );
03060 break;
03061 default:
03062 if ( (yyTok & Tok_Char) != 0 )
03063 box->set( QChar(yyTok ^ Tok_Char) );
03064 #ifndef QT_NO_REGEXP_BACKREF
03065 else if ( (yyTok & Tok_BackRef) != 0 )
03066 box->set( yyTok ^ Tok_BackRef );
03067 #endif
03068 else
03069 error( RXERR_DISABLED );
03070 }
03071 yyTok = getToken();
03072 }
03073
03074 void QRegExpEngine::parseFactor( Box *box )
03075 {
03076 #ifndef QT_NO_REGEXP_CAPTURE
03077 int atom = startAtom( yyMayCapture && yyTok == Tok_LeftParen );
03078 #else
03079 static const int atom = 0;
03080 #endif
03081
03082 #ifndef QT_NO_REGEXP_INTERVAL
03083 #define YYREDO() \
03084 yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
03085 *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
03086
03087 const QChar *in = yyIn;
03088 int pos0 = yyPos0;
03089 int pos = yyPos;
03090 int len = yyLen;
03091 int ch = yyCh;
03092 CharClass charClass;
03093 if ( yyTok == Tok_CharClass )
03094 charClass = *yyCharClass;
03095 int tok = yyTok;
03096 bool mayCapture = yyMayCapture;
03097 #endif
03098
03099 parseAtom( box );
03100 #ifndef QT_NO_REGEXP_CAPTURE
03101 finishAtom( atom );
03102 #endif
03103
03104 if ( yyTok == Tok_Quantifier ) {
03105 if ( yyMaxRep == InftyRep ) {
03106 box->plus( atom );
03107 #ifndef QT_NO_REGEXP_INTERVAL
03108 } else if ( yyMaxRep == 0 ) {
03109 box->clear();
03110 #endif
03111 }
03112 if ( yyMinRep == 0 )
03113 box->opt();
03114
03115 #ifndef QT_NO_REGEXP_INTERVAL
03116 yyMayCapture = FALSE;
03117 int alpha = ( yyMinRep == 0 ) ? 0 : yyMinRep - 1;
03118 int beta = ( yyMaxRep == InftyRep ) ? 0 : yyMaxRep - ( alpha + 1 );
03119
03120 Box rightBox( this );
03121 int i;
03122
03123 for ( i = 0; i < beta; i++ ) {
03124 YYREDO();
03125 Box leftBox( this );
03126 parseAtom( &leftBox );
03127 leftBox.cat( rightBox );
03128 leftBox.opt();
03129 rightBox = leftBox;
03130 }
03131 for ( i = 0; i < alpha; i++ ) {
03132 YYREDO();
03133 Box leftBox( this );
03134 parseAtom( &leftBox );
03135 leftBox.cat( rightBox );
03136 rightBox = leftBox;
03137 }
03138 rightBox.cat( *box );
03139 *box = rightBox;
03140 #endif
03141 yyTok = getToken();
03142 #ifndef QT_NO_REGEXP_INTERVAL
03143 yyMayCapture = mayCapture;
03144 #endif
03145 }
03146 #undef YYREDO
03147 }
03148
03149 void QRegExpEngine::parseTerm( Box *box )
03150 {
03151 #ifndef QT_NO_REGEXP_OPTIM
03152 if ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar )
03153 parseFactor( box );
03154 #endif
03155 while ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) {
03156 Box rightBox( this );
03157 parseFactor( &rightBox );
03158 box->cat( rightBox );
03159 }
03160 }
03161
03162 void QRegExpEngine::parseExpression( Box *box )
03163 {
03164 parseTerm( box );
03165 while ( yyTok == Tok_Bar ) {
03166 Box rightBox( this );
03167 yyTok = getToken();
03168 parseTerm( &rightBox );
03169 box->orx( rightBox );
03170 }
03171 }
03172
03173
03174
03175
03176
03177
03178
03179 struct QRegExpPrivate
03180 {
03181 QString pattern;
03182 QString rxpattern;
03183 #ifndef QT_NO_REGEXP_WILDCARD
03184 bool wc;
03185 #endif
03186 bool min;
03187 #ifndef QT_NO_REGEXP_CAPTURE
03188 QString t;
03189 QStringList capturedCache;
03190 #endif
03191 QMemArray<int> captured;
03192
03193 QRegExpPrivate() { captured.fill( -1, 2 ); }
03194 };
03195
03196 #ifndef QT_NO_REGEXP_OPTIM
03197 static QCache<QRegExpEngine> *engineCache = 0;
03198 static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
03199 #endif
03200
03201 static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
03202 {
03203 #ifndef QT_NO_REGEXP_OPTIM
03204 if ( engineCache != 0 ) {
03205 #ifdef QT_THREAD_SUPPORT
03206 QMutexLocker locker( qt_global_mutexpool ?
03207 qt_global_mutexpool->get( &engineCache ) : 0 );
03208 #endif
03209 QRegExpEngine *eng = engineCache->take( pattern );
03210 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
03211 delete eng;
03212 } else {
03213 eng->ref();
03214 return eng;
03215 }
03216 }
03217 #endif
03218 return new QRegExpEngine( pattern, caseSensitive );
03219 }
03220
03221 static void derefEngine( QRegExpEngine *eng, const QString& pattern )
03222 {
03223 #ifdef QT_THREAD_SUPPORT
03224 QMutexLocker locker( qt_global_mutexpool ?
03225 qt_global_mutexpool->get( &engineCache ) : 0 );
03226 #endif
03227 if ( eng != 0 && eng->deref() ) {
03228 #ifndef QT_NO_REGEXP_OPTIM
03229 if ( engineCache == 0 ) {
03230 engineCache = new QCache<QRegExpEngine>;
03231 engineCache->setAutoDelete( TRUE );
03232 cleanup_cache.set( &engineCache );
03233 }
03234 if ( !pattern.isNull() &&
03235 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
03236 return;
03237 #else
03238 Q_UNUSED( pattern );
03239 #endif
03240 delete eng;
03241 }
03242 }
03243
03265 QRegExp::QRegExp()
03266 {
03267 eng = new QRegExpEngine( TRUE );
03268 priv = new QRegExpPrivate;
03269 #ifndef QT_NO_REGEXP_WILDCARD
03270 priv->wc = FALSE;
03271 #endif
03272 priv->min = FALSE;
03273 compile( TRUE );
03274 }
03275
03285 QRegExp::QRegExp( const QString& pattern, bool caseSensitive, bool wildcard )
03286 {
03287 eng = 0;
03288 priv = new QRegExpPrivate;
03289 priv->pattern = pattern;
03290 #ifndef QT_NO_REGEXP_WILDCARD
03291 priv->wc = wildcard;
03292 #endif
03293 priv->min = FALSE;
03294 compile( caseSensitive );
03295 }
03296
03302 QRegExp::QRegExp( const QRegExp& rx )
03303 {
03304 eng = 0;
03305 priv = new QRegExpPrivate;
03306 operator=( rx );
03307 }
03308
03312 QRegExp::~QRegExp()
03313 {
03314 derefEngine( eng, priv->rxpattern );
03315 delete priv;
03316 }
03317
03323 QRegExp& QRegExp::operator=( const QRegExp& rx )
03324 {
03325 rx.eng->ref();
03326 derefEngine( eng, priv->rxpattern );
03327 eng = rx.eng;
03328 priv->pattern = rx.priv->pattern;
03329 priv->rxpattern = rx.priv->rxpattern;
03330 #ifndef QT_NO_REGEXP_WILDCARD
03331 priv->wc = rx.priv->wc;
03332 #endif
03333 priv->min = rx.priv->min;
03334 #ifndef QT_NO_REGEXP_CAPTURE
03335 priv->t = rx.priv->t;
03336 priv->capturedCache = rx.priv->capturedCache;
03337 #endif
03338 priv->captured = rx.priv->captured;
03339 return *this;
03340 }
03341
03350 bool QRegExp::operator==( const QRegExp& rx ) const
03351 {
03352 return priv->pattern == rx.priv->pattern &&
03353 eng->caseSensitive() == rx.eng->caseSensitive() &&
03354 #ifndef QT_NO_REGEXP_WILDCARD
03355 priv->wc == rx.priv->wc &&
03356 #endif
03357 priv->min == rx.priv->min;
03358 }
03359
03384 bool QRegExp::isEmpty() const
03385 {
03386 return priv->pattern.isEmpty();
03387 }
03388
03402 bool QRegExp::isValid() const
03403 {
03404 return eng->isValid();
03405 }
03406
03414 QString QRegExp::pattern() const
03415 {
03416 return priv->pattern;
03417 }
03418
03425 void QRegExp::setPattern( const QString& pattern )
03426 {
03427 if ( priv->pattern != pattern ) {
03428 priv->pattern = pattern;
03429 compile( caseSensitive() );
03430 }
03431 }
03432
03439 bool QRegExp::caseSensitive() const
03440 {
03441 return eng->caseSensitive();
03442 }
03443
03452 void QRegExp::setCaseSensitive( bool sensitive )
03453 {
03454 if ( sensitive != eng->caseSensitive() )
03455 compile( sensitive );
03456 }
03457
03458 #ifndef QT_NO_REGEXP_WILDCARD
03459
03465 bool QRegExp::wildcard() const
03466 {
03467 return priv->wc;
03468 }
03469
03483 void QRegExp::setWildcard( bool wildcard )
03484 {
03485 if ( wildcard != priv->wc ) {
03486 priv->wc = wildcard;
03487 compile( caseSensitive() );
03488 }
03489 }
03490 #endif
03491
03498 bool QRegExp::minimal() const
03499 {
03500 return priv->min;
03501 }
03502
03520 void QRegExp::setMinimal( bool minimal )
03521 {
03522 priv->min = minimal;
03523 }
03524
03545 bool QRegExp::exactMatch( const QString& str ) const
03546 {
03547 #ifndef QT_NO_REGEXP_CAPTURE
03548 priv->t = str;
03549 priv->capturedCache.clear();
03550 #endif
03551
03552 priv->captured = eng->match( str, 0, priv->min, TRUE, 0 );
03553 if ( priv->captured[1] == (int) str.length() ) {
03554 return TRUE;
03555 } else {
03556 priv->captured.detach();
03557 priv->captured[0] = 0;
03558 priv->captured[1] = eng->matchedLength();
03559 return FALSE;
03560 }
03561 }
03562
03563 #ifndef QT_NO_COMPAT
03564
03580 int QRegExp::match( const QString& str, int index, int *len,
03581 bool indexIsStart ) const
03582 {
03583 int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero );
03584 if ( len != 0 )
03585 *len = matchedLength();
03586 return pos;
03587 }
03588 #endif // QT_NO_COMPAT
03589
03590 int QRegExp::search( const QString& str, int offset ) const
03591 {
03592 return search( str, offset, CaretAtZero );
03593 }
03594
03629 int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
03630 {
03631 if ( offset < 0 )
03632 offset += str.length();
03633 #ifndef QT_NO_REGEXP_CAPTURE
03634 priv->t = str;
03635 priv->capturedCache.clear();
03636 #endif
03637 priv->captured = eng->match( str, offset, priv->min, FALSE,
03638 caretIndex(offset, caretMode) );
03639 return priv->captured[0];
03640 }
03641
03642
03643 int QRegExp::searchRev( const QString& str, int offset ) const
03644 {
03645 return searchRev( str, offset, CaretAtZero );
03646 }
03647
03668 int QRegExp::searchRev( const QString& str, int offset,
03669 CaretMode caretMode ) const
03670 {
03671 if ( offset < 0 )
03672 offset += str.length();
03673 #ifndef QT_NO_REGEXP_CAPTURE
03674 priv->t = str;
03675 priv->capturedCache.clear();
03676 #endif
03677 if ( offset < 0 || offset > (int) str.length() ) {
03678 priv->captured.detach();
03679 priv->captured.fill( -1 );
03680 return -1;
03681 }
03682
03683 while ( offset >= 0 ) {
03684 priv->captured = eng->match( str, offset, priv->min, TRUE,
03685 caretIndex(offset, caretMode) );
03686 if ( priv->captured[0] == offset )
03687 return offset;
03688 offset--;
03689 }
03690 return -1;
03691 }
03692
03699 int QRegExp::matchedLength() const
03700 {
03701 return priv->captured[1];
03702 }
03703
03704 #ifndef QT_NO_REGEXP_CAPTURE
03705
03708 int QRegExp::numCaptures() const
03709 {
03710 return eng->numCaptures();
03711 }
03712
03713
03714
03770 QStringList QRegExp::capturedTexts()
03771 {
03772 if ( priv->capturedCache.isEmpty() ) {
03773 for ( int i = 0; i < (int) priv->captured.size(); i += 2 ) {
03774 QString m;
03775 if ( priv->captured[i + 1] == 0 )
03776 m = QString::fromLatin1( "" );
03777 else if ( priv->captured[i] >= 0 )
03778 m = priv->t.mid( priv->captured[i],
03779 priv->captured[i + 1] );
03780 priv->capturedCache.append( m );
03781 }
03782 priv->t = QString::null;
03783 }
03784 return priv->capturedCache;
03785 }
03786
03829 QString QRegExp::cap( int nth )
03830 {
03831 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03832 return QString::null;
03833 else
03834 return capturedTexts()[nth];
03835 }
03836
03857 int QRegExp::pos( int nth )
03858 {
03859 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03860 return -1;
03861 else
03862 return priv->captured[2 * nth];
03863 }
03864
03871 QString QRegExp::errorString()
03872 {
03873 if ( isValid() ) {
03874 return QString( RXERR_OK );
03875 } else {
03876 return eng->errorString();
03877 }
03878 }
03879 #endif
03880
03899 QString QRegExp::escape( const QString& str )
03900 {
03901 static const char meta[] = "$()*+.?[\\]^{|}";
03902 QString quoted = str;
03903 int i = 0;
03904
03905 while ( i < (int) quoted.length() ) {
03906 if ( strchr(meta, quoted[i].latin1()) != 0 )
03907 quoted.insert( i++, "\\" );
03908 i++;
03909 }
03910 return quoted;
03911 }
03912
03913 void QRegExp::compile( bool caseSensitive )
03914 {
03915 derefEngine( eng, priv->rxpattern );
03916 #ifndef QT_NO_REGEXP_WILDCARD
03917 if ( priv->wc )
03918 priv->rxpattern = wc2rx( priv->pattern );
03919 else
03920 #endif
03921 priv->rxpattern = priv->pattern.isNull() ? QString::fromLatin1( "" )
03922 : priv->pattern;
03923 eng = newEngine( priv->rxpattern, caseSensitive );
03924 #ifndef QT_NO_REGEXP_CAPTURE
03925 priv->t = QString();
03926 priv->capturedCache.clear();
03927 #endif
03928 priv->captured.detach();
03929 priv->captured.fill( -1, 2 + 2 * eng->numCaptures() );
03930 }
03931
03932 int QRegExp::caretIndex( int offset, CaretMode caretMode )
03933 {
03934 if ( caretMode == CaretAtZero ) {
03935 return 0;
03936 } else if ( caretMode == CaretAtOffset ) {
03937 return offset;
03938 } else {
03939 return -1;
03940 }
03941 }
03942
03943 #endif // QT_NO_REGEXP