Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

qregexp.cpp

Go to the documentation of this file.
00001 /****************************************************************************
00002 ** $Id: qregexp.cpp,v 1.2 2003/07/10 02:40:12 llornkcor Exp $
00003 **
00004 ** Implementation of QRegExp class
00005 **
00006 ** Created : 950126
00007 **
00008 ** Copyright (C) 1992-2000 Trolltech AS.  All rights reserved.
00009 **
00010 ** This file is part of the tools module of the Qt GUI Toolkit.
00011 **
00012 ** This file may be distributed under the terms of the Q Public License
00013 ** as defined by Trolltech AS of Norway and appearing in the file
00014 ** LICENSE.QPL included in the packaging of this file.
00015 **
00016 ** This file may be distributed and/or modified under the terms of the
00017 ** GNU General Public License version 2 as published by the Free Software
00018 ** Foundation and appearing in the file LICENSE.GPL included in the
00019 ** packaging of this file.
00020 **
00021 ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
00022 ** licenses may use this file in accordance with the Qt Commercial License
00023 ** Agreement provided with the Software.
00024 **
00025 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00026 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00027 **
00028 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
00029 **   information about Qt Commercial License Agreements.
00030 ** See http://www.trolltech.com/qpl/ for QPL licensing information.
00031 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
00032 **
00033 ** Contact info@trolltech.com if any conditions of this licensing are
00034 ** not clear to you.
00035 **
00036 **********************************************************************/
00037 
00038 #include "qregexp.h"
00039 
00040 #ifndef QT_NO_REGEXP
00041 
00042 #include "qmemarray.h"
00043 #include "qbitarray.h"
00044 #include "qcache.h"
00045 #include "qcleanuphandler.h"
00046 #include "qintdict.h"
00047 #include "qmap.h"
00048 #include "qptrvector.h"
00049 #include "qstring.h"
00050 #include "qtl.h"
00051 
00052 #ifdef QT_THREAD_SUPPORT
00053 #include "qmutexpool_p.h"
00054 #endif // QT_THREAD_SUPPORT
00055 
00056 #undef QT_TRANSLATE_NOOP
00057 #define QT_TRANSLATE_NOOP( context, sourceText ) sourceText
00058 
00059 #include <limits.h>
00060 
00061 // error strings for the regexp parser
00062 #define RXERR_OK         QT_TRANSLATE_NOOP( "QRegExp", "no error occurred" )
00063 #define RXERR_DISABLED   QT_TRANSLATE_NOOP( "QRegExp", "disabled feature used" )
00064 #define RXERR_CHARCLASS  QT_TRANSLATE_NOOP( "QRegExp", "bad char class syntax" )
00065 #define RXERR_LOOKAHEAD  QT_TRANSLATE_NOOP( "QRegExp", "bad lookahead syntax" )
00066 #define RXERR_REPETITION QT_TRANSLATE_NOOP( "QRegExp", "bad repetition syntax" )
00067 #define RXERR_OCTAL      QT_TRANSLATE_NOOP( "QRegExp", "invalid octal value" )
00068 #define RXERR_LEFTDELIM  QT_TRANSLATE_NOOP( "QRegExp", "missing left delim" )
00069 #define RXERR_END        QT_TRANSLATE_NOOP( "QRegExp", "unexpected end" )
00070 #define RXERR_LIMIT      QT_TRANSLATE_NOOP( "QRegExp", "met internal limit" )
00071 
00072 /*
00073   WARNING! Be sure to read qregexp.tex before modifying this file.
00074 */
00075 
00716 const int NumBadChars = 64;
00717 #define BadChar( ch ) ( (ch).unicode() % NumBadChars )
00718 
00719 const int NoOccurrence = INT_MAX;
00720 const int EmptyCapture = INT_MAX;
00721 const int InftyLen = INT_MAX;
00722 const int InftyRep = 1025;
00723 const int EOS = -1;
00724 
00725 static bool isWord( QChar ch )
00726 {
00727     return ch.isLetterOrNumber() || ch == QChar( '_' );
00728 }
00729 
00730 /*
00731   Merges two QMemArrays of ints and puts the result into the first one.
00732 */
00733 static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
00734 {
00735     int asize = a->size();
00736     int bsize = b.size();
00737     if ( asize == 0 ) {
00738         *a = b.copy();
00739 #ifndef QT_NO_REGEXP_OPTIM
00740     } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
00741         a->resize( asize + 1 );
00742         (*a)[asize] = b[0];
00743 #endif
00744     } else if ( bsize >= 1 ) {
00745         int csize = asize + bsize;
00746         QMemArray<int> c( csize );
00747         int i = 0, j = 0, k = 0;
00748         while ( i < asize ) {
00749             if ( j < bsize ) {
00750                 if ( (*a)[i] == b[j] ) {
00751                     i++;
00752                     csize--;
00753                 } else if ( (*a)[i] < b[j] ) {
00754                     c[k++] = (*a)[i++];
00755                 } else {
00756                     c[k++] = b[j++];
00757                 }
00758             } else {
00759                 memcpy( c.data() + k, (*a).data() + i,
00760                         (asize - i) * sizeof(int) );
00761                 break;
00762             }
00763         }
00764         c.resize( csize );
00765         if ( j < bsize )
00766             memcpy( c.data() + k, b.data() + j, (bsize - j) * sizeof(int) );
00767         *a = c;
00768     }
00769 }
00770 
00771 /*
00772   Merges two disjoint QMaps of (int, int) pairs and puts the result into the
00773   first one.
00774 */
00775 static void mergeInto( QMap<int, int> *a, const QMap<int, int>& b )
00776 {
00777     QMap<int, int>::ConstIterator it;
00778     for ( it = b.begin(); it != b.end(); ++it )
00779         a->insert( it.key(), *it );
00780 }
00781 
00782 /*
00783   Returns the value associated to key k in QMap m of (int, int) pairs, or 0 if
00784   no such value is explicitly present.
00785 */
00786 static int at( const QMap<int, int>& m, int k )
00787 {
00788     QMap<int, int>::ConstIterator it = m.find( k );
00789     if ( it == m.end() )
00790         return 0;
00791     else
00792         return *it;
00793 }
00794 
00795 #ifndef QT_NO_REGEXP_WILDCARD
00796 /*
00797   Translates a wildcard pattern to an equivalent regular expression pattern
00798   (e.g., *.cpp to .*\.cpp).
00799 */
00800 static QString wc2rx( const QString& wc_str )
00801 {
00802     int wclen = wc_str.length();
00803     QString rx = QString::fromLatin1( "" );
00804     int i = 0;
00805     const QChar *wc = wc_str.unicode();
00806     while ( i < wclen ) {
00807         QChar c = wc[i++];
00808         switch ( c.unicode() ) {
00809         case '*':
00810             rx += QString::fromLatin1( ".*" );
00811             break;
00812         case '?':
00813             rx += QChar( '.' );
00814             break;
00815         case '$':
00816         case '(':
00817         case ')':
00818         case '+':
00819         case '.':
00820         case '\\':
00821         case '^':
00822         case '{':
00823         case '|':
00824         case '}':
00825             rx += QChar( '\\' );
00826             rx += c;
00827             break;
00828         case '[':
00829             rx += c;
00830             if ( wc[i] == QChar('^') )
00831                 rx += wc[i++];
00832             if ( i < wclen ) {
00833                 if ( rx[i] == ']' )
00834                     rx += wc[i++];
00835                 while ( i < wclen && wc[i] != QChar(']') ) {
00836                     if ( wc[i] == '\\' )
00837                         rx += QChar( '\\' );
00838                     rx += wc[i++];
00839                 }
00840             }
00841             break;
00842         default:
00843             rx += c;
00844         }
00845     }
00846     return rx;
00847 }
00848 #endif
00849 
00850 /*
00851   The class QRegExpEngine encapsulates a modified nondeterministic
00852   finite automaton (NFA).
00853 */
00854 class QRegExpEngine : public QShared
00855 {
00856 public:
00857 #ifndef QT_NO_REGEXP_CCLASS
00858     /*
00859       The class CharClass represents a set of characters, such as can
00860       be found in regular expressions (e.g., [a-z] denotes the set
00861       {a, b, ..., z}).
00862     */
00863     class CharClass
00864     {
00865     public:
00866         CharClass();
00867         CharClass( const CharClass& cc ) { operator=( cc ); }
00868 
00869         CharClass& operator=( const CharClass& cc );
00870 
00871         void clear();
00872         bool negative() const { return n; }
00873         void setNegative( bool negative );
00874         void addCategories( int cats );
00875         void addRange( ushort from, ushort to );
00876         void addSingleton( ushort ch ) { addRange( ch, ch ); }
00877 
00878         bool in( QChar ch ) const;
00879 #ifndef QT_NO_REGEXP_OPTIM
00880         const QMemArray<int>& firstOccurrence() const { return occ1; }
00881 #endif
00882 
00883 #if defined(QT_DEBUG)
00884         void dump() const;
00885 #endif
00886 
00887     private:
00888         /*
00889           The struct Range represents a range of characters (e.g.,
00890           [0-9] denotes range 48 to 57).
00891         */
00892         struct Range
00893         {
00894             ushort from; // 48
00895             ushort to; // 57
00896         };
00897 
00898         int c; // character classes
00899         QMemArray<Range> r; // character ranges
00900         bool n; // negative?
00901 #ifndef QT_NO_REGEXP_OPTIM
00902         QMemArray<int> occ1; // first-occurrence array
00903 #endif
00904     };
00905 #else
00906     struct CharClass
00907     {
00908         int dummy;
00909 
00910 #ifndef QT_NO_REGEXP_OPTIM
00911         CharClass() { occ1.fill( 0, NumBadChars ); }
00912 
00913         const QMemArray<int>& firstOccurrence() const { return occ1; }
00914         QMemArray<int> occ1;
00915 #endif
00916     };
00917 #endif
00918 
00919     QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); }
00920     QRegExpEngine( const QString& rx, bool caseSensitive );
00921 #ifndef QT_NO_REGEXP_OPTIM
00922     ~QRegExpEngine();
00923 #endif
00924 
00925     bool isValid() const { return valid; }
00926     const QString& errorString() const { return yyError; }
00927     bool caseSensitive() const { return cs; }
00928     int numCaptures() const { return officialncap; }
00929     QMemArray<int> match( const QString& str, int pos, bool minimal,
00930                           bool oneTest, int caretIndex );
00931     int matchedLength() const { return mmMatchedLen; }
00932 
00933     int createState( QChar ch );
00934     int createState( const CharClass& cc );
00935 #ifndef QT_NO_REGEXP_BACKREF
00936     int createState( int bref );
00937 #endif
00938 
00939     void addCatTransitions( const QMemArray<int>& from,
00940                             const QMemArray<int>& to );
00941 #ifndef QT_NO_REGEXP_CAPTURE
00942     void addPlusTransitions( const QMemArray<int>& from,
00943                              const QMemArray<int>& to, int atom );
00944 #endif
00945 
00946 #ifndef QT_NO_REGEXP_ANCHOR_ALT
00947     int anchorAlternation( int a, int b );
00948     int anchorConcatenation( int a, int b );
00949 #else
00950     int anchorAlternation( int a, int b ) { return a & b; }
00951     int anchorConcatenation( int a, int b ) { return a | b; }
00952 #endif
00953     void addAnchors( int from, int to, int a );
00954 
00955 #ifndef QT_NO_REGEXP_OPTIM
00956     void setupGoodStringHeuristic( int earlyStart, int lateStart,
00957                                    const QString& str );
00958     void setupBadCharHeuristic( int minLen, const QMemArray<int>& firstOcc );
00959     void heuristicallyChooseHeuristic();
00960 #endif
00961 
00962 #if defined(QT_DEBUG)
00963     void dump() const;
00964 #endif
00965 
00966 private:
00967     enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
00968 
00969     /*
00970       The struct State represents one state in a modified NFA. The
00971       input characters matched are stored in the state instead of on
00972       the transitions, something possible for an automaton
00973       constructed from a regular expression.
00974     */
00975     struct State
00976     {
00977 #ifndef QT_NO_REGEXP_CAPTURE
00978         int atom; // which atom does this state belong to?
00979 #endif
00980         int match; // what does it match? (see CharClassBit and BackRefBit)
00981         QMemArray<int> outs; // out-transitions
00982         QMap<int, int> *reenter; // atoms reentered when transiting out
00983         QMap<int, int> *anchors; // anchors met when transiting out
00984 
00985 #ifndef QT_NO_REGEXP_CAPTURE
00986         State( int a, int m )
00987             : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { }
00988 #else
00989         State( int m )
00990             : match( m ), reenter( 0 ), anchors( 0 ) { }
00991 #endif
00992         ~State() { delete reenter; delete anchors; }
00993     };
00994 
00995 #ifndef QT_NO_REGEXP_LOOKAHEAD
00996     /*
00997       The struct Lookahead represents a lookahead a la Perl (e.g., (?=foo) and
00998       (?!bar)).
00999     */
01000     struct Lookahead
01001     {
01002         QRegExpEngine *eng; // NFA representing the embedded regular expression
01003         bool neg; // negative lookahead?
01004 
01005         Lookahead( QRegExpEngine *eng0, bool neg0 )
01006             : eng( eng0 ), neg( neg0 ) { }
01007         ~Lookahead() { delete eng; }
01008     };
01009 #endif
01010 
01011 #ifndef QT_NO_REGEXP_CAPTURE
01012     /*
01013       The struct Atom represents one node in the hierarchy of regular
01014       expression atoms.
01015     */
01016     struct Atom
01017     {
01018         int parent; // index of parent in array of atoms
01019         int capture; // index of capture, from 1 to ncap
01020     };
01021 #endif
01022 
01023 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01024     /*
01025       The struct AnchorAlternation represents a pair of anchors with
01026       OR semantics.
01027     */
01028     struct AnchorAlternation
01029     {
01030         int a; // this anchor...
01031         int b; // ...or this one
01032     };
01033 #endif
01034 
01035     enum { InitialState = 0, FinalState = 1 };
01036     void setup( bool caseSensitive );
01037     int setupState( int match );
01038 
01039     /*
01040       Let's hope that 13 lookaheads and 14 back-references are
01041       enough.
01042      */
01043     enum { MaxLookaheads = 13, MaxBackRefs = 14 };
01044     enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002,
01045            Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,
01046            Anchor_FirstLookahead = 0x00000010,
01047            Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
01048            Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
01049            Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,
01050 
01051            Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^
01052                    ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) };
01053 #ifndef QT_NO_REGEXP_CAPTURE
01054     int startAtom( bool capture );
01055     void finishAtom( int atom ) { cf = f[atom].parent; }
01056 #endif
01057 
01058 #ifndef QT_NO_REGEXP_LOOKAHEAD
01059     int addLookahead( QRegExpEngine *eng, bool negative );
01060 #endif
01061 
01062 #ifndef QT_NO_REGEXP_CAPTURE
01063     bool isBetterCapture( const int *begin1, const int *end1, const int *begin2,
01064                           const int *end2 );
01065 #endif
01066     bool testAnchor( int i, int a, const int *capBegin );
01067 
01068 #ifndef QT_NO_REGEXP_OPTIM
01069     bool goodStringMatch();
01070     bool badCharMatch();
01071 #else
01072     bool bruteMatch();
01073 #endif
01074     bool matchHere();
01075 
01076     QPtrVector<State> s; // array of states
01077     int ns; // number of states
01078 #ifndef QT_NO_REGEXP_CAPTURE
01079     QMemArray<Atom> f; // atom hierarchy
01080     int nf; // number of atoms
01081     int cf; // current atom
01082 #endif
01083     int officialncap; // number of captures, seen from the outside
01084     int ncap; // number of captures, seen from the inside
01085 #ifndef QT_NO_REGEXP_CCLASS
01086     QPtrVector<CharClass> cl; // array of character classes
01087 #endif
01088 #ifndef QT_NO_REGEXP_LOOKAHEAD
01089     QPtrVector<Lookahead> ahead; // array of lookaheads
01090 #endif
01091 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01092     QMemArray<AnchorAlternation> aa; // array of (a, b) pairs of anchors
01093 #endif
01094 #ifndef QT_NO_REGEXP_OPTIM
01095     bool caretAnchored; // does the regexp start with ^?
01096 #endif
01097     bool valid; // is the regular expression valid?
01098     bool cs; // case sensitive?
01099 #ifndef QT_NO_REGEXP_BACKREF
01100     int nbrefs; // number of back-references
01101 #endif
01102 
01103 #ifndef QT_NO_REGEXP_OPTIM
01104     bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch
01105 
01106     int goodEarlyStart; // the index where goodStr can first occur in a match
01107     int goodLateStart; // the index where goodStr can last occur in a match
01108     QString goodStr; // the string that any match has to contain
01109 
01110     int minl; // the minimum length of a match
01111     QMemArray<int> occ1; // first-occurrence array
01112 #endif
01113 
01114     /*
01115       The class Box is an abstraction for a regular expression
01116       fragment. It can also be seen as one node in the syntax tree of
01117       a regular expression with synthetized attributes.
01118 
01119       It's interface is ugly for performance reasons.
01120     */
01121     class Box
01122     {
01123     public:
01124         Box( QRegExpEngine *engine );
01125         Box( const Box& b ) { operator=( b ); }
01126 
01127         Box& operator=( const Box& b );
01128 
01129         void clear() { operator=(Box(eng)); }
01130         void set( QChar ch );
01131         void set( const CharClass& cc );
01132 #ifndef QT_NO_REGEXP_BACKREF
01133         void set( int bref );
01134 #endif
01135 
01136         void cat( const Box& b );
01137         void orx( const Box& b );
01138         void plus( int atom );
01139         void opt();
01140         void catAnchor( int a );
01141 #ifndef QT_NO_REGEXP_OPTIM
01142         void setupHeuristics();
01143 #endif
01144 
01145 #if defined(QT_DEBUG)
01146         void dump() const;
01147 #endif
01148 
01149     private:
01150         void addAnchorsToEngine( const Box& to ) const;
01151 
01152         QRegExpEngine *eng; // the automaton under construction
01153         QMemArray<int> ls; // the left states (firstpos)
01154         QMemArray<int> rs; // the right states (lastpos)
01155         QMap<int, int> lanchors; // the left anchors
01156         QMap<int, int> ranchors; // the right anchors
01157         int skipanchors; // the anchors to match if the box is skipped
01158 
01159 #ifndef QT_NO_REGEXP_OPTIM
01160         int earlyStart; // the index where str can first occur
01161         int lateStart; // the index where str can last occur
01162         QString str; // a string that has to occur in any match
01163         QString leftStr; // a string occurring at the left of this box
01164         QString rightStr; // a string occurring at the right of this box
01165         int maxl; // the maximum length of this box (possibly InftyLen)
01166 #endif
01167 
01168         int minl; // the minimum length of this box
01169 #ifndef QT_NO_REGEXP_OPTIM
01170         QMemArray<int> occ1; // first-occurrence array
01171 #endif
01172     };
01173     friend class Box;
01174 
01175     /*
01176       This is the lexical analyzer for regular expressions.
01177     */
01178     enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,
01179            Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,
01180            Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord,
01181            Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
01182     int getChar();
01183     int getEscape();
01184 #ifndef QT_NO_REGEXP_INTERVAL
01185     int getRep( int def );
01186 #endif
01187 #ifndef QT_NO_REGEXP_LOOKAHEAD
01188     void skipChars( int n );
01189 #endif
01190     void error( const char *msg );
01191     void startTokenizer( const QChar *rx, int len );
01192     int getToken();
01193 
01194     const QChar *yyIn; // a pointer to the input regular expression pattern
01195     int yyPos0; // the position of yyTok in the input pattern
01196     int yyPos; // the position of the next character to read
01197     int yyLen; // the length of yyIn
01198     int yyCh; // the last character read
01199     CharClass *yyCharClass; // attribute for Tok_CharClass tokens
01200     int yyMinRep; // attribute for Tok_Quantifier
01201     int yyMaxRep; // ditto
01202     QString yyError; // syntax error or overflow during parsing?
01203 
01204     /*
01205       This is the syntactic analyzer for regular expressions.
01206     */
01207     int parse( const QChar *rx, int len );
01208     void parseAtom( Box *box );
01209     void parseFactor( Box *box );
01210     void parseTerm( Box *box );
01211     void parseExpression( Box *box );
01212 
01213     int yyTok; // the last token read
01214     bool yyMayCapture; // set this to FALSE to disable capturing
01215 
01216     /*
01217       This is the engine state during matching.
01218     */
01219     const QString *mmStr; // a pointer to the input QString
01220     const QChar *mmIn; // a pointer to the input string data
01221     int mmPos; // the current position in the string
01222     int mmCaretPos;
01223     int mmLen; // the length of the input string
01224     bool mmMinimal; // minimal matching?
01225     QMemArray<int> mmCaptured; // an array of pairs (start, len)
01226     QMemArray<int> mmCapturedNoMatch; // an array of pairs (-1, -1)
01227     QMemArray<int> mmBigArray; // big QMemArray<int> array
01228     int *mmInNextStack; // is state is mmNextStack?
01229     int *mmCurStack; // stack of current states
01230     int *mmNextStack; // stack of next states
01231     int *mmCurCapBegin; // start of current states' captures
01232     int *mmNextCapBegin; // start of next states' captures
01233     int *mmCurCapEnd; // end of current states' captures
01234     int *mmNextCapEnd; // end of next states' captures
01235     int *mmTempCapBegin; // start of temporary captures
01236     int *mmTempCapEnd; // end of temporary captures
01237     int *mmCapBegin; // start of captures for a next state
01238     int *mmCapEnd; // end of captures for a next state
01239     int *mmSlideTab; // bump-along slide table for bad-character heuristic
01240     int mmSlideTabSize; // size of slide table
01241 #ifndef QT_NO_REGEXP_BACKREF
01242     QIntDict<int> mmSleeping; // dictionary of back-reference sleepers
01243 #endif
01244     int mmMatchLen; // length of match
01245     int mmMatchedLen; // length of partial match
01246 };
01247 
01248 QRegExpEngine::QRegExpEngine( const QString& rx, bool caseSensitive )
01249 #ifndef QT_NO_REGEXP_BACKREF
01250     : mmSleeping( 101 )
01251 #endif
01252 {
01253     setup( caseSensitive );
01254     valid = ( parse(rx.unicode(), rx.length()) == (int) rx.length() );
01255     if ( !valid )
01256         error( RXERR_LEFTDELIM );
01257 }
01258 
01259 #ifndef QT_NO_REGEXP_OPTIM
01260 QRegExpEngine::~QRegExpEngine()
01261 {
01262 }
01263 #endif
01264 
01265 /*
01266   Tries to match in str and returns an array of (begin, length) pairs
01267   for captured text. If there is no match, all pairs are (-1, -1).
01268 */
01269 QMemArray<int> QRegExpEngine::match( const QString& str, int pos, bool minimal,
01270                                      bool oneTest, int caretIndex )
01271 {
01272     mmStr = &str;
01273     mmIn = str.unicode();
01274     if ( mmIn == 0 )
01275         mmIn = &QChar::null;
01276     mmPos = pos;
01277     mmCaretPos = caretIndex;
01278     mmLen = str.length();
01279     mmMinimal = minimal;
01280     mmMatchLen = 0;
01281     mmMatchedLen = 0;
01282 
01283     bool matched = FALSE;
01284     if ( valid && mmPos >= 0 && mmPos <= mmLen ) {
01285 #ifndef QT_NO_REGEXP_OPTIM
01286         if ( oneTest ) {
01287             matched = matchHere();
01288         } else {
01289             if ( mmPos <= mmLen - minl ) {
01290                 if ( caretAnchored ) {
01291                     matched = matchHere();
01292                 } else if ( useGoodStringHeuristic ) {
01293                     matched = goodStringMatch();
01294                 } else {
01295                     matched = badCharMatch();
01296                 }
01297             }
01298         }
01299 #else
01300         matched = oneTest ? matchHere() : bruteMatch();
01301 #endif
01302     }
01303 
01304     if ( matched ) {
01305         mmCaptured.detach();
01306         mmCaptured[0] = mmPos;
01307         mmCaptured[1] = mmMatchLen;
01308         for ( int j = 0; j < officialncap; j++ ) {
01309             int len = mmCapEnd[j] - mmCapBegin[j];
01310             mmCaptured[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0;
01311             mmCaptured[2 + 2 * j + 1] = len;
01312         }
01313         return mmCaptured;
01314     } else {
01315         return mmCapturedNoMatch;
01316     }
01317 }
01318 
01319 /*
01320   The three following functions add one state to the automaton and
01321   return the number of the state.
01322 */
01323 
01324 int QRegExpEngine::createState( QChar ch )
01325 {
01326     return setupState( ch.unicode() );
01327 }
01328 
01329 int QRegExpEngine::createState( const CharClass& cc )
01330 {
01331 #ifndef QT_NO_REGEXP_CCLASS
01332     int n = cl.size();
01333     cl.resize( n + 1 );
01334     cl.insert( n, new CharClass(cc) );
01335     return setupState( CharClassBit | n );
01336 #else
01337     Q_UNUSED( cc );
01338     return setupState( CharClassBit );
01339 #endif
01340 }
01341 
01342 #ifndef QT_NO_REGEXP_BACKREF
01343 int QRegExpEngine::createState( int bref )
01344 {
01345     if ( bref > nbrefs ) {
01346         nbrefs = bref;
01347         if ( nbrefs > MaxBackRefs ) {
01348             error( RXERR_LIMIT );
01349             return 0;
01350         }
01351     }
01352     return setupState( BackRefBit | bref );
01353 }
01354 #endif
01355 
01356 /*
01357   The two following functions add a transition between all pairs of
01358   states (i, j) where i is fond in from, and j is found in to.
01359 
01360   Cat-transitions are distinguished from plus-transitions for
01361   capturing.
01362 */
01363 
01364 void QRegExpEngine::addCatTransitions( const QMemArray<int>& from,
01365                                        const QMemArray<int>& to )
01366 {
01367     for ( int i = 0; i < (int) from.size(); i++ ) {
01368         State *st = s[from[i]];
01369         mergeInto( &st->outs, to );
01370     }
01371 }
01372 
01373 #ifndef QT_NO_REGEXP_CAPTURE
01374 void QRegExpEngine::addPlusTransitions( const QMemArray<int>& from,
01375                                         const QMemArray<int>& to, int atom )
01376 {
01377     for ( int i = 0; i < (int) from.size(); i++ ) {
01378         State *st = s[from[i]];
01379         QMemArray<int> oldOuts = st->outs.copy();
01380         mergeInto( &st->outs, to );
01381         if ( f[atom].capture >= 0 ) {
01382             if ( st->reenter == 0 )
01383                 st->reenter = new QMap<int, int>;
01384             for ( int j = 0; j < (int) to.size(); j++ ) {
01385                 if ( !st->reenter->contains(to[j]) &&
01386                      oldOuts.bsearch(to[j]) < 0 )
01387                     st->reenter->insert( to[j], atom );
01388             }
01389         }
01390     }
01391 }
01392 #endif
01393 
01394 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01395 /*
01396   Returns an anchor that means a OR b.
01397 */
01398 int QRegExpEngine::anchorAlternation( int a, int b )
01399 {
01400     if ( ((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0 )
01401         return a & b;
01402 
01403     int n = aa.size();
01404 #ifndef QT_NO_REGEXP_OPTIM
01405     if ( n > 0 && aa[n - 1].a == a && aa[n - 1].b == b )
01406         return Anchor_Alternation | ( n - 1 );
01407 #endif
01408 
01409     aa.resize( n + 1 );
01410     aa[n].a = a;
01411     aa[n].b = b;
01412     return Anchor_Alternation | n;
01413 }
01414 
01415 /*
01416   Returns an anchor that means a AND b.
01417 */
01418 int QRegExpEngine::anchorConcatenation( int a, int b )
01419 {
01420     if ( ((a | b) & Anchor_Alternation) == 0 )
01421         return a | b;
01422     if ( (b & Anchor_Alternation) != 0 )
01423         qSwap( a, b );
01424 
01425     int aprime = anchorConcatenation( aa[a ^ Anchor_Alternation].a, b );
01426     int bprime = anchorConcatenation( aa[a ^ Anchor_Alternation].b, b );
01427     return anchorAlternation( aprime, bprime );
01428 }
01429 #endif
01430 
01431 /*
01432   Adds anchor a on a transition caracterised by its from state and
01433   its to state.
01434 */
01435 void QRegExpEngine::addAnchors( int from, int to, int a )
01436 {
01437     State *st = s[from];
01438     if ( st->anchors == 0 )
01439         st->anchors = new QMap<int, int>;
01440     if ( st->anchors->contains(to) )
01441         a = anchorAlternation( (*st->anchors)[to], a );
01442     st->anchors->insert( to, a );
01443 }
01444 
01445 #ifndef QT_NO_REGEXP_OPTIM
01446 /*
01447   The two following functions provide the engine with the information
01448   needed by its matching heuristics.
01449 */
01450 
01451 void QRegExpEngine::setupGoodStringHeuristic( int earlyStart, int lateStart,
01452                                               const QString& str )
01453 {
01454     goodEarlyStart = earlyStart;
01455     goodLateStart = lateStart;
01456     goodStr = cs ? str : str.lower();
01457 }
01458 
01459 void QRegExpEngine::setupBadCharHeuristic( int minLen,
01460                                            const QMemArray<int>& firstOcc )
01461 {
01462     minl = minLen;
01463     if ( cs ) {
01464         occ1 = firstOcc;
01465     } else {
01466         occ1.fill( 0, NumBadChars );
01467     }
01468 }
01469 
01470 /*
01471   This function chooses between the good-string and the bad-character
01472   heuristics. It computes two scores and chooses the heuristic with
01473   the highest score.
01474 
01475   Here are some common-sense constraints on the scores that should be
01476   respected if the formulas are ever modified: (1) If goodStr is
01477   empty, the good-string heuristic scores 0. (2) If the search is
01478   case insensitive, the good-string heuristic should be used, unless
01479   it scores 0. (Case insensitivity turns all entries of occ1 to 0.)
01480   (3) If (goodLateStart - goodEarlyStart) is big, the good-string
01481   heuristic should score less.
01482 */
01483 void QRegExpEngine::heuristicallyChooseHeuristic()
01484 {
01485     int i;
01486 
01487     if ( minl == 0 )
01488         return;
01489 
01490     /*
01491       Magic formula:  The good string has to constitute a good
01492       proportion of the minimum-length string, and appear at a
01493       more-or-less known index.
01494     */
01495     int goodStringScore = ( 64 * goodStr.length() / minl ) -
01496                           ( goodLateStart - goodEarlyStart );
01497 
01498     /*
01499       Less magic formula:  We pick a couple of characters at random,
01500       and check whether they are good or bad.
01501     */
01502     int badCharScore = 0;
01503     int step = QMAX( 1, NumBadChars / 32 );
01504     for ( i = 1; i < NumBadChars; i += step ) {
01505         if ( occ1[i] == NoOccurrence )
01506             badCharScore += minl;
01507         else
01508             badCharScore += occ1[i];
01509     }
01510     badCharScore /= minl;
01511 
01512     useGoodStringHeuristic = ( goodStringScore > badCharScore );
01513 }
01514 #endif
01515 
01516 #if defined(QT_DEBUG)
01517 void QRegExpEngine::dump() const
01518 {
01519     int i, j;
01520     qDebug( "Case %ssensitive engine", cs ? "" : "in" );
01521     qDebug( "  States" );
01522     for ( i = 0; i < ns; i++ ) {
01523         qDebug( "  %d%s", i,
01524                 i == InitialState ? " (initial)" :
01525                 i == FinalState ? " (final)" : "" );
01526 #ifndef QT_NO_REGEXP_CAPTURE
01527         qDebug( "    in atom %d", s[i]->atom );
01528 #endif
01529         int m = s[i]->match;
01530         if ( (m & CharClassBit) != 0 ) {
01531             qDebug( "    match character class %d", m ^ CharClassBit );
01532 #ifndef QT_NO_REGEXP_CCLASS
01533             cl[m ^ CharClassBit]->dump();
01534 #else
01535             qDebug( "    negative character class" );
01536 #endif
01537         } else if ( (m & BackRefBit) != 0 ) {
01538             qDebug( "    match back-reference %d", m ^ BackRefBit );
01539         } else if ( m >= 0x20 && m <= 0x7e ) {
01540             qDebug( "    match 0x%.4x (%c)", m, m );
01541         } else {
01542             qDebug( "    match 0x%.4x", m );
01543         }
01544         for ( j = 0; j < (int) s[i]->outs.size(); j++ ) {
01545             int next = s[i]->outs[j];
01546             qDebug( "    -> %d", next );
01547             if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) )
01548                 qDebug( "       [reenter %d]", (*s[i]->reenter)[next] );
01549             if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 )
01550                 qDebug( "       [anchors 0x%.8x]", (*s[i]->anchors)[next] );
01551         }
01552     }
01553 #ifndef QT_NO_REGEXP_CAPTURE
01554     if ( nf > 0 ) {
01555         qDebug( "  Atom    Parent  Capture" );
01556         for ( i = 0; i < nf; i++ )
01557             qDebug( "  %6d  %6d  %6d", i, f[i].parent, f[i].capture );
01558     }
01559 #endif
01560 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01561     for ( i = 0; i < (int) aa.size(); i++ )
01562         qDebug( "  Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a,
01563                 aa[i].b );
01564 #endif
01565 }
01566 #endif
01567 
01568 void QRegExpEngine::setup( bool caseSensitive )
01569 {
01570     s.setAutoDelete( TRUE );
01571     s.resize( 32 );
01572     ns = 0;
01573 #ifndef QT_NO_REGEXP_CAPTURE
01574     f.resize( 32 );
01575     nf = 0;
01576     cf = -1;
01577 #endif
01578     officialncap = 0;
01579     ncap = 0;
01580 #ifndef QT_NO_REGEXP_CCLASS
01581     cl.setAutoDelete( TRUE );
01582 #endif
01583 #ifndef QT_NO_REGEXP_LOOKAHEAD
01584     ahead.setAutoDelete( TRUE );
01585 #endif
01586 #ifndef QT_NO_REGEXP_OPTIM
01587     caretAnchored = TRUE;
01588 #endif
01589     valid = FALSE;
01590     cs = caseSensitive;
01591 #ifndef QT_NO_REGEXP_BACKREF
01592     nbrefs = 0;
01593 #endif
01594 #ifndef QT_NO_REGEXP_OPTIM
01595     useGoodStringHeuristic = FALSE;
01596     minl = 0;
01597     occ1.fill( 0, NumBadChars );
01598 #endif
01599     mmCapturedNoMatch.fill( -1, 2 );
01600 }
01601 
01602 int QRegExpEngine::setupState( int match )
01603 {
01604     if ( (ns & (ns + 1)) == 0 && ns + 1 >= (int) s.size() )
01605         s.resize( (ns + 1) << 1 );
01606 #ifndef QT_NO_REGEXP_CAPTURE
01607     s.insert( ns, new State(cf, match) );
01608 #else
01609     s.insert( ns, new State(match) );
01610 #endif
01611     return ns++;
01612 }
01613 
01614 #ifndef QT_NO_REGEXP_CAPTURE
01615 /*
01616   Functions startAtom() and finishAtom() should be called to delimit
01617   atoms. When a state is created, it is assigned to the current atom.
01618   The information is later used for capturing.
01619 */
01620 int QRegExpEngine::startAtom( bool capture )
01621 {
01622     if ( (nf & (nf + 1)) == 0 && nf + 1 >= (int) f.size() )
01623         f.resize( (nf + 1) << 1 );
01624     f[nf].parent = cf;
01625     cf = nf++;
01626     f[cf].capture = capture ? ncap++ : -1;
01627     return cf;
01628 }
01629 #endif
01630 
01631 #ifndef QT_NO_REGEXP_LOOKAHEAD
01632 /*
01633   Creates a lookahead anchor.
01634 */
01635 int QRegExpEngine::addLookahead( QRegExpEngine *eng, bool negative )
01636 {
01637     int n = ahead.size();
01638     if ( n == MaxLookaheads ) {
01639         error( RXERR_LIMIT );
01640         return 0;
01641     }
01642     ahead.resize( n + 1 );
01643     ahead.insert( n, new Lookahead(eng, negative) );
01644     return Anchor_FirstLookahead << n;
01645 }
01646 #endif
01647 
01648 #ifndef QT_NO_REGEXP_CAPTURE
01649 /*
01650   We want the longest leftmost captures.
01651 */
01652 bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
01653                                      const int *begin2, const int *end2 )
01654 {
01655     for ( int i = 0; i < ncap; i++ ) {
01656         int delta = begin2[i] - begin1[i]; // it has to start early...
01657         if ( delta == 0 )
01658             delta = end1[i] - end2[i]; // ...and end late (like a party)
01659 
01660         if ( delta != 0 )
01661             return delta > 0;
01662     }
01663     return FALSE;
01664 }
01665 #endif
01666 
01667 /*
01668   Returns TRUE if anchor a matches at position mmPos + i in the input
01669   string, otherwise FALSE.
01670 */
01671 bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
01672 {
01673     int j;
01674 
01675 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01676     if ( (a & Anchor_Alternation) != 0 ) {
01677         return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) ||
01678                testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin );
01679     }
01680 #endif
01681 
01682     if ( (a & Anchor_Caret) != 0 ) {
01683         if ( mmPos + i != mmCaretPos )
01684             return FALSE;
01685     }
01686     if ( (a & Anchor_Dollar) != 0 ) {
01687         if ( mmPos + i != mmLen )
01688             return FALSE;
01689     }
01690 #ifndef QT_NO_REGEXP_ESCAPE
01691     if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
01692         bool before = FALSE;
01693         bool after = FALSE;
01694         if ( mmPos + i != 0 )
01695             before = isWord( mmIn[mmPos + i - 1] );
01696         if ( mmPos + i != mmLen )
01697             after = isWord( mmIn[mmPos + i] );
01698         if ( (a & Anchor_Word) != 0 && (before == after) )
01699             return FALSE;
01700         if ( (a & Anchor_NonWord) != 0 && (before != after) )
01701             return FALSE;
01702     }
01703 #endif
01704 #ifndef QT_NO_REGEXP_LOOKAHEAD
01705     bool catchx = TRUE;
01706 
01707     if ( (a & Anchor_LookaheadMask) != 0 ) {
01708         QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
01709                                            mmLen - mmPos - i );
01710         for ( j = 0; j < (int) ahead.size(); j++ ) {
01711             if ( (a & (Anchor_FirstLookahead << j)) != 0 ) {
01712                 catchx = ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE,
01713                                                mmCaretPos - mmPos - i )[0] == 0;
01714                 if ( catchx == ahead[j]->neg )
01715                     return FALSE;
01716             }
01717         }
01718     }
01719 #endif
01720 #ifndef QT_NO_REGEXP_CAPTURE
01721 #ifndef QT_NO_REGEXP_BACKREF
01722     for ( j = 0; j < nbrefs; j++ ) {
01723         if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) {
01724             if ( capBegin[j] != EmptyCapture )
01725                 return FALSE;
01726         }
01727     }
01728 #endif
01729 #endif
01730     return TRUE;
01731 }
01732 
01733 #ifndef QT_NO_REGEXP_OPTIM
01734 /*
01735   The three following functions are what Jeffrey Friedl would call
01736   transmissions (or bump-alongs). Using one or the other should make
01737   no difference except in performance.
01738 */
01739 
01740 bool QRegExpEngine::goodStringMatch()
01741 {
01742     int k = mmPos + goodEarlyStart;
01743 
01744     while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) {
01745         int from = k - goodLateStart;
01746         int to = k - goodEarlyStart;
01747         if ( from > mmPos )
01748             mmPos = from;
01749 
01750         while ( mmPos <= to ) {
01751             if ( matchHere() )
01752                 return TRUE;
01753             mmPos++;
01754         }
01755         k++;
01756     }
01757     return FALSE;
01758 }
01759 
01760 bool QRegExpEngine::badCharMatch()
01761 {
01762     int slideHead = 0;
01763     int slideNext = 0;
01764     int i;
01765     int lastPos = mmLen - minl;
01766     memset( mmSlideTab, 0, mmSlideTabSize * sizeof(int) );
01767 
01768     /*
01769       Set up the slide table, used for the bad-character heuristic,
01770       using the table of first occurrence of each character.
01771     */
01772     for ( i = 0; i < minl; i++ ) {
01773         int sk = occ1[BadChar(mmIn[mmPos + i])];
01774         if ( sk == NoOccurrence )
01775             sk = i + 1;
01776         if ( sk > 0 ) {
01777             int k = i + 1 - sk;
01778             if ( k < 0 ) {
01779                 sk = i + 1;
01780                 k = 0;
01781             }
01782             if ( sk > mmSlideTab[k] )
01783                 mmSlideTab[k] = sk;
01784         }
01785     }
01786 
01787     if ( mmPos > lastPos )
01788         return FALSE;
01789 
01790     for ( ;; ) {
01791         if ( ++slideNext >= mmSlideTabSize )
01792             slideNext = 0;
01793         if ( mmSlideTab[slideHead] > 0 ) {
01794             if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] )
01795                 mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1;
01796             mmSlideTab[slideHead] = 0;
01797         } else {
01798             if ( matchHere() )
01799                 return TRUE;
01800         }
01801 
01802         if ( mmPos == lastPos )
01803             break;
01804 
01805         /*
01806           Update the slide table. This code has much in common with
01807           the initialization code.
01808         */
01809         int sk = occ1[BadChar(mmIn[mmPos + minl])];
01810         if ( sk == NoOccurrence ) {
01811             mmSlideTab[slideNext] = minl;
01812         } else if ( sk > 0 ) {
01813             int k = slideNext + minl - sk;
01814             if ( k >= mmSlideTabSize )
01815                 k -= mmSlideTabSize;
01816             if ( sk > mmSlideTab[k] )
01817                 mmSlideTab[k] = sk;
01818         }
01819         slideHead = slideNext;
01820         mmPos++;
01821     }
01822     return FALSE;
01823 }
01824 #else
01825 bool QRegExpEngine::bruteMatch()
01826 {
01827     while ( mmPos <= mmLen ) {
01828         if ( matchHere() )
01829             return TRUE;
01830         mmPos++;
01831     }
01832     return FALSE;
01833 }
01834 #endif
01835 
01836 /*
01837   Here's the core of the engine. It tries to do a match here and now.
01838 */
01839 bool QRegExpEngine::matchHere()
01840 {
01841     int ncur = 1, nnext = 0;
01842     int i = 0, j, k, m;
01843     bool stop = FALSE;
01844 
01845     mmMatchLen = -1;
01846     mmMatchedLen = -1;
01847     mmCurStack[0] = InitialState;
01848 
01849 #ifndef QT_NO_REGEXP_CAPTURE
01850     if ( ncap > 0 ) {
01851         for ( j = 0; j < ncap; j++ ) {
01852             mmCurCapBegin[j] = EmptyCapture;
01853             mmCurCapEnd[j] = EmptyCapture;
01854         }
01855     }
01856 #endif
01857 
01858 #ifndef QT_NO_REGEXP_BACKREF
01859     int *zzZ = 0;
01860 
01861     while ( (ncur > 0 || !mmSleeping.isEmpty()) && i <= mmLen - mmPos &&
01862             !stop )
01863 #else
01864     while ( ncur > 0 && i <= mmLen - mmPos && !stop )
01865 #endif
01866     {
01867         int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0;
01868         for ( j = 0; j < ncur; j++ ) {
01869             int cur = mmCurStack[j];
01870             State *scur = s[cur];
01871             QMemArray<int>& outs = scur->outs;
01872             for ( k = 0; k < (int) outs.size(); k++ ) {
01873                 int next = outs[k];
01874                 State *snext = s[next];
01875                 bool in = TRUE;
01876 #ifndef QT_NO_REGEXP_BACKREF
01877                 int needSomeSleep = 0;
01878 #endif
01879 
01880                 /*
01881                   First, check if the anchors are anchored properly.
01882                 */
01883                 if ( scur->anchors != 0 ) {
01884                     int a = at( *scur->anchors, next );
01885                     if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) )
01886                         in = FALSE;
01887                 }
01888                 /*
01889                   If indeed they are, check if the input character is
01890                   correct for this transition.
01891                 */
01892                 if ( in ) {
01893                     m = snext->match;
01894                     if ( (m & (CharClassBit | BackRefBit)) == 0 ) {
01895                         if ( cs )
01896                             in = ( m == ch );
01897                         else
01898                             in = ( QChar(m).lower() == QChar(ch).lower() );
01899                     } else if ( next == FinalState ) {
01900                         mmMatchLen = i;
01901                         stop = mmMinimal;
01902                         in = TRUE;
01903                     } else if ( (m & CharClassBit) != 0 ) {
01904 #ifndef QT_NO_REGEXP_CCLASS
01905                         const CharClass *cc = cl[m ^ CharClassBit];
01906                         if ( cs )
01907                             in = cc->in( ch );
01908                         else if ( cc->negative() )
01909                             in = cc->in( QChar(ch).lower() ) &&
01910                                  cc->in( QChar(ch).upper() );
01911                         else
01912                             in = cc->in( QChar(ch).lower() ) ||
01913                                  cc->in( QChar(ch).upper() );
01914 #endif
01915 #ifndef QT_NO_REGEXP_BACKREF
01916                     } else { /* ( (m & BackRefBit) != 0 ) */
01917                         int bref = m ^ BackRefBit;
01918                         int ell = j * ncap + ( bref - 1 );
01919 
01920                         in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture;
01921                         if ( in ) {
01922                             if ( cs )
01923                                 in = ( mmIn[mmPos + mmCurCapBegin[ell]]
01924                                        == QChar(ch) );
01925                             else
01926                                 in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower()
01927                                        == QChar(ch).lower() );
01928                         }
01929 
01930                         if ( in ) {
01931                             int delta;
01932                             if ( mmCurCapEnd[ell] == EmptyCapture )
01933                                 delta = i - mmCurCapBegin[ell];
01934                             else
01935                                 delta = mmCurCapEnd[ell] - mmCurCapBegin[ell];
01936 
01937                             in = ( delta <= mmLen - (mmPos + i) );
01938                             if ( in && delta > 1 ) {
01939                                 int n = 1;
01940                                 if ( cs ) {
01941                                     while ( n < delta ) {
01942                                         if ( mmIn[mmPos +
01943                                                   mmCurCapBegin[ell] + n] !=
01944                                              mmIn[mmPos + i + n] )
01945                                             break;
01946                                         n++;
01947                                     }
01948                                 } else {
01949                                     while ( n < delta ) {
01950                                         QChar a = mmIn[mmPos +
01951                                                        mmCurCapBegin[ell] + n];
01952                                         QChar b = mmIn[mmPos + i + n];
01953                                         if ( a.lower() != b.lower() )
01954                                             break;
01955                                         n++;
01956                                     }
01957                                 }
01958                                 in = ( n == delta );
01959                                 if ( in )
01960                                     needSomeSleep = delta - 1;
01961                             }
01962                         }
01963 #endif
01964                     }
01965                 }
01966 
01967                 /*
01968                   We must now update our data structures.
01969                 */
01970                 if ( in ) {
01971 #ifndef QT_NO_REGEXP_CAPTURE
01972                     int *capBegin, *capEnd;
01973 #endif
01974                     /*
01975                       If the next state was not encountered yet, all
01976                       is fine.
01977                     */
01978                     if ( (m = mmInNextStack[next]) == -1 ) {
01979                         m = nnext++;
01980                         mmNextStack[m] = next;
01981                         mmInNextStack[next] = m;
01982 #ifndef QT_NO_REGEXP_CAPTURE
01983                         capBegin = mmNextCapBegin + m * ncap;
01984                         capEnd = mmNextCapEnd + m * ncap;
01985 
01986                     /*
01987                       Otherwise, we'll first maintain captures in
01988                       temporary arrays, and decide at the end whether
01989                       it's best to keep the previous capture zones or
01990                       the new ones.
01991                     */
01992                     } else {
01993                         capBegin = mmTempCapBegin;
01994                         capEnd = mmTempCapEnd;
01995 #endif
01996                     }
01997 
01998 #ifndef QT_NO_REGEXP_CAPTURE
01999                     /*
02000                       Updating the capture zones is much of a task.
02001                     */
02002                     if ( ncap > 0 ) {
02003                         memcpy( capBegin, mmCurCapBegin + j * ncap,
02004                                 ncap * sizeof(int) );
02005                         memcpy( capEnd, mmCurCapEnd + j * ncap,
02006                                 ncap * sizeof(int) );
02007                         int c = scur->atom, n = snext->atom;
02008                         int p = -1, q = -1;
02009                         int cap;
02010 
02011                         /*
02012                           Lemma 1. For any x in the range [0..nf), we
02013                           have f[x].parent < x.
02014 
02015                           Proof. By looking at startAtom(), it is
02016                           clear that cf < nf holds all the time, and
02017                           thus that f[nf].parent < nf.
02018                         */
02019 
02020                         /*
02021                           If we are reentering an atom, we empty all
02022                           capture zones inside it.
02023                         */
02024                         if ( scur->reenter != 0 &&
02025                              (q = at(*scur->reenter, next)) != 0 ) {
02026                             QBitArray b;
02027                             b.fill( FALSE, nf );
02028                             b.setBit( q, TRUE );
02029                             for ( int ell = q + 1; ell < nf; ell++ ) {
02030                                 if ( b.testBit(f[ell].parent) ) {
02031                                     b.setBit( ell, TRUE );
02032                                     cap = f[ell].capture;
02033                                     if ( cap >= 0 ) {
02034                                         capBegin[cap] = EmptyCapture;
02035                                         capEnd[cap] = EmptyCapture;
02036                                     }
02037                                 }
02038                             }
02039                             p = f[q].parent;
02040 
02041                         /*
02042                           Otherwise, close the capture zones we are
02043                           leaving. We are leaving f[c].capture,
02044                           f[f[c].parent].capture,
02045                           f[f[f[c].parent].parent].capture, ...,
02046                           until f[x].capture, with x such that
02047                           f[x].parent is the youngest common ancestor
02048                           for c and n.
02049 
02050                           We go up along c's and n's ancestry until
02051                           we find x.
02052                         */
02053                         } else {
02054                             p = c;
02055                             q = n;
02056                             while ( p != q ) {
02057                                 if ( p > q ) {
02058                                     cap = f[p].capture;
02059                                     if ( cap >= 0 ) {
02060                                         if ( capBegin[cap] == i ) {
02061                                             capBegin[cap] = EmptyCapture;
02062                                             capEnd[cap] = EmptyCapture;
02063                                         } else {
02064                                             capEnd[cap] = i;
02065                                         }
02066                                     }
02067                                     p = f[p].parent;
02068                                 } else {
02069                                     q = f[q].parent;
02070                                 }
02071                             }
02072                         }
02073 
02074                         /*
02075                           In any case, we now open the capture zones
02076                           we are entering. We work upwards from n
02077                           until we reach p (the parent of the atom we
02078                           reenter or the youngest common ancestor).
02079                         */
02080                         while ( n > p ) {
02081                             cap = f[n].capture;
02082                             if ( cap >= 0 ) {
02083                                 capBegin[cap] = i;
02084                                 capEnd[cap] = EmptyCapture;
02085                             }
02086                             n = f[n].parent;
02087                         }
02088                         /*
02089                           If the next state was already in
02090                           mmNextStack, we must choose carefully which
02091                           capture zones we want to keep.
02092                         */
02093                         if ( capBegin == mmTempCapBegin &&
02094                              isBetterCapture(capBegin, capEnd,
02095                                              mmNextCapBegin + m * ncap,
02096                                              mmNextCapEnd + m * ncap) ) {
02097                             memcpy( mmNextCapBegin + m * ncap, capBegin,
02098                                     ncap * sizeof(int) );
02099                             memcpy( mmNextCapEnd + m * ncap, capEnd,
02100                                     ncap * sizeof(int) );
02101                         }
02102                     }
02103 #ifndef QT_NO_REGEXP_BACKREF
02104                     /*
02105                       We are done with updating the capture zones.
02106                       It's now time to put the next state to sleep,
02107                       if it needs to, and to remove it from
02108                       mmNextStack.
02109                     */
02110                     if ( needSomeSleep > 0 ) {
02111                         zzZ = new int[1 + 2 * ncap];
02112                         zzZ[0] = next;
02113                         if ( ncap > 0 ) {
02114                             memcpy( zzZ + 1, capBegin, ncap * sizeof(int) );
02115                             memcpy( zzZ + 1 + ncap, capEnd,
02116                                     ncap * sizeof(int) );
02117                         }
02118                         mmInNextStack[mmNextStack[--nnext]] = -1;
02119                         mmSleeping.insert( i + needSomeSleep, zzZ );
02120                     }
02121 #endif
02122 #endif
02123                 }
02124             }
02125         }
02126 #ifndef QT_NO_REGEXP_CAPTURE
02127         /*
02128           If we reached the final state, hurray! Copy the captured
02129           zone.
02130         */
02131         if ( ncap > 0 && (m = mmInNextStack[FinalState]) != -1 ) {
02132             memcpy( mmCapBegin, mmNextCapBegin + m * ncap, ncap * sizeof(int) );
02133             memcpy( mmCapEnd, mmNextCapEnd + m * ncap, ncap * sizeof(int) );
02134         }
02135 #ifndef QT_NO_REGEXP_BACKREF
02136         /*
02137           It's time to wake up the sleepers.
02138         */
02139         if ( !mmSleeping.isEmpty() ) {
02140             while ( (zzZ = mmSleeping.take(i)) != 0 ) {
02141                 int next = zzZ[0];
02142                 int *capBegin = zzZ + 1;
02143                 int *capEnd = zzZ + 1 + ncap;
02144                 bool copyOver = TRUE;
02145 
02146                 if ( (m = mmInNextStack[zzZ[0]]) == -1 ) {
02147                     m = nnext++;
02148                     mmNextStack[m] = next;
02149                     mmInNextStack[next] = m;
02150                 } else {
02151                     copyOver = isBetterCapture( mmNextCapBegin + m * ncap,
02152                                                 mmNextCapEnd + m * ncap,
02153                                                 capBegin, capEnd );
02154                 }
02155                 if ( copyOver ) {
02156                     memcpy( mmNextCapBegin + m * ncap, capBegin,
02157                             ncap * sizeof(int) );
02158                     memcpy( mmNextCapEnd + m * ncap, capEnd,
02159                             ncap * sizeof(int) );
02160                 }
02161                 delete[] zzZ;
02162             }
02163         }
02164 #endif
02165 #endif
02166         for ( j = 0; j < nnext; j++ )
02167             mmInNextStack[mmNextStack[j]] = -1;
02168 
02169         // avoid needless iteration that confuses mmMatchedLen
02170         if ( nnext == 1 && mmNextStack[0] == FinalState
02171 #ifndef QT_NO_REGEXP_BACKREF
02172              && mmSleeping.isEmpty()
02173 #endif
02174              )
02175             stop = TRUE;
02176 
02177         qSwap( mmCurStack, mmNextStack );
02178 #ifndef QT_NO_REGEXP_CAPTURE
02179         qSwap( mmCurCapBegin, mmNextCapBegin );
02180         qSwap( mmCurCapEnd, mmNextCapEnd );
02181 #endif
02182         ncur = nnext;
02183         nnext = 0;
02184         i++;
02185     }
02186 
02187 #ifndef QT_NO_REGEXP_BACKREF
02188     /*
02189       If minimal matching is enabled, we might have some sleepers
02190       left.
02191     */
02192     while ( !mmSleeping.isEmpty() ) {
02193         zzZ = mmSleeping.take( *QIntDictIterator<int>(mmSleeping) );
02194         delete[] zzZ;
02195     }
02196 #endif
02197 
02198     mmMatchedLen = i - 1;
02199     return ( mmMatchLen >= 0 );
02200 }
02201 
02202 #ifndef QT_NO_REGEXP_CCLASS
02203 
02204 QRegExpEngine::CharClass::CharClass()
02205     : c( 0 ), n( FALSE )
02206 {
02207 #ifndef QT_NO_REGEXP_OPTIM
02208     occ1.fill( NoOccurrence, NumBadChars );
02209 #endif
02210 }
02211 
02212 QRegExpEngine::CharClass& QRegExpEngine::CharClass::operator=(
02213         const CharClass& cc )
02214 {
02215     c = cc.c;
02216     r = cc.r.copy();
02217     n = cc.n;
02218 #ifndef QT_NO_REGEXP_OPTIM
02219     occ1 = cc.occ1;
02220 #endif
02221     return *this;
02222 }
02223 
02224 void QRegExpEngine::CharClass::clear()
02225 {
02226     c = 0;
02227     r.resize( 0 );
02228     n = FALSE;
02229 }
02230 
02231 void QRegExpEngine::CharClass::setNegative( bool negative )
02232 {
02233     n = negative;
02234 #ifndef QT_NO_REGEXP_OPTIM
02235     occ1.fill( 0, NumBadChars );
02236 #endif
02237 }
02238 
02239 void QRegExpEngine::CharClass::addCategories( int cats )
02240 {
02241     c |= cats;
02242 #ifndef QT_NO_REGEXP_OPTIM
02243     occ1.fill( 0, NumBadChars );
02244 #endif
02245 }
02246 
02247 void QRegExpEngine::CharClass::addRange( ushort from, ushort to )
02248 {
02249     if ( from > to )
02250         qSwap( from, to );
02251     int m = r.size();
02252     r.resize( m + 1 );
02253     r[m].from = from;
02254     r[m].to = to;
02255 
02256 #ifndef QT_NO_REGEXP_OPTIM
02257     int i;
02258 
02259     if ( to - from < NumBadChars ) {
02260         occ1.detach();
02261         if ( from % NumBadChars <= to % NumBadChars ) {
02262             for ( i = from % NumBadChars; i <= to % NumBadChars; i++ )
02263                 occ1[i] = 0;
02264         } else {
02265             for ( i = 0; i <= to % NumBadChars; i++ )
02266                 occ1[i] = 0;
02267             for ( i = from % NumBadChars; i < NumBadChars; i++ )
02268                 occ1[i] = 0;
02269         }
02270     } else {
02271         occ1.fill( 0, NumBadChars );
02272     }
02273 #endif
02274 }
02275 
02276 bool QRegExpEngine::CharClass::in( QChar ch ) const
02277 {
02278 #ifndef QT_NO_REGEXP_OPTIM
02279     if ( occ1[BadChar(ch)] == NoOccurrence )
02280         return n;
02281 #endif
02282 
02283     if ( c != 0 && (c & (1 << (int) ch.category())) != 0 )
02284         return !n;
02285     for ( int i = 0; i < (int) r.size(); i++ ) {
02286         if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to )
02287             return !n;
02288     }
02289     return n;
02290 }
02291 
02292 #if defined(QT_DEBUG)
02293 void QRegExpEngine::CharClass::dump() const
02294 {
02295     int i;
02296     qDebug( "    %stive character class", n ? "nega" : "posi" );
02297 #ifndef QT_NO_REGEXP_CCLASS
02298     if ( c != 0 )
02299         qDebug( "      categories 0x%.8x", c );
02300 #endif
02301     for ( i = 0; i < (int) r.size(); i++ )
02302         qDebug( "      0x%.4x through 0x%.4x", r[i].from, r[i].to );
02303 }
02304 #endif
02305 #endif
02306 
02307 QRegExpEngine::Box::Box( QRegExpEngine *engine )
02308     : eng( engine ), skipanchors( 0 )
02309 #ifndef QT_NO_REGEXP_OPTIM
02310       , earlyStart( 0 ), lateStart( 0 ), maxl( 0 )
02311 #endif
02312 {
02313 #ifndef QT_NO_REGEXP_OPTIM
02314     occ1.fill( NoOccurrence, NumBadChars );
02315 #endif
02316     minl = 0;
02317 }
02318 
02319 QRegExpEngine::Box& QRegExpEngine::Box::operator=( const Box& b )
02320 {
02321     eng = b.eng;
02322     ls = b.ls;
02323     rs = b.rs;
02324     lanchors = b.lanchors;
02325     ranchors = b.ranchors;
02326     skipanchors = b.skipanchors;
02327 #ifndef QT_NO_REGEXP_OPTIM
02328     earlyStart = b.earlyStart;
02329     lateStart = b.lateStart;
02330     str = b.str;
02331     leftStr = b.leftStr;
02332     rightStr = b.rightStr;
02333     maxl = b.maxl;
02334     occ1 = b.occ1;
02335 #endif
02336     minl = b.minl;
02337     return *this;
02338 }
02339 
02340 void QRegExpEngine::Box::set( QChar ch )
02341 {
02342     ls.resize( 1 );
02343     ls[0] = eng->createState( ch );
02344     rs = ls;
02345     rs.detach();
02346 #ifndef QT_NO_REGEXP_OPTIM
02347     str = ch;
02348     leftStr = ch;
02349     rightStr = ch;
02350     maxl = 1;
02351     occ1.detach();
02352     occ1[BadChar(ch)] = 0;
02353 #endif
02354     minl = 1;
02355 }
02356 
02357 void QRegExpEngine::Box::set( const CharClass& cc )
02358 {
02359     ls.resize( 1 );
02360     ls[0] = eng->createState( cc );
02361     rs = ls;
02362     rs.detach();
02363 #ifndef QT_NO_REGEXP_OPTIM
02364     maxl = 1;
02365     occ1 = cc.firstOccurrence();
02366 #endif
02367     minl = 1;
02368 }
02369 
02370 #ifndef QT_NO_REGEXP_BACKREF
02371 void QRegExpEngine::Box::set( int bref )
02372 {
02373     ls.resize( 1 );
02374     ls[0] = eng->createState( bref );
02375     rs = ls;
02376     rs.detach();
02377     if ( bref >= 1 && bref <= MaxBackRefs )
02378         skipanchors = Anchor_BackRef0Empty << bref;
02379 #ifndef QT_NO_REGEXP_OPTIM
02380     maxl = InftyLen;
02381 #endif
02382     minl = 0;
02383 }
02384 #endif
02385 
02386 void QRegExpEngine::Box::cat( const Box& b )
02387 {
02388     eng->addCatTransitions( rs, b.ls );
02389     addAnchorsToEngine( b );
02390     if ( minl == 0 ) {
02391         mergeInto( &lanchors, b.lanchors );
02392         if ( skipanchors != 0 ) {
02393             for ( int i = 0; i < (int) b.ls.size(); i++ ) {
02394                 int a = eng->anchorConcatenation( at(lanchors, b.ls[i]),
02395                                                   skipanchors );
02396                 lanchors.insert( b.ls[i], a );
02397             }
02398         }
02399         mergeInto( &ls, b.ls );
02400     }
02401     if ( b.minl == 0 ) {
02402         mergeInto( &ranchors, b.ranchors );
02403         if ( b.skipanchors != 0 ) {
02404             for ( int i = 0; i < (int) rs.size(); i++ ) {
02405                 int a = eng->anchorConcatenation( at(ranchors, rs[i]),
02406                                                   b.skipanchors );
02407                 ranchors.insert( rs[i], a );
02408             }
02409         }
02410         mergeInto( &rs, b.rs );
02411     } else {
02412         ranchors = b.ranchors;
02413         rs = b.rs;
02414     }
02415 
02416 #ifndef QT_NO_REGEXP_OPTIM
02417     if ( maxl != InftyLen ) {
02418         if ( rightStr.length() + b.leftStr.length() >
02419              QMAX(str.length(), b.str.length()) ) {
02420             earlyStart = minl - rightStr.length();
02421             lateStart = maxl - rightStr.length();
02422             str = rightStr + b.leftStr;
02423         } else if ( b.str.length() > str.length() ) {
02424             earlyStart = minl + b.earlyStart;
02425             lateStart = maxl + b.lateStart;
02426             str = b.str;
02427         }
02428     }
02429 
02430     if ( (int) leftStr.length() == maxl )
02431         leftStr += b.leftStr;
02432     if ( (int) b.rightStr.length() == b.maxl )
02433         rightStr += b.rightStr;
02434     else
02435         rightStr = b.rightStr;
02436 
02437     if ( maxl == InftyLen || b.maxl == InftyLen )
02438         maxl = InftyLen;
02439     else
02440         maxl += b.maxl;
02441 
02442     occ1.detach();
02443     for ( int i = 0; i < NumBadChars; i++ ) {
02444         if ( b.occ1[i] != NoOccurrence && minl + b.occ1[i] < occ1[i] )
02445             occ1[i] = minl + b.occ1[i];
02446     }
02447 #endif
02448 
02449     minl += b.minl;
02450     if ( minl == 0 )
02451         skipanchors = eng->anchorConcatenation( skipanchors, b.skipanchors );
02452     else
02453         skipanchors = 0;
02454 }
02455 
02456 void QRegExpEngine::Box::orx( const Box& b )
02457 {
02458     mergeInto( &ls, b.ls );
02459     mergeInto( &lanchors, b.lanchors );
02460     mergeInto( &rs, b.rs );
02461     mergeInto( &ranchors, b.ranchors );
02462 
02463     if ( b.minl == 0 ) {
02464         if ( minl == 0 )
02465             skipanchors = eng->anchorAlternation( skipanchors, b.skipanchors );
02466         else
02467             skipanchors = b.skipanchors;
02468     }
02469 
02470 #ifndef QT_NO_REGEXP_OPTIM
02471     occ1.detach();
02472     for ( int i = 0; i < NumBadChars; i++ ) {
02473         if ( occ1[i] > b.occ1[i] )
02474             occ1[i] = b.occ1[i];
02475     }
02476     earlyStart = 0;
02477     lateStart = 0;
02478     str = QString();
02479     leftStr = QString();
02480     rightStr = QString();
02481     if ( b.maxl > maxl )
02482         maxl = b.maxl;
02483 #endif
02484     if ( b.minl < minl )
02485         minl = b.minl;
02486 }
02487 
02488 void QRegExpEngine::Box::plus( int atom )
02489 {
02490 #ifndef QT_NO_REGEXP_CAPTURE
02491     eng->addPlusTransitions( rs, ls, atom );
02492 #else
02493     Q_UNUSED( atom );
02494     eng->addCatTransitions( rs, ls );
02495 #endif
02496     addAnchorsToEngine( *this );
02497 #ifndef QT_NO_REGEXP_OPTIM
02498     maxl = InftyLen;
02499 #endif
02500 }
02501 
02502 void QRegExpEngine::Box::opt()
02503 {
02504 #ifndef QT_NO_REGEXP_OPTIM
02505     earlyStart = 0;
02506     lateStart = 0;
02507     str = QString();
02508     leftStr = QString();
02509     rightStr = QString();
02510 #endif
02511     skipanchors = 0;
02512     minl = 0;
02513 }
02514 
02515 void QRegExpEngine::Box::catAnchor( int a )
02516 {
02517     if ( a != 0 ) {
02518         for ( int i = 0; i < (int) rs.size(); i++ ) {
02519             a = eng->anchorConcatenation( at(ranchors, rs[i]), a );
02520             ranchors.insert( rs[i], a );
02521         }
02522         if ( minl == 0 )
02523             skipanchors = eng->anchorConcatenation( skipanchors, a );
02524     }
02525 }
02526 
02527 #ifndef QT_NO_REGEXP_OPTIM
02528 void QRegExpEngine::Box::setupHeuristics()
02529 {
02530     eng->setupGoodStringHeuristic( earlyStart, lateStart, str );
02531 
02532     /*
02533       A regular expression such as 112|1 has occ1['2'] = 2 and minl =
02534       1 at this point. An entry of occ1 has to be at most minl or
02535       infinity for the rest of the algorithm to go well.
02536 
02537       We waited until here before normalizing these cases (instead of
02538       doing it in Box::orx()) because sometimes things improve by
02539       themselves. Consider for example (112|1)34.
02540     */
02541     for ( int i = 0; i < NumBadChars; i++ ) {
02542         if ( occ1[i] != NoOccurrence && occ1[i] >= minl )
02543             occ1[i] = minl;
02544     }
02545     eng->setupBadCharHeuristic( minl, occ1 );
02546 
02547     eng->heuristicallyChooseHeuristic();
02548 }
02549 #endif
02550 
02551 #if defined(QT_DEBUG)
02552 void QRegExpEngine::Box::dump() const
02553 {
02554     int i;
02555     qDebug( "Box of at least %d character%s", minl, minl == 1 ? "" : "s" );
02556     qDebug( "  Left states:" );
02557     for ( i = 0; i < (int) ls.size(); i++ ) {
02558         if ( at(lanchors, ls[i]) == 0 )
02559             qDebug( "    %d", ls[i] );
02560         else
02561             qDebug( "    %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]] );
02562     }
02563     qDebug( "  Right states:" );
02564     for ( i = 0; i < (int) rs.size(); i++ ) {
02565         if ( at(ranchors, rs[i]) == 0 )
02566             qDebug( "    %d", rs[i] );
02567         else
02568             qDebug( "    %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]] );
02569     }
02570     qDebug( "  Skip anchors: 0x%.8x", skipanchors );
02571 }
02572 #endif
02573 
02574 void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const
02575 {
02576     for ( int i = 0; i < (int) to.ls.size(); i++ ) {
02577         for ( int j = 0; j < (int) rs.size(); j++ ) {
02578             int a = eng->anchorConcatenation( at(ranchors, rs[j]),
02579                                               at(to.lanchors, to.ls[i]) );
02580             eng->addAnchors( rs[j], to.ls[i], a );
02581         }
02582     }
02583 }
02584 
02585 int QRegExpEngine::getChar()
02586 {
02587     return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode();
02588 }
02589 
02590 int QRegExpEngine::getEscape()
02591 {
02592 #ifndef QT_NO_REGEXP_ESCAPE
02593     const char tab[] = "afnrtv"; // no b, as \b means word boundary
02594     const char backTab[] = "\a\f\n\r\t\v";
02595     ushort low;
02596     int i;
02597 #endif
02598     ushort val;
02599     int prevCh = yyCh;
02600 
02601     if ( prevCh == EOS ) {
02602         error( RXERR_END );
02603         return Tok_Char | '\\';
02604     }
02605     yyCh = getChar();
02606 #ifndef QT_NO_REGEXP_ESCAPE
02607     if ( (prevCh & ~0xff) == 0 ) {
02608         const char *p = strchr( tab, prevCh );
02609         if ( p != 0 )
02610             return Tok_Char | backTab[p - tab];
02611     }
02612 #endif
02613 
02614     switch ( prevCh ) {
02615 #ifndef QT_NO_REGEXP_ESCAPE
02616     case '0':
02617         val = 0;
02618         for ( i = 0; i < 3; i++ ) {
02619             if ( yyCh >= '0' && yyCh <= '7' )
02620                 val = ( val << 3 ) | ( yyCh - '0' );
02621             else
02622                 break;
02623             yyCh = getChar();
02624         }
02625         if ( (val & ~0377) != 0 )
02626             error( RXERR_OCTAL );
02627         return Tok_Char | val;
02628 #endif
02629 #ifndef QT_NO_REGEXP_ESCAPE
02630     case 'B':
02631         return Tok_NonWord;
02632 #endif
02633 #ifndef QT_NO_REGEXP_CCLASS
02634     case 'D':
02635         // see QChar::isDigit()
02636         yyCharClass->addCategories( 0x7fffffef );
02637         return Tok_CharClass;
02638     case 'S':
02639         // see QChar::isSpace()
02640         yyCharClass->addCategories( 0x7ffff87f );
02641         yyCharClass->addRange( 0x0000, 0x0008 );
02642         yyCharClass->addRange( 0x000e, 0x001f );
02643         yyCharClass->addRange( 0x007f, 0x009f );
02644         return Tok_CharClass;
02645     case 'W':
02646         // see QChar::isLetterOrNumber()
02647         yyCharClass->addCategories( 0x7fe07f8f );
02648         yyCharClass->addRange( 0x203f, 0x2040 );
02649         yyCharClass->addSingleton( 0x2040 );
02650         yyCharClass->addSingleton( 0x30fb );
02651         yyCharClass->addRange( 0xfe33, 0xfe34 );
02652         yyCharClass->addRange( 0xfe4d, 0xfe4f );
02653         yyCharClass->addSingleton( 0xff3f );
02654         yyCharClass->addSingleton( 0xff65 );
02655         return Tok_CharClass;
02656 #endif
02657 #ifndef QT_NO_REGEXP_ESCAPE
02658     case 'b':
02659         return Tok_Word;
02660 #endif
02661 #ifndef QT_NO_REGEXP_CCLASS
02662     case 'd':
02663         // see QChar::isDigit()
02664         yyCharClass->addCategories( 0x00000010 );
02665         return Tok_CharClass;
02666     case 's':
02667         // see QChar::isSpace()
02668         yyCharClass->addCategories( 0x00000380 );
02669         yyCharClass->addRange( 0x0009, 0x000d );
02670         return Tok_CharClass;
02671     case 'w':
02672         // see QChar::isLetterOrNumber()
02673         yyCharClass->addCategories( 0x000f8070 );
02674         yyCharClass->addSingleton( 0x005f ); // '_'
02675         return Tok_CharClass;
02676 #endif
02677 #ifndef QT_NO_REGEXP_ESCAPE
02678     case 'x':
02679         val = 0;
02680         for ( i = 0; i < 4; i++ ) {
02681             low = QChar( yyCh ).lower();
02682             if ( low >= '0' && low <= '9' )
02683                 val = ( val << 4 ) | ( low - '0' );
02684             else if ( low >= 'a' && low <= 'f' )
02685                 val = ( val << 4 ) | ( low - 'a' + 10 );
02686             else
02687                 break;
02688             yyCh = getChar();
02689         }
02690         return Tok_Char | val;
02691 #endif
02692     default:
02693         if ( prevCh >= '1' && prevCh <= '9' ) {
02694 #ifndef QT_NO_REGEXP_BACKREF
02695             val = prevCh - '0';
02696             while ( yyCh >= '0' && yyCh <= '9' ) {
02697                 val = ( val *= 10 ) | ( yyCh - '0' );
02698                 yyCh = getChar();
02699             }
02700             return Tok_BackRef | val;
02701 #else
02702             error( RXERR_DISABLED );
02703 #endif
02704         }
02705         return Tok_Char | prevCh;
02706     }
02707 }
02708 
02709 #ifndef QT_NO_REGEXP_INTERVAL
02710 int QRegExpEngine::getRep( int def )
02711 {
02712     if ( yyCh >= '0' && yyCh <= '9' ) {
02713         int rep = 0;
02714         do {
02715             rep = 10 * rep + yyCh - '0';
02716             if ( rep >= InftyRep ) {
02717                 error( RXERR_REPETITION );
02718                 rep = def;
02719             }
02720             yyCh = getChar();
02721         } while ( yyCh >= '0' && yyCh <= '9' );
02722         return rep;
02723     } else {
02724         return def;
02725     }
02726 }
02727 #endif
02728 
02729 #ifndef QT_NO_REGEXP_LOOKAHEAD
02730 void QRegExpEngine::skipChars( int n )
02731 {
02732     if ( n > 0 ) {
02733         yyPos += n - 1;
02734         yyCh = getChar();
02735     }
02736 }
02737 #endif
02738 
02739 void QRegExpEngine::error( const char *msg )
02740 {
02741     if ( yyError.isEmpty() )
02742         yyError = QString::fromLatin1( msg );
02743 }
02744 
02745 void QRegExpEngine::startTokenizer( const QChar *rx, int len )
02746 {
02747     yyIn = rx;
02748     yyPos0 = 0;
02749     yyPos = 0;
02750     yyLen = len;
02751     yyCh = getChar();
02752     yyCharClass = new CharClass;
02753     yyMinRep = 0;
02754     yyMaxRep = 0;
02755     yyError = QString();
02756 }
02757 
02758 int QRegExpEngine::getToken()
02759 {
02760 #ifndef QT_NO_REGEXP_CCLASS
02761     ushort pendingCh = 0;
02762     bool charPending;
02763     bool rangePending;
02764     int tok;
02765 #endif
02766     int prevCh = yyCh;
02767 
02768     yyPos0 = yyPos - 1;
02769 #ifndef QT_NO_REGEXP_CCLASS
02770     yyCharClass->clear();
02771 #endif
02772     yyMinRep = 0;
02773     yyMaxRep = 0;
02774     yyCh = getChar();
02775     switch ( prevCh ) {
02776     case EOS:
02777         yyPos0 = yyPos;
02778         return Tok_Eos;
02779     case '$':
02780         return Tok_Dollar;
02781     case '(':
02782         if ( yyCh == '?' ) {
02783             prevCh = getChar();
02784             yyCh = getChar();
02785             switch ( prevCh ) {
02786 #ifndef QT_NO_REGEXP_LOOKAHEAD
02787             case '!':
02788                 return Tok_NegLookahead;
02789             case '=':
02790                 return Tok_PosLookahead;
02791 #endif
02792             case ':':
02793                 return Tok_MagicLeftParen;
02794             default:
02795                 error( RXERR_LOOKAHEAD );
02796                 return Tok_MagicLeftParen;
02797             }
02798         } else {
02799             return Tok_LeftParen;
02800         }
02801     case ')':
02802         return Tok_RightParen;
02803     case '*':
02804         yyMinRep = 0;
02805         yyMaxRep = InftyRep;
02806         return Tok_Quantifier;
02807     case '+':
02808         yyMinRep = 1;
02809         yyMaxRep = InftyRep;
02810         return Tok_Quantifier;
02811     case '.':
02812 #ifndef QT_NO_REGEXP_CCLASS
02813         yyCharClass->setNegative( TRUE );
02814 #endif
02815         return Tok_CharClass;
02816     case '?':
02817         yyMinRep = 0;
02818         yyMaxRep = 1;
02819         return Tok_Quantifier;
02820     case '[':
02821 #ifndef QT_NO_REGEXP_CCLASS
02822         if ( yyCh == '^' ) {
02823             yyCharClass->setNegative( TRUE );
02824             yyCh = getChar();
02825         }
02826         charPending = FALSE;
02827         rangePending = FALSE;
02828         do {
02829             if ( yyCh == '-' && charPending && !rangePending ) {
02830                 rangePending = TRUE;
02831                 yyCh = getChar();
02832             } else {
02833                 if ( charPending && !rangePending ) {
02834                     yyCharClass->addSingleton( pendingCh );
02835                     charPending = FALSE;
02836                 }
02837                 if ( yyCh == '\\' ) {
02838                     yyCh = getChar();
02839                     tok = getEscape();
02840                     if ( tok == Tok_Word )
02841                         tok = '\b';
02842                 } else {
02843                     tok = Tok_Char | yyCh;
02844                     yyCh = getChar();
02845                 }
02846                 if ( tok == Tok_CharClass ) {
02847                     if ( rangePending ) {
02848                         yyCharClass->addSingleton( '-' );
02849                         yyCharClass->addSingleton( pendingCh );
02850                         charPending = FALSE;
02851                         rangePending = FALSE;
02852                     }
02853                 } else if ( (tok & Tok_Char) != 0 ) {
02854                     if ( rangePending ) {
02855                         yyCharClass->addRange( pendingCh, tok ^ Tok_Char );
02856                         charPending = FALSE;
02857                         rangePending = FALSE;
02858                     } else {
02859                         pendingCh = tok ^ Tok_Char;
02860                         charPending = TRUE;
02861                     }
02862                 } else {
02863                     error( RXERR_CHARCLASS );
02864                 }
02865             }
02866         }  while ( yyCh != ']' && yyCh != EOS );
02867         if ( rangePending )
02868             yyCharClass->addSingleton( '-' );
02869         if ( charPending )
02870             yyCharClass->addSingleton( pendingCh );
02871         if ( yyCh == EOS )
02872             error( RXERR_END );
02873         else
02874             yyCh = getChar();
02875         return Tok_CharClass;
02876 #else
02877         error( RXERR_END );
02878         return Tok_Char | '[';
02879 #endif
02880     case '\\':
02881         return getEscape();
02882     case ']':
02883         error( RXERR_LEFTDELIM );
02884         return Tok_Char | ']';
02885     case '^':
02886         return Tok_Caret;
02887     case '{':
02888 #ifndef QT_NO_REGEXP_INTERVAL
02889         yyMinRep = getRep( 0 );
02890         yyMaxRep = yyMinRep;
02891         if ( yyCh == ',' ) {
02892             yyCh = getChar();
02893             yyMaxRep = getRep( InftyRep );
02894         }
02895         if ( yyMaxRep < yyMinRep )
02896             qSwap( yyMinRep, yyMaxRep );
02897         if ( yyCh != '}' )
02898             error( RXERR_REPETITION );
02899         yyCh = getChar();
02900         return Tok_Quantifier;
02901 #else
02902         error( RXERR_DISABLED );
02903         return Tok_Char | '{';
02904 #endif
02905     case '|':
02906         return Tok_Bar;
02907     case '}':
02908         error( RXERR_LEFTDELIM );
02909         return Tok_Char | '}';
02910     default:
02911         return Tok_Char | prevCh;
02912     }
02913 }
02914 
02915 int QRegExpEngine::parse( const QChar *pattern, int len )
02916 {
02917     valid = TRUE;
02918     startTokenizer( pattern, len );
02919     yyTok = getToken();
02920 #ifndef QT_NO_REGEXP_CAPTURE
02921     yyMayCapture = TRUE;
02922 #else
02923     yyMayCapture = FALSE;
02924 #endif
02925 
02926 #ifndef QT_NO_REGEXP_CAPTURE
02927     int atom = startAtom( FALSE );
02928 #endif
02929     CharClass anything;
02930     Box box( this ); // create InitialState
02931     box.set( anything );
02932     Box rightBox( this ); // create FinalState
02933     rightBox.set( anything );
02934 
02935     Box middleBox( this );
02936     parseExpression( &middleBox );
02937 #ifndef QT_NO_REGEXP_CAPTURE
02938     finishAtom( atom );
02939 #endif
02940 #ifndef QT_NO_REGEXP_OPTIM
02941     middleBox.setupHeuristics();
02942 #endif
02943     box.cat( middleBox );
02944     box.cat( rightBox );
02945     delete yyCharClass;
02946     yyCharClass = 0;
02947 
02948     officialncap = ncap;
02949 #ifndef QT_NO_REGEXP_BACKREF
02950     if ( nbrefs > ncap )
02951         ncap = nbrefs;
02952 #endif
02953 
02954     mmCaptured.resize( 2 + 2 * officialncap );
02955     mmCapturedNoMatch.fill( -1, 2 + 2 * officialncap );
02956 
02957     /*
02958       We use one QMemArray<int> for all the big data used a lot in
02959       matchHere() and friends.
02960     */
02961 #ifndef QT_NO_REGEXP_OPTIM
02962     mmSlideTabSize = QMAX( minl + 1, 16 );
02963 #else
02964     mmSlideTabSize = 0;
02965 #endif
02966     mmBigArray.resize( (3 + 4 * ncap) * ns + 4 * ncap + mmSlideTabSize );
02967 
02968     mmInNextStack = mmBigArray.data();
02969     memset( mmInNextStack, -1, ns * sizeof(int) );
02970     mmCurStack = mmInNextStack + ns;
02971     mmNextStack = mmInNextStack + 2 * ns;
02972 
02973     mmCurCapBegin = mmInNextStack + 3 * ns;
02974     mmNextCapBegin = mmCurCapBegin + ncap * ns;
02975     mmCurCapEnd = mmCurCapBegin + 2 * ncap * ns;
02976     mmNextCapEnd = mmCurCapBegin + 3 * ncap * ns;
02977 
02978     mmTempCapBegin = mmCurCapBegin + 4 * ncap * ns;
02979     mmTempCapEnd = mmTempCapBegin + ncap;
02980     mmCapBegin = mmTempCapBegin + 2 * ncap;
02981     mmCapEnd = mmTempCapBegin + 3 * ncap;
02982 
02983     mmSlideTab = mmTempCapBegin + 4 * ncap;
02984 
02985     if ( !yyError.isEmpty() )
02986         return -1;
02987 
02988 #ifndef QT_NO_REGEXP_OPTIM
02989     State *sinit = s[InitialState];
02990     caretAnchored = ( sinit->anchors != 0 );
02991     if ( caretAnchored ) {
02992         QMap<int, int>& anchors = *sinit->anchors;
02993         QMap<int, int>::ConstIterator a;
02994         for ( a = anchors.begin(); a != anchors.end(); ++a ) {
02995 #ifndef QT_NO_REGEXP_ANCHOR_ALT
02996             if ( (*a & Anchor_Alternation) != 0 )
02997                 break;
02998 #endif
02999             if ( (*a & Anchor_Caret) == 0 ) {
03000                 caretAnchored = FALSE;
03001                 break;
03002             }
03003         }
03004     }
03005 #endif
03006     return yyPos0;
03007 }
03008 
03009 void QRegExpEngine::parseAtom( Box *box )
03010 {
03011 #ifndef QT_NO_REGEXP_LOOKAHEAD
03012     QRegExpEngine *eng = 0;
03013     bool neg;
03014     int len;
03015 #endif
03016 
03017     switch ( yyTok ) {
03018     case Tok_Dollar:
03019         box->catAnchor( Anchor_Dollar );
03020         break;
03021     case Tok_Caret:
03022         box->catAnchor( Anchor_Caret );
03023         break;
03024 #ifndef QT_NO_REGEXP_LOOKAHEAD
03025     case Tok_PosLookahead:
03026     case Tok_NegLookahead:
03027         neg = ( yyTok == Tok_NegLookahead );
03028         eng = new QRegExpEngine( cs );
03029         len = eng->parse( yyIn + yyPos - 1, yyLen - yyPos + 1 );
03030         if ( len >= 0 )
03031             skipChars( len );
03032         else
03033             error( RXERR_LOOKAHEAD );
03034         box->catAnchor( addLookahead(eng, neg) );
03035         yyTok = getToken();
03036         if ( yyTok != Tok_RightParen )
03037             error( RXERR_LOOKAHEAD );
03038         break;
03039 #endif
03040 #ifndef QT_NO_REGEXP_ESCAPE
03041     case Tok_Word:
03042         box->catAnchor( Anchor_Word );
03043         break;
03044     case Tok_NonWord:
03045         box->catAnchor( Anchor_NonWord );
03046         break;
03047 #endif
03048     case Tok_LeftParen:
03049     case Tok_MagicLeftParen:
03050         yyTok = getToken();
03051         parseExpression( box );
03052         if ( yyTok != Tok_RightParen )
03053             error( RXERR_END );
03054         break;
03055     case Tok_CharClass:
03056         box->set( *yyCharClass );
03057         break;
03058     case Tok_Quantifier:
03059         error( RXERR_REPETITION );
03060         break;
03061     default:
03062         if ( (yyTok & Tok_Char) != 0 )
03063             box->set( QChar(yyTok ^ Tok_Char) );
03064 #ifndef QT_NO_REGEXP_BACKREF
03065         else if ( (yyTok & Tok_BackRef) != 0 )
03066             box->set( yyTok ^ Tok_BackRef );
03067 #endif
03068         else
03069             error( RXERR_DISABLED );
03070     }
03071     yyTok = getToken();
03072 }
03073 
03074 void QRegExpEngine::parseFactor( Box *box )
03075 {
03076 #ifndef QT_NO_REGEXP_CAPTURE
03077     int atom = startAtom( yyMayCapture && yyTok == Tok_LeftParen );
03078 #else
03079     static const int atom = 0;
03080 #endif
03081 
03082 #ifndef QT_NO_REGEXP_INTERVAL
03083 #define YYREDO() \
03084         yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
03085         *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
03086 
03087     const QChar *in = yyIn;
03088     int pos0 = yyPos0;
03089     int pos = yyPos;
03090     int len = yyLen;
03091     int ch = yyCh;
03092     CharClass charClass;
03093     if ( yyTok == Tok_CharClass )
03094         charClass = *yyCharClass;
03095     int tok = yyTok;
03096     bool mayCapture = yyMayCapture;
03097 #endif
03098 
03099     parseAtom( box );
03100 #ifndef QT_NO_REGEXP_CAPTURE
03101     finishAtom( atom );
03102 #endif
03103 
03104     if ( yyTok == Tok_Quantifier ) {
03105         if ( yyMaxRep == InftyRep ) {
03106             box->plus( atom );
03107 #ifndef QT_NO_REGEXP_INTERVAL
03108         } else if ( yyMaxRep == 0 ) {
03109             box->clear();
03110 #endif
03111         }
03112         if ( yyMinRep == 0 )
03113             box->opt();
03114 
03115 #ifndef QT_NO_REGEXP_INTERVAL
03116         yyMayCapture = FALSE;
03117         int alpha = ( yyMinRep == 0 ) ? 0 : yyMinRep - 1;
03118         int beta = ( yyMaxRep == InftyRep ) ? 0 : yyMaxRep - ( alpha + 1 );
03119 
03120         Box rightBox( this );
03121         int i;
03122 
03123         for ( i = 0; i < beta; i++ ) {
03124             YYREDO();
03125             Box leftBox( this );
03126             parseAtom( &leftBox );
03127             leftBox.cat( rightBox );
03128             leftBox.opt();
03129             rightBox = leftBox;
03130         }
03131         for ( i = 0; i < alpha; i++ ) {
03132             YYREDO();
03133             Box leftBox( this );
03134             parseAtom( &leftBox );
03135             leftBox.cat( rightBox );
03136             rightBox = leftBox;
03137         }
03138         rightBox.cat( *box );
03139         *box = rightBox;
03140 #endif
03141         yyTok = getToken();
03142 #ifndef QT_NO_REGEXP_INTERVAL
03143         yyMayCapture = mayCapture;
03144 #endif
03145     }
03146 #undef YYREDO
03147 }
03148 
03149 void QRegExpEngine::parseTerm( Box *box )
03150 {
03151 #ifndef QT_NO_REGEXP_OPTIM
03152     if ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar )
03153         parseFactor( box );
03154 #endif
03155     while ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) {
03156         Box rightBox( this );
03157         parseFactor( &rightBox );
03158         box->cat( rightBox );
03159     }
03160 }
03161 
03162 void QRegExpEngine::parseExpression( Box *box )
03163 {
03164     parseTerm( box );
03165     while ( yyTok == Tok_Bar ) {
03166         Box rightBox( this );
03167         yyTok = getToken();
03168         parseTerm( &rightBox );
03169         box->orx( rightBox );
03170     }
03171 }
03172 
03173 /*
03174   The struct QRegExpPrivate contains the private data of a regular
03175   expression other than the automaton. It makes it possible for many
03176   QRegExp objects to use the same QRegExpEngine object with different
03177   QRegExpPrivate objects.
03178 */
03179 struct QRegExpPrivate
03180 {
03181     QString pattern; // regular-expression or wildcard pattern
03182     QString rxpattern; // regular-expression pattern
03183 #ifndef QT_NO_REGEXP_WILDCARD
03184     bool wc; // wildcard mode?
03185 #endif
03186     bool min; // minimal matching? (instead of maximal)
03187 #ifndef QT_NO_REGEXP_CAPTURE
03188     QString t; // last string passed to QRegExp::search() or searchRev()
03189     QStringList capturedCache; // what QRegExp::capturedTexts() returned last
03190 #endif
03191     QMemArray<int> captured; // what QRegExpEngine::search() returned last
03192 
03193     QRegExpPrivate() { captured.fill( -1, 2 ); }
03194 };
03195 
03196 #ifndef QT_NO_REGEXP_OPTIM
03197 static QCache<QRegExpEngine> *engineCache = 0;
03198 static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
03199 #endif
03200 
03201 static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
03202 {
03203 #ifndef QT_NO_REGEXP_OPTIM
03204     if ( engineCache != 0 ) {
03205 #ifdef QT_THREAD_SUPPORT
03206         QMutexLocker locker( qt_global_mutexpool ?
03207                              qt_global_mutexpool->get( &engineCache ) : 0 );
03208 #endif
03209         QRegExpEngine *eng = engineCache->take( pattern );
03210         if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
03211             delete eng;
03212         } else {
03213             eng->ref();
03214             return eng;
03215         }
03216     }
03217 #endif
03218     return new QRegExpEngine( pattern, caseSensitive );
03219 }
03220 
03221 static void derefEngine( QRegExpEngine *eng, const QString& pattern )
03222 {
03223 #ifdef QT_THREAD_SUPPORT
03224     QMutexLocker locker( qt_global_mutexpool ?
03225                          qt_global_mutexpool->get( &engineCache ) : 0 );
03226 #endif
03227     if ( eng != 0 && eng->deref() ) {
03228 #ifndef QT_NO_REGEXP_OPTIM
03229         if ( engineCache == 0 ) {
03230             engineCache = new QCache<QRegExpEngine>;
03231             engineCache->setAutoDelete( TRUE );
03232             cleanup_cache.set( &engineCache );
03233         }
03234         if ( !pattern.isNull() &&
03235              engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
03236             return;
03237 #else
03238         Q_UNUSED( pattern );
03239 #endif
03240         delete eng;
03241     }
03242 }
03243 
03265 QRegExp::QRegExp()
03266 {
03267     eng = new QRegExpEngine( TRUE );
03268     priv = new QRegExpPrivate;
03269 #ifndef QT_NO_REGEXP_WILDCARD
03270     priv->wc = FALSE;
03271 #endif
03272     priv->min = FALSE;
03273     compile( TRUE );
03274 }
03275 
03285 QRegExp::QRegExp( const QString& pattern, bool caseSensitive, bool wildcard )
03286 {
03287     eng = 0;
03288     priv = new QRegExpPrivate;
03289     priv->pattern = pattern;
03290 #ifndef QT_NO_REGEXP_WILDCARD
03291     priv->wc = wildcard;
03292 #endif
03293     priv->min = FALSE;
03294     compile( caseSensitive );
03295 }
03296 
03302 QRegExp::QRegExp( const QRegExp& rx )
03303 {
03304     eng = 0;
03305     priv = new QRegExpPrivate;
03306     operator=( rx );
03307 }
03308 
03312 QRegExp::~QRegExp()
03313 {
03314     derefEngine( eng, priv->rxpattern );
03315     delete priv;
03316 }
03317 
03323 QRegExp& QRegExp::operator=( const QRegExp& rx )
03324 {
03325     rx.eng->ref();
03326     derefEngine( eng, priv->rxpattern );
03327     eng = rx.eng;
03328     priv->pattern = rx.priv->pattern;
03329     priv->rxpattern = rx.priv->rxpattern;
03330 #ifndef QT_NO_REGEXP_WILDCARD
03331     priv->wc = rx.priv->wc;
03332 #endif
03333     priv->min = rx.priv->min;
03334 #ifndef QT_NO_REGEXP_CAPTURE
03335     priv->t = rx.priv->t;
03336     priv->capturedCache = rx.priv->capturedCache;
03337 #endif
03338     priv->captured = rx.priv->captured;
03339     return *this;
03340 }
03341 
03350 bool QRegExp::operator==( const QRegExp& rx ) const
03351 {
03352     return priv->pattern == rx.priv->pattern &&
03353            eng->caseSensitive() == rx.eng->caseSensitive() &&
03354 #ifndef QT_NO_REGEXP_WILDCARD
03355            priv->wc == rx.priv->wc &&
03356 #endif
03357            priv->min == rx.priv->min;
03358 }
03359 
03384 bool QRegExp::isEmpty() const
03385 {
03386     return priv->pattern.isEmpty();
03387 }
03388 
03402 bool QRegExp::isValid() const
03403 {
03404     return eng->isValid();
03405 }
03406 
03414 QString QRegExp::pattern() const
03415 {
03416     return priv->pattern;
03417 }
03418 
03425 void QRegExp::setPattern( const QString& pattern )
03426 {
03427     if ( priv->pattern != pattern ) {
03428         priv->pattern = pattern;
03429         compile( caseSensitive() );
03430     }
03431 }
03432 
03439 bool QRegExp::caseSensitive() const
03440 {
03441     return eng->caseSensitive();
03442 }
03443 
03452 void QRegExp::setCaseSensitive( bool sensitive )
03453 {
03454     if ( sensitive != eng->caseSensitive() )
03455         compile( sensitive );
03456 }
03457 
03458 #ifndef QT_NO_REGEXP_WILDCARD
03459 
03465 bool QRegExp::wildcard() const
03466 {
03467     return priv->wc;
03468 }
03469 
03483 void QRegExp::setWildcard( bool wildcard )
03484 {
03485     if ( wildcard != priv->wc ) {
03486         priv->wc = wildcard;
03487         compile( caseSensitive() );
03488     }
03489 }
03490 #endif
03491 
03498 bool QRegExp::minimal() const
03499 {
03500     return priv->min;
03501 }
03502 
03520 void QRegExp::setMinimal( bool minimal )
03521 {
03522     priv->min = minimal;
03523 }
03524 
03545 bool QRegExp::exactMatch( const QString& str ) const
03546 {
03547 #ifndef QT_NO_REGEXP_CAPTURE
03548     priv->t = str;
03549     priv->capturedCache.clear();
03550 #endif
03551 
03552     priv->captured = eng->match( str, 0, priv->min, TRUE, 0 );
03553     if ( priv->captured[1] == (int) str.length() ) {
03554         return TRUE;
03555     } else {
03556         priv->captured.detach();
03557         priv->captured[0] = 0;
03558         priv->captured[1] = eng->matchedLength();
03559         return FALSE;
03560     }
03561 }
03562 
03563 #ifndef QT_NO_COMPAT
03564 
03580 int QRegExp::match( const QString& str, int index, int *len,
03581                     bool indexIsStart ) const
03582 {
03583     int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero );
03584     if ( len != 0 )
03585         *len = matchedLength();
03586     return pos;
03587 }
03588 #endif // QT_NO_COMPAT
03589 
03590 int QRegExp::search( const QString& str, int offset ) const
03591 {
03592     return search( str, offset, CaretAtZero );
03593 }
03594 
03629 int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
03630 {
03631     if ( offset < 0 )
03632         offset += str.length();
03633 #ifndef QT_NO_REGEXP_CAPTURE
03634     priv->t = str;
03635     priv->capturedCache.clear();
03636 #endif
03637     priv->captured = eng->match( str, offset, priv->min, FALSE,
03638                                  caretIndex(offset, caretMode) );
03639     return priv->captured[0];
03640 }
03641 
03642 
03643 int QRegExp::searchRev( const QString& str, int offset ) const
03644 {
03645     return searchRev( str, offset, CaretAtZero );
03646 }
03647 
03668 int QRegExp::searchRev( const QString& str, int offset,
03669                         CaretMode caretMode ) const
03670 {
03671     if ( offset < 0 )
03672         offset += str.length();
03673 #ifndef QT_NO_REGEXP_CAPTURE
03674     priv->t = str;
03675     priv->capturedCache.clear();
03676 #endif
03677     if ( offset < 0 || offset > (int) str.length() ) {
03678         priv->captured.detach();
03679         priv->captured.fill( -1 );
03680         return -1;
03681     }
03682 
03683     while ( offset >= 0 ) {
03684         priv->captured = eng->match( str, offset, priv->min, TRUE,
03685                                      caretIndex(offset, caretMode) );
03686         if ( priv->captured[0] == offset )
03687             return offset;
03688         offset--;
03689     }
03690     return -1;
03691 }
03692 
03699 int QRegExp::matchedLength() const
03700 {
03701     return priv->captured[1];
03702 }
03703 
03704 #ifndef QT_NO_REGEXP_CAPTURE
03705 
03708 int QRegExp::numCaptures() const
03709 {
03710     return eng->numCaptures();
03711 }
03712 
03713 
03714 
03770 QStringList QRegExp::capturedTexts()
03771 {
03772     if ( priv->capturedCache.isEmpty() ) {
03773         for ( int i = 0; i < (int) priv->captured.size(); i += 2 ) {
03774             QString m;
03775             if ( priv->captured[i + 1] == 0 )
03776                 m = QString::fromLatin1( "" );
03777             else if ( priv->captured[i] >= 0 )
03778                 m = priv->t.mid( priv->captured[i],
03779                                  priv->captured[i + 1] );
03780             priv->capturedCache.append( m );
03781         }
03782         priv->t = QString::null;
03783     }
03784     return priv->capturedCache;
03785 }
03786 
03829 QString QRegExp::cap( int nth )
03830 {
03831     if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03832         return QString::null;
03833     else
03834         return capturedTexts()[nth];
03835 }
03836 
03857 int QRegExp::pos( int nth )
03858 {
03859     if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03860         return -1;
03861     else
03862         return priv->captured[2 * nth];
03863 }
03864 
03871 QString QRegExp::errorString()
03872 {
03873     if ( isValid() ) {
03874         return QString( RXERR_OK );
03875     } else {
03876         return eng->errorString();
03877     }
03878 }
03879 #endif
03880 
03899 QString QRegExp::escape( const QString& str )
03900 {
03901     static const char meta[] = "$()*+.?[\\]^{|}";
03902     QString quoted = str;
03903     int i = 0;
03904 
03905     while ( i < (int) quoted.length() ) {
03906         if ( strchr(meta, quoted[i].latin1()) != 0 )
03907             quoted.insert( i++, "\\" );
03908         i++;
03909     }
03910     return quoted;
03911 }
03912 
03913 void QRegExp::compile( bool caseSensitive )
03914 {
03915     derefEngine( eng, priv->rxpattern );
03916 #ifndef QT_NO_REGEXP_WILDCARD
03917     if ( priv->wc )
03918         priv->rxpattern = wc2rx( priv->pattern );
03919     else
03920 #endif
03921         priv->rxpattern = priv->pattern.isNull() ? QString::fromLatin1( "" )
03922                           : priv->pattern;
03923     eng = newEngine( priv->rxpattern, caseSensitive );
03924 #ifndef QT_NO_REGEXP_CAPTURE
03925     priv->t = QString();
03926     priv->capturedCache.clear();
03927 #endif
03928     priv->captured.detach();
03929     priv->captured.fill( -1, 2 + 2 * eng->numCaptures() );
03930 }
03931 
03932 int QRegExp::caretIndex( int offset, CaretMode caretMode )
03933 {
03934     if ( caretMode == CaretAtZero ) {
03935         return 0;
03936     } else if ( caretMode == CaretAtOffset ) {
03937         return offset;
03938     } else { // CaretWontMatch
03939         return -1;
03940     }
03941 }
03942 
03943 #endif // QT_NO_REGEXP

Generated on Sat Nov 5 16:18:32 2005 for OPIE by  doxygen 1.4.2