Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

qregexp3.cpp

Go to the documentation of this file.
00001 /****************************************************************************
00002 ** $Id: qregexp3.cpp,v 1.3 2005/01/21 19:56:17 ar Exp $
00003 **
00004 ** Implementation of QRegExp class
00005 **
00006 ** Created : 950126
00007 **
00008 ** Copyright (C) 1992-2000 Trolltech AS.  All rights reserved.
00009 **
00010 ** This file is part of the tools module of the Qt GUI Toolkit.
00011 **
00012 ** This file may be distributed under the terms of the Q Public License
00013 ** as defined by Trolltech AS of Norway and appearing in the file
00014 ** LICENSE.QPL included in the packaging of this file.
00015 **
00016 ** This file may be distributed and/or modified under the terms of the
00017 ** GNU General Public License version 2 as published by the Free Software
00018 ** Foundation and appearing in the file LICENSE.GPL included in the
00019 ** packaging of this file.
00020 **
00021 ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
00022 ** licenses may use this file in accordance with the Qt Commercial License
00023 ** Agreement provided with the Software.
00024 **
00025 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00026 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00027 **
00028 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
00029 **   information about Qt Commercial License Agreements.
00030 ** See http://www.trolltech.com/qpl/ for QPL licensing information.
00031 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
00032 **
00033 ** Contact info@trolltech.com if any conditions of this licensing are
00034 ** not clear to you.
00035 **
00036 **********************************************************************/
00037 #if QT_VERSION >= 0x030000
00038 #error QRegExp3 is now in QT 3 use QRegExp instead
00039 #endif
00040 
00041 #if QT_VERSION < 0x030000
00042 #include "./qregexp3.h"
00043 #else
00044 #include "qregexp.h"
00045 #endif
00046 
00047 /* OPIE */
00048 #include <opie2/odebug.h>
00049 
00050 /* QT */
00051 #include <qarray.h>
00052 #include <qbitarray.h>
00053 #include <qcache.h>
00054 #include <qintdict.h>
00055 #include <qmap.h>
00056 #include <qstring.h>
00057 #include <qtl.h>
00058 #include <qvector.h>
00059 
00060 /* STD */
00061 #include <limits.h>
00062 
00063 /*
00064   WARNING!  Be sure to read qregexp.tex before modifying this file.
00065 */
00066 
00662 static const int NumBadChars = 128;
00663 #define BadChar( ch ) ( (ch).cell() % NumBadChars )
00664 
00665 static const int NoOccurrence = INT_MAX;
00666 static const int EmptyCapture = INT_MAX;
00667 static const int InftyLen = INT_MAX;
00668 static const int InftyRep = 1000;
00669 static const int EOS = -1;
00670 
00671 #ifndef QT_NO_REGEXP_OPTIM
00672 static int engCount = 0;
00673 static QArray<int> *noOccurrences = 0;
00674 static QArray<int> *firstOccurrenceAtZero = 0;
00675 #endif
00676 
00677 /*
00678   Merges two QArrays of ints and puts the result into the first one.
00679 */
00680 static void mergeInto( QArray<int> *a, const QArray<int>& b )
00681 {
00682     int asize = a->size();
00683     int bsize = b.size();
00684     if ( asize == 0 ) {
00685     *a = b.copy();
00686 #ifndef QT_NO_REGEXP_OPTIM
00687     } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
00688     a->resize( asize + 1 );
00689     (*a)[asize] = b[0];
00690 #endif
00691     } else if ( bsize >= 1 ) {
00692     int csize = asize + bsize;
00693     QArray<int> c( csize );
00694     int i = 0, j = 0, k = 0;
00695     while ( i < asize ) {
00696         if ( j < bsize ) {
00697         if ( (*a)[i] == b[j] ) {
00698             i++;
00699             csize--;
00700         } else if ( (*a)[i] < b[j] ) {
00701             c[k++] = (*a)[i++];
00702         } else {
00703             c[k++] = b[j++];
00704         }
00705         } else {
00706         memcpy( c.data() + k, (*a).data() + i,
00707             (asize - i) * sizeof(int) );
00708         break;
00709         }
00710     }
00711     c.resize( csize );
00712     if ( j < bsize )
00713         memcpy( c.data() + k, b.data() + j, (bsize - j) * sizeof(int) );
00714     *a = c;
00715     }
00716 }
00717 
00718 /*
00719   Merges two disjoint QMaps of (int, int) pairs and puts the result into the
00720   first one.
00721 */
00722 static void mergeInto( QMap<int, int> *a, const QMap<int, int>& b )
00723 {
00724     QMap<int, int>::ConstIterator it;
00725     for ( it = b.begin(); it != b.end(); ++it )
00726     a->insert( it.key(), *it );
00727 }
00728 
00729 /*
00730   Returns the value associated to key k in QMap m of (int, int) pairs, or 0 if
00731   no such value is explicitly present.
00732 */
00733 static int at( const QMap<int, int>& m, int k )
00734 {
00735     QMap<int, int>::ConstIterator it = m.find( k );
00736     if ( it == m.end() )
00737     return 0;
00738     else
00739     return *it;
00740 }
00741 
00742 #ifndef QT_NO_REGEXP_WILDCARD
00743 /*
00744   Translates a wildcard pattern to an equivalent regular expression pattern
00745   (e.g., *.cpp to .*\.cpp).
00746 */
00747 static QString wc2rx( const QString& wc )
00748 {
00749     int wclen = wc.length();
00750     QString rx = QString::fromLatin1( "" );
00751     int i = 0;
00752     while ( i < wclen ) {
00753     QChar c = wc[i++];
00754     switch ( c.unicode() ) {
00755     case '*':
00756         rx += QString::fromLatin1( ".*" );
00757         break;
00758     case '?':
00759         rx += QChar( '.' );
00760         break;
00761     case '$':
00762     case '(':
00763     case ')':
00764     case '+':
00765     case '.':
00766     case '\\':
00767     case '^':
00768     case '{':
00769     case '|':
00770     case '}':
00771         rx += QChar( '\\' );
00772         rx += c;
00773         break;
00774     case '[':
00775         rx += c;
00776         if ( wc[i] == QChar('^') )
00777         rx += wc[i++];
00778         if ( i < wclen ) {
00779         if ( rx[i] == ']' )
00780             rx += wc[i++];
00781         while ( i < wclen && wc[i] != QChar(']') ) {
00782             if ( wc[i] == '\\' )
00783             rx += QChar( '\\' );
00784             rx += wc[i++];
00785         }
00786         }
00787         break;
00788     default:
00789         rx += c;
00790     }
00791     }
00792     return rx;
00793 }
00794 #endif
00795 
00796 /*
00797   The class QRegExpEngine encapsulates a modified nondeterministic finite
00798   automaton (NFA).
00799 */
00800 class QRegExpEngine : public QShared
00801 {
00802 public:
00803 #ifndef QT_NO_REGEXP_CCLASS
00804     /*
00805       The class CharClass represents a set of characters, such as can be found
00806       in regular expressions (e.g., [a-z] denotes the set {a, b, ..., z}).
00807     */
00808     class CharClass
00809     {
00810     public:
00811     CharClass();
00812     CharClass( const CharClass& cc ) { operator=( cc ); }
00813 
00814     CharClass& operator=( const CharClass& cc );
00815 
00816     void clear();
00817     bool negative() const { return n; }
00818     void setNegative( bool negative );
00819     void addCategories( int cats );
00820     void addRange( ushort from, ushort to );
00821     void addSingleton( ushort ch ) { addRange( ch, ch ); }
00822 
00823     bool in( QChar ch ) const;
00824 #ifndef QT_NO_REGEXP_OPTIM
00825     const QArray<int>& firstOccurrence() const { return occ1; }
00826 #endif
00827 
00828 #if defined(QT_DEBUG)
00829     void dump() const;
00830 #endif
00831 
00832     private:
00833     /*
00834       The struct Range represents a range of characters (e.g., [0-9] denotes
00835       range 48 to 57).
00836     */
00837     struct Range
00838     {
00839         ushort from; // 48
00840         ushort to; // 57
00841     };
00842 
00843     int c; // character classes
00844     QArray<Range> r; // character ranges
00845     bool n; // negative?
00846 #ifndef QT_NO_REGEXP_OPTIM
00847     QArray<int> occ1; // first-occurrence array
00848 #endif
00849     };
00850 #else
00851     struct CharClass
00852     {
00853     int x; // dummy
00854 
00855 #ifndef QT_NO_REGEXP_OPTIM
00856     const QArray<int>& firstOccurrence() const {
00857         return *firstOccurrenceAtZero;
00858     }
00859 #endif
00860     };
00861 #endif
00862 
00863     QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); }
00864     QRegExpEngine( const QString& rx, bool caseSensitive );
00865 #ifndef QT_NO_REGEXP_OPTIM
00866     ~QRegExpEngine();
00867 #endif
00868 
00869     bool isValid() const { return valid; }
00870     bool caseSensitive() const { return cs; }
00871     int numCaptures() const { return realncap; }
00872     QArray<int> match( const QString& str, int pos, bool minimal,
00873                bool oneTest );
00874     int matchedLength() const { return mmMatchedLen; }
00875 
00876     int createState( QChar ch );
00877     int createState( const CharClass& cc );
00878 #ifndef QT_NO_REGEXP_BACKREF
00879     int createState( int bref );
00880 #endif
00881 
00882     void addCatTransitions( const QArray<int>& from, const QArray<int>& to );
00883 #ifndef QT_NO_REGEXP_CAPTURE
00884     void addPlusTransitions( const QArray<int>& from, const QArray<int>& to,
00885                  int atom );
00886 #endif
00887 
00888 #ifndef QT_NO_REGEXP_ANCHOR_ALT
00889     int anchorAlternation( int a, int b );
00890     int anchorConcatenation( int a, int b );
00891 #else
00892     int anchorAlternation( int a, int b ) { return a & b; }
00893     int anchorConcatenation( int a, int b ) { return a | b; }
00894 #endif
00895     void addAnchors( int from, int to, int a );
00896 
00897 #ifndef QT_NO_REGEXP_OPTIM
00898     void setupGoodStringHeuristic( int earlyStart, int lateStart,
00899                    const QString& str );
00900     void setupBadCharHeuristic( int minLen, const QArray<int>& firstOcc );
00901     void heuristicallyChooseHeuristic();
00902 #endif
00903 
00904 #if defined(QT_DEBUG)
00905     void dump() const;
00906 #endif
00907 
00908 private:
00909     enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
00910 
00911     /*
00912       The struct State represents one state in a modified NFA.  The input
00913       characters matched are stored in the state instead of on the transitions,
00914       something possible for an automaton constructed from a regular expression.
00915     */
00916     struct State
00917     {
00918 #ifndef QT_NO_REGEXP_CAPTURE
00919     int atom; // which atom does this state belong to?
00920 #endif
00921     int match; // what does it match? (see CharClassBit and BackRefBit)
00922     QArray<int> outs; // out-transitions
00923     QMap<int, int> *reenter; // atoms reentered when transiting out
00924     QMap<int, int> *anchors; // anchors met when transiting out
00925 
00926 #ifndef QT_NO_REGEXP_CAPTURE
00927     State( int a, int m )
00928         : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { }
00929 #else
00930     State( int m )
00931         : match( m ), reenter( 0 ), anchors( 0 ) { }
00932 #endif
00933     ~State() { delete reenter; delete anchors; }
00934     };
00935 
00936 #ifndef QT_NO_REGEXP_LOOKAHEAD
00937     /*
00938       The struct Lookahead represents a lookahead a la Perl (e.g., (?=foo) and
00939       (?!bar)).
00940     */
00941     struct Lookahead
00942     {
00943     QRegExpEngine *eng; // NFA representing the embedded regular expression
00944     bool neg; // negative lookahead?
00945 
00946     Lookahead( QRegExpEngine *eng0, bool neg0 )
00947         : eng( eng0 ), neg( neg0 ) { }
00948     ~Lookahead() { delete eng; }
00949     };
00950 #endif
00951 
00952 #ifndef QT_NO_REGEXP_CAPTURE
00953     /*
00954       The struct Atom represents one node in the hierarchy of regular expression
00955       atoms.
00956     */
00957     struct Atom
00958     {
00959     int parent; // index of parent in array of atoms
00960     int capture; // index of capture, from 1 to ncap
00961     };
00962 #endif
00963 
00964 #ifndef QT_NO_REGEXP_ANCHOR_ALT
00965     /*
00966       The struct AnchorAlternation represents a pair of anchors with OR
00967       semantics.
00968     */
00969     struct AnchorAlternation
00970     {
00971     int a; // this anchor...
00972     int b; // ...or this one
00973     };
00974 #endif
00975 
00976     enum { InitialState = 0, FinalState = 1 };
00977     void setup( bool caseSensitive );
00978     int setupState( int match );
00979 
00980     /*
00981       Let's hope that 13 lookaheads and 14 back-references are enough.
00982      */
00983     enum { MaxLookaheads = 13, MaxBackRefs = 14 };
00984     enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002,
00985        Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,
00986        Anchor_FirstLookahead = 0x00000010,
00987        Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
00988        Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
00989        Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,
00990 
00991        Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^
00992            ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) };
00993 #ifndef QT_NO_REGEXP_CAPTURE
00994     int startAtom( bool capture );
00995     void finishAtom( int atom ) { cf = f[atom].parent; }
00996 #endif
00997 
00998 #ifndef QT_NO_REGEXP_LOOKAHEAD
00999     int addLookahead( QRegExpEngine *eng, bool negative );
01000 #endif
01001 
01002 #ifndef QT_NO_REGEXP_CAPTURE
01003     bool isBetterCapture( const int *begin1, const int *end1, const int *begin2,
01004               const int *end2 );
01005 #endif
01006     bool testAnchor( int i, int a, const int *capBegin );
01007 
01008 #ifndef QT_NO_REGEXP_OPTIM
01009     bool goodStringMatch();
01010     bool badCharMatch();
01011 #else
01012     bool bruteMatch();
01013 #endif
01014     bool matchHere();
01015 
01016     QVector<State> s; // array of states
01017     int ns; // number of states
01018 #ifndef QT_NO_REGEXP_CAPTURE
01019     QArray<Atom> f; // atom hierarchy
01020     int nf; // number of atoms
01021     int cf; // current atom
01022 #endif
01023     int realncap; // number of captures, seen from the outside
01024     int ncap; // number of captures, seen from the inside
01025 #ifndef QT_NO_REGEXP_CCLASS
01026     QVector<CharClass> cl; // array of character classes
01027 #endif
01028 #ifndef QT_NO_REGEXP_LOOKAHEAD
01029     QVector<Lookahead> ahead; // array of lookaheads
01030 #endif
01031 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01032     QArray<AnchorAlternation> aa; // array of (a, b) pairs of anchors
01033 #endif
01034 #ifndef QT_NO_REGEXP_OPTIM
01035     bool caretAnchored; // does the regexp start with ^?
01036 #endif
01037     bool valid; // is the regular expression valid?
01038     bool cs; // case sensitive?
01039 #ifndef QT_NO_REGEXP_BACKREF
01040     int nbrefs; // number of back-references
01041 #endif
01042 
01043 #ifndef QT_NO_REGEXP_OPTIM
01044     bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch
01045 
01046     int goodEarlyStart; // the index where goodStr can first occur in a match
01047     int goodLateStart; // the index where goodStr can last occur in a match
01048     QString goodStr; // the string that any match has to contain
01049 
01050     int minl; // the minimum length of a match
01051     QArray<int> occ1; // first-occurrence array
01052 #endif
01053 
01054     /*
01055       The class Box is an abstraction for a regular expression fragment.  It can
01056       also be seen as one node in the syntax tree of a regular expression with
01057       synthetized attributes.
01058 
01059       It's interface is ugly for performance reasons.
01060     */
01061     class Box
01062     {
01063     public:
01064     Box( QRegExpEngine *engine );
01065     Box( const Box& b ) { operator=( b ); }
01066 
01067     Box& operator=( const Box& b );
01068 
01069     void clear() { operator=(Box(eng)); }
01070     void set( QChar ch );
01071     void set( const CharClass& cc );
01072 #ifndef QT_NO_REGEXP_BACKREF
01073     void set( int bref );
01074 #endif
01075 
01076     void cat( const Box& b );
01077     void orx( const Box& b );
01078     void plus( int atom );
01079     void opt();
01080     void catAnchor( int a );
01081 #ifndef QT_NO_REGEXP_OPTIM
01082     void setupHeuristics();
01083 #endif
01084 
01085 #if defined(QT_DEBUG)
01086     void dump() const;
01087 #endif
01088 
01089     private:
01090     void addAnchorsToEngine( const Box& to ) const;
01091 
01092     QRegExpEngine *eng; // the automaton under construction
01093     QArray<int> ls; // the left states (firstpos)
01094     QArray<int> rs; // the right states (lastpos)
01095     QMap<int, int> lanchors; // the left anchors
01096     QMap<int, int> ranchors; // the right anchors
01097     int skipanchors; // the anchors to match if the box is skipped
01098 
01099 #ifndef QT_NO_REGEXP_OPTIM
01100     int earlyStart; // the index where str can first occur
01101     int lateStart; // the index where str can last occur
01102     QString str; // a string that has to occur in any match
01103     QString leftStr; // a string occurring at the left of this box
01104     QString rightStr; // a string occurring at the right of this box
01105     int maxl; // the maximum length of this box (possibly InftyLen)
01106 #endif
01107 
01108     int minl; // the minimum length of this box
01109 #ifndef QT_NO_REGEXP_OPTIM
01110     QArray<int> occ1; // first-occurrence array
01111 #endif
01112     };
01113     friend class Box;
01114 
01115     /*
01116       This is the lexical analyzer for regular expressions.
01117     */
01118     enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,
01119        Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,
01120        Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord,
01121        Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
01122     int getChar();
01123     int getEscape();
01124 #ifndef QT_NO_REGEXP_INTERVAL
01125     int getRep( int def );
01126 #endif
01127 #ifndef QT_NO_REGEXP_LOOKAHEAD
01128     void skipChars( int n );
01129 #endif
01130     void startTokenizer( const QChar *rx, int len );
01131     int getToken();
01132 
01133     const QChar *yyIn; // a pointer to the input regular expression pattern
01134     int yyPos0; // the position of yyTok in the input pattern
01135     int yyPos; // the position of the next character to read
01136     int yyLen; // the length of yyIn
01137     int yyCh; // the last character read
01138     CharClass *yyCharClass; // attribute for Tok_CharClass tokens
01139     int yyMinRep; // attribute for Tok_Quantifier
01140     int yyMaxRep; // ditto
01141     bool yyError; // syntax error or overflow during parsing?
01142 
01143     /*
01144       This is the syntactic analyzer for regular expressions.
01145     */
01146     int parse( const QChar *rx, int len );
01147     void parseAtom( Box *box );
01148     void parseFactor( Box *box );
01149     void parseTerm( Box *box );
01150     void parseExpression( Box *box );
01151 
01152     int yyTok; // the last token read
01153     bool yyMayCapture; // set this to FALSE to disable capturing
01154 
01155     /*
01156       This is the engine state during matching.
01157     */
01158     const QString *mmStr; // a pointer to the input QString
01159     const QChar *mmIn; // a pointer to the input string data
01160     int mmPos; // the current position in the string
01161     int mmLen; // the length of the input string
01162     bool mmMinimal; // minimal matching?
01163     QArray<int> mmCaptured; // an array of pairs (start, len)
01164     QArray<int> mmCapturedNoMatch; // an array of pairs (-1, -1)
01165     QArray<int> mmBigArray; // big QArray<int> array
01166     int *mmInNextStack; // is state is mmNextStack?
01167     int *mmCurStack; // stack of current states
01168     int *mmNextStack; // stack of next states
01169     int *mmCurCapBegin; // start of current states' captures
01170     int *mmNextCapBegin; // start of next states' captures
01171     int *mmCurCapEnd; // end of current states' captures
01172     int *mmNextCapEnd; // end of next states' captures
01173     int *mmTempCapBegin; // start of temporary captures
01174     int *mmTempCapEnd; // end of temporary captures
01175     int *mmCapBegin; // start of captures for a next state
01176     int *mmCapEnd; // end of captures for a next state
01177     int *mmSlideTab; // bump-along slide table for bad-character heuristic
01178     int mmSlideTabSize; // size of slide table
01179 #ifndef QT_NO_REGEXP_BACKREF
01180     QIntDict<int> mmSleeping; // dictionary of back-reference sleepers
01181 #endif
01182     int mmMatchedLen; // length of match or of matched string for partial match
01183 };
01184 
01185 QRegExpEngine::QRegExpEngine( const QString& rx, bool caseSensitive )
01186 #ifndef QT_NO_REGEXP_BACKREF
01187     : mmSleeping( 101 )
01188 #endif
01189 {
01190     setup( caseSensitive );
01191     valid = ( parse(rx.unicode(), rx.length()) == (int) rx.length() );
01192 }
01193 
01194 #ifndef QT_NO_REGEXP_OPTIM
01195 QRegExpEngine::~QRegExpEngine()
01196 {
01197     if ( --engCount == 0 ) {
01198     delete noOccurrences;
01199     noOccurrences = 0;
01200     delete firstOccurrenceAtZero;
01201     firstOccurrenceAtZero = 0;
01202     }
01203 }
01204 #endif
01205 
01206 /*
01207   Tries to match in str and returns an array of (begin, length) pairs for
01208   captured text.  If there is no match, all pairs are (-1, -1).
01209 */
01210 QArray<int> QRegExpEngine::match( const QString& str, int pos, bool minimal,
01211                   bool oneTest )
01212 {
01213     mmStr = &str;
01214     mmIn = str.unicode();
01215     if ( mmIn == 0 )
01216     mmIn = &QChar::null;
01217     mmPos = pos;
01218     mmLen = str.length();
01219     mmMinimal = minimal;
01220     mmMatchedLen = 0;
01221 
01222     bool matched = FALSE;
01223     if ( valid && mmPos >= 0 && mmPos <= mmLen ) {
01224 #ifndef QT_NO_REGEXP_OPTIM
01225     if ( mmPos <= mmLen - minl ) {
01226         if ( caretAnchored || oneTest )
01227         matched = matchHere();
01228         else if ( useGoodStringHeuristic )
01229         matched = goodStringMatch();
01230         else
01231         matched = badCharMatch();
01232     }
01233 #else
01234     matched = oneTest ? matchHere() : bruteMatch();
01235 #endif
01236     }
01237 
01238     if ( matched ) {
01239     mmCaptured.detach();
01240     mmCaptured[0] = mmPos;
01241     mmCaptured[1] = mmMatchedLen;
01242     for ( int j = 0; j < realncap; j++ ) {
01243         int len = mmCapEnd[j] - mmCapBegin[j];
01244         mmCaptured[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0;
01245         mmCaptured[2 + 2 * j + 1] = len;
01246     }
01247     return mmCaptured;
01248     } else {
01249     return mmCapturedNoMatch;
01250     }
01251 }
01252 
01253 /*
01254   The three following functions add one state to the automaton and return the
01255   number of the state.
01256 */
01257 
01258 int QRegExpEngine::createState( QChar ch )
01259 {
01260     return setupState( ch.unicode() );
01261 }
01262 
01263 int QRegExpEngine::createState( const CharClass& cc )
01264 {
01265 #ifndef QT_NO_REGEXP_CCLASS
01266     int n = cl.size();
01267     cl.resize( n + 1 );
01268     cl.insert( n, new CharClass(cc) );
01269     return setupState( CharClassBit | n );
01270 #else
01271     Q_UNUSED( cc );
01272     return setupState( CharClassBit );
01273 #endif
01274 }
01275 
01276 #ifndef QT_NO_REGEXP_BACKREF
01277 int QRegExpEngine::createState( int bref )
01278 {
01279     if ( bref > nbrefs ) {
01280     nbrefs = bref;
01281     if ( nbrefs > MaxBackRefs ) {
01282         yyError = TRUE;
01283         return 0;
01284     }
01285     }
01286     return setupState( BackRefBit | bref );
01287 }
01288 #endif
01289 
01290 /*
01291   The two following functions add a transition between all pairs of states
01292   (i, j) where i is fond in from, and j is found in to.
01293 
01294   Cat-transitions are distinguished from plus-transitions for capturing.
01295 */
01296 
01297 void QRegExpEngine::addCatTransitions( const QArray<int>& from,
01298                        const QArray<int>& to )
01299 {
01300     for ( int i = 0; i < (int) from.size(); i++ ) {
01301     State *st = s[from[i]];
01302     mergeInto( &st->outs, to );
01303     }
01304 }
01305 
01306 #ifndef QT_NO_REGEXP_CAPTURE
01307 void QRegExpEngine::addPlusTransitions( const QArray<int>& from,
01308                     const QArray<int>& to, int atom )
01309 {
01310     for ( int i = 0; i < (int) from.size(); i++ ) {
01311     State *st = s[from[i]];
01312     QArray<int> oldOuts = st->outs.copy();
01313     mergeInto( &st->outs, to );
01314     if ( f[atom].capture >= 0 ) {
01315         if ( st->reenter == 0 )
01316         st->reenter = new QMap<int, int>;
01317         for ( int j = 0; j < (int) to.size(); j++ ) {
01318         if ( !st->reenter->contains(to[j]) &&
01319              oldOuts.bsearch(to[j]) < 0 )
01320             st->reenter->insert( to[j], atom );
01321         }
01322     }
01323     }
01324 }
01325 #endif
01326 
01327 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01328 /*
01329   Returns an anchor that means a OR b.
01330 */
01331 int QRegExpEngine::anchorAlternation( int a, int b )
01332 {
01333     if ( ((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0 )
01334     return a & b;
01335 
01336     int n = aa.size();
01337     aa.resize( n + 1 );
01338     aa[n].a = a;
01339     aa[n].b = b;
01340     return Anchor_Alternation | n;
01341 }
01342 
01343 /*
01344   Returns an anchor that means a AND b.
01345 */
01346 int QRegExpEngine::anchorConcatenation( int a, int b )
01347 {
01348     if ( ((a | b) & Anchor_Alternation) == 0 )
01349     return a | b;
01350     if ( (b & Anchor_Alternation) != 0 )
01351     qSwap( a, b );
01352     int aprime = anchorConcatenation( aa[a ^ Anchor_Alternation].a, b );
01353     int bprime = anchorConcatenation( aa[a ^ Anchor_Alternation].b, b );
01354     return anchorAlternation( aprime, bprime );
01355 }
01356 #endif
01357 
01358 /*
01359   Adds anchor a on a transition caracterised by its from state and its to state.
01360 */
01361 void QRegExpEngine::addAnchors( int from, int to, int a )
01362 {
01363     State *st = s[from];
01364     if ( st->anchors == 0 )
01365     st->anchors = new QMap<int, int>;
01366     if ( st->anchors->contains(to) )
01367     a = anchorAlternation( (*st->anchors)[to], a );
01368     st->anchors->insert( to, a );
01369 }
01370 
01371 #ifndef QT_NO_REGEXP_OPTIM
01372 /*
01373   The two following functions provide the engine with the information needed by
01374   its matching heuristics.
01375 */
01376 
01377 void QRegExpEngine::setupGoodStringHeuristic( int earlyStart, int lateStart,
01378                           const QString& str )
01379 {
01380     goodEarlyStart = earlyStart;
01381     goodLateStart = lateStart;
01382     goodStr = cs ? str : str.lower();
01383 }
01384 
01385 void QRegExpEngine::setupBadCharHeuristic( int minLen,
01386                        const QArray<int>& firstOcc )
01387 {
01388     minl = minLen;
01389     occ1 = cs ? firstOcc : *firstOccurrenceAtZero;
01390 }
01391 
01392 /*
01393   This function chooses between the good-string and the bad-character
01394   heuristics.  It computes two scores and chooses the heuristic with the highest
01395   score.
01396 
01397   Here are some common-sense constraints on the scores that should be respected
01398   if the formulas are ever modified:  (1) If goodStr is empty, the good-string
01399   heuristic scores 0.  (2) If the search is case insensitive, the good-string
01400   heuristic should be used, unless it scores 0.  (Case insensitivity
01401   turns all entries of occ1 to 0.)  (3) If (goodLateStart - goodEarlyStart) is
01402   big, the good-string heuristic should score less.
01403 */
01404 void QRegExpEngine::heuristicallyChooseHeuristic()
01405 {
01406     int i;
01407 
01408     if ( minl == 0 )
01409     return;
01410 
01411     /*
01412       Magic formula:  The good string has to constitute a good proportion of the
01413       minimum-length string, and appear at a more-or-less known index.
01414     */
01415     int goodStringScore = ( 64 * goodStr.length() / minl ) -
01416               ( goodLateStart - goodEarlyStart );
01417 
01418     /*
01419       Less magic formula:  We pick a couple of characters at random, and check
01420       whether they are good or bad.
01421     */
01422     int badCharScore = 0;
01423     int step = QMAX( 1, NumBadChars / 32 );
01424     for ( i = 1; i < NumBadChars; i += step ) {
01425     if ( occ1[i] == NoOccurrence )
01426         badCharScore += minl;
01427     else
01428         badCharScore += occ1[i];
01429     }
01430     badCharScore /= minl;
01431 
01432     useGoodStringHeuristic = ( goodStringScore > badCharScore );
01433 }
01434 #endif
01435 
01436 #if defined(QT_DEBUG)
01437 void QRegExpEngine::dump() const
01438 {
01439     int i, j;
01440     odebug << "Case " << (cs ? "" : "in") << "sensitive engine" << oendl;
01441     odebug << "  States" << oendl;
01442     for ( i = 0; i < ns; i++ ) {
01443     odebug << "  " << i
01444            << (i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "") << oendl;
01445 
01446 #ifndef QT_NO_REGEXP_CAPTURE
01447     odebug << "    in atom " << s[i]->atom << oendl;
01448 #endif
01449     int m = s[i]->match;
01450     if ( (m & CharClassBit) != 0 ) {
01451         odebug << "    match character class " << (m ^ CharClassBit) << oendl;
01452 #ifndef QT_NO_REGEXP_CCLASS
01453         cl[m ^ CharClassBit]->dump();
01454 #else
01455         odebug << "    negative character class" << oendl;
01456 #endif
01457     } else if ( (m & BackRefBit) != 0 ) {
01458         odebug << "    match back-reference " << (m ^ BackRefBit) << oendl;
01459     } else if ( m >= 0x20 && m <= 0x7e ) {
01460         odebug << "    match " << QString().sprintf( "0x%.4x", m) << " (" << m << ")" << oendl;
01461 
01462     } else {
01463         odebug << "    match " << QString().sprintf( "0x%.4x", m) << oendl;
01464     }
01465     for ( j = 0; j < (int) s[i]->outs.size(); j++ ) {
01466         int next = s[i]->outs[j];
01467         odebug << "    -> " << next << oendl;
01468         if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) )
01469         odebug << "       [reenter " << (*s[i]->reenter)[next] << "]" << oendl;
01470         if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 )
01471         odebug << "       [anchors " << QString().sprintf( "0x%.8x]", (*s[i]->anchors)[next] ) << oendl;
01472     }
01473     }
01474 #ifndef QT_NO_REGEXP_CAPTURE
01475     if ( nf > 0 ) {
01476     odebug << "  Atom    Parent  Capture" << oendl;
01477     for ( i = 0; i < nf; i++ )
01478         odebug << QString().sprintf("  %6d  %6d  %6d", i, f[i].parent, f[i].capture ) << oendl;
01479     }
01480 #endif
01481 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01482     for ( i = 0; i < (int) aa.size(); i++ )
01483     odebug << QString().sprintf("  Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b ) << oendl;
01484 #endif
01485 }
01486 #endif
01487 
01488 void QRegExpEngine::setup( bool caseSensitive )
01489 {
01490 #ifndef QT_NO_REGEXP_OPTIM
01491     if ( engCount++ == 0 ) {
01492     noOccurrences = new QArray<int>( NumBadChars );
01493     firstOccurrenceAtZero = new QArray<int>( NumBadChars );
01494     noOccurrences->fill( NoOccurrence );
01495     firstOccurrenceAtZero->fill( 0 );
01496     }
01497 #endif
01498     s.setAutoDelete( TRUE );
01499     s.resize( 32 );
01500     ns = 0;
01501 #ifndef QT_NO_REGEXP_CAPTURE
01502     f.resize( 32 );
01503     nf = 0;
01504     cf = -1;
01505 #endif
01506     realncap = 0;
01507     ncap = 0;
01508 #ifndef QT_NO_REGEXP_CCLASS
01509     cl.setAutoDelete( TRUE );
01510 #endif
01511 #ifndef QT_NO_REGEXP_LOOKAHEAD
01512     ahead.setAutoDelete( TRUE );
01513 #endif
01514 #ifndef QT_NO_REGEXP_OPTIM
01515     caretAnchored = TRUE;
01516 #endif
01517     valid = FALSE;
01518     cs = caseSensitive;
01519 #ifndef QT_NO_REGEXP_BACKREF
01520     nbrefs = 0;
01521 #endif
01522 #ifndef QT_NO_REGEXP_OPTIM
01523     useGoodStringHeuristic = FALSE;
01524     minl = 0;
01525     occ1 = *firstOccurrenceAtZero;
01526 #endif
01527     mmCapturedNoMatch.fill( -1, 2 );
01528 }
01529 
01530 int QRegExpEngine::setupState( int match )
01531 {
01532     if ( (ns & (ns + 1)) == 0 && ns + 1 >= (int) s.size() )
01533     s.resize( (ns + 1) << 1 );
01534 #ifndef QT_NO_REGEXP_CAPTURE
01535     s.insert( ns, new State(cf, match) );
01536 #else
01537     s.insert( ns, new State(match) );
01538 #endif
01539     return ns++;
01540 }
01541 
01542 #ifndef QT_NO_REGEXP_CAPTURE
01543 /*
01544   Functions startAtom() and finishAtom() should be called to delimit atoms.
01545   When a state is created, it is assigned to the current atom.  The information
01546   is later used for capturing.
01547 */
01548 int QRegExpEngine::startAtom( bool capture )
01549 {
01550     if ( (nf & (nf + 1)) == 0 && nf + 1 >= (int) f.size() )
01551     f.resize( (nf + 1) << 1 );
01552     f[nf].parent = cf;
01553     cf = nf++;
01554     f[cf].capture = capture ? ncap++ : -1;
01555     return cf;
01556 }
01557 #endif
01558 
01559 #ifndef QT_NO_REGEXP_LOOKAHEAD
01560 /*
01561   Creates a lookahead anchor.
01562 */
01563 int QRegExpEngine::addLookahead( QRegExpEngine *eng, bool negative )
01564 {
01565     int n = ahead.size();
01566     if ( n == MaxLookaheads ) {
01567     yyError = TRUE;
01568     return 0;
01569     }
01570     ahead.resize( n + 1 );
01571     ahead.insert( n, new Lookahead(eng, negative) );
01572     return Anchor_FirstLookahead << n;
01573 }
01574 #endif
01575 
01576 #ifndef QT_NO_REGEXP_CAPTURE
01577 /*
01578   We want the longest leftmost captures.
01579 */
01580 bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
01581                      const int *begin2, const int *end2 )
01582 {
01583     for ( int i = 0; i < ncap; i++ ) {
01584     int delta = begin2[i] - begin1[i]; // it has to start early...
01585     if ( delta == 0 )
01586         delta = end1[i] - end2[i]; // ...and end late (like a party)
01587 
01588     if ( delta != 0 )
01589         return delta > 0;
01590     }
01591     return FALSE;
01592 }
01593 #endif
01594 
01595 /*
01596   Returns TRUE if anchor a matches at position mmPos + i in the input string,
01597   otherwise FALSE.
01598 */
01599 bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
01600 {
01601     int j;
01602 
01603 #ifndef QT_NO_REGEXP_ANCHOR_ALT
01604     if ( (a & Anchor_Alternation) != 0 ) {
01605     return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) ||
01606            testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin );
01607     }
01608 #endif
01609 
01610     if ( (a & Anchor_Caret) != 0 ) {
01611     if ( mmPos + i != 0 )
01612         return FALSE;
01613     }
01614     if ( (a & Anchor_Dollar) != 0 ) {
01615     if ( mmPos + i != mmLen )
01616         return FALSE;
01617     }
01618 #ifndef QT_NO_REGEXP_ESCAPE
01619     if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
01620     bool before = FALSE, after = FALSE;
01621     if ( mmPos + i != 0 )
01622         before = mmIn[mmPos + i - 1].isLetterOrNumber();
01623     if ( mmPos + i != mmLen )
01624         after = mmIn[mmPos + i].isLetterOrNumber();
01625     if ( (a & Anchor_Word) != 0 && (before == after) )
01626         return FALSE;
01627     if ( (a & Anchor_NonWord) != 0 && (before != after) )
01628         return FALSE;
01629     }
01630 #endif
01631 #ifndef QT_NO_REGEXP_LOOKAHEAD
01632     bool catchx = TRUE;
01633 
01634     if ( (a & Anchor_LookaheadMask) != 0 ) {
01635     QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
01636                        mmLen - mmPos - i );
01637     for ( j = 0; j < (int) ahead.size(); j++ ) {
01638         if ( (a & (Anchor_FirstLookahead << j)) != 0 ) {
01639         catchx = ( ahead[j]->eng->match(cstr.string(), 0, TRUE,
01640                         TRUE)[0] == 0 );
01641         if ( catchx == ahead[j]->neg )
01642             return FALSE;
01643         }
01644     }
01645     }
01646 #endif
01647 #ifndef QT_NO_REGEXP_CAPTURE
01648 #ifndef QT_NO_REGEXP_BACKREF
01649     for ( j = 0; j < nbrefs; j++ ) {
01650     if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) {
01651         if ( capBegin[j] != EmptyCapture )
01652         return FALSE;
01653     }
01654     }
01655 #endif
01656 #endif
01657     return TRUE;
01658 }
01659 
01660 #ifndef QT_NO_REGEXP_OPTIM
01661 /*
01662   The three following functions are what Jeffrey Friedl would call transmissions
01663   (or bump-alongs).  Using one or the other should make no difference, except
01664   in performance.
01665 */
01666 
01667 bool QRegExpEngine::goodStringMatch()
01668 {
01669     int k = mmPos + goodEarlyStart;
01670 
01671     while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) {
01672     int from = k - goodLateStart;
01673     int to = k - goodEarlyStart;
01674     if ( from > mmPos )
01675         mmPos = from;
01676 
01677     while ( mmPos <= to ) {
01678         if ( matchHere() )
01679         return TRUE;
01680         mmPos++;
01681     }
01682     k++;
01683     }
01684     return FALSE;
01685 }
01686 
01687 bool QRegExpEngine::badCharMatch()
01688 {
01689     int slideHead = 0;
01690     int slideNext = 0;
01691     int i;
01692     int lastPos = mmLen - minl;
01693     memset( mmSlideTab, 0, mmSlideTabSize * sizeof(int) );
01694 
01695     /*
01696       Set up the slide table, used for the bad-character heuristic, using
01697       the table of first occurrence of each character.
01698     */
01699     for ( i = 0; i < minl; i++ ) {
01700     int sk = occ1[BadChar(mmIn[mmPos + i])];
01701     if ( sk == NoOccurrence )
01702         sk = i + 1;
01703     if ( sk > 0 ) {
01704         int k = i + 1 - sk;
01705         if ( k < 0 ) {
01706         sk = i + 1;
01707         k = 0;
01708         }
01709         if ( sk > mmSlideTab[k] )
01710         mmSlideTab[k] = sk;
01711     }
01712     }
01713 
01714     if ( mmPos > lastPos )
01715     return FALSE;
01716 
01717     while ( TRUE ) {
01718     if ( ++slideNext >= mmSlideTabSize )
01719         slideNext = 0;
01720     if ( mmSlideTab[slideHead] > 0 ) {
01721         if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] )
01722         mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1;
01723         mmSlideTab[slideHead] = 0;
01724     } else {
01725         if ( matchHere() )
01726         return TRUE;
01727     }
01728 
01729     if ( mmPos == lastPos )
01730         break;
01731 
01732     /*
01733       Update the slide table.  This code has much in common with the
01734       initialization code.
01735     */
01736     int sk = occ1[BadChar(mmIn[mmPos + minl])];
01737     if ( sk == NoOccurrence ) {
01738         mmSlideTab[slideNext] = minl;
01739     } else if ( sk > 0 ) {
01740         int k = slideNext + minl - sk;
01741         if ( k >= mmSlideTabSize )
01742         k -= mmSlideTabSize;
01743         if ( sk > mmSlideTab[k] )
01744         mmSlideTab[k] = sk;
01745     }
01746     slideHead = slideNext;
01747     mmPos++;
01748     }
01749     return FALSE;
01750 }
01751 #else
01752 bool QRegExpEngine::bruteMatch()
01753 {
01754     while ( mmPos <= mmLen ) {
01755     if ( matchHere() )
01756         return TRUE;
01757     mmPos++;
01758     }
01759     return FALSE;
01760 }
01761 #endif
01762 
01763 /*
01764   Here's the core of the engine.  It tries to do a match here and now.
01765 */
01766 bool QRegExpEngine::matchHere()
01767 {
01768     int ncur = 1, nnext = 0;
01769     int i = 0, j, k, m;
01770     bool match = FALSE;
01771 
01772     mmMatchedLen = -1;
01773     mmCurStack[0] = InitialState;
01774 
01775 #ifndef QT_NO_REGEXP_CAPTURE
01776     if ( ncap > 0 ) {
01777     for ( j = 0; j < ncap; j++ ) {
01778         mmCurCapBegin[j] = EmptyCapture;
01779         mmCurCapEnd[j] = EmptyCapture;
01780     }
01781     }
01782 #endif
01783 
01784 #ifndef QT_NO_REGEXP_BACKREF
01785     int *zzZ = 0;
01786 
01787     while ( (ncur > 0 || mmSleeping.count() > 0) && i <= mmLen - mmPos &&
01788         !match )
01789 #else
01790     while ( ncur > 0 && i <= mmLen - mmPos && !match )
01791 #endif
01792     {
01793     int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0;
01794     for ( j = 0; j < ncur; j++ ) {
01795         int cur = mmCurStack[j];
01796         State *scur = s[cur];
01797         QArray<int>& outs = scur->outs;
01798         for ( k = 0; k < (int) outs.size(); k++ ) {
01799         int next = outs[k];
01800         State *snext = s[next];
01801         bool in = TRUE;
01802 #ifndef QT_NO_REGEXP_BACKREF
01803         int needSomeSleep = 0;
01804 #endif
01805 
01806         /*
01807           First, check if the anchors are anchored properly.
01808         */
01809         if ( scur->anchors != 0 ) {
01810             int a = at( *scur->anchors, next );
01811             if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) )
01812             in = FALSE;
01813         }
01814         /*
01815           If indeed they are, check if the input character is correct
01816           for this transition.
01817         */
01818         if ( in ) {
01819             m = snext->match;
01820             if ( (m & (CharClassBit | BackRefBit)) == 0 ) {
01821             if ( cs )
01822                 in = ( m == ch );
01823             else
01824                 in = ( QChar(m).lower() == QChar(ch).lower() );
01825             } else if ( next == FinalState ) {
01826             mmMatchedLen = i;
01827             match = mmMinimal;
01828             in = TRUE;
01829             } else if ( (m & CharClassBit) != 0 ) {
01830 #ifndef QT_NO_REGEXP_CCLASS
01831             const CharClass *cc = cl[m ^ CharClassBit];
01832             if ( cs )
01833                 in = cc->in( ch );
01834             else if ( cc->negative() )
01835                 in = cc->in( QChar(ch).lower() ) &&
01836                  cc->in( QChar(ch).upper() );
01837             else
01838                 in = cc->in( QChar(ch).lower() ) ||
01839                  cc->in( QChar(ch).upper() );
01840 #endif
01841 #ifndef QT_NO_REGEXP_BACKREF
01842             } else { /* ( (m & BackRefBit) != 0 ) */
01843             int bref = m ^ BackRefBit;
01844             int ell = j * ncap + ( bref - 1 );
01845 
01846             in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture;
01847             if ( in ) {
01848                 if ( cs )
01849                 in = ( mmIn[mmPos + mmCurCapBegin[ell]]
01850                        == QChar(ch) );
01851                 else
01852                 in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower()
01853                        == QChar(ch).lower() );
01854             }
01855 
01856             if ( in ) {
01857                 int delta;
01858                 if ( mmCurCapEnd[ell] == EmptyCapture )
01859                 delta = i - mmCurCapBegin[ell];
01860                 else
01861                 delta = mmCurCapEnd[ell] - mmCurCapBegin[ell];
01862 
01863                 in = ( delta <= mmLen - mmPos );
01864                 if ( in && delta > 1 ) {
01865                 int n;
01866                 if ( cs ) {
01867                     for ( n = 1; n < delta; n++ ) {
01868                     if ( mmIn[mmPos +
01869                           mmCurCapBegin[ell] + n] !=
01870                          mmIn[mmPos + i + n] )
01871                         break;
01872                     }
01873                 } else {
01874                     for ( n = 1; n < delta; n++ ) {
01875                     QChar a = mmIn[mmPos +
01876                                mmCurCapBegin[ell] + n];
01877                     QChar b = mmIn[mmPos + i + n];
01878                     if ( a.lower() != b.lower() )
01879                         break;
01880                     }
01881                 }
01882                 in = ( n == delta );
01883                 if ( in )
01884                     needSomeSleep = delta - 1;
01885                 }
01886             }
01887 #endif
01888             }
01889         }
01890 
01891         /*
01892           All is right.  We must now update our data structures.
01893         */
01894         if ( in ) {
01895 #ifndef QT_NO_REGEXP_CAPTURE
01896             int *capBegin, *capEnd;
01897 #endif
01898             /*
01899               If the next state was not encountered yet, all is fine.
01900             */
01901             if ( (m = mmInNextStack[next]) == -1 ) {
01902             m = nnext++;
01903             mmNextStack[m] = next;
01904             mmInNextStack[next] = m;
01905 #ifndef QT_NO_REGEXP_CAPTURE
01906             capBegin = mmNextCapBegin + m * ncap;
01907             capEnd = mmNextCapEnd + m * ncap;
01908 
01909             /*
01910               Otherwise, we'll first maintain captures in temporary
01911               arrays, and decide at the end whether it's best to keep
01912               the previous capture zones or the new ones.
01913             */
01914             } else {
01915             capBegin = mmTempCapBegin;
01916             capEnd = mmTempCapEnd;
01917 #endif
01918             }
01919 
01920 #ifndef QT_NO_REGEXP_CAPTURE
01921             /*
01922               Updating the capture zones is much of a task.
01923             */
01924             if ( ncap > 0 ) {
01925             memcpy( capBegin, mmCurCapBegin + j * ncap,
01926                 ncap * sizeof(int) );
01927             memcpy( capEnd, mmCurCapEnd + j * ncap,
01928                 ncap * sizeof(int) );
01929             int c = scur->atom, n = snext->atom;
01930             int p = -1, q = -1;
01931             int cap;
01932 
01933             /*
01934               Lemma 1.  For any x in the range [0..nf), we have
01935               f[x].parent < x.
01936 
01937               Proof.  By looking at startAtom(), it is clear that
01938               cf < nf holds all the time, and thus that
01939               f[nf].parent < nf.
01940             */
01941 
01942             /*
01943               If we are reentering an atom, we empty all capture
01944               zones inside it.
01945             */
01946             if ( scur->reenter != 0 &&
01947                  (q = at(*scur->reenter, next)) != 0 ) {
01948                 QBitArray b;
01949                 b.fill( FALSE, nf );
01950                 b.setBit( q, TRUE );
01951                 for ( int ell = q + 1; ell < nf; ell++ ) {
01952                 if ( b.testBit(f[ell].parent) ) {
01953                     b.setBit( ell, TRUE );
01954                     cap = f[ell].capture;
01955                     if ( cap >= 0 ) {
01956                     capBegin[cap] = EmptyCapture;
01957                     capEnd[cap] = EmptyCapture;
01958                     }
01959                 }
01960                 }
01961                 p = f[q].parent;
01962 
01963             /*
01964               Otherwise, close the capture zones we are leaving.
01965               We are leaving f[c].capture, f[f[c].parent].capture,
01966               f[f[f[c].parent].parent].capture, ..., until
01967               f[x].capture, with x such that f[x].parent is the
01968               youngest common ancestor for c and n.
01969 
01970               We go up along c's and n's ancestry until we find x.
01971             */
01972             } else {
01973                 p = c;
01974                 q = n;
01975                 while ( p != q ) {
01976                 if ( p > q ) {
01977                     cap = f[p].capture;
01978                     if ( cap >= 0 ) {
01979                     if ( capBegin[cap] == i ) {
01980                         capBegin[cap] = EmptyCapture;
01981                         capEnd[cap] = EmptyCapture;
01982                     } else {
01983                         capEnd[cap] = i;
01984                     }
01985                     }
01986                     p = f[p].parent;
01987                 } else {
01988                     q = f[q].parent;
01989                 }
01990                 }
01991             }
01992 
01993             /*
01994               In any case, we now open the capture zones we are
01995               entering.  We work upwards from n until we reach p
01996               (the parent of the atom we reenter or the youngest
01997               common ancestor).
01998             */
01999             while ( n > p ) {
02000                 cap = f[n].capture;
02001                 if ( cap >= 0 ) {
02002                 capBegin[cap] = i;
02003                 capEnd[cap] = EmptyCapture;
02004                 }
02005                 n = f[n].parent;
02006             }
02007             /*
02008               If the next state was already in mmNextStack, we must
02009               choose carefully which capture zones we want to keep.
02010             */
02011             if ( capBegin == mmTempCapBegin &&
02012                  isBetterCapture(capBegin, capEnd,
02013                          mmNextCapBegin + m * ncap,
02014                          mmNextCapEnd + m * ncap) ) {
02015                 memcpy( mmNextCapBegin + m * ncap, capBegin,
02016                     ncap * sizeof(int) );
02017                 memcpy( mmNextCapEnd + m * ncap, capEnd,
02018                     ncap * sizeof(int) );
02019             }
02020             }
02021 #ifndef QT_NO_REGEXP_BACKREF
02022             /*
02023               We are done with updating the capture zones.  It's now
02024               time to put the next state to sleep, if it needs to, and
02025               to remove it from mmNextStack.
02026             */
02027             if ( needSomeSleep > 0 ) {
02028             zzZ = new int[1 + 2 * ncap];
02029             zzZ[0] = next;
02030             if ( ncap > 0 ) {
02031                 memcpy( zzZ + 1, capBegin, ncap * sizeof(int) );
02032                 memcpy( zzZ + 1 + ncap, capEnd,
02033                     ncap * sizeof(int) );
02034             }
02035             mmInNextStack[mmNextStack[--nnext]] = -1;
02036             mmSleeping.insert( i + needSomeSleep, zzZ );
02037             }
02038 #endif
02039 #endif
02040         }
02041         }
02042     }
02043 #ifndef QT_NO_REGEXP_CAPTURE
02044     /*
02045       If we reached the final state, hurray!  Copy the captured zone.
02046     */
02047     if ( ncap > 0 && (m = mmInNextStack[FinalState]) != -1 ) {
02048         memcpy( mmCapBegin, mmNextCapBegin + m * ncap, ncap * sizeof(int) );
02049         memcpy( mmCapEnd, mmNextCapEnd + m * ncap, ncap * sizeof(int) );
02050     }
02051 #ifndef QT_NO_REGEXP_BACKREF
02052     /*
02053       It's time to wake up the sleepers.
02054     */
02055     if ( mmSleeping.count() > 0 ) {
02056         while ( (zzZ = mmSleeping.take(i)) != 0 ) {
02057         int next = zzZ[0];
02058         int *capBegin = zzZ + 1;
02059         int *capEnd = zzZ + 1 + ncap;
02060         bool copyOver = TRUE;
02061 
02062         if ( (m = mmInNextStack[zzZ[0]]) == -1 ) {
02063             m = nnext++;
02064             mmNextStack[m] = next;
02065             mmInNextStack[next] = m;
02066         } else {
02067             copyOver = isBetterCapture( mmNextCapBegin + m * ncap,
02068                         mmNextCapEnd + m * ncap,
02069                         capBegin, capEnd );
02070         }
02071         if ( copyOver ) {
02072             memcpy( mmNextCapBegin + m * ncap, capBegin,
02073                 ncap * sizeof(int) );
02074             memcpy( mmNextCapEnd + m * ncap, capEnd,
02075                 ncap * sizeof(int) );
02076         }
02077         delete[] zzZ;
02078         }
02079     }
02080 #endif
02081 #endif
02082     for ( j = 0; j < nnext; j++ )
02083         mmInNextStack[mmNextStack[j]] = -1;
02084 
02085     qSwap( mmCurStack, mmNextStack );
02086 #ifndef QT_NO_REGEXP_CAPTURE
02087     qSwap( mmCurCapBegin, mmNextCapBegin );
02088     qSwap( mmCurCapEnd, mmNextCapEnd );
02089 #endif
02090     ncur = nnext;
02091     nnext = 0;
02092     i++;
02093     }
02094 
02095 #ifndef QT_NO_REGEXP_BACKREF
02096     /*
02097       If minimal matching is enabled, we might have some sleepers left.
02098     */
02099     while ( !mmSleeping.isEmpty() ) {
02100     zzZ = mmSleeping.take( *QIntDictIterator<int>(mmSleeping) );
02101     delete[] zzZ;
02102     }
02103 #endif
02104 
02105     match = ( mmMatchedLen >= 0 );
02106     if ( !match )
02107     mmMatchedLen = i - 1;
02108     return match;
02109 }
02110 
02111 #ifndef QT_NO_REGEXP_CCLASS
02112 
02113 QRegExpEngine::CharClass::CharClass()
02114     : c( 0 ), n( FALSE )
02115 #ifndef QT_NO_REGEXP_OPTIM
02116       , occ1( *noOccurrences )
02117 #endif
02118 {
02119 }
02120 
02121 QRegExpEngine::CharClass& QRegExpEngine::CharClass::operator=(
02122     const CharClass& cc )
02123 {
02124     c = cc.c;
02125     r = cc.r.copy();
02126     n = cc.n;
02127 #ifndef QT_NO_REGEXP_OPTIM
02128     occ1 = cc.occ1;
02129 #endif
02130     return *this;
02131 }
02132 
02133 void QRegExpEngine::CharClass::clear()
02134 {
02135     c = 0;
02136     r.resize( 0 );
02137     n = FALSE;
02138 }
02139 
02140 void QRegExpEngine::CharClass::setNegative( bool negative )
02141 {
02142     n = negative;
02143 #ifndef QT_NO_REGEXP_OPTIM
02144     occ1 = *firstOccurrenceAtZero;
02145 #endif
02146 }
02147 
02148 void QRegExpEngine::CharClass::addCategories( int cats )
02149 {
02150     c |= cats;
02151 #ifndef QT_NO_REGEXP_OPTIM
02152     occ1 = *firstOccurrenceAtZero;
02153 #endif
02154 }
02155 
02156 void QRegExpEngine::CharClass::addRange( ushort from, ushort to )
02157 {
02158     if ( from > to )
02159     qSwap( from, to );
02160     int n = r.size();
02161     r.resize( n + 1 );
02162     r[n].from = from;
02163     r[n].to = to;
02164 
02165 #ifndef QT_NO_REGEXP_OPTIM
02166     int i;
02167 
02168     if ( to - from < NumBadChars ) {
02169     occ1.detach();
02170     if ( from % NumBadChars <= to % NumBadChars ) {
02171         for ( i = from % NumBadChars; i <= to % NumBadChars; i++ )
02172         occ1[i] = 0;
02173     } else {
02174         for ( i = 0; i <= to % NumBadChars; i++ )
02175         occ1[i] = 0;
02176         for ( i = from % NumBadChars; i < NumBadChars; i++ )
02177         occ1[i] = 0;
02178     }
02179     } else {
02180     occ1 = *firstOccurrenceAtZero;
02181     }
02182 #endif
02183 }
02184 
02185 bool QRegExpEngine::CharClass::in( QChar ch ) const
02186 {
02187 #ifndef QT_NO_REGEXP_OPTIM
02188     if ( occ1[BadChar(ch)] == NoOccurrence )
02189     return n;
02190 #endif
02191 
02192     if ( c != 0 && (c & (1 << (int) ch.category())) != 0 )
02193     return !n;
02194     for ( int i = 0; i < (int) r.size(); i++ ) {
02195     if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to )
02196         return !n;
02197     }
02198     return n;
02199 }
02200 
02201 #if defined(QT_DEBUG)
02202 void QRegExpEngine::CharClass::dump() const
02203 {
02204     int i;
02205     odebug << "    " << (n ? "nega" : "posi") << "tive character class" << oendl;
02206 #ifndef QT_NO_REGEXP_CCLASS
02207     if ( c != 0 )
02208     odebug << QString().sprintf("      categories 0x%.8x", c ) << oendl;
02209 #endif
02210     for ( i = 0; i < (int) r.size(); i++ )
02211     odebug << QString().sprintf("      0x%.4x through 0x%.4x", r[i].from, r[i].to ) << oendl;
02212 }
02213 #endif
02214 #endif
02215 
02216 QRegExpEngine::Box::Box( QRegExpEngine *engine )
02217     : eng( engine ), skipanchors( 0 )
02218 #ifndef QT_NO_REGEXP_OPTIM
02219       , earlyStart( 0 ), lateStart( 0 ), maxl( 0 ), occ1( *noOccurrences )
02220 #endif
02221 {
02222     minl = 0;
02223 }
02224 
02225 QRegExpEngine::Box& QRegExpEngine::Box::operator=( const Box& b )
02226 {
02227     eng = b.eng;
02228     ls = b.ls;
02229     rs = b.rs;
02230     lanchors = b.lanchors;
02231     ranchors = b.ranchors;
02232     skipanchors = b.skipanchors;
02233 #ifndef QT_NO_REGEXP_OPTIM
02234     earlyStart = b.earlyStart;
02235     lateStart = b.lateStart;
02236     str = b.str;
02237     leftStr = b.leftStr;
02238     rightStr = b.rightStr;
02239     maxl = b.maxl;
02240     occ1 = b.occ1;
02241 #endif
02242     minl = b.minl;
02243     return *this;
02244 }
02245 
02246 void QRegExpEngine::Box::set( QChar ch )
02247 {
02248     ls.resize( 1 );
02249     ls[0] = eng->createState( ch );
02250     rs = ls;
02251     rs.detach();
02252 #ifndef QT_NO_REGEXP_OPTIM
02253     str = ch;
02254     leftStr = ch;
02255     rightStr = ch;
02256     maxl = 1;
02257     occ1.detach();
02258     occ1[BadChar(ch)] = 0;
02259 #endif
02260     minl = 1;
02261 }
02262 
02263 void QRegExpEngine::Box::set( const CharClass& cc )
02264 {
02265     ls.resize( 1 );
02266     ls[0] = eng->createState( cc );
02267     rs = ls;
02268     rs.detach();
02269 #ifndef QT_NO_REGEXP_OPTIM
02270     maxl = 1;
02271     occ1 = cc.firstOccurrence();
02272 #endif
02273     minl = 1;
02274 }
02275 
02276 #ifndef QT_NO_REGEXP_BACKREF
02277 void QRegExpEngine::Box::set( int bref )
02278 {
02279     ls.resize( 1 );
02280     ls[0] = eng->createState( bref );
02281     rs = ls;
02282     rs.detach();
02283     if ( bref >= 1 && bref <= MaxBackRefs )
02284     skipanchors = Anchor_BackRef0Empty << bref;
02285 #ifndef QT_NO_REGEXP_OPTIM
02286     maxl = InftyLen;
02287 #endif
02288     minl = 0;
02289 }
02290 #endif
02291 
02292 void QRegExpEngine::Box::cat( const Box& b )
02293 {
02294     eng->addCatTransitions( rs, b.ls );
02295     addAnchorsToEngine( b );
02296     if ( minl == 0 ) {
02297     mergeInto( &lanchors, b.lanchors );
02298     if ( skipanchors != 0 ) {
02299         for ( int i = 0; i < (int) b.ls.size(); i++ ) {
02300         int a = eng->anchorConcatenation( at(lanchors, b.ls[i]),
02301                           skipanchors );
02302         lanchors.insert( b.ls[i], a );
02303         }
02304     }
02305     mergeInto( &ls, b.ls );
02306     }
02307     if ( b.minl == 0 ) {
02308     mergeInto( &ranchors, b.ranchors );
02309     if ( b.skipanchors != 0 ) {
02310         for ( int i = 0; i < (int) rs.size(); i++ ) {
02311         int a = eng->anchorConcatenation( at(ranchors, rs[i]),
02312                           b.skipanchors );
02313         ranchors.insert( rs[i], a );
02314         }
02315     }
02316     mergeInto( &rs, b.rs );
02317     } else {
02318     ranchors = b.ranchors;
02319     rs = b.rs;
02320     }
02321 
02322 #ifndef QT_NO_REGEXP_OPTIM
02323     if ( maxl != InftyLen ) {
02324     if ( rightStr.length() + b.leftStr.length() >
02325          QMAX(str.length(), b.str.length()) ) {
02326         earlyStart = minl - rightStr.length();
02327         lateStart = maxl - rightStr.length();
02328         str = rightStr + b.leftStr;
02329     } else if ( b.str.length() > str.length() ) {
02330         earlyStart = minl + b.earlyStart;
02331         lateStart = maxl + b.lateStart;
02332         str = b.str;
02333     }
02334     }
02335 
02336     if ( (int) leftStr.length() == maxl )
02337     leftStr += b.leftStr;
02338     if ( (int) b.rightStr.length() == b.maxl )
02339     rightStr += b.rightStr;
02340     else
02341     rightStr = b.rightStr;
02342 
02343     if ( maxl == InftyLen || b.maxl == InftyLen )
02344     maxl = InftyLen;
02345     else
02346     maxl += b.maxl;
02347 
02348     occ1.detach();
02349     for ( int i = 0; i < NumBadChars; i++ ) {
02350     if ( b.occ1[i] != NoOccurrence && minl + b.occ1[i] < occ1[i] )
02351         occ1[i] = minl + b.occ1[i];
02352     }
02353 #endif
02354 
02355     minl += b.minl;
02356     if ( minl == 0 )
02357     skipanchors = eng->anchorConcatenation( skipanchors, b.skipanchors );
02358     else
02359     skipanchors = 0;
02360 }
02361 
02362 void QRegExpEngine::Box::orx( const Box& b )
02363 {
02364     mergeInto( &ls, b.ls );
02365     mergeInto( &lanchors, b.lanchors );
02366     mergeInto( &rs, b.rs );
02367     mergeInto( &ranchors, b.ranchors );
02368     skipanchors = eng->anchorAlternation( skipanchors, b.skipanchors );
02369 
02370 #ifndef QT_NO_REGEXP_OPTIM
02371     occ1.detach();
02372     for ( int i = 0; i < NumBadChars; i++ ) {
02373     if ( occ1[i] > b.occ1[i] )
02374         occ1[i] = b.occ1[i];
02375     }
02376     earlyStart = 0;
02377     lateStart = 0;
02378     str = QString::null;
02379     leftStr = QString::null;
02380     rightStr = QString::null;
02381     if ( b.maxl > maxl )
02382     maxl = b.maxl;
02383 #endif
02384     if ( b.minl < minl )
02385     minl = b.minl;
02386 }
02387 
02388 void QRegExpEngine::Box::plus( int atom )
02389 {
02390 #ifndef QT_NO_REGEXP_CAPTURE
02391     eng->addPlusTransitions( rs, ls, atom );
02392 #else
02393     Q_UNUSED( atom );
02394     eng->addCatTransitions( rs, ls );
02395 #endif
02396     addAnchorsToEngine( *this );
02397 #ifndef QT_NO_REGEXP_OPTIM
02398     maxl = InftyLen;
02399 #endif
02400 }
02401 
02402 void QRegExpEngine::Box::opt()
02403 {
02404 #ifndef QT_NO_REGEXP_OPTIM
02405     earlyStart = 0;
02406     lateStart = 0;
02407     str = QString::null;
02408     leftStr = QString::null;
02409     rightStr = QString::null;
02410 #endif
02411     skipanchors = 0;
02412     minl = 0;
02413 }
02414 
02415 void QRegExpEngine::Box::catAnchor( int a )
02416 {
02417     if ( a != 0 ) {
02418     for ( int i = 0; i < (int) rs.size(); i++ ) {
02419         a = eng->anchorConcatenation( at(ranchors, rs[i]), a );
02420         ranchors.insert( rs[i], a );
02421     }
02422     if ( minl == 0 )
02423         skipanchors = eng->anchorConcatenation( skipanchors, a );
02424     }
02425 }
02426 
02427 #ifndef QT_NO_REGEXP_OPTIM
02428 void QRegExpEngine::Box::setupHeuristics()
02429 {
02430     eng->setupGoodStringHeuristic( earlyStart, lateStart, str );
02431 
02432     /*
02433       A regular expression such as 112|1 has occ1['2'] = 2 and minl = 1 at this
02434       point.  An entry of occ1 has to be at most minl or infinity for the rest
02435       of the algorithm to go well.
02436 
02437       We waited until here before normalizing these cases (instead of doing it
02438       in Box::orx()) because sometimes things improve by themselves; consider
02439       (112|1)34.
02440     */
02441     for ( int i = 0; i < NumBadChars; i++ ) {
02442     if ( occ1[i] != NoOccurrence && occ1[i] >= minl )
02443         occ1[i] = minl;
02444     }
02445     eng->setupBadCharHeuristic( minl, occ1 );
02446 
02447     eng->heuristicallyChooseHeuristic();
02448 }
02449 #endif
02450 
02451 #if defined(QT_DEBUG)
02452 void QRegExpEngine::Box::dump() const
02453 {
02454     int i;
02455     odebug << "Box of at least " << minl << " character" << (minl == 1 ? "" : "s") << oendl;
02456     odebug << "  Left states:" << oendl;
02457     for ( i = 0; i < (int) ls.size(); i++ ) {
02458     if ( at(lanchors, ls[i]) == 0 )
02459         odebug << "    " << ls[i] << oendl;
02460     else
02461         odebug << "    " << ls[i] << QString().sprintf(" [anchors 0x%.8x]", lanchors[ls[i]]) << oendl;
02462     }
02463     odebug << "  Right states:" << oendl;
02464     for ( i = 0; i < (int) rs.size(); i++ ) {
02465     if ( at(ranchors, ls[i]) == 0 )
02466         odebug << "    " << rs[i] << oendl;
02467     else
02468         odebug << "    " << rs[i] << QString().sprintf(" [anchors 0x%.8x]", ranchors[rs[i]]) << oendl;
02469     }
02470     odebug << QString().sprintf("  Skip anchors: 0x%.8x", skipanchors) << oendl;
02471 }
02472 #endif
02473 
02474 void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const
02475 {
02476     for ( int i = 0; i < (int) to.ls.size(); i++ ) {
02477     for ( int j = 0; j < (int) rs.size(); j++ ) {
02478         int a = eng->anchorConcatenation( at(ranchors, rs[j]),
02479                           at(to.lanchors, to.ls[i]) );
02480         eng->addAnchors( rs[j], to.ls[i], a );
02481     }
02482     }
02483 }
02484 
02485 int QRegExpEngine::getChar()
02486 {
02487     return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode();
02488 }
02489 
02490 int QRegExpEngine::getEscape()
02491 {
02492 #ifndef QT_NO_REGEXP_ESCAPE
02493     const char tab[] = "afnrtv"; // no b, as \b means word boundary
02494     const char backTab[] = "\a\f\n\r\t\v";
02495     ushort low;
02496     int i;
02497 #endif
02498     ushort val;
02499     int prevCh = yyCh;
02500 
02501     if ( prevCh == EOS ) {
02502     yyError = TRUE;
02503     return Tok_Char | '\\';
02504     }
02505     yyCh = getChar();
02506 #ifndef QT_NO_REGEXP_ESCAPE
02507     if ( (prevCh & ~0xff) == 0 ) {
02508     const char *p = strchr( tab, prevCh );
02509     if ( p != 0 )
02510         return Tok_Char | backTab[p - tab];
02511     }
02512 #endif
02513 
02514     switch ( prevCh ) {
02515 #ifndef QT_NO_REGEXP_ESCAPE
02516     case '0':
02517     val = 0;
02518     for ( i = 0; i < 3; i++ ) {
02519         if ( yyCh >= '0' && yyCh <= '7' )
02520         val = ( val << 3 ) | ( yyCh - '0' );
02521         else
02522         break;
02523         yyCh = getChar();
02524     }
02525     if ( (val & ~0377) != 0 )
02526         yyError = TRUE;
02527     return Tok_Char | val;
02528 #endif
02529 #ifndef QT_NO_REGEXP_ESCAPE
02530     case 'B':
02531     return Tok_NonWord;
02532 #endif
02533 #ifndef QT_NO_REGEXP_CCLASS
02534     case 'D':
02535     // see QChar::isDigit()
02536     yyCharClass->addCategories( 0x7fffffef );
02537     return Tok_CharClass;
02538     case 'S':
02539     // see QChar::isSpace()
02540     yyCharClass->addCategories( 0x7ffff87f );
02541     yyCharClass->addRange( 0x0000, 0x0008 );
02542     yyCharClass->addRange( 0x000e, 0x001f );
02543     yyCharClass->addRange( 0x007f, 0x009f );
02544     return Tok_CharClass;
02545     case 'W':
02546     // see QChar::isLetterOrNumber()
02547     yyCharClass->addCategories( 0x7ff07f8f );
02548     return Tok_CharClass;
02549 #endif
02550 #ifndef QT_NO_REGEXP_ESCAPE
02551     case 'b':
02552     return Tok_Word;
02553 #endif
02554 #ifndef QT_NO_REGEXP_CCLASS
02555     case 'd':
02556     // see QChar::isDigit()
02557     yyCharClass->addCategories( 0x00000010 );
02558     return Tok_CharClass;
02559     case 's':
02560     // see QChar::isSpace()
02561     yyCharClass->addCategories( 0x00000380 );
02562     yyCharClass->addRange( 0x0009, 0x000d );
02563     return Tok_CharClass;
02564     case 'w':
02565     // see QChar::isLetterOrNumber()
02566     yyCharClass->addCategories( 0x000f8070 );
02567     return Tok_CharClass;
02568 #endif
02569 #ifndef QT_NO_REGEXP_ESCAPE
02570     case 'x':
02571     val = 0;
02572     for ( i = 0; i < 4; i++ ) {
02573         low = QChar( yyCh ).lower();
02574         if ( low >= '0' && low <= '9' )
02575         val = ( val << 4 ) | ( low - '0' );
02576         else if ( low >= 'a' && low <= 'f' )
02577         val = ( val << 4 ) | ( low - 'a' + 10 );
02578         else
02579         break;
02580         yyCh = getChar();
02581     }
02582     return Tok_Char | val;
02583 #endif
02584     default:
02585     if ( prevCh >= '1' && prevCh <= '9' ) {
02586 #ifndef QT_NO_REGEXP_BACKREF
02587         val = prevCh - '0';
02588         while ( yyCh >= '0' && yyCh <= '9' ) {
02589         val = ( val *= 10 ) | ( yyCh - '0' );
02590         yyCh = getChar();
02591         }
02592         return Tok_BackRef | val;
02593 #else
02594         yyError = TRUE;
02595 #endif
02596     }
02597     return Tok_Char | prevCh;
02598     }
02599 }
02600 
02601 #ifndef QT_NO_REGEXP_INTERVAL
02602 int QRegExpEngine::getRep( int def )
02603 {
02604     if ( yyCh >= '0' && yyCh <= '9' ) {
02605     int rep = 0;
02606     do {
02607         rep = 10 * rep + yyCh - '0';
02608         if ( rep >= InftyRep ) {
02609         yyError = TRUE;
02610         rep = def;
02611         }
02612         yyCh = getChar();
02613     } while ( yyCh >= '0' && yyCh <= '9' );
02614     return rep;
02615     } else {
02616     return def;
02617     }
02618 }
02619 #endif
02620 
02621 #ifndef QT_NO_REGEXP_LOOKAHEAD
02622 void QRegExpEngine::skipChars( int n )
02623 {
02624     if ( n > 0 ) {
02625     yyPos += n - 1;
02626     yyCh = getChar();
02627     }
02628 }
02629 #endif
02630 
02631 void QRegExpEngine::startTokenizer( const QChar *rx, int len )
02632 {
02633     yyIn = rx;
02634     yyPos0 = 0;
02635     yyPos = 0;
02636     yyLen = len;
02637     yyCh = getChar();
02638     yyCharClass = new CharClass;
02639     yyMinRep = 0;
02640     yyMaxRep = 0;
02641     yyError = FALSE;
02642 }
02643 
02644 int QRegExpEngine::getToken()
02645 {
02646 #ifndef QT_NO_REGEXP_CCLASS
02647     ushort pendingCh = 0;
02648     bool charPending;
02649     bool rangePending;
02650     int tok;
02651 #endif
02652     int prevCh = yyCh;
02653 
02654     yyPos0 = yyPos - 1;
02655 #ifndef QT_NO_REGEXP_CCLASS
02656     yyCharClass->clear();
02657 #endif
02658     yyMinRep = 0;
02659     yyMaxRep = 0;
02660     yyCh = getChar();
02661     switch ( prevCh ) {
02662     case EOS:
02663     yyPos0 = yyPos;
02664     return Tok_Eos;
02665     case '$':
02666     return Tok_Dollar;
02667     case '(':
02668     if ( yyCh == '?' ) {
02669         prevCh = getChar();
02670         yyCh = getChar();
02671         switch ( prevCh ) {
02672 #ifndef QT_NO_REGEXP_LOOKAHEAD
02673         case '!':
02674         return Tok_NegLookahead;
02675         case '=':
02676         return Tok_PosLookahead;
02677 #endif
02678         case ':':
02679         return Tok_MagicLeftParen;
02680         default:
02681         yyError = TRUE;
02682         return Tok_MagicLeftParen;
02683         }
02684     } else {
02685         return Tok_LeftParen;
02686     }
02687     case ')':
02688     return Tok_RightParen;
02689     case '*':
02690     yyMinRep = 0;
02691     yyMaxRep = InftyRep;
02692     return Tok_Quantifier;
02693     case '+':
02694     yyMinRep = 1;
02695     yyMaxRep = InftyRep;
02696     return Tok_Quantifier;
02697     case '.':
02698 #ifndef QT_NO_REGEXP_CCLASS
02699     yyCharClass->setNegative( TRUE );
02700 #endif
02701     return Tok_CharClass;
02702     case '?':
02703     yyMinRep = 0;
02704     yyMaxRep = 1;
02705     return Tok_Quantifier;
02706     case '[':
02707 #ifndef QT_NO_REGEXP_CCLASS
02708     if ( yyCh == '^' ) {
02709         yyCharClass->setNegative( TRUE );
02710         yyCh = getChar();
02711     }
02712     charPending = FALSE;
02713     rangePending = FALSE;
02714     do {
02715         if ( yyCh == '-' && charPending && !rangePending ) {
02716         rangePending = TRUE;
02717         yyCh = getChar();
02718         } else {
02719         if ( charPending && !rangePending ) {
02720             yyCharClass->addSingleton( pendingCh );
02721             charPending = FALSE;
02722         }
02723         if ( yyCh == '\\' ) {
02724             yyCh = getChar();
02725             tok = getEscape();
02726             if ( tok == Tok_Word )
02727             tok = '\b';
02728         } else {
02729             tok = Tok_Char | yyCh;
02730             yyCh = getChar();
02731         }
02732         if ( tok == Tok_CharClass ) {
02733             if ( rangePending ) {
02734             yyCharClass->addSingleton( '-' );
02735             yyCharClass->addSingleton( pendingCh );
02736             charPending = FALSE;
02737             rangePending = FALSE;
02738             }
02739         } else if ( (tok & Tok_Char) != 0 ) {
02740             if ( rangePending ) {
02741             yyCharClass->addRange( pendingCh, tok ^ Tok_Char );
02742             charPending = FALSE;
02743             rangePending = FALSE;
02744             } else {
02745             pendingCh = tok ^ Tok_Char;
02746             charPending = TRUE;
02747             }
02748         } else {
02749             yyError = TRUE;
02750         }
02751         }
02752     }  while ( yyCh != ']' && yyCh != EOS );
02753     if ( rangePending )
02754         yyCharClass->addSingleton( '-' );
02755     if ( charPending )
02756         yyCharClass->addSingleton( pendingCh );
02757     if ( yyCh == EOS )
02758         yyError = TRUE;
02759     else
02760         yyCh = getChar();
02761     return Tok_CharClass;
02762 #else
02763     yyError = TRUE;
02764     return Tok_Char | '[';
02765 #endif
02766     case '\\':
02767     return getEscape();
02768     case ']':
02769     yyError = TRUE;
02770     return Tok_Char | ']';
02771     case '^':
02772     return Tok_Caret;
02773 #ifndef QT_NO_REGEXP_INTERVAL
02774     case '{':
02775     yyMinRep = getRep( 0 );
02776     yyMaxRep = yyMinRep;
02777     if ( yyCh == ',' ) {
02778         yyCh = getChar();
02779         yyMaxRep = getRep( InftyRep );
02780     }
02781     if ( yyMaxRep < yyMinRep )
02782         qSwap( yyMinRep, yyMaxRep );
02783     if ( yyCh != '}' )
02784         yyError = TRUE;
02785     yyCh = getChar();
02786     return Tok_Quantifier;
02787 #else
02788     yyError = TRUE;
02789     return Tok_Char | '{';
02790 #endif
02791     case '|':
02792     return Tok_Bar;
02793     case '}':
02794     yyError = TRUE;
02795     return Tok_Char | '}';
02796     default:
02797     return Tok_Char | prevCh;
02798     }
02799 }
02800 
02801 int QRegExpEngine::parse( const QChar *pattern, int len )
02802 {
02803     valid = TRUE;
02804     startTokenizer( pattern, len );
02805     yyTok = getToken();
02806 #ifndef QT_NO_REGEXP_CAPTURE
02807     yyMayCapture = TRUE;
02808 #else
02809     yyMayCapture = FALSE;
02810 #endif
02811 
02812 #ifndef QT_NO_REGEXP_CAPTURE
02813     int atom = startAtom( FALSE );
02814 #endif
02815     CharClass anything;
02816     Box box( this ); // create InitialState
02817     box.set( anything );
02818     Box rightBox( this ); // create FinalState
02819     rightBox.set( anything );
02820 
02821     Box middleBox( this );
02822     parseExpression( &middleBox );
02823 #ifndef QT_NO_REGEXP_CAPTURE
02824     finishAtom( atom );
02825 #endif
02826 #ifndef QT_NO_REGEXP_OPTIM
02827     middleBox.setupHeuristics();
02828 #endif
02829     box.cat( middleBox );
02830     box.cat( rightBox );
02831     delete yyCharClass;
02832     yyCharClass = 0;
02833 
02834     realncap = ncap;
02835 #ifndef QT_NO_REGEXP_BACKREF
02836     if ( nbrefs > ncap )
02837     ncap = nbrefs;
02838 #endif
02839 
02840     mmCaptured.resize( 2 + 2 * realncap );
02841     mmCapturedNoMatch.fill( -1, 2 + 2 * realncap );
02842 
02843     /*
02844       We use one QArray<int> for all the big data used a lot in matchHere() and
02845       friends.
02846     */
02847 #ifndef QT_NO_REGEXP_OPTIM
02848     mmSlideTabSize = QMAX( minl + 1, 16 );
02849 #else
02850     mmSlideTabSize = 0;
02851 #endif
02852     mmBigArray.resize( (3 + 4 * ncap) * ns + 4 * ncap + mmSlideTabSize );
02853 
02854     mmInNextStack = mmBigArray.data();
02855     memset( mmInNextStack, -1, ns * sizeof(int) );
02856     mmCurStack = mmInNextStack + ns;
02857     mmNextStack = mmInNextStack + 2 * ns;
02858 
02859     mmCurCapBegin = mmInNextStack + 3 * ns;
02860     mmNextCapBegin = mmCurCapBegin + ncap * ns;
02861     mmCurCapEnd = mmCurCapBegin + 2 * ncap * ns;
02862     mmNextCapEnd = mmCurCapBegin + 3 * ncap * ns;
02863 
02864     mmTempCapBegin = mmCurCapBegin + 4 * ncap * ns;
02865     mmTempCapEnd = mmTempCapBegin + ncap;
02866     mmCapBegin = mmTempCapBegin + 2 * ncap;
02867     mmCapEnd = mmTempCapBegin + 3 * ncap;
02868 
02869     mmSlideTab = mmTempCapBegin + 4 * ncap;
02870 
02871     if ( yyError )
02872     return -1;
02873 
02874 #ifndef QT_NO_REGEXP_OPTIM
02875     State *sinit = s[InitialState];
02876     caretAnchored = ( sinit->anchors != 0 );
02877     if ( caretAnchored ) {
02878     QMap<int, int>& anchors = *sinit->anchors;
02879     QMap<int, int>::ConstIterator a;
02880     for ( a = anchors.begin(); a != anchors.end(); ++a ) {
02881 #ifndef QT_NO_REGEXP_ANCHOR_ALT
02882         if ( (*a & Anchor_Alternation) != 0 )
02883         break;
02884 #endif
02885         if ( (*a & Anchor_Caret) == 0 ) {
02886         caretAnchored = FALSE;
02887         break;
02888         }
02889     }
02890     }
02891 #endif
02892     return yyPos0;
02893 }
02894 
02895 void QRegExpEngine::parseAtom( Box *box )
02896 {
02897 #ifndef QT_NO_REGEXP_LOOKAHEAD
02898     QRegExpEngine *eng = 0;
02899     bool neg;
02900     int len;
02901 #endif
02902 
02903     switch ( yyTok ) {
02904     case Tok_Dollar:
02905     box->catAnchor( Anchor_Dollar );
02906     break;
02907     case Tok_Caret:
02908     box->catAnchor( Anchor_Caret );
02909     break;
02910 #ifndef QT_NO_REGEXP_LOOKAHEAD
02911     case Tok_PosLookahead:
02912     case Tok_NegLookahead:
02913     neg = ( yyTok == Tok_NegLookahead );
02914     eng = new QRegExpEngine( cs );
02915     len = eng->parse( yyIn + yyPos - 1, yyLen - yyPos + 1 );
02916     if ( len >= 0 )
02917         skipChars( len );
02918     else
02919         yyError = TRUE;
02920     box->catAnchor( addLookahead(eng, neg) );
02921     yyTok = getToken();
02922     if ( yyTok != Tok_RightParen )
02923         yyError = TRUE;
02924     break;
02925 #endif
02926 #ifndef QT_NO_REGEXP_ESCAPE
02927     case Tok_Word:
02928     box->catAnchor( Anchor_Word );
02929     break;
02930     case Tok_NonWord:
02931     box->catAnchor( Anchor_NonWord );
02932     break;
02933 #endif
02934     case Tok_LeftParen:
02935     case Tok_MagicLeftParen:
02936     yyTok = getToken();
02937     parseExpression( box );
02938     if ( yyTok != Tok_RightParen )
02939         yyError = TRUE;
02940     break;
02941     case Tok_CharClass:
02942     box->set( *yyCharClass );
02943     break;
02944     default:
02945     if ( (yyTok & Tok_Char) != 0 )
02946         box->set( QChar(yyTok ^ Tok_Char) );
02947 #ifndef QT_NO_REGEXP_BACKREF
02948     else if ( (yyTok & Tok_BackRef) != 0 )
02949         box->set( yyTok ^ Tok_BackRef );
02950 #endif
02951     else
02952         yyError = TRUE;
02953     }
02954     yyTok = getToken();
02955 }
02956 
02957 void QRegExpEngine::parseFactor( Box *box )
02958 {
02959 #ifndef QT_NO_REGEXP_CAPTURE
02960     int atom = startAtom( yyMayCapture && yyTok == Tok_LeftParen );
02961 #else
02962     static const int atom = 0;
02963 #endif
02964 
02965 #ifndef QT_NO_REGEXP_INTERVAL
02966 #define YYREDO() \
02967     yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
02968     *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
02969 
02970     const QChar *in = yyIn;
02971     int pos0 = yyPos0;
02972     int pos = yyPos;
02973     int len = yyLen;
02974     int ch = yyCh;
02975     CharClass charClass;
02976     if ( yyTok == Tok_CharClass )
02977     charClass = *yyCharClass;
02978     int tok = yyTok;
02979     bool mayCapture = yyMayCapture;
02980 #endif
02981 
02982     parseAtom( box );
02983 #ifndef QT_NO_REGEXP_CAPTURE
02984     finishAtom( atom );
02985 #endif
02986 
02987     if ( yyTok == Tok_Quantifier ) {
02988     if ( yyMaxRep == InftyRep ) {
02989         box->plus( atom );
02990 #ifndef QT_NO_REGEXP_INTERVAL
02991     } else if ( yyMaxRep == 0 ) {
02992         box->clear();
02993 #endif
02994     }
02995     if ( yyMinRep == 0 )
02996         box->opt();
02997 
02998 #ifndef QT_NO_REGEXP_INTERVAL
02999     yyMayCapture = FALSE;
03000     int alpha = ( yyMinRep == 0 ) ? 0 : yyMinRep - 1;
03001     int beta = ( yyMaxRep == InftyRep ) ? 0 : yyMaxRep - ( alpha + 1 );
03002 
03003     Box rightBox( this );
03004     int i;
03005 
03006     for ( i = 0; i < beta; i++ ) {
03007         YYREDO();
03008         Box leftBox( this );
03009         parseAtom( &leftBox );
03010         leftBox.cat( rightBox );
03011         leftBox.opt();
03012         rightBox = leftBox;
03013     }
03014     for ( i = 0; i < alpha; i++ ) {
03015         YYREDO();
03016         Box leftBox( this );
03017         parseAtom( &leftBox );
03018         leftBox.cat( rightBox );
03019         rightBox = leftBox;
03020     }
03021     rightBox.cat( *box );
03022     *box = rightBox;
03023 #endif
03024     yyTok = getToken();
03025 #ifndef QT_NO_REGEXP_INTERVAL
03026     yyMayCapture = mayCapture;
03027 #endif
03028     }
03029 #undef YYREDO
03030 }
03031 
03032 void QRegExpEngine::parseTerm( Box *box )
03033 {
03034 #ifndef QT_NO_REGEXP_OPTIM
03035     if ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar )
03036     parseFactor( box );
03037 #endif
03038     while ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) {
03039     Box rightBox( this );
03040     parseFactor( &rightBox );
03041     box->cat( rightBox );
03042     }
03043 }
03044 
03045 void QRegExpEngine::parseExpression( Box *box )
03046 {
03047     parseTerm( box );
03048     while ( yyTok == Tok_Bar ) {
03049     Box rightBox( this );
03050     yyTok = getToken();
03051     parseTerm( &rightBox );
03052     box->orx( rightBox );
03053     }
03054 }
03055 
03056 /*
03057   The class QRegExpPrivate contains the private data of a regular expression
03058   other than the automaton.  It makes it possible for many QRegExp objects to
03059   use the same QRegExpEngine object with different QRegExpPrivate objects.
03060 */
03061 struct QRegExpPrivate
03062 {
03063     QString pattern; // regular-expression or wildcard pattern
03064     QString rxpattern; // regular-expression pattern
03065 #ifndef QT_NO_REGEXP_WILDCARD
03066     bool wc; // wildcard mode?
03067 #endif
03068     bool min; // minimal matching? (instead of maximal)
03069 #ifndef QT_NO_REGEXP_CAPTURE
03070     QString t; // last string passed to QRegExp::search() or searchRev()
03071     QStringList capturedCache; // what QRegExp::capturedTexts() returned last
03072 #endif
03073     QArray<int> captured; // what QRegExpEngine::search() returned last
03074 
03075     QRegExpPrivate() { captured.fill( -1, 2 ); }
03076 };
03077 
03078 #ifndef QT_NO_REGEXP_OPTIM
03079 static QCache<QRegExpEngine> *engineCache = 0;
03080 #endif
03081 
03082 static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
03083 {
03084 #ifndef QT_NO_REGEXP_OPTIM
03085     if ( engineCache != 0 ) {
03086     QRegExpEngine *eng = engineCache->take( pattern );
03087     if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
03088         delete eng;
03089     } else {
03090         eng->ref();
03091         return eng;
03092     }
03093     }
03094 #endif
03095     return new QRegExpEngine( pattern, caseSensitive );
03096 }
03097 
03098 static void derefEngine( QRegExpEngine *eng, const QString& pattern )
03099 {
03100     if ( eng != 0 && eng->deref() ) {
03101 #ifndef QT_NO_REGEXP_OPTIM
03102     if ( engineCache == 0 ) {
03103         engineCache = new QCache<QRegExpEngine>;
03104         engineCache->setAutoDelete( TRUE );
03105     }
03106     if ( !pattern.isNull() &&
03107          engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
03108         return;
03109 #else
03110     Q_UNUSED( pattern );
03111 #endif
03112     delete eng;
03113     }
03114 }
03115 
03121 QRegExp3::QRegExp3()
03122 {
03123     eng = new QRegExpEngine( TRUE );
03124     priv = new QRegExpPrivate;
03125     priv->pattern = QString::null;
03126 #ifndef QT_NO_REGEXP_WILDCARD
03127     priv->wc = FALSE;
03128 #endif
03129     priv->min = FALSE;
03130     compile( TRUE );
03131 }
03132 
03142 QRegExp3::QRegExp3( const QString& pattern, bool caseSensitive, bool wildcard )
03143 {
03144     eng = 0;
03145     priv = new QRegExpPrivate;
03146     priv->pattern = pattern;
03147 #ifndef QT_NO_REGEXP_WILDCARD
03148     priv->wc = wildcard;
03149 #endif
03150     priv->min = FALSE;
03151     compile( caseSensitive );
03152 }
03153 
03159 QRegExp3::QRegExp3( const QRegExp3& rx )
03160 {
03161     eng = 0;
03162     priv = new QRegExpPrivate;
03163     operator=( rx );
03164 }
03165 
03169 QRegExp3::~QRegExp3()
03170 {
03171     derefEngine( eng, priv->rxpattern );
03172     delete priv;
03173 }
03174 
03180 QRegExp3& QRegExp3::operator=( const QRegExp3& rx )
03181 {
03182     rx.eng->ref();
03183     derefEngine( eng, priv->rxpattern );
03184     eng = rx.eng;
03185     priv->pattern = rx.priv->pattern;
03186     priv->rxpattern = rx.priv->rxpattern;
03187 #ifndef QT_NO_REGEXP_WILDCARD
03188     priv->wc = rx.priv->wc;
03189 #endif
03190     priv->min = rx.priv->min;
03191 #ifndef QT_NO_REGEXP_CAPTURE
03192     priv->t = rx.priv->t;
03193     priv->capturedCache = rx.priv->capturedCache;
03194 #endif
03195     priv->captured = rx.priv->captured;
03196     return *this;
03197 }
03198 
03207 bool QRegExp3::operator==( const QRegExp3& rx ) const
03208 {
03209     return priv->pattern == rx.priv->pattern &&
03210        eng->caseSensitive() == rx.eng->caseSensitive() &&
03211 #ifndef QT_NO_REGEXP_WILDCARD
03212        priv->wc == rx.priv->wc &&
03213 #endif
03214        priv->min == rx.priv->min;
03215 }
03216 
03239 bool QRegExp3::isEmpty() const
03240 {
03241     return priv->pattern.isEmpty();
03242 }
03243 
03255 bool QRegExp3::isValid() const
03256 {
03257     return eng->isValid();
03258 }
03259 
03266 QString QRegExp3::pattern() const
03267 {
03268     return priv->pattern;
03269 }
03270 
03278 void QRegExp3::setPattern( const QString& pattern )
03279 {
03280     if ( priv->pattern != pattern ) {
03281     priv->pattern = pattern;
03282     compile( caseSensitive() );
03283     }
03284 }
03285 
03292 bool QRegExp3::caseSensitive() const
03293 {
03294     return eng->caseSensitive();
03295 }
03296 
03305 void QRegExp3::setCaseSensitive( bool sensitive )
03306 {
03307     if ( sensitive != eng->caseSensitive() )
03308     compile( sensitive );
03309 }
03310 
03311 #ifndef QT_NO_REGEXP_WILDCARD
03312 
03318 bool QRegExp3::wildcard() const
03319 {
03320     return priv->wc;
03321 }
03322 
03334 void QRegExp3::setWildcard( bool wildcard )
03335 {
03336     if ( wildcard != priv->wc ) {
03337     priv->wc = wildcard;
03338     compile( caseSensitive() );
03339     }
03340 }
03341 #endif
03342 
03348 bool QRegExp3::minimal() const
03349 {
03350     return priv->min;
03351 }
03352 
03370 void QRegExp3::setMinimal( bool minimal )
03371 {
03372     priv->min = minimal;
03373 }
03374 
03391 bool QRegExp3::exactMatch( const QString& str )
03392 {
03393 #ifndef QT_NO_REGEXP_CAPTURE
03394     priv->t = str;
03395     priv->capturedCache.clear();
03396 #endif
03397 
03398     priv->captured = eng->match( str, 0, priv->min, TRUE );
03399     if ( priv->captured[1] == (int) str.length() ) {
03400     return TRUE;
03401     } else {
03402     priv->captured.detach();
03403     priv->captured[0] = 0;
03404     priv->captured[1] = eng->matchedLength();
03405     return FALSE;
03406     }
03407 }
03408 
03413 bool QRegExp3::exactMatch( const QString& str ) const
03414 {
03415    return eng->match(str, 0, priv->min, TRUE)[0] == 0 &&
03416       eng->matchedLength() == (int) str.length();
03417 }
03418 
03442 #ifndef QT_NO_COMPAT
03443 int QRegExp3::match( const QString& str, int index, int *len,
03444             bool indexIsStart )
03445 {
03446     int pos;
03447     if ( indexIsStart ) {
03448     pos = search( str.mid(index) );
03449     if ( pos >= 0 ) {
03450         pos += index;
03451         if ( len != 0 )
03452         *len = matchedLength();
03453     } else {
03454         if ( len != 0 )
03455         *len = 0;
03456     }
03457     } else {
03458     pos = search( str, index );
03459     if ( len != 0 )
03460         *len = matchedLength();
03461     }
03462     return pos;
03463 }
03464 #endif
03465 
03491 // QChar versions
03492 
03493 #ifdef QCHAR_SUPPORT
03494 const QString makeString(const QChar *str)
03495 {
03496 // A sentinel value checked in case the QChar *ptr is never null terminated
03497     const uint MAXLENGTH=65535;
03498 
03499     const QChar *s=str;
03500     uint i=0;
03501     while(i < MAXLENGTH && *s != QChar::null) {  i++;s++ ;}
03502     return QString(str,i);
03503 
03504 }
03505 int QRegExp3::search(const QChar *str,int start)
03506 {
03507     return search(makeString(str),start);
03508 }
03509 int QRegExp3::search(const QChar *str,int start) const
03510 {
03511     return search(makeString(str),start);
03512 }
03513 int QRegExp3::searchRev(const QChar *str,int start)
03514 {
03515     return searchRev(makeString(str),start);
03516 }
03517 int QRegExp3::searchRev(const QChar *str,int start) const
03518 {
03519     return searchRev(makeString(str),start);
03520 }
03521 bool QRegExp3::exactMatch(const QChar *str)
03522 {
03523     return exactMatch(makeString(str));
03524 }
03525 bool QRegExp3::exactMatch(const QChar *str) const
03526 {
03527     return exactMatch(makeString(str));
03528 }
03529 #endif // QCHAR_SUPPORT
03530 
03531 int QRegExp3::search( const QString& str, int start )
03532 {
03533     if ( start < 0 )
03534     start += str.length();
03535 #ifndef QT_NO_REGEXP_CAPTURE
03536     priv->t = str;
03537     priv->capturedCache.clear();
03538 #endif
03539     priv->captured = eng->match( str, start, priv->min, FALSE );
03540     return priv->captured[0];
03541 }
03542 
03547 int QRegExp3::search( const QString& str, int start ) const
03548 {
03549     if ( start < 0 )
03550     start += str.length();
03551     return eng->match( str, start, priv->min, FALSE )[0];
03552 }
03553 
03565 int QRegExp3::searchRev( const QString& str, int start )
03566 {
03567     if ( start < 0 )
03568     start += str.length();
03569 #ifndef QT_NO_REGEXP_CAPTURE
03570     priv->t = str;
03571     priv->capturedCache.clear();
03572 #endif
03573     if ( start < 0 || start > (int) str.length() ) {
03574     priv->captured.detach();
03575     priv->captured.fill( -1 );
03576     return -1;
03577     }
03578 
03579     while ( start >= 0 ) {
03580     priv->captured = eng->match( str, start, priv->min, TRUE );
03581     if ( priv->captured[0] == start )
03582         return start;
03583     start--;
03584     }
03585     return -1;
03586 }
03587 
03592 int QRegExp3::searchRev( const QString& str, int start ) const
03593 {
03594     if ( start < 0 )
03595     start += str.length();
03596     if ( start < 0 || start > (int) str.length() )
03597     return -1;
03598 
03599     while ( start >= 0 ) {
03600     if ( eng->match(str, start, priv->min, TRUE)[0] == start )
03601         return start;
03602     start--;
03603     }
03604     return -1;
03605 }
03606 
03612 int QRegExp3::matchedLength()
03613 {
03614     return priv->captured[1];
03615 }
03616 
03617 #ifndef QT_NO_REGEXP_CAPTURE
03618 
03662 QStringList QRegExp3::capturedTexts()
03663 {
03664     if ( priv->capturedCache.isEmpty() ) {
03665     for ( int i = 0; i < (int) priv->captured.size(); i += 2 ) {
03666         QString m;
03667         if ( priv->captured[i + 1] == 0 )
03668         m = QString::fromLatin1( "" );
03669         else if ( priv->captured[i] >= 0 )
03670         m = priv->t.mid( priv->captured[i],
03671                  priv->captured[i + 1] );
03672         priv->capturedCache.append( m );
03673     }
03674     priv->t = QString::null;
03675     }
03676     return priv->capturedCache;
03677 }
03678 
03720 QString QRegExp3::cap( int nth )
03721 {
03722     if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03723     return QString::null;
03724     else
03725     return capturedTexts()[nth];
03726 }
03727 
03747 int QRegExp3::pos( int nth )
03748 {
03749     if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
03750     return -1;
03751     else
03752     return priv->captured[2 * nth];
03753 }
03754 #endif
03755 
03756 void QRegExp3::compile( bool caseSensitive )
03757 {
03758     derefEngine( eng, priv->rxpattern );
03759 #ifndef QT_NO_REGEXP_WILDCARD
03760     if ( priv->wc )
03761     priv->rxpattern = wc2rx( priv->pattern );
03762     else
03763 #endif
03764     priv->rxpattern = priv->pattern.isNull() ? QString::fromLatin1( "" )
03765               : priv->pattern;
03766     eng = newEngine( priv->rxpattern, caseSensitive );
03767 #ifndef QT_NO_REGEXP_CAPTURE
03768     priv->t = QString::null;
03769     priv->capturedCache.clear();
03770 #endif
03771     priv->captured.detach();
03772     priv->captured.fill( -1, 2 + 2 * eng->numCaptures() );
03773 }

Generated on Sat Nov 5 16:17:12 2005 for OPIE by  doxygen 1.4.2