Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

numberh.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002 ** Copyright (C) 2000-2002 Trolltech AS.  All rights reserved.
00003 **
00004 ** This file is part of Qt Linguist.
00005 **
00006 ** This file may be distributed and/or modified under the terms of the
00007 ** GNU General Public License version 2 as published by the Free Software
00008 ** Foundation and appearing in the file LICENSE.GPL included in the
00009 ** packaging of this file.
00010 **
00011 ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
00012 ** licenses may use this file in accordance with the Qt Commercial License
00013 ** Agreement provided with the Software.
00014 **
00015 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00016 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00017 **
00018 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
00019 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
00020 **   information about Qt Commercial License Agreements.
00021 **
00022 ** Contact info@trolltech.com if any conditions of this licensing are
00023 ** not clear to you.
00024 **
00025 **********************************************************************/
00026 
00027 #include <metatranslator.h>
00028 
00029 #include <qmemarray.h>
00030 #include <qcstring.h>
00031 #include <qmap.h>
00032 #include <qstringlist.h>
00033 
00034 #include <ctype.h>
00035 
00036 typedef QMap<QCString, MetaTranslatorMessage> TMM;
00037 typedef QValueList<MetaTranslatorMessage> TML;
00038 
00039 static bool isDigitFriendly( int c )
00040 {
00041     return ispunct((uchar)c) || isspace((uchar)c);
00042 }
00043 
00044 static int numberLength( const char *s )
00045 {
00046     int i = 0;
00047 
00048     if ( isdigit((uchar)s[0]) ) {
00049         do {
00050             i++;
00051         } while (isdigit((uchar)s[i]) ||
00052                  (isDigitFriendly(s[i]) &&
00053                   (isdigit((uchar)s[i + 1]) ||
00054                    (isDigitFriendly(s[i + 1]) && isdigit((uchar)s[i + 2])))));
00055     }
00056     return i;
00057 }
00058 
00059 /*
00060   Returns a version of 'key' where all numbers have been replaced by zeroes.  If
00061   there were none, returns "".
00062 */
00063 static QCString zeroKey( const char *key )
00064 {
00065     QCString zeroed( strlen(key) + 1 );
00066     char *z = zeroed.data();
00067     int i = 0, j = 0;
00068     int len;
00069     bool metSomething = FALSE;
00070 
00071     while ( key[i] != '\0' ) {
00072         len = numberLength( key + i );
00073         if ( len > 0 ) {
00074             i += len;
00075             z[j++] = '0';
00076             metSomething = TRUE;
00077         } else {
00078             z[j++] = key[i++];
00079         }
00080     }
00081     z[j] = '\0';
00082 
00083     if ( metSomething )
00084         return zeroed;
00085     else
00086         return "";
00087 }
00088 
00089 static QString translationAttempt( const QString& oldTranslation,
00090                                    const char *oldSource,
00091                                    const char *newSource )
00092 {
00093     int p = zeroKey( oldSource ).contains( '0' );
00094     int oldSourceLen = qstrlen( oldSource );
00095     QString attempt;
00096     QStringList oldNumbers;
00097     QStringList newNumbers;
00098     QMemArray<bool> met( p );
00099     QMemArray<int> matchedYet( p );
00100     int i, j;
00101     int k = 0, ell, best;
00102     int m, n;
00103     int pass;
00104 
00105     /*
00106       This algorithm is hard to follow, so we'll consider an example
00107       all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
00108       and newSource is "XeT 3.1".
00109 
00110       First, we set up two tables: oldNumbers and newNumbers. In our
00111       example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
00112     */
00113     for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
00114         m = numberLength( oldSource + i );
00115         n = numberLength( newSource + j );
00116         if ( m > 0 ) {
00117             oldNumbers.append( QCString(oldSource + i, m + 1) );
00118             newNumbers.append( QCString(newSource + j, n + 1) );
00119             i += m;
00120             j += n;
00121             met[k] = FALSE;
00122             matchedYet[k] = 0;
00123             k++;
00124         }
00125     }
00126 
00127     /*
00128       We now go over the old translation, "XeT 3.0", one letter at a
00129       time, looking for numbers found in oldNumbers. Whenever such a
00130       number is met, it is replaced with its newNumber equivalent. In
00131       our example, the "3.0" of "XeT 3.0" becomes "3.1".
00132     */
00133     for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
00134         attempt += oldTranslation[i];
00135         for ( k = 0; k < p; k++ ) {
00136             if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
00137                 matchedYet[k]++;
00138             else
00139                 matchedYet[k] = 0;
00140         }
00141 
00142         /*
00143           Let's find out if the last character ended a match. We make
00144           two passes over the data. In the first pass, we try to
00145           match only numbers that weren't matched yet; if that fails,
00146           the second pass does the trick. This is useful in some
00147           suspicious cases, flagged below.
00148         */
00149         for ( pass = 0; pass < 2; pass++ ) {
00150             best = p; // an impossible value
00151             for ( k = 0; k < p; k++ ) {
00152                 if ( (!met[k] || pass > 0) &&
00153                      matchedYet[k] == (int) oldNumbers[k].length() &&
00154                      numberLength(oldTranslation.latin1() + (i + 1) -
00155                                   matchedYet[k]) == matchedYet[k] ) {
00156                     // the longer the better
00157                     if ( best == p || matchedYet[k] > matchedYet[best] )
00158                         best = k;
00159                 }
00160             }
00161             if ( best != p ) {
00162                 attempt.truncate( attempt.length() - matchedYet[best] );
00163                 attempt += newNumbers[best];
00164                 met[best] = TRUE;
00165                 for ( k = 0; k < p; k++ )
00166                     matchedYet[k] = 0;
00167                 break;
00168             }
00169         }
00170     }
00171 
00172     /*
00173       We flag two kinds of suspicious cases. They are identified as
00174       such with comments such as "{2000?}" at the end.
00175 
00176       Example of the first kind: old source text "TeX 3.0" translated
00177       as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
00178       new text is.
00179     */
00180     for ( k = 0; k < p; k++ ) {
00181         if ( !met[k] )
00182             attempt += QString( " {" ) + newNumbers[k] + QString( "?}" );
00183     }
00184 
00185     /*
00186       Example of the second kind: "1 of 1" translated as "1 af 1",
00187       with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
00188       because it's not clear which of "1 af 2" and "2 af 1" is right.
00189     */
00190     for ( k = 0; k < p; k++ ) {
00191         for ( ell = 0; ell < p; ell++ ) {
00192             if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
00193                     newNumbers[k] < newNumbers[ell] )
00194                 attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) +
00195                            newNumbers[ell] + QString( "?}" );
00196         }
00197     }
00198     return attempt;
00199 }
00200 
00201 /*
00202   Augments a MetaTranslator with translations easily derived from
00203   similar existing (probably obsolete) translations.
00204 
00205   For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
00206   has no translation, "XeT 3.1" is added to the translator and is
00207   marked Unfinished.
00208 */
00209 void applyNumberHeuristic( MetaTranslator *tor, bool verbose )
00210 {
00211     TMM translated, untranslated;
00212     TMM::Iterator t, u;
00213     TML all = tor->messages();
00214     TML::Iterator it;
00215     int inserted = 0;
00216 
00217     for ( it = all.begin(); it != all.end(); ++it ) {
00218         if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
00219             if ( (*it).translation().isEmpty() )
00220                 untranslated.insert(QCString((*it).context()) + "\n" + (*it).sourceText() + "\n"
00221                                     + (*it).comment(), *it);
00222         } else if ( !(*it).translation().isEmpty() ) {
00223             translated.insert( zeroKey((*it).sourceText()), *it );
00224         }
00225     }
00226 
00227     for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
00228         t = translated.find( zeroKey((*u).sourceText()) );
00229         if ( t != translated.end() && !t.key().isEmpty() &&
00230              qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
00231             MetaTranslatorMessage m( *u );
00232             m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
00233                                                 (*u).sourceText()));
00234             tor->insert( m );
00235             inserted++;
00236         }
00237     }
00238     if ( verbose && inserted != 0 )
00239         fprintf( stderr, " number heuristic provided %d translation%s\n",
00240                  inserted, inserted == 1 ? "" : "s" );
00241 }

Generated on Sat Nov 5 16:15:58 2005 for OPIE by  doxygen 1.4.2