Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

LanguageModel.h

Go to the documentation of this file.
00001 // LanguageModel.h
00002 //
00004 //
00005 // Copyright (c) 2001-2002 David Ward
00006 //
00008 
00009 // Abstract language model class
00010 // See PPMModel for an example implementation
00011 
00012 // Contexts are indentified by a unique unsigned integer
00013 
00014 #ifndef __LanguageModel_h__
00015 #define __LanguageModel_h__
00016 
00017 #include "MSVC_Unannoy.h"
00018 #include <vector>
00019 #include <string>
00020 
00021 #include "Alphabet.h"
00022 #include "Context.h"
00023 
00024 namespace Dasher {class CLanguageModel;}
00025 class Dasher::CLanguageModel
00026 {
00027 public:
00028         CLanguageModel(CAlphabet* Alphabet, int Normalization);
00029 
00030         // Interface for the Dasher code
00031         // --------------------------------------------------------------------------
00032         class CNodeContext {
00033         public:
00034                 CNodeContext() {};
00035                 virtual ~CNodeContext() {};
00036         };
00037         
00038         // return the model's normalization - what the probabilities sum to
00039         const int normalization() const { return m_iNorm;}
00040         
00041         CNodeContext* GetRootNodeContext();
00042         CNodeContext* CloneNodeContext(CNodeContext* NodeContext);
00043         void ReleaseNodeContext(CNodeContext* NodeContext);
00044         void EnterNodeSymbol(CNodeContext* NodeContext, symbol Symbol);
00045         void LearnNodeSymbol(CNodeContext* NodeContext, symbol Symbol);
00046         void EnterText(CNodeContext* NodeContext, std::string TheText);
00047         void LearnText(CNodeContext* NodeContext, std::string* TheText, bool IsMore);
00048         bool GetNodeProbs(CNodeContext* Context, std::vector<symbol> &NewSymbols,
00049                 std::vector<unsigned int> &Groups, std::vector<unsigned int> &Probs, double AddProb);
00050         
00051         // Alphabet pass-through functions for widely needed information
00052         symbol GetSpaceSymbol() {return m_Alphabet->GetSpaceSymbol();}
00053         
00054         int GetColour(int character);
00055 
00056 protected:
00057         int GetNumberModelChars() {return m_Alphabet->GetNumberSymbols();}
00058         
00059         // Generic language model functions to be implemented 
00060         // --------------------------------------------------------------------------
00061         typedef unsigned int modelchar;
00062         
00063         // return the id for the root context:
00064         virtual CContext* GetRootContext()=0;
00065         // clone a context and return the new id:
00066         virtual CContext* CloneContext(CContext*)=0;
00067         // delete a context:
00068         virtual void ReleaseContext(CContext*)=0;
00069         // diagnostic info:
00070         virtual void dump()=0;
00071         // add character to the language model:
00072         virtual void LearnSymbol(CContext* Context, modelchar Symbol)=0;
00073         // update context with a character:
00074         virtual void EnterSymbol(CContext* context, modelchar Symbol)=0;
00075         // get the probability distrubution at the given context:
00076         virtual bool GetProbs(CContext* Context, std::vector<unsigned int> &Probs, double AddProb)=0;
00077         
00078 private:
00079         CAlphabet *m_Alphabet;
00080         int m_iModelChars; // number of charater in the model 1...ModelChars
00081         int m_iNorm;       // normalization of probabilities
00082 };
00083 
00084 using namespace Dasher;
00085 
00087 
00088 inline CLanguageModel::CNodeContext* CLanguageModel::GetRootNodeContext()
00089 {
00090         return (CNodeContext*) GetRootContext();
00091 }
00092 
00094 
00095 inline CLanguageModel::CNodeContext* CLanguageModel::CloneNodeContext(CNodeContext* NodeContext)
00096 {
00097         return (CNodeContext*) CloneContext((CContext*) NodeContext);
00098 }
00099 
00101 
00102 inline void CLanguageModel::ReleaseNodeContext(CNodeContext* NodeContext)
00103 {
00104         ReleaseContext((CContext*) NodeContext);
00105 }
00106 
00108 
00109 inline void CLanguageModel::EnterNodeSymbol(CNodeContext* NodeContext, symbol Symbol)
00110 {
00111         EnterSymbol((CContext*) NodeContext, (modelchar) Symbol);
00112 }
00113 
00115 
00116 inline void CLanguageModel::LearnNodeSymbol(CNodeContext* NodeContext, symbol Symbol)
00117 {
00118         LearnSymbol((CContext*) NodeContext, (modelchar) Symbol);
00119 }
00120 
00121 
00122 
00123 #endif /* #ifndef __LanguageModel_h__ */

Generated on Sat Nov 5 16:16:00 2005 for OPIE by  doxygen 1.4.2