Version 4.1.5
Main Page | Class Hierarchy | Class List | File List | Class Members | Related Pages

PMarkov.h

Go to the documentation of this file.
00001 /* seqpp/PMarkov.h
00002  *
00003  * Copyright (C) 2003 Laboratoire Statistique & Génome
00004  *
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or (at
00008  * your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful, but
00011  * WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  */
00019 
00028 #ifndef SEQPP_PMARKOV_H
00029 #define SEQPP_PMARKOV_H
00030 
00031 #include <seqpp/Markov.h>
00032 #include <seqpp/PhasedPMarkov.h>
00033 #include <seqpp/pmm_tree.h>
00034 
00045 class PMarkov :  public Markov
00046 {
00047  protected :
00048   pmm_forest * _p_f;
00049 
00050  public:
00052 
00060   PMarkov( Partition& p,
00061            const SequenceSet & seqset,
00062            const string& prior_alpha_file = string(),
00063            bool motif_prior = true,
00064            double penalty = 0.,
00065            const string & xmlfile = string() )
00066     :  Markov( seqset.tell_alphabet_size(), seqset.tell_order() )
00067   { 
00068     seqset.count_occurencies(); 
00069     if (!this->load_prior_alpha( prior_alpha_file ))
00070       this->default_prior_alpha();
00071     _p_f = new pmm_forest( p, _size, _order, _prior_alpha[0], motif_prior,  penalty );
00072     select( seqset.get_count(), true, seqset.get_translator(), xmlfile );
00073   }
00074 
00076 
00085   PMarkov( Partition& p,
00086            const Sequence & seq, 
00087            const string& prior_alpha_file = string(),
00088            bool motif_prior = true,
00089            double penalty = 0.,
00090            const Translator & trans = Translator(),
00091            const string & xmlfile = string() )
00092     : Markov(  seq.tell_alphabet_size(), seq.tell_order() )
00093   { 
00094     seq.count_occurencies();
00095     if (!this->load_prior_alpha( prior_alpha_file ))
00096       this->default_prior_alpha();
00097     _p_f = new pmm_forest( p, _size, _order, _prior_alpha[0], motif_prior, penalty ); 
00098     select( seq.get_count(), true, trans, xmlfile );
00099   }
00100 
00101 
00103 
00114   PMarkov( Partition& p,
00115            unsigned long * count,
00116            short size, short order,
00117            const string& prior_alpha_file = string(),
00118            bool motif_prior = true,
00119            double penalty = 0.,            
00120            const Translator & trans = Translator(),        
00121            const string & xmlfile = string() )
00122     : Markov( size, order )    
00123   {
00124     if (!this->load_prior_alpha( prior_alpha_file ))
00125       this->default_prior_alpha();
00126     _p_f = new pmm_forest( p, _size, _order, _prior_alpha[0], motif_prior, penalty );  
00127     select( count, false, trans, xmlfile );
00128   }
00129 
00131 
00142     PMarkov( Partition& part,
00143              const string & count_file,
00144              short size, short order,
00145              const string& prior_alpha_file = string(),
00146              bool motif_prior = true,
00147              double penalty = 0.,
00148              const Translator & trans = Translator(),
00149              const string & xmlfile = string() )
00150       :  Markov( size, order )
00151       {         
00152          unsigned long * count = new unsigned long[_nPi];    
00153         if (! this->load_count( count_file, &count )){
00154           cerr<<"Bad count file"<<endl;
00155           exit(1);
00156         }
00157         if (!this->load_prior_alpha( prior_alpha_file ))
00158           this->default_prior_alpha();
00159         _p_f = new pmm_forest( part, _size, _order, _prior_alpha[0], motif_prior, penalty );      
00160         select( count, false, trans, xmlfile );
00161         
00162         delete [] count;
00163       }
00164 
00166 
00175   PMarkov( Partition& part,
00176            short size,
00177            short order,
00178            const string& prior_alpha_file = string(),
00179            bool motif_prior = true,
00180            double penalty = 0.,
00181            bool alloc = true )
00182     : Markov( size, order, alloc )
00183   {
00184     if (!this->load_prior_alpha( prior_alpha_file ))
00185       this->default_prior_alpha();
00186     _p_f = new pmm_forest( part, _size, _order, _prior_alpha[0], motif_prior, penalty );
00187   }
00188 
00189 
00191 
00197   void select( unsigned long * count, bool decal_required,
00198                const Translator & trans = Translator(), 
00199                const string & xmlfile = string() )
00200   {    
00201    _nb_param = 0; 
00202    long decal = 0;
00203    if (decal_required)
00204      decal = _jump;
00205       
00206 #ifdef HAVE_LIBXML2
00207    if (xmlfile.size() != 0){
00208      xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00209      xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00210      xmlDocSetRootElement(doc,root_node);
00211 
00212      pmm_tree p_t = _p_f->select( count+decal );
00213      p_t.tree_to_matrix( _Pi );
00214      _nb_param += p_t.nb_leaves() * (_size-1);
00215 
00216      std::stringstream treenamestream;
00217      p_t.save( trans, root_node, treenamestream.str() );
00218      xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00219      xmlFreeDoc(doc); 
00220    }
00221    else{
00222 
00223      pmm_tree p_t = _p_f->select( count+decal );     
00224      p_t.tree_to_matrix( _Pi );
00225      _nb_param += p_t.nb_leaves() * (_size-1);
00226 
00227    }
00228 #endif
00229 #ifndef HAVE_LIBXML2
00230    if (xmlfile.size() != 0){
00231      cerr<<"in PMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00232    } 
00233 
00234    pmm_tree p_t = _p_f->select( count+decal );   
00235    p_t.tree_to_matrix( _Pi );
00236    _nb_param += p_t.nb_leaves() * (_size-1);
00237 
00238 #endif      
00239   }
00240 
00241   
00243 
00249   template <class TSeq1, class TSeq2>
00250     double mean_post_log_likelihood( const TSeq1& tseq_train, const TSeq2& tseq_eval,
00251                                      short initial_phase_train = 0, short initial_phase_eval = 0 )
00252   {
00253     tseq_train.count_occurencies();
00254     tseq_eval.count_occurencies();
00255     return this->mean_post_log_likelihood( tseq_train.get_count(), true,
00256                                            tseq_eval.get_count(), true ); 
00257   }
00258   
00260 
00264   template <class TSeq>
00265     double mean_post_log_likelihood( const TSeq& tseq_eval,
00266                                      short initial_phase_eval = 0 )
00267     {     
00268       tseq_eval.count_occurencies();
00269       return this->mean_post_log_likelihood( tseq_eval.get_count(), true );
00270     }
00271   
00272 
00274 
00280   double mean_post_log_likelihood( unsigned long * count_train, bool decal_required_t,
00281                                    unsigned long * count_eval, bool decal_required_e )
00282   {              
00283     if (decal_required_t)        
00284       _p_f->extended_tree( count_train+_jump );
00285     else
00286       _p_f->extended_tree( count_train );    
00287    
00288     if (decal_required_e)     
00289       return _p_f->mean_post_log_likelihood( count_eval+_jump );
00290     else   
00291       return _p_f->mean_post_log_likelihood( count_eval );
00292   }
00293 
00295 
00299   double mean_post_log_likelihood( unsigned long * count_eval, bool decal_required_e )
00300   {
00301     if (decal_required_e) 
00302       return _p_f->mean_post_log_likelihood( count_eval+_jump );
00303     else
00304       return _p_f->mean_post_log_likelihood( count_eval );
00305   }
00306 
00308   double mean_post_log_likelihood(){
00309     _p_f->extended_tree();
00310     return  _p_f->mean_post_log_likelihood( );
00311   }
00312 
00313   //---------------------------------------
00315 
00318   template <class TSeq>
00319     double post_log_likelihood( const TSeq& tseq_eval )
00320   {    
00321     if ( tseq_eval.tell_order() != _order ){
00322       cerr << "Order incompatibility beetween  sequence and model " << endl;
00323       exit(-1);
00324     }
00325     // A FAIRE attention seq > order...
00326     
00327     tseq_eval.count_occurencies();     
00328     
00329     return post_log_likelihood( tseq_eval.get_count(), true );
00330   }
00331 
00333 
00337   double post_log_likelihood( unsigned long * count_eval, bool decal_required_e  )
00338   {
00339     if (decal_required_e)
00340       return _p_f->post_log_likelihood( count_eval+_jump );
00341   }
00342 
00343   //--------------------------------------
00344 
00346 
00353   void draw( unsigned long * count, bool decal_required,
00354              gsl_rng * r,
00355              const Translator & trans = Translator(), 
00356              const string & xmlfile = string() )
00357   {
00358    _nb_param = 0;   
00359 #ifdef HAVE_LIBXML2
00360    if (xmlfile.size() != 0){
00361      xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00362      xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00363      xmlDocSetRootElement(doc,root_node);
00364      if (decal_required)
00365        _p_f->extended_tree( count+_jump );
00366      else 
00367        _p_f->extended_tree( count );
00368        
00369      pmm_tree p_t = _p_f->draw(r);   
00370      p_t.tree_to_matrix( _Pi );
00371      _nb_param += p_t.nb_leaves() * (_size-1);
00372 
00373      std::stringstream treenamestream;
00374      p_t.save( trans, root_node, treenamestream.str() );
00375      xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00376      xmlFreeDoc(doc); 
00377    }
00378    else{
00379      if (decal_required)
00380        _p_f->extended_tree( count+_jump );
00381      else
00382        _p_f->extended_tree( count );
00383        
00384      pmm_tree p_t = _p_f->draw(r);        
00385      p_t.tree_to_matrix( _Pi );
00386      _nb_param += p_t.nb_leaves() * (_size-1);
00387 
00388    }
00389 #endif
00390 #ifndef HAVE_LIBXML2
00391    if (xmlfile.size() != 0){
00392      cerr<<"in PMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00393    } 
00394    if (decal_required)
00395        _p_f->extended_tree( count+_jump );
00396      else
00397        _p_f->extended_tree( count );
00398        
00399    pmm_tree p_t = _p_f->draw(r); 
00400    p_t.tree_to_matrix( _Pi );
00401    _nb_param += p_t.nb_leaves() * (_size-1);
00402 
00403 #endif   
00404 
00405    if (_Mu != NULL)
00406      this->compute_stat_law( true );   
00407   }  
00408   
00410 
00415   void draw( gsl_rng * r,
00416              const Translator & trans, 
00417              const string & xmlfile = string() )
00418   {
00419    _nb_param = 0;   
00420 #ifdef HAVE_LIBXML2
00421    if (xmlfile.size() != 0){
00422      xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00423      xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00424      xmlDocSetRootElement(doc,root_node);
00425 
00426      pmm_tree p_t = _p_f->draw(r); 
00427      p_t.tree_to_matrix( _Pi );
00428      _nb_param += p_t.nb_leaves() * (_size-1);
00429 
00430      std::stringstream treenamestream;
00431      p_t.save( trans, root_node, treenamestream.str() );
00432      xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00433      xmlFreeDoc(doc); 
00434    }
00435    else{
00436 
00437      pmm_tree p_t = _p_f->draw(r);      
00438      p_t.tree_to_matrix( _Pi );
00439      _nb_param += p_t.nb_leaves() * (_size-1);
00440 
00441    }
00442 #endif
00443 #ifndef HAVE_LIBXML2
00444    if (xmlfile.size() != 0){
00445      cerr<<"in PMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00446    } 
00447    
00448    pmm_tree p_t = _p_f->draw(r); 
00449    p_t.tree_to_matrix( _Pi );
00450    _nb_param += p_t.nb_leaves() * (_size-1);
00451 
00452 #endif 
00453 
00454    if (_Mu != NULL)
00455      this->compute_stat_law( true );   
00456   }
00457 
00459   void info_nb_leaves() const{
00460     cout<<_p_f->current_pmm_tree().nb_leaves()<<" leaves"
00461         <<endl;
00462   }
00463 
00464   void set_penalty( double penalty ){
00465     _p_f->set_penalty( penalty);
00466   }
00467 
00468   void default_prior_alpha(){
00469     vector<double> tmp;
00470     tmp.assign(_size, 1.);
00471     _prior_alpha.assign(1, tmp);
00472   }
00473 
00474   ~PMarkov(){
00475     //cout<<"destr.Pmarkov"<<endl;
00476   }
00477 };
00478 #endif



Download seq++ 4.1.5
Download previous versions
Statistique & Genome Home


Generated on Thu Aug 4 18:34:04 2005 for seqpp by doxygen 1.3.9.1