Version 4.1.5
Main Page | Class Hierarchy | Class List | File List | Class Members | Related Pages

PhasedPMarkov.h

Go to the documentation of this file.
00001 /* seqpp/PhasedPMarkov.h
00002  *
00003  * Copyright (C) 2003 Laboratoire Statistique & Génome
00004  *
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or (at
00008  * your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful, but
00011  * WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  */
00019 
00028 #ifndef SEQPP_PHASEDPMARKOV_H
00029 #define SEQPP_PHASEDPMARKOV_H
00030 
00031 #include <seqpp/PhasedMarkov.h>
00032 
00033 #include <seqpp/pmm_forest.h>
00034 
00047 class PhasedPMarkov : public PhasedMarkov
00048 {
00049  protected : 
00051   vector<pmm_forest*> _p_f;
00052   
00053  public:
00055 
00065   PhasedPMarkov( Partition& part,
00066                  const SequenceSet & seqset,
00067                  short phase, short initial_phase = 0,
00068                  const string& prior_alpha_file = string(),
00069                  bool motif_prior = true,
00070                  double penalty = 0.,
00071                  const string & xmlfile = string() )
00072     :  PhasedMarkov(  seqset.tell_alphabet_size(), seqset.tell_order(), phase )
00073   { 
00074     seqset.count_p_occurencies( phase, initial_phase );
00075     if (!this->load_prior_alpha( prior_alpha_file ))
00076       this->default_prior_alpha();
00077     for (int i=0; i<_phase; i++)
00078       _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00079     select( seqset.get_p_count(), true, seqset.get_translator(), xmlfile );
00080   }
00081 
00083 
00094   PhasedPMarkov( Partition& part,
00095                  const Sequence & seq,
00096                  short phase, short initial_phase = 0,
00097                  const string& prior_alpha_file = string(),
00098                  bool motif_prior = true,
00099                  double penalty = 0., 
00100                  const Translator & trans = Translator(),
00101                  const string & xmlfile = string() )
00102     : PhasedMarkov(  seq.tell_alphabet_size(), seq.tell_order(), phase )
00103   {
00104     seq.count_p_occurencies( phase, initial_phase );
00105     if (!this->load_prior_alpha( prior_alpha_file ))
00106       this->default_prior_alpha();
00107     for (int i=0; i<_phase; i++)
00108       _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) ); 
00109     select( seq.get_p_count(), true, trans, xmlfile );
00110   }
00111   
00113 
00125   PhasedPMarkov( Partition& part,
00126                  unsigned long * * count,
00127                  short size, short order,
00128                  short phase, 
00129                  const string& prior_alpha_file = string(),
00130                  bool motif_prior = true,
00131                  double penalty = 0.,
00132                  const Translator & trans = Translator(),
00133                  const string & xmlfile = string() )
00134     :  PhasedMarkov( size, order, phase )
00135   { 
00136     if (!this->load_prior_alpha( prior_alpha_file ))
00137       this->default_prior_alpha();
00138     for (int i=0; i<_phase; i++)
00139       _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );  
00140     select( count, false, trans, xmlfile );
00141   }
00142 
00144 
00156   PhasedPMarkov( Partition& part,
00157                  const string & count_file,
00158                  short size, short order,
00159                  short phase, 
00160                  const string& prior_alpha_file = string(),
00161                  bool motif_prior = true,
00162                  double penalty = 0.,
00163                  const Translator & trans = Translator(),
00164                  const string & xmlfile = string() )
00165     :  PhasedMarkov( size, order, phase )
00166   { 
00167     unsigned long ** count = new unsigned long*[_phase];
00168     for (short p=0; p<_phase; p++ )
00169       count[p] = new unsigned long[_nPi];    
00170     if (! this->load_count( count_file, count )){
00171       cerr<<"Bad count file"<<endl;
00172       exit(1);
00173     }
00174 
00175     if (!this->load_prior_alpha( prior_alpha_file ))
00176       this->default_prior_alpha();
00177     for (int i=0; i<_phase; i++)
00178       _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00179     select( count, false, trans, xmlfile );
00180 
00181     for (short p=0; p<_phase; p++ )
00182       delete[] count[p];
00183     delete [] count;
00184   }
00185   
00187 
00197   PhasedPMarkov( Partition& part,
00198                  short size,
00199                  short order,
00200                  short phase, 
00201                  const string& prior_alpha_file = string(),
00202                  bool motif_prior = true,
00203                  double penalty = 0.,
00204                  bool alloc = true )
00205     : PhasedMarkov( size, order, phase, alloc )
00206   {
00207     if (!this->load_prior_alpha( prior_alpha_file ))
00208       this->default_prior_alpha();
00209     for (int i=0; i<_phase; i++)
00210       _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00211   }
00212 
00214 
00220   void select( unsigned long * * count, bool decal_required,
00221                const Translator & trans = Translator(),
00222                const string & xmlfile = string()  )
00223   {
00224    _nb_param = 0;
00225    long decal = 0;
00226    if (decal_required)
00227      decal = _jump;
00228    
00229 #ifdef HAVE_LIBXML2
00230    if (xmlfile.size() != 0){
00231      xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00232      xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00233      xmlDocSetRootElement(doc,root_node); 
00234      for (int i=0; i<_phase; i++){
00235          pmm_tree p_t = _p_f[i]->select( count[i]+decal );
00236          
00237        p_t.tree_to_matrix( _Pis[i] );
00238        _nb_param += p_t.nb_leaves() * (_size-1);
00239 
00240        std::stringstream treenamestream;
00241        treenamestream <<"phase "<<i;
00242        p_t.save( trans, root_node, treenamestream.str() );
00243 
00244 
00245      }
00246      xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00247      xmlFreeDoc(doc); 
00248    }
00249    else{
00250      for (int i=0; i<_phase; i++){
00251 
00252        pmm_tree p_t = _p_f[i]->select( count[i]+decal );
00253        p_t.tree_to_matrix( _Pis[i] );
00254        _nb_param += p_t.nb_leaves() * (_size-1);
00255 
00256      }
00257    }
00258 #endif
00259 #ifndef HAVE_LIBXML2
00260    if (xmlfile.size() != 0){
00261      cerr<<"in PhasedPMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00262    }
00263    for (int i=0; i<_phase; i++){    
00264 
00265      pmm_tree p_t = _p_f[i]->select( count[i]+decal );
00266      p_t.tree_to_matrix( _Pis[i] );
00267      _nb_param += p_t.nb_leaves() * (_size-1);
00268        
00269      
00270    }
00271 #endif
00272  }
00273 
00274 
00281   template <class TSeq1, class TSeq2>
00282     double mean_post_log_likelihood( const TSeq1& tseq_train, const TSeq2& tseq_eval,
00283                                      short initial_phase_train = 0, short initial_phase_eval = 0 )
00284   {
00285     tseq_train.count_p_occurencies(_phase, initial_phase_train);
00286     tseq_eval.count_p_occurencies(_phase, initial_phase_eval);
00287     return this->mean_post_log_likelihood( tseq_train.get_p_count(), true,
00288                                            tseq_eval.get_p_count(), true ); 
00289   }
00290 
00292 
00298   double mean_post_log_likelihood( unsigned long * * count_train, bool decal_required_t,
00299                                    unsigned long * * count_eval, bool decal_required_e )
00300   {
00301     double mean_logv = 0;
00302     for (int i=0; i<_phase; i++){             
00303       if (decal_required_t)
00304         _p_f[i]->extended_tree( count_train[i]+_jump );
00305       else
00306         _p_f[i]->extended_tree( count_train[i] );
00307       if (decal_required_e)
00308         mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i]+_jump );
00309       else
00310         mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i] );
00311     }
00312     return mean_logv;
00313   }
00314 
00319   template <class TSeq>
00320     double mean_post_log_likelihood( const TSeq& tseq_eval,short initial_phase_eval = 0 )
00321     {     
00322       tseq_eval.count_p_occurencies(_phase, initial_phase_eval);
00323       return this->mean_post_log_likelihood( tseq_eval.get_p_count(), true );
00324     }
00325   
00326   
00327 
00329 
00333   double mean_post_log_likelihood( unsigned long * * count_eval, bool decal_required_e )
00334   {
00335     double mean_logv = 0;
00336     for (int i=0; i<_phase; i++){              
00337       if (decal_required_e)
00338         mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i]+_jump );
00339       else
00340         mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i]);
00341     }
00342     return mean_logv;
00343   }
00344 
00346   double mean_post_log_likelihood(){
00347     double sum = 0.;
00348     for (short i=0; i<_phase;i++){
00349       _p_f[i]->extended_tree();
00350       sum += _p_f[i]->mean_post_log_likelihood( );
00351     }
00352     return sum;
00353   }
00354   
00355 
00356   
00357  //---------------------------------------
00359 
00363   template <class TSeq>
00364     double post_log_likelihood( const TSeq& tseq_eval, 
00365                                 short initial_phase_eval = 0 )
00366   {    
00367     if ( tseq_eval.tell_order() != _order ){
00368       cerr << "Order incompatibility beetween  sequence and model " << endl;
00369       exit(-1);
00370     }
00371     // A FAIRE attention seq > order...
00372     
00373     tseq_eval.count_p_occurencies(_phase, initial_phase_eval);     
00374     
00375     return post_log_likelihood( tseq_eval.get_p_count(), true );
00376   }
00377 
00379 
00383   double post_log_likelihood( unsigned long * * count_eval, bool decal_required_e  )
00384   {
00385     double sum=0.;
00386   
00387     for (short p=0; p<_phase; p++){
00388       if (decal_required_e)
00389         sum += _p_f[p]->post_log_likelihood( count_eval[p]+_jump );
00390       else
00391         sum += _p_f[p]->post_log_likelihood( count_eval[p] );
00392     }
00393     return sum;
00394   }
00395 
00396 
00397   //---------------------------------------
00399 
00406   void draw( unsigned long * * count, bool decal_required,
00407              gsl_rng * r,
00408              const Translator & trans = Translator(),
00409              const string & xmlfile = string() )
00410   {  
00411    _nb_param = 0;   
00412 #ifdef HAVE_LIBXML2
00413    if (xmlfile.size() != 0){
00414      xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00415      xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00416      xmlDocSetRootElement(doc,root_node); 
00417      for (int i=0; i<_phase; i++){
00418        if (decal_required)
00419          _p_f[i]->extended_tree( count[i]+_jump );
00420        else 
00421          _p_f[i]->extended_tree( count[i] );
00422          
00423        pmm_tree p_t = _p_f[i]->draw(r);                
00424        p_t.tree_to_matrix( _Pis[i] );
00425        _nb_param += p_t.nb_leaves() * (_size-1);
00426 
00427          std::stringstream treenamestream;
00428          treenamestream <<"phase "<<i;
00429          p_t.save( trans, root_node, treenamestream.str() );
00430      }
00431      xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00432      xmlFreeDoc(doc); 
00433    }
00434    else{
00435      for (int i=0; i<_phase; i++){
00436 
00437        if (decal_required)
00438          _p_f[i]->extended_tree( count[i]+_jump );
00439        else
00440           _p_f[i]->extended_tree( count[i] );
00441        pmm_tree p_t = _p_f[i]->draw(r);          
00442        p_t.tree_to_matrix( _Pis[i] );
00443        _nb_param += p_t.nb_leaves() * (_size-1);
00444     
00445      }
00446    }
00447 #endif
00448 #ifndef HAVE_LIBXML2
00449    if (xmlfile.size() != 0){
00450      cerr<<"in PhasedPMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00451    }
00452    for (int i=0; i<_phase; i++){        
00453      if (decal_required)
00454        _p_f[i]->extended_tree( count[i]+_jump );
00455      else
00456        _p_f[i]->extended_tree( count[i] );
00457      pmm_tree p_t = _p_f[i]->draw(r);        
00458      p_t.tree_to_matrix( _Pis[i] );
00459      _nb_param += p_t.nb_leaves() * (_size-1);
00460      
00461    }
00462 #endif
00463 
00464    if (_Mus != NULL)
00465      this->compute_stat_laws( true );
00466   }
00467 
00468 
00470 
00475   void draw( gsl_rng * r,
00476              const Translator & trans = Translator(),
00477              const string & xmlfile = string()  )
00478   {
00479     if (!_p_f[0]->extended_ok()){
00480       cerr<<"PhasedPMarkov::draw = count required for a first draw"<<endl;
00481       exit(1);
00482     }
00483 
00484    _nb_param = 0;   
00485 #ifdef HAVE_LIBXML2
00486    if (xmlfile.size() != 0){
00487      xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00488      xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00489      xmlDocSetRootElement(doc,root_node); 
00490      for (int i=0; i<_phase; i++){
00491        
00492        pmm_tree p_t = _p_f[i]->draw(r);                
00493        p_t.tree_to_matrix( _Pis[i] );
00494        _nb_param += p_t.nb_leaves() * (_size-1);
00495 
00496          std::stringstream treenamestream;
00497          treenamestream <<"phase "<<i;
00498          p_t.save( trans, root_node, treenamestream.str() );
00499      }
00500      xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00501      xmlFreeDoc(doc); 
00502    }
00503    else{
00504      for (int i=0; i<_phase; i++){
00505        
00506        pmm_tree p_t = _p_f[i]->draw(r);     
00507        p_t.tree_to_matrix( _Pis[i] );   
00508        _nb_param += p_t.nb_leaves() * (_size-1);
00509 
00510      }
00511    }
00512 #endif
00513 #ifndef HAVE_LIBXML2
00514    if (xmlfile.size() != 0){
00515      cerr<<"in PhasedPMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00516    }
00517    for (int i=0; i<_phase; i++){        
00518      
00519      pmm_tree p_t = _p_f[i]->draw(r);        
00520      p_t.tree_to_matrix( _Pis[i] );
00521      _nb_param += p_t.nb_leaves() * (_size-1); 
00522     
00523    }
00524 #endif   
00525    
00526    if (_Mus != NULL)
00527      this->compute_stat_laws( true );
00528   }
00529 
00530 
00532   void info_nb_leaves() const{
00533     for (short p=0; p<_phase; p++){
00534       cout<<"Phase "<<p<<": "
00535           <<_p_f[p]->current_pmm_tree().nb_leaves()<<" leaves"
00536           <<endl;
00537     }
00538   }
00539 
00540   void set_penalty( double penalty ){
00541     for (int i=0; i<_phase; i++) 
00542       _p_f[i]->set_penalty( penalty);
00543   }
00544 
00545   void default_prior_alpha(){
00546     vector<double> tmp;
00547     tmp.assign(_size, 1.);
00548     _prior_alpha.assign(_phase, tmp);
00549   }
00550 
00551 
00553   ~PhasedPMarkov(){
00554     for (int i=0; i<_phase; i++) 
00555       if (_p_f[i])
00556         delete _p_f[i];
00557   } 
00558 };
00559 #endif



Download seq++ 4.1.5
Download previous versions
Statistique & Genome Home


Generated on Thu Aug 4 18:34:04 2005 for seqpp by doxygen 1.3.9.1