00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00028 #ifndef SEQPP_PHASEDPMARKOV_H
00029 #define SEQPP_PHASEDPMARKOV_H
00030
00031 #include <seqpp/PhasedMarkov.h>
00032
00033 #include <seqpp/pmm_forest.h>
00034
00047 class PhasedPMarkov : public PhasedMarkov
00048 {
00049 protected :
00051 vector<pmm_forest*> _p_f;
00052
00053 public:
00055
00065 PhasedPMarkov( Partition& part,
00066 const SequenceSet & seqset,
00067 short phase, short initial_phase = 0,
00068 const string& prior_alpha_file = string(),
00069 bool motif_prior = true,
00070 double penalty = 0.,
00071 const string & xmlfile = string() )
00072 : PhasedMarkov( seqset.tell_alphabet_size(), seqset.tell_order(), phase )
00073 {
00074 seqset.count_p_occurencies( phase, initial_phase );
00075 if (!this->load_prior_alpha( prior_alpha_file ))
00076 this->default_prior_alpha();
00077 for (int i=0; i<_phase; i++)
00078 _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00079 select( seqset.get_p_count(), true, seqset.get_translator(), xmlfile );
00080 }
00081
00083
00094 PhasedPMarkov( Partition& part,
00095 const Sequence & seq,
00096 short phase, short initial_phase = 0,
00097 const string& prior_alpha_file = string(),
00098 bool motif_prior = true,
00099 double penalty = 0.,
00100 const Translator & trans = Translator(),
00101 const string & xmlfile = string() )
00102 : PhasedMarkov( seq.tell_alphabet_size(), seq.tell_order(), phase )
00103 {
00104 seq.count_p_occurencies( phase, initial_phase );
00105 if (!this->load_prior_alpha( prior_alpha_file ))
00106 this->default_prior_alpha();
00107 for (int i=0; i<_phase; i++)
00108 _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00109 select( seq.get_p_count(), true, trans, xmlfile );
00110 }
00111
00113
00125 PhasedPMarkov( Partition& part,
00126 unsigned long * * count,
00127 short size, short order,
00128 short phase,
00129 const string& prior_alpha_file = string(),
00130 bool motif_prior = true,
00131 double penalty = 0.,
00132 const Translator & trans = Translator(),
00133 const string & xmlfile = string() )
00134 : PhasedMarkov( size, order, phase )
00135 {
00136 if (!this->load_prior_alpha( prior_alpha_file ))
00137 this->default_prior_alpha();
00138 for (int i=0; i<_phase; i++)
00139 _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00140 select( count, false, trans, xmlfile );
00141 }
00142
00144
00156 PhasedPMarkov( Partition& part,
00157 const string & count_file,
00158 short size, short order,
00159 short phase,
00160 const string& prior_alpha_file = string(),
00161 bool motif_prior = true,
00162 double penalty = 0.,
00163 const Translator & trans = Translator(),
00164 const string & xmlfile = string() )
00165 : PhasedMarkov( size, order, phase )
00166 {
00167 unsigned long ** count = new unsigned long*[_phase];
00168 for (short p=0; p<_phase; p++ )
00169 count[p] = new unsigned long[_nPi];
00170 if (! this->load_count( count_file, count )){
00171 cerr<<"Bad count file"<<endl;
00172 exit(1);
00173 }
00174
00175 if (!this->load_prior_alpha( prior_alpha_file ))
00176 this->default_prior_alpha();
00177 for (int i=0; i<_phase; i++)
00178 _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00179 select( count, false, trans, xmlfile );
00180
00181 for (short p=0; p<_phase; p++ )
00182 delete[] count[p];
00183 delete [] count;
00184 }
00185
00187
00197 PhasedPMarkov( Partition& part,
00198 short size,
00199 short order,
00200 short phase,
00201 const string& prior_alpha_file = string(),
00202 bool motif_prior = true,
00203 double penalty = 0.,
00204 bool alloc = true )
00205 : PhasedMarkov( size, order, phase, alloc )
00206 {
00207 if (!this->load_prior_alpha( prior_alpha_file ))
00208 this->default_prior_alpha();
00209 for (int i=0; i<_phase; i++)
00210 _p_f.push_back( new pmm_forest( part, _size, _order, _prior_alpha[i], motif_prior, penalty ) );
00211 }
00212
00214
00220 void select( unsigned long * * count, bool decal_required,
00221 const Translator & trans = Translator(),
00222 const string & xmlfile = string() )
00223 {
00224 _nb_param = 0;
00225 long decal = 0;
00226 if (decal_required)
00227 decal = _jump;
00228
00229 #ifdef HAVE_LIBXML2
00230 if (xmlfile.size() != 0){
00231 xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00232 xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00233 xmlDocSetRootElement(doc,root_node);
00234 for (int i=0; i<_phase; i++){
00235 pmm_tree p_t = _p_f[i]->select( count[i]+decal );
00236
00237 p_t.tree_to_matrix( _Pis[i] );
00238 _nb_param += p_t.nb_leaves() * (_size-1);
00239
00240 std::stringstream treenamestream;
00241 treenamestream <<"phase "<<i;
00242 p_t.save( trans, root_node, treenamestream.str() );
00243
00244
00245 }
00246 xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00247 xmlFreeDoc(doc);
00248 }
00249 else{
00250 for (int i=0; i<_phase; i++){
00251
00252 pmm_tree p_t = _p_f[i]->select( count[i]+decal );
00253 p_t.tree_to_matrix( _Pis[i] );
00254 _nb_param += p_t.nb_leaves() * (_size-1);
00255
00256 }
00257 }
00258 #endif
00259 #ifndef HAVE_LIBXML2
00260 if (xmlfile.size() != 0){
00261 cerr<<"in PhasedPMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00262 }
00263 for (int i=0; i<_phase; i++){
00264
00265 pmm_tree p_t = _p_f[i]->select( count[i]+decal );
00266 p_t.tree_to_matrix( _Pis[i] );
00267 _nb_param += p_t.nb_leaves() * (_size-1);
00268
00269
00270 }
00271 #endif
00272 }
00273
00274
00281 template <class TSeq1, class TSeq2>
00282 double mean_post_log_likelihood( const TSeq1& tseq_train, const TSeq2& tseq_eval,
00283 short initial_phase_train = 0, short initial_phase_eval = 0 )
00284 {
00285 tseq_train.count_p_occurencies(_phase, initial_phase_train);
00286 tseq_eval.count_p_occurencies(_phase, initial_phase_eval);
00287 return this->mean_post_log_likelihood( tseq_train.get_p_count(), true,
00288 tseq_eval.get_p_count(), true );
00289 }
00290
00292
00298 double mean_post_log_likelihood( unsigned long * * count_train, bool decal_required_t,
00299 unsigned long * * count_eval, bool decal_required_e )
00300 {
00301 double mean_logv = 0;
00302 for (int i=0; i<_phase; i++){
00303 if (decal_required_t)
00304 _p_f[i]->extended_tree( count_train[i]+_jump );
00305 else
00306 _p_f[i]->extended_tree( count_train[i] );
00307 if (decal_required_e)
00308 mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i]+_jump );
00309 else
00310 mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i] );
00311 }
00312 return mean_logv;
00313 }
00314
00319 template <class TSeq>
00320 double mean_post_log_likelihood( const TSeq& tseq_eval,short initial_phase_eval = 0 )
00321 {
00322 tseq_eval.count_p_occurencies(_phase, initial_phase_eval);
00323 return this->mean_post_log_likelihood( tseq_eval.get_p_count(), true );
00324 }
00325
00326
00327
00329
00333 double mean_post_log_likelihood( unsigned long * * count_eval, bool decal_required_e )
00334 {
00335 double mean_logv = 0;
00336 for (int i=0; i<_phase; i++){
00337 if (decal_required_e)
00338 mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i]+_jump );
00339 else
00340 mean_logv += _p_f[i]->mean_post_log_likelihood( count_eval[i]);
00341 }
00342 return mean_logv;
00343 }
00344
00346 double mean_post_log_likelihood(){
00347 double sum = 0.;
00348 for (short i=0; i<_phase;i++){
00349 _p_f[i]->extended_tree();
00350 sum += _p_f[i]->mean_post_log_likelihood( );
00351 }
00352 return sum;
00353 }
00354
00355
00356
00357
00359
00363 template <class TSeq>
00364 double post_log_likelihood( const TSeq& tseq_eval,
00365 short initial_phase_eval = 0 )
00366 {
00367 if ( tseq_eval.tell_order() != _order ){
00368 cerr << "Order incompatibility beetween sequence and model " << endl;
00369 exit(-1);
00370 }
00371
00372
00373 tseq_eval.count_p_occurencies(_phase, initial_phase_eval);
00374
00375 return post_log_likelihood( tseq_eval.get_p_count(), true );
00376 }
00377
00379
00383 double post_log_likelihood( unsigned long * * count_eval, bool decal_required_e )
00384 {
00385 double sum=0.;
00386
00387 for (short p=0; p<_phase; p++){
00388 if (decal_required_e)
00389 sum += _p_f[p]->post_log_likelihood( count_eval[p]+_jump );
00390 else
00391 sum += _p_f[p]->post_log_likelihood( count_eval[p] );
00392 }
00393 return sum;
00394 }
00395
00396
00397
00399
00406 void draw( unsigned long * * count, bool decal_required,
00407 gsl_rng * r,
00408 const Translator & trans = Translator(),
00409 const string & xmlfile = string() )
00410 {
00411 _nb_param = 0;
00412 #ifdef HAVE_LIBXML2
00413 if (xmlfile.size() != 0){
00414 xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00415 xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00416 xmlDocSetRootElement(doc,root_node);
00417 for (int i=0; i<_phase; i++){
00418 if (decal_required)
00419 _p_f[i]->extended_tree( count[i]+_jump );
00420 else
00421 _p_f[i]->extended_tree( count[i] );
00422
00423 pmm_tree p_t = _p_f[i]->draw(r);
00424 p_t.tree_to_matrix( _Pis[i] );
00425 _nb_param += p_t.nb_leaves() * (_size-1);
00426
00427 std::stringstream treenamestream;
00428 treenamestream <<"phase "<<i;
00429 p_t.save( trans, root_node, treenamestream.str() );
00430 }
00431 xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00432 xmlFreeDoc(doc);
00433 }
00434 else{
00435 for (int i=0; i<_phase; i++){
00436
00437 if (decal_required)
00438 _p_f[i]->extended_tree( count[i]+_jump );
00439 else
00440 _p_f[i]->extended_tree( count[i] );
00441 pmm_tree p_t = _p_f[i]->draw(r);
00442 p_t.tree_to_matrix( _Pis[i] );
00443 _nb_param += p_t.nb_leaves() * (_size-1);
00444
00445 }
00446 }
00447 #endif
00448 #ifndef HAVE_LIBXML2
00449 if (xmlfile.size() != 0){
00450 cerr<<"in PhasedPMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00451 }
00452 for (int i=0; i<_phase; i++){
00453 if (decal_required)
00454 _p_f[i]->extended_tree( count[i]+_jump );
00455 else
00456 _p_f[i]->extended_tree( count[i] );
00457 pmm_tree p_t = _p_f[i]->draw(r);
00458 p_t.tree_to_matrix( _Pis[i] );
00459 _nb_param += p_t.nb_leaves() * (_size-1);
00460
00461 }
00462 #endif
00463
00464 if (_Mus != NULL)
00465 this->compute_stat_laws( true );
00466 }
00467
00468
00470
00475 void draw( gsl_rng * r,
00476 const Translator & trans = Translator(),
00477 const string & xmlfile = string() )
00478 {
00479 if (!_p_f[0]->extended_ok()){
00480 cerr<<"PhasedPMarkov::draw = count required for a first draw"<<endl;
00481 exit(1);
00482 }
00483
00484 _nb_param = 0;
00485 #ifdef HAVE_LIBXML2
00486 if (xmlfile.size() != 0){
00487 xmlDocPtr doc=xmlNewDoc(BAD_CAST "1.0");
00488 xmlNodePtr root_node=xmlNewNode(NULL,BAD_CAST "treeset");
00489 xmlDocSetRootElement(doc,root_node);
00490 for (int i=0; i<_phase; i++){
00491
00492 pmm_tree p_t = _p_f[i]->draw(r);
00493 p_t.tree_to_matrix( _Pis[i] );
00494 _nb_param += p_t.nb_leaves() * (_size-1);
00495
00496 std::stringstream treenamestream;
00497 treenamestream <<"phase "<<i;
00498 p_t.save( trans, root_node, treenamestream.str() );
00499 }
00500 xmlSaveFormatFileEnc(xmlfile.c_str(),doc,"ISO-8859-1",1);
00501 xmlFreeDoc(doc);
00502 }
00503 else{
00504 for (int i=0; i<_phase; i++){
00505
00506 pmm_tree p_t = _p_f[i]->draw(r);
00507 p_t.tree_to_matrix( _Pis[i] );
00508 _nb_param += p_t.nb_leaves() * (_size-1);
00509
00510 }
00511 }
00512 #endif
00513 #ifndef HAVE_LIBXML2
00514 if (xmlfile.size() != 0){
00515 cerr<<"in PhasedPMarkov: no libxml2 detected to fill "<<xmlfile<<endl;
00516 }
00517 for (int i=0; i<_phase; i++){
00518
00519 pmm_tree p_t = _p_f[i]->draw(r);
00520 p_t.tree_to_matrix( _Pis[i] );
00521 _nb_param += p_t.nb_leaves() * (_size-1);
00522
00523 }
00524 #endif
00525
00526 if (_Mus != NULL)
00527 this->compute_stat_laws( true );
00528 }
00529
00530
00532 void info_nb_leaves() const{
00533 for (short p=0; p<_phase; p++){
00534 cout<<"Phase "<<p<<": "
00535 <<_p_f[p]->current_pmm_tree().nb_leaves()<<" leaves"
00536 <<endl;
00537 }
00538 }
00539
00540 void set_penalty( double penalty ){
00541 for (int i=0; i<_phase; i++)
00542 _p_f[i]->set_penalty( penalty);
00543 }
00544
00545 void default_prior_alpha(){
00546 vector<double> tmp;
00547 tmp.assign(_size, 1.);
00548 _prior_alpha.assign(_phase, tmp);
00549 }
00550
00551
00553 ~PhasedPMarkov(){
00554 for (int i=0; i<_phase; i++)
00555 if (_p_f[i])
00556 delete _p_f[i];
00557 }
00558 };
00559 #endif