![]() |
Version 4.1.5 |
#include <seqpp/PhasedMarkov.h>
Inheritance diagram for PhasedMarkov:
Public Member Functions | |
PhasedMarkov (const string &markov_file, bool calc_rank=false) | |
Constructor 1 : read a configuration file. | |
PhasedMarkov (const SequenceSet &seqset, short phase, short initial_phase=0, bool calc_rank=false, const string &prior_alpha_file=string()) | |
Constructor 2 : Estimate the transition matrices on the sequences of seqset. | |
PhasedMarkov (const Sequence &seq, short phase, short initial_phase=0, bool calc_rank=false, const string &prior_alpha_file=string()) | |
Constructor 3 : Estimate the transition matrices on the sequence seq. | |
PhasedMarkov (const PhasedMarkov &phm) | |
Constructor 4 : Copy constructor. | |
PhasedMarkov () | |
Constructor 5 : Default constructor. | |
PhasedMarkov (short size, short order, short phase, bool alloc=true, const string &prior_alpha_file=string()) | |
Constructor 6 : Minimal Constructor. | |
PhasedMarkov (const PhasedMarkov &M1, const PhasedMarkov &M2, const float p) | |
Constructor 7 : Creation of a "mixed" Markov chain M = p*M1 + (1-p)*M2 */. | |
PhasedMarkov (const SequenceSet &seqset, const vector< int > &Indseq, short phase, short initial_phase=0, bool calc_rank=false, const string &prior_alpha_file=string()) | |
Constructor 8 : Estimation of the transition matrix based on the sequences of seqset given in Indseq. | |
PhasedMarkov (const gsl_rng *r, short size, short order, short phase, bool calc_rank=false) | |
Constructor 9 : random markov matrices. | |
PhasedMarkov (unsigned long **count, short size, short order, short phase, short initial_phase=0, bool calc_rank=false, const string &prior_alpha_file=string()) | |
Constructor 10 Estimate the transition matrices on a word-count. | |
virtual | ~PhasedMarkov () |
Destructor. | |
template<class TSeq> | |
void | estimate (const TSeq &tseq, short phase, short initial_phase, unsigned long beg, unsigned long end, bool calc_rank=false, bool count_again=true) |
Estimate the transition matrices on the sequence/sequenceset tseq. | |
void | estimate (const string &count_file, bool calc_rank=false) |
Estimate the transition matrices from a file containing the count. | |
void | estimate (unsigned long **count, bool decal_required, bool calc_rank=false) |
Estimate the transition matrices from a word count. | |
const double ** | markov_matrices () const |
access to the markov matrix(ces) | |
const double * | markov_matrix (short numphase) const |
access to the numphase-th markov matrix | |
void | draw_markov_matrices (const gsl_rng *r) |
draw at random the markov matrices | |
virtual void | new_markov_matrices () |
allocate the memory for markov matrices | |
virtual void | free_markov_matrices () |
free the memory allocated for markov matrices | |
double | total_variation (const PhasedMarkov &M) |
Total variation distance between *this and M. | |
void | compute_stat_laws (bool force=false) |
Compute the stationnary laws. | |
const double * | stat_law (short numphase=0) const |
access to the stationnary distrib in phase numphase | |
void | free_stat_laws () |
free the memory allocated for stationnary laws | |
void | compute_init_law (double *MuInit, const SequenceSet &seqset) const |
Get the empirical relative frequency of the first order+1 letters on the set of sequences "seqset". | |
virtual int | compute_rank () |
Computes the rank of convergence of the Markov Chain. | |
virtual long | nb_parameters () const |
return the number of effective parameters | |
void | link_to_translator (const Translator &trans) |
link to a Translator object to use proba methods with strings | |
double | proba_c (const string &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters (!use link_to_translator before!). | |
double | proba (const string &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word. | |
double | proba_c (const vector< short > &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters. | |
double | proba (const vector< short > &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word. | |
double | proba_c (long word, int lw=-1, long jump=-1, short numphase=0) const |
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters. | |
double | proba (long word, int lw=-1, long jump=-1, short numphase=0) const |
Stationnary proba of a word. | |
double | proba_c (const long *seq, long tbeg, long tend, short numphase=0) const |
Stationnary proba of the word seq[tbeg...tend](size greater than _order) conditionnaly of its first letters with seq[tend] in phase numphase. | |
double | proba (const long *seq, long tbeg, long tend, short numphase=0) const |
Stationnary proba of the word seq[tbeg...tend] with seq[tend] in phase numphase. | |
double | log_likelihood (const SequenceSet &seqset, short initial_phase=0, short numphase=-1) const |
loglikelihood of a set of sequence | |
double | log_ratio_likelihood (const SequenceSet &seqset, const PhasedMarkov &M, short initial_phase1=0, short initial_phase2=0) const |
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M". | |
double | log_likelihood (const Sequence &seq, short initial_phase=0, short numphase=-1) const |
loglikelihood of a sequence | |
double | log_ratio_likelihood (const Sequence &seq, const PhasedMarkov &M, short initial_phase1=0, short initial_phase2=0) const |
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M". | |
template<class TSeq> | |
double | BIC (const TSeq &tseq, short initial_phase=0) const |
BIC of sequences (BIC = -2*loglikelihood + nbparam*log(length)). | |
template<class TSeq> | |
double | AIC (const TSeq &tseq, short initial_phase=0) const |
AIC of a set of sequences (AIC = -2*loglikelihood + 2*nbparam). | |
template<class TSeq1, class TSeq2> | |
double | post_log_likelihood (const TSeq1 &tseq_train, const TSeq2 &tseq_eval, bool force=false, short initial_phase_train=0, short initial_phase_eval=0) |
compute the mean posterior likelihood over the parameters | |
void | print (const string &FileOut) |
Print a summary of the object. | |
void | print (ofstream &Out) const |
Print a summary of the object. | |
int | tell_size () const |
Returns the alphabet size. | |
int | tell_rank () const |
Returns the convergence rank. | |
int | tell_order () const |
Returns the order. | |
int | tell_phase () const |
Returns the phase. | |
int | nMu () const |
size of the stat law vector | |
int | nPi () const |
size of the matrix | |
double | Pi (int index, int p=0) const |
Access to Markov matrix Pi. | |
double & | operator() (int index, int p=0) |
() operator for Markov matrix Pi elements | |
double | Mu (int index, int p=0) const |
Access to stationnary vector Mu elements. | |
bool | isPis () const |
_Pis != NULL ? | |
bool | isMus () const |
_Mus != NULL ? | |
short | nextPhase (short p) const |
Give the phase following p. | |
short | prevPhase (short p) const |
Give the phase preceding p. | |
bool | Stochasticity () |
Verify stochasticity of the _Pis[] and eventually rescale it. | |
void | file_to_count (const string &src_file, unsigned long **dest_count) |
fill a count from a file | |
Protected Member Functions | |
bool | isNextPhase () const |
_nextPhase != NULL ? | |
bool | isPrevPhase () const |
_prevPhase != NULL ? | |
Protected Attributes | |
short | _phase |
Phase of the model. | |
double ** | _Pis |
Pointer to "Matrices" (in vector format) of transition probabilities for each phase. | |
double ** | _containers |
Container of "Matrices" (in vector format) of transition probabilities for each phase. | |
double ** | _Mus |
Vector of stationnary probabilities for each phase. | |
short | _size |
Size of the alphabet. | |
short | _order |
Order of the model (the same at each phase). | |
long | _nPi |
Dim of Pi :_size^(_order+1). | |
long | _nMu |
Dim of Mu :_size^_order. | |
long | _nb_param |
number of effective parameters | |
int | _rank |
How many steps to converge to Mu ? | |
long | _jump |
jump to the codes of _order+1 letters when Sequence-like code | |
short * | _nextPhase |
(Optimization) For each phase, give the next phase | |
short * | _prevPhase |
(Optimization) For each phase, give the previous phase | |
const Translator * | _trans |
link to a translator object for the use of proba methods | |
double | _postloglike |
current posterior likelihood from a training [set of] sequence | |
vector< vector< double > > | _prior_alpha |
prior on the counts, one value per alphabet element, and for each phase |
This is generalization of a Markov chain, using different matrices in function of the considered position in the sequence. The phase is variable.
For example, if we consider 3 phases, and we note respectively Pi1, Pi2 and Pi3, the three transition matrices, the Markov sequences will be generated by the indices 123123123123... In a DNA modelisation (genomic field), this is useful to take into account the fact that a coding region is read by 3 bases-blocks. The order of the Markov Model, i.e. the number of previous states necessary to determine the distribution of the current state, is variable. It is assume here that this order is the same in all the phases.
Methods are implemented for Markovian transition matrix estimation, stationary distribution calculus, word probabilities, total variation distance between two Markovian matrices, and further. The efficiency of eigenproblems computation is ensured by the use of the implicitly restarted Arnoldi algorithm.
Simulations are also possible.
|
Constructor 2 : Estimate the transition matrices on the sequences of seqset.
|
|
Constructor 3 : Estimate the transition matrices on the sequence seq.
|
|
Constructor 6 : Minimal Constructor. Initialises the constants of the model but not the matrices nor the stat laws
|
|
Constructor 7 : Creation of a "mixed" Markov chain M = p*M1 + (1-p)*M2 */.
|
|
Constructor 8 : Estimation of the transition matrix based on the sequences of seqset given in Indseq.
|
|
Constructor 9 : random markov matrices.
const gsl_rng_type * T; // Choice a default generator and seed // from environment variables gsl_rng_env_setup(); // New created instance of the generator T = gsl_rng_default; gsl_rng * r = gsl_rng_alloc (T); // Initialize/Seeds the random number generator gsl_rng_set( r, (long)getpid() ); ... ... gsl_rng_free( r ); |
|
Constructor 10 Estimate the transition matrices on a word-count.
|
|
AIC of a set of sequences (AIC = -2*loglikelihood + 2*nbparam).
|
|
BIC of sequences (BIC = -2*loglikelihood + nbparam*log(length)).
|
|
draw at random the markov matrices
const gsl_rng_type * T; // Choice a default generator and seed // from environment variables gsl_rng_env_setup(); // New created instance of the generator T = gsl_rng_default; gsl_rng * r = gsl_rng_alloc (T); // Initialize/Seeds the random number generator gsl_rng_set( r, (long)getpid() ); ... ... gsl_rng_free( r ); |
|
Estimate the transition matrices from a word count.
|
|
Estimate the transition matrices from a file containing the count.
Reimplemented in Markov. |
|
Estimate the transition matrices on the sequence/sequenceset tseq.
|
|
fill a count from a file
|
|
loglikelihood of a sequence
|
|
loglikelihood of a set of sequence
|
|
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M".
|
|
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M".
|
|
Access to stationnary vector Mu elements.
|
|
() operator for Markov matrix Pi elements
|
|
Access to Markov matrix Pi.
|
|
compute the mean posterior likelihood over the parameters
|
|
Print a summary of the object. The estimation results can be saved in such a representation: # 1 <- Order of the phased Markov chain # 2 <- Phase # 4 <- Alphabet size # 19 steps <- Convergence to the stationnary distribution # Phase n°0 # Transition matrix: 0.3945322543 0.1652811616 0.1535033485 0.2866832356 etc........... # Stationnary Probability: 0.3127105148 0.2114684268 0.1783495332 0.2974715251 # Phase n°1 # Transition matrix: 0.3923961961 0.163516403 0.1521005152 0.2919868858 etc................ # Stationnary Probability: 0.3135417652 0.2089660861 0.1771006767 0.300391472 |
|
Stationnary proba of the word seq[tbeg...tend] with seq[tend] in phase numphase.
|
|
Stationnary proba of a word.
|
|
Stationnary proba of a word.
|
|
Stationnary proba of a word.
|
|
Stationnary proba of the word seq[tbeg...tend](size greater than _order) conditionnaly of its first letters with seq[tend] in phase numphase.
|
|
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters.
|
|
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters.
|
|
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters (!use link_to_translator before!).
|
Download seq++ 4.1.5 |
Download previous versions |
Statistique & Genome Home |
Contributors : M.Baudry, P.Y.Bourguignon, M.Hoebeke, V.Miele, P.Nicolas, G.Nuel, H.Richard, D.Robelin |