/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#include "parameters.h"
#include <math.h>
#include <stdexcept>
#include "util.h"

Parameters::probabilitycache::probabilitycache()
{
  _prob = NULL;
  _wms = NULL;
  _numWM = 0;
  _start = 0;
  _length = 0;
  _associatedCurrentWindow = NULL;
}

Parameters::probabilitycache& Parameters::probabilitycache::operator=(const probabilitycache &pc)
{
  if (this == &pc) return *this;
  Destroy();
  Copy(pc);
}

Parameters::probabilitycache::probabilitycache(const probabilitycache &pc)
{
  Copy(pc);
}

Parameters::probabilitycache::~probabilitycache()
{
  Destroy();
}

void Parameters::probabilitycache::Copy(const probabilitycache &pc)
{
  const probabilitycache *pcptr = &pc;
  if (pcptr==NULL) {
    printf("Error: copy called for null probabilitycache\n");
    exit(1);
  }

  int i;
  _numWM = pcptr->_numWM;
  _start = pcptr->_start;
  _length = pcptr->_length;

  Window *pcptr_window = pcptr->_associatedCurrentWindow;
  _associatedCurrentWindow = pcptr_window;   // shallow copy

  if (pcptr->_wms) {
    _wms = new WtMx*[_numWM];
    for (i=0; i<_numWM; i++) 
      _wms[i] = pcptr->_wms[i];
  }
  else _wms = NULL;

  if (pcptr->_prob) {
    _prob = new DTYPE  *[_numWM];
    for (int j=0; j<_numWM; j++) {
      _prob[j] = new DTYPE [_length];
      for (int l=0; l<_length; l++) 
	_prob[j][l] = pcptr->_prob[j][l];
    }
  }
  else _prob = NULL;

}

void Parameters::probabilitycache::Destroy()
{
  if (_prob) {
    for (int j=0; j<_numWM; j++) {
      if (_prob[j]) delete [] _prob[j];
    }
    delete [] _prob;
    _prob = NULL;
  }

  if (_wms) {
    delete [] _wms;
    _wms = NULL;
  }

}

Parameters::Phylogeny::Phylogeny()
{
  _mu = NULL;
  _numSpecies = 0;
}

DTYPE  Parameters::Phylogeny::ComputeProbabilityGivenAncestor(WtMx *wm, int index, int orientation, int a, char *arrayofchar, int *history)
  // orientation: 0 => forward only, 1 => backward only, 2 => both
{
  char numseq = *arrayofchar;
  DTYPE  term = 1;
  if (history == NULL) {
    char *ptr = arrayofchar; ptr++;
    for (int j=0; j<numseq; j++) {
      char seqindex = *ptr++;
      float mu = _mu[seqindex];
      char indch = *ptr++;
      char indrevch = (indch <= 3 ? 3-indch : indch);
      if (orientation==0) 
	term *= (mu*wm->Frequency(index,indch) + (indch==a?1-mu:0));
      if (orientation==1) 
	term *= (mu*wm->Frequency(index,indrevch) + (indrevch==a?1-mu:0));
      if (orientation==2) {
	if (wm->Length() > 1) {
	  printf("Error: Shouldnt be calling function ComputeProbabilityGivenAncestor\n");
	  exit(1);
	}
	float fbias = wm->GetForwardBias();
	term *= fbias*(mu*wm->Frequency(index,indch) + (indch==a?1-mu:0)) + (1-fbias)*(mu*wm->Frequency(index,indrevch) + (indrevch==a?1-mu:0));
      }
    }
    return term;  
  }
  else {
    char *ptr = arrayofchar; ptr++;
    for (int j=0; j<numseq; j++) {
      char seqindex = *ptr++;
      float mu = _mu[seqindex];
      char indch = *ptr++;
      char indrevch = (indch <= 3 ? 3-indch : indch);
      int hword = history[seqindex];
      if (hword==-1) {  // dont use history
	if (orientation==0) 
	  term *= (mu*wm->Frequency(index,indch) + (indch==a?1-mu:0));
	if (orientation==1)
	  term *= (mu*wm->Frequency(index,indrevch) + (indrevch==a?1-mu:0));
	if (orientation==2) {
	  if (wm->Length() > 1) {
	    printf("Error: Shouldnt be calling function ComputeProbabilityGivenAncestor\n");
	    exit(1);
	  }
	  float fbias = wm->GetForwardBias();
	  term *= fbias*(mu*wm->Frequency(index,indch) + (indch==a?1-mu:0)) + (1-fbias)*(mu*wm->Frequency(index,indrevch) + (indrevch==a?1-mu:0));
	}
      }
      else {            // use history
	if (orientation==0) 
	  term *= (mu*wm->Frequency(index,indch,hword) + (indch==a?1-mu:0));
	if (orientation==1)
	  term *= (mu*wm->Frequency(index,indrevch,hword) + (indrevch==a?1-mu:0));
	if (orientation==2) {
	  if (wm->Length() > 1) {
	    printf("Error: Shouldnt be calling function ComputeProbabilityGivenAncestor\n");
	    exit(1);
	  }	  
	  float fbias = wm->GetForwardBias();
	  term *= fbias*(mu*wm->Frequency(index,indch,hword) + (indch==a?1-mu:0)) + (1-fbias)*(mu*wm->Frequency(index,indrevch,hword) + (indrevch==a?1-mu:0));
	}
      }
    }
    return term;
  }
}

DTYPE  Parameters::Phylogeny::ComputeProbability(WtMx *wm, int index, bool rev_orientation, char *arrayofchar, int *history)
  // this is capable of handling multiple sequences, but only with independence assumption
  // do not use this if you want to handle ms and horder bkgwm accurately
{
  DTYPE  sum = 0;
  for (int a=0; a<4; a++) {  // ancestor char at this position
    sum += wm->Frequency(index,a)*ComputeProbabilityGivenAncestor(wm,index,(rev_orientation?1:0),a,arrayofchar,history);
  }
  return sum;
}

// Static member data of the (virtual) Parameters class has to be explicitly defined
struct Parameters::probabilitycache *Parameters::_currentwindowcache = NULL;
struct Parameters::Phylogeny *Parameters::_phy = NULL;

char Parameters::ReverseChar(char ch)
{
  switch(ch) {
  case 'A': return 'T';
  case 'C': return 'G';
  case 'G': return 'C';
  case 'T': return 'A';
  case 'R': return 'Y';
  case 'Y': return 'R';
  case 'S': return 'S';
  case 'W': return 'W';
  case 'N': return 'N';
  }
  return 'N';
}

WtMx *Parameters::TrainWtMx(Window *context)
{
  if (context==NULL) {
    printf("Error: TrainWtMx called on null window\n");
    exit(1);
  }

  int i;
  WtMx *bkgwm;

#ifndef _MARKOV
  float *bkg = new float[4];
  context->ComputeBaseFrequencies(bkg);

  float **bkgmat = new float *[4];
  for (i=0; i<4; i++) {
    bkgmat[i] = new float[1];
    bkgmat[i][0] = bkg[i];
  }

  bkgwm = new WtMx(bkgmat,1,"Background");  // create a length 1 wm for background 

  for (i=0; i<4; i++) 
    delete [] bkgmat[i];
  delete [] bkgmat;
  delete [] bkg;
#else
#ifndef MARKOV_ORDER
  int morder = 0;
#else 
  int morder = MARKOV_ORDER;
#endif
  // now train a Markov chain.
  int powmorder = (int)(pow(4,morder));
  float **bkg = new float *[powmorder];
  for (i=0; i<powmorder; i++) bkg[i] = new float[4];
  context->ComputeBaseFrequenciesWithHistory(bkg,morder);

  float ***bkgmat = new float **[powmorder];
  for (i=0; i<powmorder; i++) {
    bkgmat[i] = new float *[4];
    for (int j=0; j<4; j++) {
      bkgmat[i][j] = new float[1];
      bkgmat[i][j][0] = bkg[i][j];
    }
  }

  float *basicbkg = new float[4];
  context->ComputeBaseFrequencies(basicbkg);

  float **basicbkgmat = new float *[4];
  for (i=0; i<4; i++) {
    basicbkgmat[i] = new float[1];
    basicbkgmat[i][0] = basicbkg[i];
  }

  bkgwm = new WtMx(bkgmat,basicbkgmat,1,"Background",morder);

  for (i=0; i<powmorder; i++) {
    for (int j=0; j<4; j++) {
      delete [] bkgmat[i][j];
    }
    delete [] bkgmat[i];
    delete [] bkg[i];
  }
  delete [] bkgmat;
  delete [] bkg;
  for (i=0; i<4; i++) {
    delete [] basicbkgmat[i];
  }
  delete [] basicbkg;
  delete [] basicbkgmat;
#endif 
  
  return bkgwm;
}

void Parameters::TrainBackground(Window *context)
{
#ifdef _MULTIPLE_SEQUENCES_BACKGROUND 
  vector<Window *> *context_list = new vector<Window *>;
#endif

  bool context_was_null = false;
  if (global_background==NULL) {
#ifdef _MULTIPLE_SEQUENCES_BACKGROUND
    if (context==NULL) {
      context = (*_windows)[0]->Context(int(CONTEXT_WIDTH_FACTOR)*(*_windows)[0]->Length());
      context_list->push_back(context);
      for (int windex=1; windex < _windows->size(); windex++) 
	context_list->push_back((*_windows)[windex]);
      context_was_null = true;
    }
    if (_bkgwm) delete _bkgwm;
    _bkgwm = TrainWtMx(context_list);
#else 
    if (context==NULL) {
      context = (*_windows)[0]->Context(int(CONTEXT_WIDTH_FACTOR)*(*_windows)[0]->Length());
      context_was_null = true;
    }
    
    if (_bkgwm) delete _bkgwm;
    _bkgwm = TrainWtMx(context);
#endif      
  }
  else {
    if (_bkgwm) delete _bkgwm;
    _bkgwm = new WtMx(global_background);
  }

  // if this function created the context window, then delete it
  if (context_was_null) {
#ifdef _MULTIPLE_SEQUENCES_BACKGROUND
    delete context_list;
    delete context;
#else
    delete context;
#endif
  }

  // destroy the cache if present
  if (_currentwindowcache) delete [] _currentwindowcache;
  _currentwindowcache = NULL;

  // create an array of caches, one for each window 
  struct probabilitycache *currentpc = new struct probabilitycache[_windows->size()];
  for (int i=0; i<_windows->size(); i++) {
    (*_windows)[i]->SetIndex(i);
    struct probabilitycache *pc = CacheWindowBackgroundProbabilities((*_windows)[i],_bkgwm);
    currentpc[i] = *pc;
    delete pc;
  }
  _currentwindowcache = currentpc;
}

Parameters::Parameters()
{
  _windows = NULL;
  _wmc = NULL; _wm_len = NULL; 
  _is_trained = _is_initialized = false;
  _free_emission_probabilities = NULL;
  _free_energy_differential = _free_energy = -1;
  _bkgwm = NULL;
  _numWM = 0;
  _initialbias = 10000;
  _num_iterations = 0;
}

DTYPE  Parameters::ComputeSequenceProbability(Window *win, int start, int stop, WtMx *wm, bool both_orientations)
{
#ifndef _OPTIMIZE_CAREFULCSP
  if (start>stop) return 0;
  if (stop-start+1 > wm->Length()) {
#ifdef WARNINGS
    Warn("Warning: Sequence longer then motif length, cant compute probability\n");
#endif
    return 0;
  }
#endif

  int history_length = wm->MarkovOrder();
  int wm_len = wm->Length();

  // check the global cache kept in the sequence object
#ifdef _OPTIMIZE_PROTECTED_ACCESS
  Sequence *seq = win->_seq;
  int win_start = win->_start;
  struct probabilitycache *pc = (struct probabilitycache *)(seq->_udata);
#else
  Sequence *seq = win->Seq();
  int win_start = win->Start();
  struct probabilitycache *pc = (struct probabilitycache *)(seq->GetUserData());
#endif
  if (pc != NULL) {
    int index = -1;
#ifndef _OPTIMIZE_WMINDEX
    for (int i=0; i<pc->_numWM; i++) {
      if (pc->_wms[i]==wm) {
	index = i;
	break;
      }
    }
#else
#ifdef _OPTIMIZE_PROTECTED_ACCESS
    index = wm->_udata;
#else
    index = wm->GetUserData();
#endif
#endif
    // if found, fetch the prob from the cache
    if (index >= 0) {
      if (stop+win_start < pc->_start+pc->_length && stop-start+1==wm_len) {
	return pc->_prob[index][start+win_start-pc->_start];
      }
    }
  }
  
  // also check the current window cache kept locally
  if (_currentwindowcache != NULL) {
    int window_index = win->GetIndex();
    pc = &(_currentwindowcache[window_index]);
    if (pc->_associatedCurrentWindow==win) {
      int index = -1;
#ifndef _OPTIMIZE_WMINDEX
      for (int i=0; i<pc->_numWM; i++) {
	if (pc->_wms[i]==wm) {
	  index = i;
	  break;
	}
      }
#else
      // use the fact that if the background prob is cached, itll be in the first (and only) location in _wms
      if (pc->_wms[0] == wm) index = 0;
#endif
      // if found, fetch the prob from the cache
      // we dont substract pc->_start since this is the local window cache and has pc->_start implicitly zero.
      if (index >= 0) {
	if (stop < pc->_length && stop-start+1==wm_len) {
	  return pc->_prob[index][start];
	}
      }
    }
  }

  // if reached here, nothing found in cache
  int ss_len = stop-start+1;
  char *arrayofchar;
#ifdef BKG_FORWARD_ONLY 
  if (wm_len == 1) both_orientations = false;
#endif

  // first handle the simple case of no markov history
  // in the case of multiple sequences, do not allow wm to overlap an alignment boundary

  if (history_length == 0) {
    DTYPE  prob = 1;
    for (int i=0; i<ss_len; i++) {
      if (wm_len > 1) {
#ifndef _OVERLAPS
	if (i > 0 && win->AlignmentBeginsAt(start+i)) return 0;
	if (i<wm_len-1 && win->AlignmentEndsAt(start+i)) return 0;
#endif
      }
      char ch = win->IndexOfCharAt(start+i,arrayofchar);
      if (ch == -1) prob *= _phy->ComputeProbability(wm,i,false,arrayofchar);
      else prob *= wm->Frequency(i,ch);
    }
    
    if (!both_orientations) {
      return prob;
    }
    
    DTYPE  prob_rc = 1;
    for (int i=0; i<ss_len; i++) {
      char indch = win->IndexOfCharAt(start+i,arrayofchar);
      if (indch == -1) prob_rc *= _phy->ComputeProbability(wm,wm_len-1-i,true,arrayofchar);
      else {
	if (indch >= 0 && indch <= 3) {
	  prob_rc *= wm->Frequency(wm_len-1-i,3-indch);
	}
	else {
	  prob_rc *= wm->Frequency(wm_len-1-i,indch);
	}
      }
    }

    float fbias = wm->GetForwardBias();
    return fbias*prob+(1-fbias)*prob_rc;
    // return max(prob,prob_rc);
  }

  // else history required
  // first construct the initial history word
  // only allow 1 sequence (the reference sequence) ... this function cannot be used for multiple sequences
  // only allow unit-length matrix

  if (ss_len > 1) {
    printf("Error: ComputeSequenceProbability called with history and non-unit length matrix\n");
    exit(1);
  }
  int history[MAX_ALIGNED_SEQUENCES];
  for (int i=0; i<MAX_ALIGNED_SEQUENCES; i++) history[i] = -1;
  int this_seq_index = seq->GetSpeciesIndex();
  int last_ambiguous_seen_at = -1;
  for (int i=0; i<history_length; i++) {    
    int hpos = start-history_length+i;
    if (hpos < 0) {
      history[this_seq_index] = -1;
      last_ambiguous_seen_at = hpos;
      break;
    }
    if (win->AmbiguousCharAt(hpos)) {
      history[this_seq_index] = -1; 
      last_ambiguous_seen_at = hpos;
      break;
    }
    char ch = win->IndexOfCharAt(hpos,arrayofchar);
    if (ch == -1) {
      printf("Error: ComputeSequenceProbability called for horder and multiple sequence\n");
      exit(1);
    }
    if (i==0) { // first iteration : initialize the history array if needed
      history[this_seq_index] = ch;
    }
    else {
      history[this_seq_index] = history[this_seq_index]*4+ch;;
    }
  }
    
  int mask = int(pow(4,history_length-1));
  DTYPE  prob = 1;
  DTYPE  prob_rc = 1;
  for (int i=0; i<ss_len; i++) {
    int start_plus_i = start+i;
    // retrieve the current character and multiply prob by its probability
    char ch = win->IndexOfCharAt(start_plus_i,arrayofchar);
    bool not_enough_history = (start_plus_i-last_ambiguous_seen_at <= history_length);
    if (not_enough_history) {
      prob *= wm->Frequency(i,ch);
    }
    else {
      prob *= wm->Frequency(i,ch,history[this_seq_index]);
    }
      
    if (both_orientations) {
      if (ch >= 0 && ch <= 3) {
	if (not_enough_history) 
	  prob_rc *= wm->Frequency(wm_len-1-i,3-ch);
	else 
	  prob_rc *= wm->Frequency(wm_len-1-i,3-ch,history[this_seq_index]);
      }
      else {
	if (not_enough_history) 
	  prob_rc *= wm->Frequency(wm_len-1-i,ch);
	else 	  
	  prob_rc *= wm->Frequency(wm_len-1-i,ch,history[this_seq_index]);
      }
    }
  }
  
  if (!both_orientations) {
    return prob;
  }
  else {
    float fbias = wm->GetForwardBias();
    return fbias*prob+(1-fbias)*prob_rc;
    // return max(prob,prob_rc);
  }
}

void Parameters::CacheBackgroundProbabilities()
{
  if (_currentwindowcache) {
    printf("Error: Request to cache background probabilities when cache exists\n");
    exit(1);
  }

  struct probabilitycache *currentpc = new struct probabilitycache[_windows->size()];
  for (int i=0; i<_windows->size(); i++) {
    struct probabilitycache *pc = CacheWindowBackgroundProbabilities((*_windows)[i],_bkgwm);
    currentpc[i] = *pc;
    delete pc;
  }
  _currentwindowcache = currentpc;
}


Parameters::probabilitycache *Parameters::CacheWindowBackgroundProbabilities(Window *win, WtMx *wm)
  // PRECONDITION: wm is the weight matrix for background
{
  int numWM = 1;
  WtMx **wms = new WtMx *[1];
  wms[0] = wm;
  if (wm==NULL) {
    printf("Error: Cant cache probabilities for null matrix\n");
    exit(1);
  }

  int length = win->Length();
  int morder = wm->MarkovOrder();
  int powmorder = 1;   // will be set later if required
  int mask;
  int this_seq_index;
  bool horderandms = (morder > 0 && _phy && _phy->_numSpecies > 0); // if this is true, we need to ...
                                                                    // use ancestor probs in computing prob
  
  int wm_len = wms[0]->Length();
  DTYPE  **aprob;
  int history[MAX_ALIGNED_SEQUENCES];

  if (horderandms) {
    if (wm_len != 1) {             // assume unit length
      printf("Error: Higher order wm with non-unit length not supported\n");
      exit(1);
    }
    aprob = new DTYPE  *[length];
    powmorder = int(pow(4,morder));
    mask = int(pow(4,morder-1));
    this_seq_index = win->Seq()->GetSpeciesIndex();
  }

  DTYPE  **prob = new DTYPE  *[1];
  prob[0] = new DTYPE  [length];

#ifdef BKG_FORWARD_ONLY 
  int orientation = 0;  // this means background will be considered in forward orientation only
#else
  int orientation = 2;
#endif

  char *arrayofchar; char ch;
  if (horderandms) {
    for (int j=0; j<MAX_ALIGNED_SEQUENCES; j++) history[j] = -1;
  }
  int last_ambiguous_seen_at = -1;
  float pga[4];
  
  if (horderandms) {
    for (int l=0; l<length-wm_len+1; l++) {
      // Update aprob
      aprob[l] = new DTYPE [powmorder];     // assign aprob[l]
	
      if (l - last_ambiguous_seen_at <= morder) {  // use default values for aprob, dont look at sequence
	for (int a=0; a<powmorder; a++) {   // assign aprob[l][a] TODO: minor bug here, not looking at sequence
	  aprob[l][a] = wm->HigherOrderFrequency(0,a);
	}
      }
      else {
	for (int a=0; a<powmorder; a++) {   // assign aprob[l][a]
	  DTYPE  sum = 0;
	  int aprefix = a/4;
	  int asuffix = a%4;
	  for (int am1=0; am1<4; am1++) {   // sum over all a_{-1}
	    DTYPE  term = 1;
	    int ahis = aprefix + int(am1<<(2*(morder-1))); // ahis is the k-mer a_{-1}
	    term *= wm->Frequency(0,asuffix,ahis);
	    term *= pga[ahis%4];
	    term *= aprob[l-1][ahis]/prob[0][l-1];
	    sum += term;
	  }
	  aprob[l][a] = sum;
	}
      }
      
      // Update history 
      if (l>0) {
	if (ch == -1) {
	  char *ptr = arrayofchar;
	  char num_seq = *ptr++;
	  for (int j=0; j<num_seq; j++) {
	    char seq_index = *ptr++;
	    char ch_index = *ptr++;
	    if (win->AmbiguousChar(ch_index)) history[seq_index] = -1;
	    else {
	      if (history[seq_index]==-1) history[seq_index] = ch_index;
	      else history[seq_index] = (history[seq_index]%mask)*4 + ch_index;
	    }
	  }
	}
	else {
	  char *tmp; char ch2;
	  if (l>=2 && (ch2 = win->IndexOfCharAt(l-2,tmp))==-1) { // l-1 not aligned, l-2 aligned, i.e., algnmt ends.
	    for (int j=0; j<MAX_ALIGNED_SEQUENCES; j++) {
	      if (j==this_seq_index) continue;
	      history[j] = -1;
	    }
	  }
	  if (win->AmbiguousChar(ch)) history[this_seq_index] = -1;
	  else {
	    if (history[this_seq_index] == -1) history[this_seq_index] = ch;
	    else history[this_seq_index] = (history[this_seq_index]%mask)*4 + ch;
	  }
	}
      }
      
      // look at current column and update prob
      if (win->AmbiguousCharAt(l)) {
	last_ambiguous_seen_at = l;
      }

      ch = win->IndexOfCharAt(l,arrayofchar);	
      if (ch != -1) {
	arrayofchar = new char[3];
	arrayofchar[0] = 1; arrayofchar[1] = this_seq_index; arrayofchar[2] = ch;
      }
      
      float sum = 0;
      for (int al=0; al<4; al++) {
	if (l-last_ambiguous_seen_at <= morder) {
	  pga[al] = _phy->ComputeProbabilityGivenAncestor(wms[0],0,orientation,al,arrayofchar);
	}
	else {
	  pga[al] = _phy->ComputeProbabilityGivenAncestor(wms[0],0,orientation,al,arrayofchar,history);
	}
	int powmorderm1 = mask; // pow(4,morder-1)
	float term = 0;
	for (int ap=0; ap<powmorderm1; ap++) {
	  term += aprob[l][ap*4+al];
	}	 
	sum += term*pga[al];
      }      
      
      if (ch != -1) delete [] arrayofchar;
      
      prob[0][l] = sum;
    }
    
    for (int l=0; l<length-wm_len+1; l++) {
      delete [] aprob[l];
    }
  }
  else {
    bool both_orientations = (orientation==0?false:true);
    for (int l=0; l<length-wm_len+1; l++) {
      prob[0][l] = Parameters::ComputeSequenceProbability(win, l, l+wm_len-1, wms[0], both_orientations);
    }
  }
  
  if (horderandms) delete [] aprob;

  struct probabilitycache *pc = new struct probabilitycache;
  pc->_numWM = numWM;
  pc->_wms = wms;
  pc->_length = length;
  pc->_prob = prob;
  pc->_associatedCurrentWindow = win;

  return pc;
}

void Parameters::CacheSubsequenceProbabilities(Sequence *seq, WtMxCollection *wmc, int start, int cache_length, bool lookAtAlignments)
{
  struct probabilitycache *oldpc = (struct probabilitycache *)(seq->GetUserData());
  if (oldpc != NULL) delete oldpc;
  seq->SetUserData(NULL);

  int numWM = wmc->Size();
  WtMx **wms = new WtMx *[numWM];
  for (int i=0; i<numWM; i++) {
    WtMx *wm = wmc->WM(i);
    wms[i] = wm;
#ifdef _OPTIMIZE_WMINDEX
    wm->SetUserData(i);
#endif
  }

  int length = cache_length;
  if (start+length-1 >= seq->Length()) length = seq->Length()-start;
  Window *win = new Window(seq,start,start+length-1);
  if (lookAtAlignments && seq->_alignments != NULL) {
    struct AlignmentNode *ndlist = seq->_alignments->GetAlignmentNodeList(seq,start,start+length-1);
    for (struct AlignmentNode *ptr = ndlist; ptr != NULL; ptr = ptr->_next) {
      Window *owin = new Window(ptr->_otherSeq, ptr->_l2, ptr->_r2);
      win->AlignWindow(owin,ptr->_l1);
      delete owin;
    }
  }

#ifdef _WTMX_BIAS
  float bkg[4]; win->ComputeBaseFrequencies(bkg);
  float **bkgmat = new float *[4];
  for (int i=0; i<4; i++) {
    bkgmat[i] = new float[1];
    bkgmat[i][0] = bkg[i];
  }
  WtMx *bkgwm = new WtMx(bkgmat,1,"full_background");
  for (int i=0; i<4; i++) delete [] bkgmat[i];
  delete [] bkgmat;

  int win_length = win->Length();
  for (int i=0; i<wmc->Size(); i++) {
    WtMx *w_tmp = wmc->WM(i);
    int wt_length = w_tmp->Length();

    float fcount = 0;
    float bcount = 0;
    for (int l=0; l<win_length-wt_length; l++) {
      w_tmp->SetForwardBias(1);
      float fprob = Parameters::ComputeSequenceProbability(win,l,l+wt_length-1,w_tmp,true);
      w_tmp->SetForwardBias(0);
      float bprob = Parameters::ComputeSequenceProbability(win,l,l+wt_length-1,w_tmp,true);

      float bkgprob = 1; 
      for (int l1=0; l1<wt_length; l1++) 
	bkgprob *= Parameters::ComputeSequenceProbability(win,l+l1,l+l1,bkgwm); 
      if (bkgprob > 0 && fprob > bkgprob) fcount++;
      if (bkgprob > 0 && bprob > bkgprob) bcount++;
    }

    // if (fcount + bcount < 10) wmc->WM(i)->SetForwardBias(0.5);
    wmc->WM(i)->SetForwardBias((fcount+1)/(fcount+bcount+2));
  }

  delete bkgwm;
#endif

  DTYPE  **prob = new DTYPE  *[numWM];
  for (int j=0; j<numWM; j++) {
    int wm_len = wms[j]->Length();
    prob[j] = new DTYPE  [length];
    for (int l=0; l<length-wm_len+1; l++) {
      prob[j][l] = Parameters::ComputeSequenceProbability(win, l, l+wm_len-1, wms[j], true);
    }
  }
  delete win;

  struct probabilitycache *pc = new struct probabilitycache;
  pc->_numWM = numWM;
  pc->_wms = wms;
  pc->_start = start;
  pc->_length = length;
  pc->_prob = prob;
  pc->_associatedCurrentWindow = NULL;  // this is the global sequence cache, not associated with any particular window

  seq->SetUserData((void *)pc);
}

void Parameters::DeleteCacheSubsequenceProbabilities(Sequence *seq)
{
  struct probabilitycache *pc = (struct probabilitycache *)(seq->GetUserData());
  if (pc == NULL) return;
  delete pc;
  seq->SetUserData(NULL);
}

DTYPE  Parameters::EvaluateFreeEnergyBackground(bool both_orientations)
{
  if (!_is_initialized) {
    printf("Error: EvaluateFreeEnergyBackground called without complete initialization\n");
    exit(1);
  }

  DTYPE  logP = 0;
  for (int wi=0; wi<_windows->size(); wi++) {
    Window *window = (*_windows)[wi];
    int start = window->Start();
    int length = window->Length();

    DTYPE  tmplogP = 0;
    for (int i=0; i<length; i++) {
      if (window->AmbiguousCharAt(i)) continue;
      DTYPE  prob = ComputeSequenceProbability(window,i,i,_bkgwm,both_orientations);
      tmplogP += log(prob);
    }
    logP += tmplogP;
  }
  return -logP;
}

float Parameters::Free_Energy_Differential()
{
  if (_is_trained) return _free_energy_differential;
#ifdef WARNINGS
  Warn("Warning: free energy differential computed on untrained parameters\n");
#endif
  return -1;
}

float Parameters::Free_Energy()
{
  if (_is_trained) return _free_energy;
#ifdef WARNINGS
  Warn("Warning: free energy computed on untrained parameters\n");
#endif
  return -1;
}

void Parameters::Scale_Free_Energy_Differential(float scalefactor)
{
  _free_energy_differential *= scalefactor;
}

int Parameters::InitialBias()
{
  return _initialbias;
}

int Parameters::NumIterations()
{
  return _num_iterations;
}

bool Parameters::IsTrained() 
{
  return _is_trained;
}

bool Parameters::IsInitialized()
{
  return _is_initialized;
}

int Parameters::BackgroundIndex()
{
  if (!_is_initialized) {
    printf("Error: background index computed on uninitialized parameters\n");
    exit(1);
  }
  if (_wmc==NULL) return 0;
  return _numWM-1;
}

int Parameters::BackgroundIndex(WtMxCollection *wmc)
{
  return wmc->Size();
}

void Parameters::PrintBackground(FILE *fp)
{
  _bkgwm->Print(fp);
}

void Parameters::PrintWM(FILE *fp, int i)
{
  _wmc->WM(i)->Print(fp);
}

DTYPE Parameters::PID()
{
  int numSpecific=0;
  int numAligned=0;
  int numNonAligned=0;
  for (int i=0; i<1; i++) {
    Window *win = (*_windows)[i];
    for (int l=0; l<win->Length(); l++) {
      if (win->AmbiguousCharAt(l)) continue;
      numSpecific++;
      char *arrayofchar;
      if (win->IndexOfCharAt(l,arrayofchar) == -1) numAligned++;
      else numNonAligned++;
    }
  }
  if (numSpecific==0) return 0;
  return float(numAligned)/float(numSpecific);
}

void Parameters::PrintPID(FILE *fp)
{
  int totalLen = 0;
  for (int wi=0; wi<_windows->size(); wi++) {
    totalLen += (*_windows)[wi]->Length();
  }

  DTYPE pid = PID();
  fprintf(fp,"%d\t",(*_windows)[0]->Start());
#ifdef _PRINT_SEQNAME_IN_FENFILE
  char seqname[1024]; (*_windows)[0]->Seq()->Name(seqname);
  fprintf(fp,"%.4f\t%.4f\t%d\t%d\t%s\n",pid,pid,totalLen,_num_iterations,seqname);
#else
  fprintf(fp,"%.4f\t%.4f\t%d\t%d\n",pid,pid,totalLen,_num_iterations);
#endif
  
  return;
}

int Parameters::NumWM()
{
  return _numWM;
}

void Parameters::GetBackground(float *&bkg)
{
  if (_bkgwm==NULL) return;
  for (int i=0; i<4; i++) bkg[i] = _bkgwm->Frequency(0,i);
}

void Parameters::SetBackground(float *bkg)
{
  int i;
  if (_bkgwm) delete _bkgwm;

  float **bkgmat = new float *[4];
  for (i=0; i<4; i++) {
    bkgmat[i] = new float[1];
    bkgmat[i][0] = bkg[i];
  }
  _bkgwm = new WtMx(bkgmat,1,"Background");  // create a length 1 wm for background 
  for (i=0; i<4; i++) 
    delete [] bkgmat[i];
  delete [] bkgmat;

  // dont cache probabilities !! why ??
  if (_currentwindowcache) delete [] _currentwindowcache;
  _currentwindowcache = NULL;
}

void Parameters::GetBackground(WtMx *&bkg)
{
  bkg = _bkgwm;
}

void Parameters::SetBackground(WtMx *bkg)
{
  if (_bkgwm) delete _bkgwm;
  _bkgwm = bkg;

  if (_currentwindowcache) delete [] _currentwindowcache;
  _currentwindowcache = NULL;
}

void Parameters::SetPhylogeny(float *mu, int numSpecies)
{
  if (_phy != NULL) delete _phy;
  _phy = new Parameters::Phylogeny;
  _phy->_numSpecies = numSpecies; 
  if (_phy->_mu) delete [] _phy->_mu;
  _phy->_mu = new float[numSpecies];
  for (int i=0; i<numSpecies; i++) _phy->_mu[i] = mu[i];
}

const float *Parameters::GetPhylogeny(int &numSpecies)
{
  if (_phy == NULL || _phy->_numSpecies < 1) {
    numSpecies = 0;
    return NULL;
  }
  numSpecies = _phy->_numSpecies;
  return _phy->_mu;
}

void Parameters::FreeEmissionProbabilities(int wmindex, int offset)
{
  if (offset >= 0) {
    _free_emission_probabilities[wmindex][offset] = true;
  }
  else {
    WtMx *w;
    if (wmindex == BackgroundIndex()) w = _bkgwm;
    else w = _wmc->WM(wmindex);
    for (int i=0; i<w->Length(); i++) {
      _free_emission_probabilities[wmindex][i] = true;
    }
  }
  return;
}

bool Parameters::IsEmissionProbabilityFree(int wmindex, int offset) 
{
  if (!_is_initialized) {
    printf("IsEmissionProbabilityFree called without complete initialization\n");
    exit(1);
  }
  if (offset >= 0) {
    return _free_emission_probabilities[wmindex][offset];
  }
  else {
    bool isfree = false;
    WtMx *w;
    if (wmindex == BackgroundIndex(_wmc)) w = _bkgwm;
    else w = _wmc->WM(wmindex);
    for (int i=0; i<w->Length(); i++) {
      if (_free_emission_probabilities[wmindex][i]) {
	isfree = true;
	break;
      }
    }
    return isfree;
  }
}

void Parameters::FixEmissionProbabilities(int wmindex, int offset)
{
  if (offset >= 0) {
    _free_emission_probabilities[wmindex][offset] = false;
  }
  else {
    WtMx *w;
    if (wmindex == BackgroundIndex()) w = _bkgwm;
    else w = _wmc->WM(wmindex);
    for (int i=0; i<w->Length(); i++) {
      _free_emission_probabilities[wmindex][i] = false;
    }
  }
  return;
}

