/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#include "parameters.h"
#include <math.h>
#include <stdexcept>
#include "util.h"

void Parameters_H0::Train(bool differential)
{
  if (!_is_initialized) {
    printf("Training attempted without initialization\n");
    _is_trained = false;
    return;
  }

  DTYPE freeEnergyBackground =  EvaluateFreeEnergyBackground();

  DTYPE  previousFreeEnergy = INF_FREE_ENERGY;
  _free_energy = EvaluateFreeEnergy();

  int count = 0;
  do {
    count++;
    previousFreeEnergy = _free_energy;
    Update();
    _free_energy = EvaluateFreeEnergy();
    _free_energy_differential = freeEnergyBackground - _free_energy;
    if (count > CHECK_ITERATION_THRESHOLD && _free_energy_differential < CHECK_FEN_THRESHOLD) break;
    if (count > MAX_TRAINING_ITERATIONS) break;    
  } while (previousFreeEnergy - _free_energy > THRESHOLD);

  _num_iterations = count;        // record how many iterations were needed

  if (differential) {
    _free_energy_differential = freeEnergyBackground - _free_energy;
    if (_free_energy_differential < 0) _free_energy_differential = 0;
  }
  else _free_energy_differential = 0;

  int numSpecific = 0;
  int numTotal = 0;
  for (int i=0; i<_windows->size(); i++) {
    Window *win = (*_windows)[i];
    numSpecific += win->NumSpecificCharacters();
    numTotal += win->Length();
  }

#ifdef _NORMALIZE_SPACERS
  if (numSpecific == 0) _free_energy_differential = 0;
  else _free_energy_differential *= float(numTotal)/float(numSpecific);
#endif

  _free_energy_perlength = _free_energy_differential / float(numTotal);

  _is_trained = true;
}

void Parameters_H0::TrainWithFixedParameters(bool differential)
{
  if (!_is_initialized) {
    printf("Training attempted without initialization\n");
    _is_trained = false;
    return;
  }

  DTYPE freeEnergyBackground =  EvaluateFreeEnergyBackground();

  // PrepareForUpdate();
  _free_energy = EvaluateFreeEnergy();
  _num_iterations = 0;        // record how many iterations were needed

  if (differential) {
    _free_energy_differential = freeEnergyBackground - _free_energy;
    if (_free_energy_differential < 0) _free_energy_differential = 0;
  }
  else _free_energy_differential = 0;

  int numSpecific = 0;
  int numTotal = 0;
  for (int i=0; i<_windows->size(); i++) {
    Window *win = (*_windows)[i];
    numSpecific += win->NumSpecificCharacters();
    numTotal += win->Length();
  }

#ifdef _NORMALIZE_SPACERS
  if (numSpecific == 0) _free_energy_differential = 0;
  else _free_energy_differential *= float(numTotal)/float(numSpecific);
#endif

  _free_energy_perlength = _free_energy_differential / float(numTotal);

  _is_trained = true;
}

void Parameters_H0::Initialize(vector<Window *> *wl, WtMxCollection *wmc, int initialbias)
{
  _initialbias = initialbias;
  if (initialbias < 0) 
    Initialize(wl,wmc,(Parameters *)NULL);
  else {
    Parameters_H0 seed(wmc, initialbias);
    Initialize(wl,wmc,&seed);
  }                          
}

void Parameters_H0::Initialize(vector<Window *> *wl, WtMxCollection *wmc, Parameters *initseed)
{
  int i;

  Destroy();                      // destroy whatever initialization that may already have occurred

  _windows = wl;
  _wmc = wmc;
  _numWM = _wmc->Size()+1;
  _wm_len = new int[_numWM];

  Parameters_H0 *init = (Parameters_H0 *)initseed;
  int bkgIndex =  BackgroundIndex(_wmc);     
  TrainBackground();
                                  // create the record for which weight matrices are free to be trained
  _free_emission_probabilities = new bool *[_numWM];
  int max_wm_len = -1;
  for (i=0; i<_numWM; i++) {
    WtMx *w;
    if (i==bkgIndex) 
      w = _bkgwm;
    else
      w = _wmc->WM(i);

    int wm_len = w->Length();
    _wm_len[i] = wm_len;
    if (wm_len > max_wm_len) max_wm_len = wm_len;

    _free_emission_probabilities[i] = new bool[wm_len];
    for (int j=0; j<w->Length(); j++) {
      _free_emission_probabilities[i][j] = false;
    }
  }
  if (max_wm_len < 0) {
    printf("Error: maximum wm length < 0\n");
    exit(1);
  }

  _pi = new float[_numWM];
  _oldpi = new float[_numWM];
  for (i=0; i<_numWM; i++) {
    if (init==NULL) {
      _pi[i] = 1/float(_numWM);   // initialize to the uniform distribution
    }
    else {                        // initialize to the parameters in init 
      _pi[i] = init->_pi[i];
    }
    _oldpi[i] = _pi[i]; 
  }

  _max_window_length = -1;
  int numWindows = wl->size();
  _Ai = new DTYPE  *[numWindows];
  _fringe_corrections = new DTYPE [numWindows];
  for (int wi=0; wi<numWindows; wi++) {
    Window *window = (*wl)[wi];
    int length = window->Length();

    _Ai[wi] = new DTYPE  [_numWM];
    for (i=0; i<_numWM; i++) {
      _Ai[wi][i] = 0;
    }

    if (_max_window_length < length) _max_window_length = length;
    _fringe_corrections[wi] = -1.0;
  }
  if (_max_window_length < 0) {
    printf("Error: maximum window length < 0\n");
    exit(1);
  }

  _Ail_window = new DTYPE *[_numWM];
  for (i=0; i<_numWM; i++) {
    _Ail_window[i] = new DTYPE[_max_window_length];
  }

  _alpha = new DTYPE [_max_window_length];
  _beta = new DTYPE [_max_window_length];
  _c = new DTYPE [_max_window_length];
  _cij = new DTYPE  *[_max_window_length];
  for (i=0; i<_max_window_length; i++) {
    _c[i] = 1;
    _cij[i] = new DTYPE[max_wm_len];
  }

  _is_initialized = true;
}


DTYPE  Parameters_H0::EvaluateFreeEnergy()
{
  if (!_is_initialized) {
    printf("Error: EvaluateFreeEnergy attempted without complete initialization\n");
    exit(1);
  }

  int bkgIndex = BackgroundIndex();
  WtMx **wm = new WtMx *[_numWM];
  for (int i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }

                                      // evaluate the logP
  DTYPE  logP = 0;
  int numWindows = _windows->size();
  for (int wi=0; wi<numWindows; wi++) {
    Window *window = _currentWindow = (*_windows)[wi];
    Forward();                        // update alpha, c and cij
    Backward();                       // update beta
    PrepareForUpdate();               // update Ail

    int length = window->Length();
    DTYPE  tmplogP = 0;
    for (int i=0; i<length; i++) {
      tmplogP += (-(log(_c[i])));
    }
                                     // also add the fringe occurrences
    DTYPE  term = 1.;
    for (int i=0; i<_numWM; i++) {
      if (i==bkgIndex || _wm_len[i]==1) continue;
      for (int k=1; k<_wm_len[i]; k++) {
	if (length-k-1 < 0) continue;
	if (window->AmbiguousCharAt(length-1-k+1)) continue;
	DTYPE  scale_factor = _cij[length-1-k+1][k-1];
	DTYPE  factor = _alpha[length-1-k]*scale_factor*_pi[i]*ComputeSequenceProbability(window,length-1-k+1,length-1,wm[i]);
	term += factor;
      }
    }
    _fringe_corrections[wi] = 1/term; 
    tmplogP += log(term);
    
    logP += tmplogP;                // tmplogP is the contribution from the current window
  }
                                    // Clean up
  delete [] wm;
  
  return -logP;    
}

DTYPE  Parameters_H0::FringeCorrectionFactor(int index)
{
  if (!_is_initialized) {
    printf("Error: FringeCorrectionFactor attempted without complete initialization\n");
    exit(1);
  }
  
  if (index >=  _windows->size()) {
    printf("Error: FringeCorrectionFactor attempted on non-existing window\n");
    exit(1);
  }

  if (_fringe_corrections[index] > -0.5) {   // before being computed, this is at -1.
    return _fringe_corrections[index];
  }
  else {
    printf("Error: FringeCorrectionFactor doesnt have required information\n");
    exit(1);
  }
}

void Parameters_H0::Forward()
{
  int i;
  int bkgIndex = BackgroundIndex();  // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  int max_wm_length = 0;
  for (i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
    if (_wm_len[i] > max_wm_length) max_wm_length = _wm_len[i];
  }

  Window *window = _currentWindow;
  int length = window->Length();

                                       // Clear up previous information
  for (int l=0; l<length; l++) {
    _alpha[l] = 0;
    _c[l] = 0;
  }

                                       // Base conditions (l==0) :-
  if (window->AmbiguousCharAt(0)) {    // 'N' or 'X' at position 0, alpha[0] = 1
    _alpha[0] = 1;
  }
  else {                               // usual case: a known nucleotide at position 0
    DTYPE  sum = 0;
    for (i=0; i<_numWM; i++) {
      if (_wm_len[i]==1) {
	sum += _pi[i]*ComputeSequenceProbability(window,0,0,wm[i]);
      }
    }
    _alpha[0] = sum;
  }
                                       // Scale the _alpha  
  _c[0] = _alpha[0];;
  if (_c[0] < SMALL_FLOAT) _c[0] = 1;
  else _c[0] = 1/_c[0];
  _cij[0][0] = _c[0];
  _alpha[0] *= _c[0];
                                       // Recurrences :-
  for (int l=1; l<length; l++) {       // compute alpha[l]
    if (window->AmbiguousCharAt(l)) {  // 'N' or 'X' here - propagate the previous alpha
      _alpha[l] = _alpha[l-1];
    }
    else {                             // usual case: known nucleotude at this position
      DTYPE  sum = 0;
      for (int i=0; i<_numWM; i++) {
	int wm_len_i = _wm_len[i];
	if (l-wm_len_i+1<0)
	  continue;
	if (l-wm_len_i+1==0) {
	  DTYPE  scale_factor = _cij[0][l-1];
	  sum += scale_factor*_pi[i]*ComputeSequenceProbability(window,0,l,wm[i]);
	  continue;
	}
	DTYPE  scale_factor = (wm_len_i>1?_cij[l-wm_len_i+1][wm_len_i-2]:1);
	sum += _alpha[l-wm_len_i]*scale_factor*_pi[i]*ComputeSequenceProbability(window,l-wm_len_i+1,l,wm[i]);
      }
      _alpha[l] = sum;
    }                       
                                       // Scale the _alphas
    _c[l] = _alpha[l];
    if (_c[l] < SMALL_FLOAT) _c[l] = 1;
    else _c[l] = 1/_c[l];
    for (int i=max(0,l-max_wm_length+1); i<=l-1; i++) {
      _cij[i][l-i] = _cij[i][l-i-1]*_c[l];
    }
    _cij[l][0] = _c[l];
    _alpha[l] *= _c[l];
  }

                                      // Clean up
  delete [] wm;
}

void Parameters_H0::Backward()
{
  int i;
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }

  Window *window = _currentWindow;
  int length = window->Length();
  
                                    // Clear up previous information
  for (int l=0; l<length; l++) {
    _beta[l] = 0;
  }
                                    // Base conditions :-
  if (window->AmbiguousCharAt(length-1)) {   // 'N' or 'X' here
    _beta[length-1] = 1*_c[length-1];
  }
  else {                            // usual case: known nucleotide at this position
    DTYPE  sum = 0;
    for (int i=0; i<_numWM; i++) {
      sum += _pi[i]*ComputeSequenceProbability(window,length-1,length-1,wm[i]);
    }
    _beta[length-1] = sum*_c[length-1];
  }
                                    // Recurrences :-
  for (int l=length-2; l>=1; l--) { // need beta only for length-1 .. 1
    if (window->AmbiguousCharAt(l)) {// 'N' or 'X' here, propagate the previous _beta
      _beta[l] = _c[l]*_beta[l+1];
      continue;
    }
                                    // usual case: known nucleotide at this position
    DTYPE  sum = 0;
    for (i=0; i<_numWM; i++) {      // any wm_i (including the background) may start at position l
      int wm_len_i = _wm_len[i];
      if (l+wm_len_i<length) {     // wm_i starts and ends before length - 1, so there is a following beta also
	DTYPE  scale_factor = (wm_len_i>1?_cij[l+1][wm_len_i-2]:1);
	sum += scale_factor*_pi[i]*ComputeSequenceProbability(window,l,l+wm_len_i-1,wm[i])
	  *_beta[l+wm_len_i];
      }
      else {                        // wm_i starts here but either it doesnt end, or ends at last position
	DTYPE  scale_factor = _cij[l+1][length-2-l];
	sum += scale_factor*_pi[i]*ComputeSequenceProbability(window,l,length-1,wm[i]);
      }
    }
    _beta[l] = sum*_c[l];
  }
                                   // Clean up
  delete [] wm;
}

void Parameters_H0::PrepareForUpdate()
{
  int i;
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }

  Window *window = _currentWindow;
  int length = window->Length();

  int wi = window->GetIndex();
  if ((*_windows)[wi] != window) {
    printf("Error: Couldnt retrieve index of window correctly\n");
    exit(1);
  }
                                    // Clear up previous information
  for (i=0; i<_numWM; i++) {
    _Ai[wi][i] = -1;
  }
  
    /*********** Note: **********************
    Ail computed here is actually $\sum_{P|Xil(P)=1} Pr[P,S|\theta]*(\prod c[l])$,
    which is approximately but not exactly equal to $\sum_{P|Xil(P)=1} Pr[P,S|\theta]/Pr[S|\theta]$
    This difference does not matter in the computation of the updated probabilities, since the
    correction factor is the same in the numerator and denominator. 
    *****************************************/
                                  
  for (i=0; i<_numWM; i++) {        // compute _Ai[i] ...
    _Ai[wi][i] = 0;
    int wm_len_i = _wm_len[i];
    for (int l=0; l<length; l++) {  // ... by summing _Aij[i][l] over all l
      if (window->AmbiguousCharAt(l)) { // 'N' or 'X' at this position, cant have pij at this position
	_Ail_window[i][l] = 0;
	continue;
      }
      if (l==0) {                   // case 1: no alpha term defined for l-1
	_Ail_window[i][l] = _cij[l][wm_len_i-1]*_pi[i]
	  *ComputeSequenceProbability(window,l,l+wm_len_i-1,wm[i])
	  *_beta[l+wm_len_i];
	_Ai[wi][i] += _Ail_window[i][l];
	continue;
      }
      if (l+wm_len_i>=length) {     // case 2: no beta term defined for l+wm_len[i]
	_Ail_window[i][l] = _alpha[l-1]*_cij[l][length-1-l]*_pi[i]
	  *ComputeSequenceProbability(window,l,length-1,wm[i]);
	_Ai[wi][i] += _Ail_window[i][l];
	continue;
      }
	                            // general condition: use both alpha and beta
      _Ail_window[i][l] = _alpha[l-1]*_cij[l][wm_len_i-1]*_pi[i]
	*ComputeSequenceProbability(window,l,l+wm_len_i-1,wm[i])
	*_beta[l+wm_len_i];
      _Ai[wi][i] += _Ail_window[i][l];	
    }  
  }
                                    // Clean up
  delete [] wm;
}

void Parameters_H0::UpdateEmissionProbabilities()
{
#ifdef WARNINGS
  Warn("Warning: UpdateEmissionProbabilities not yet supported\n");
#endif
  return;

#if 0
  int i;
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }

  int numWindows = _windows->size();

  for (i=0; i<_numWM; i++) {
    for (int j=0; j<_wm_len[i]; j++) {
      if (!_free_emission_probabilities[i][j]) continue;
                                    // else recompute emission probabilities
      DTYPE  *Eij = new DTYPE [4];
      for (int k=0; k<4; k++) Eij[k] = 0;
      for (int wi=0; wi<numWindows; wi++) {
	Window *window = (*_windows)[wi];
	int length = window->Length();
             
	for (int l=0; l<length; l++) {
	  if (l-j < 0) continue;
	  if (!window->AmbiguousCharAtInReferenceSequence(l))
	    Eij[window->IndexOfCharAtInReferenceSequence(l)] += _Ail[wi][i][l-j];
	}
      }
      DTYPE  sum = 0;
      for (int k=0; k<4; k++) sum += Eij[k];
      if (sum > 0) {
	for (int k=0; k<4; k++) Eij[k] /= sum;
      }
      wm[i]->UpdateFrequency(j,Eij);

      delete [] Eij;
    }
  }
                                    // Clean up
  delete [] wm;
#endif
}

void Parameters_H0::UpdateTransitionProbabilities()
{
  int i;
                                    // save current pij
  for (i=0; i<_numWM; i++) {
    _oldpi[i] = _pi[i];
  }
 
  DTYPE  *ai = new DTYPE [_numWM];
  DTYPE  sum = 0;

  int numWindows = _windows->size();
  for (i=0; i<_numWM; i++) {
    ai[i] = 0; 
    for (int wi=0; wi<numWindows; wi++) {
      DTYPE  correction = FringeCorrectionFactor(wi);
      ai[i] += _Ai[wi][i]*correction;
    }
    sum += ai[i];
  }
  if (sum > 0) {
    for (i=0; i<_numWM; i++) {
      _pi[i] = float(ai[i]/sum);
    }
  }
  delete [] ai;
}

void Parameters_H0::Update()
{
  UpdateEmissionProbabilities();
  UpdateTransitionProbabilities();
}

void Parameters_H0::Revert()
{
  int i;

  for (i=0; i<_numWM; i++) {
    _pi[i] = _oldpi[i];
  }

  return;
}

void Parameters_H0::Print(FILE *fp, bool verbose)
{
  int totalLen = 0;
  for (int wi=0; wi<_windows->size(); wi++) {
    totalLen += (*_windows)[wi]->Length();
  }

  if (!verbose) {
    fprintf(fp,"%d\t",(*_windows)[0]->Start());
#ifdef _PRINT_SEQNAME_IN_FENFILE
    char seqname[1024]; (*_windows)[0]->Seq()->Name(seqname);
    fprintf(fp,"%.4f\t%.4f\t%d\t%d\t%s\n",_free_energy_differential,_free_energy_perlength,totalLen,_num_iterations,seqname);
#else
    fprintf(fp,"%.4f\t%.4f\t%d\t%d\n",_free_energy_differential,_free_energy_perlength,totalLen,_num_iterations);
#endif

    return;
  }
  else {
    for (int wi=0; wi<_windows->size(); wi++)
      (*_windows)[wi]->Print(verbose);
    fprintf(fp,"Score = %.4f\tTotalLen = %d\tIterations = %d\n",_free_energy_differential,totalLen,_num_iterations);
  
    int i;
    
    _bkgwm->Print();
    fprintf(fp,"Transition probabilities (p_i):\n");   // print the _pi values
    for (i=0; i<_numWM; i++) {                    
      fprintf(fp,"%g ",_pi[i]);
    }
    fprintf(fp,"\n");
  }
}

void Parameters_H0::PrintProbabilities(FILE *fp, bool verbose)
{
  for (int i=0; i<_numWM; i++) {
    fprintf(fp,"%.4f ",_pi[i]);
  }
  fprintf(fp,"\n");
  return;
}

DTYPE  Parameters_H0::ComputeAverageCount(int i)
{
  DTYPE  totalac = 0;
  int numWindows = _windows->size();
  for (int wi=0; wi<numWindows; wi++) {
    DTYPE  ac = _Ai[wi][i];
    DTYPE  correction = FringeCorrectionFactor(wi);
    totalac += ac*correction;
  }

  return totalac;
}

DTYPE  Parameters_H0::ComputeExpectedAverageCount(int i, DTYPE  *&expectations)
{
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (int k=0; k<_numWM; k++) {
    if (k!=bkgIndex) {
      wm[k] = _wmc->WM(k);
    }
    else {
      wm[k] = _bkgwm;
    }
  }

  DTYPE  mean = 0;
  int numWindows = _windows->size();
  expectations = new DTYPE [numWindows];

  for (int wi=0; wi<numWindows; wi++) {
    Window *window = (*_windows)[wi];
    int length = window->NumSpecificCharacters();

    DTYPE  *alpha = new DTYPE [length];
    alpha[0] = 1;
    DTYPE  *alphasum = new DTYPE [length];
    alphasum[0] = alpha[0];
    for (int l=1; l<length; l++) {
      alpha[l] = 0;
      for (int k=0; k<_numWM; k++) {
	if (l-_wm_len[k]<0) continue;
	alpha[l] += alpha[l-_wm_len[k]]*_pi[k];
      }
      alphasum[l] = alphasum[l-1]+alpha[l];
    }
    expectations[wi] = _pi[i]*alphasum[length-1];
    mean += expectations[wi];

    delete [] alpha;
    delete [] alphasum;              // TODO: optimize this
  }

  delete [] wm;
  return mean;
}

DTYPE   Parameters_H0::ComputeVarianceOfCount(int i, DTYPE  *expectations)
{
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (int k=0; k<_numWM; k++) {
    if (k!=bkgIndex) {
      wm[k] = _wmc->WM(k);
    }
    else {
      wm[k] = _bkgwm;
    }
  }

  DTYPE  variance = 0;
  int numWindows = _windows->size();
  for (int wi=0; wi<numWindows; wi++) {
    Window *window = (*_windows)[wi];
    int length = window->NumSpecificCharacters();
    
    DTYPE  *alpha = new DTYPE  [length];
    alpha[0] = 1;
    for (int l=1; l<length; l++) {
      alpha[l] = 0;
      for (int k=0; k<_numWM; k++) {
	if (l-_wm_len[k]<0) continue;
	alpha[l] += alpha[l-_wm_len[k]]*_pi[k];
      }
    }

    // Compute termIJ
    DTYPE  termIJ = 0;
    variance += (expectations[wi] + 2*pow(_pi[i],2)*termIJ - pow(expectations[wi],2));
    delete [] alpha;
  }

  delete [] wm;
  return variance;
}

void Parameters_H0::PrintAverageCounts(FILE *fp, bool verbose)
{
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (int i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }
  
  int numWindows = _windows->size();
  
  for (int i=0; i<_numWM; i++) {
    DTYPE  totalaverage = 0;
    for (int wi=0; wi<numWindows; wi++) {
      Window *window = (*_windows)[wi];
      int length = window->Length();
      DTYPE  correction = FringeCorrectionFactor(wi);      	
      DTYPE  average = _Ai[wi][i]*correction;
      totalaverage += average;
    }
    fprintf(fp,"%.4f ",totalaverage);      
  }    
  fprintf(fp,"\n");
  
  delete [] wm;

  return;
}

#include <list>
struct tmpstruc1 {
  int mindex;
  int offset;
  DTYPE ail;
};

void Parameters_H0::PrintProfile(FILE *fp, FILE *dict, float occurrence_threshold)
{
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (int i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }
  
  int numWindows = _windows->size();
  for (int wi=0; wi<numWindows; wi++) {
    Window *window = (*_windows)[wi];
    char name[1024];
    window->Seq()->Name(name);
    fprintf(fp,">\nSequence : %s\tPosition %d\n",name,window->Start());
    fprintf(dict,">%s\nPosition: %d\tNucl: %d\tWord_av_length: 0.00\tFree Energy: %.4f\n",name,window->Start(),window->NumSpecificCharacters(),_free_energy_differential);

    _currentWindow = window;
    Forward();
    Backward();
    PrepareForUpdate();

    int length = window->Length();
    DTYPE  correction = FringeCorrectionFactor(wi);      	
    list<tmpstruc1 *> worklist;

    for (int l=0; l<length; l++) {
      fprintf(fp,"%d\t%c\n",window->Start()+l,window->Seq()->CharAt(window->Start()+l));
      if (window->AmbiguousCharAt(l)) {
	fprintf(fp,"N\t\t\t\t1.0\n");
	continue;
      }

      // process the working list
      for (list<tmpstruc1 *>::iterator it = worklist.begin(); it != worklist.end(); ) {
	tmpstruc1 *&current = *it;
	char name[1024]; wm[current->mindex]->Name(name,15);
	if (current->offset <= wm[current->mindex]->Length()) {
	  fprintf(fp,"%s\t+\t%d\t%.4f\n",name,current->offset,current->ail);
	}
	current->offset ++;
	if (current->offset > wm[current->mindex]->Length()) {
	  it = worklist.erase(it);
	  delete current;
	}
	else it++;
      }      

      // see if there's a new motif starting here
      for (int i=0; i<_numWM; i++) {
	if (l >= length-_wm_len[i]+1) continue;
	DTYPE  ail = _Ail_window[i][l]*correction;
	if (ail > occurrence_threshold) {
	  char name[1024]; wm[i]->Name(name,15);
	  if (i==bkgIndex) {
	    fprintf(fp,"%s\t\t\t%.4f\n",name,ail);
	  }
	  else {
	    fprintf(fp,"%s\t+\t1\t%.4f\n",name,ail);
	    // also put it in a list
	    tmpstruc1 *tmp = new tmpstruc1;
	    tmp->mindex = i;
	    tmp->offset = 2;
	    tmp->ail    = ail;
	    worklist.push_back(tmp);
	  }
	}
      }      
    }

    // delete the working list
    for (list<tmpstruc1 *>::iterator it = worklist.begin(); it != worklist.end();) {
      tmpstruc1 *&current = *it;
      it = worklist.erase(it);
      delete current;
    }      

    // write the dictionary
    for (int i=0; i<_numWM; i++) {
      DTYPE average = _Ai[wi][i]*correction;
      if (average > occurrence_threshold) {
	char name[1024]; wm[i]->Name(name,15);
#ifdef _WTMX_BIAS
	float fbias = wm[i]->GetForwardBias();
	fprintf(dict,"%s\t%.4f\t%.4f\t%.2f\n",name,_pi[i],average,fbias);
#else 
	fprintf(dict,"%s\t%.4f\t%.4f\n",name,_pi[i],average);
#endif	
      }
    }

    fprintf(dict,"<\n");
    fprintf(fp,"<\n");
  }    
  
  delete [] wm;
  return;
}

#ifdef _CYCLIC_WINDOWS

float **Parameters_H0::GetLastMotifs()
{
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (int i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }
  
  int numWindows = _windows->size();
  int numWM = _numWM;

  float **initial;
  initial = new float *[numWindows];
  for (int wi=0; wi<numWindows; wi++) {
    initial[wi] = new float[numWM];
    for (int j=0; j<numWM; j++) {
      initial[wi][j] = 0;
    }

    Window *window = (*_windows)[wi];
    _currentWindow = window;
    Forward();
    Backward();
    PrepareForUpdate();

    int length = window->Length();
    DTYPE  correction = FringeCorrectionFactor(wi);      	
    list<tmpstruc1 *> worklist;

    int last_motif_starts_at = -1;
    float remaining_probability = 1;
    for (int l=length-1; l>=0; l--) {
      if (window->AmbiguousCharAt(l)) {
	continue;
      }

      // see if there's a new motif starting here
      for (int i=0; i<_numWM; i++) {
	if (i==bkgIndex) continue;
	if (l >= length-_wm_len[i]+1) continue;
	DTYPE  ail = _Ail_window[i][l]*correction;
	if (ail < SMALL_MOTIF_OCCURRENCE_THRESHOLD) continue;
	tmpstruc1 *tmp = new tmpstruc1;
	tmp->mindex = i;
	tmp->offset = l;
	tmp->ail    = ail*remaining_probability;
	worklist.push_back(tmp);
	remaining_probability -= tmp->ail;
      }
      if (remaining_probability < SMALL_FLOAT) break;
    }

    // process the working list
    int nummotifsfound = 0;
    for (list<tmpstruc1 *>::iterator it = worklist.begin(); it != worklist.end();) {
      tmpstruc1 *&current = *it;
      initial[wi][current->mindex] += current->ail;
      nummotifsfound++;
      it++;
    }    
    float sum = 0;
    for (int j=0; j<numWM; j++) sum += initial[wi][j];
    if (sum < 1) initial[wi][bkgIndex] = 1-sum;
    else {
      for (int j=0; j<numWM; j++) initial[wi][j] /= sum;
    }

    // delete the working list
    for (list<tmpstruc1 *>::iterator it = worklist.begin(); it != worklist.end();) {
      tmpstruc1 *&current = *it;
      it = worklist.erase(it);
      delete current;
    }      
  }

  return initial;
}

void Parameters_H0::DeleteSpaceForLastMotifs(float **initial)
{
  int numWindows = _windows->size();
  int numWM = _numWM;

  if (initial) {
    for (int wi=0; wi<numWindows; wi++) 
      delete [] initial[wi];
    delete [] initial;
  }

  return;
}
#endif


void Parameters_H0::SetParameters(DTYPE  *p)
{
  for (int i=0; i<_numWM; i++) _pi[i] = p[i];
}

DTYPE Parameters_H0::GetParameter(int wmindex)
{
  return _pi[wmindex];
}


#define MINIMUM_OVERLAP 1

int Parameters_H0::MaximumLeftOverlap(AlignmentNode *al, float occurrence_threshold)
{
  int position = al->_l1;
  Window *win = (*_windows)[0];
  int length = win->Length();
  int rel_position = position - win->Start();
  if (rel_position < 0) return 0;
  if (rel_position >= length) return 0;


  _currentWindow = win;
  Forward();
  Backward();
  PrepareForUpdate();  
  DTYPE  correction = FringeCorrectionFactor(0);      	

  int bkgIndex = BackgroundIndex();
  int best_wm_index = -1;
  int best_so_far = rel_position;
  for (int i=0; i<_numWM; i++) {
    if (i==bkgIndex) continue;
    int wm_len = _wmc->WM(i)->Length();
    if (wm_len == 1) continue;
    for (int l=rel_position-wm_len+MINIMUM_OVERLAP; l<rel_position; l++) {
      if (l < 0) continue;
      DTYPE  ail = _Ail_window[i][l]*correction;
      if (ail > occurrence_threshold && best_so_far > l) {
	best_so_far = l;
	best_wm_index = i;
	break;
      }
    }
  }
  if (best_wm_index >= 0) {
    char name[32];
    _wmc->WM(best_wm_index)->Name(name);
    // printf("left: %s: %d %d\n",name, _wmc->WM(best_wm_index)->Length(), rel_position-best_so_far);
  }
  return rel_position-best_so_far;
}

int Parameters_H0::MaximumRightOverlap(AlignmentNode *al, float occurrence_threshold)
{
  int position = al->_r1;
  Window *win = (*_windows)[0];
  int length = win->Length();
  int rel_position = position - win->Start();
  if (rel_position < 0) return 0;
  if (rel_position >= length) return 0;


  _currentWindow = win;
  Forward();
  Backward();
  PrepareForUpdate();  
  DTYPE  correction = FringeCorrectionFactor(0);      	

  int bkgIndex = BackgroundIndex();
  int best_wm_index = -1;
  int best_so_far = rel_position;
  for (int i=0; i<_numWM; i++) {
    if (i==bkgIndex) continue;
    int wm_len = _wmc->WM(i)->Length();
    if (wm_len == 1) continue;
    for (int l=rel_position-wm_len+2; l<=rel_position-MINIMUM_OVERLAP+1; l++) {
      if (l < 0) continue;
      if (l+wm_len-1 >= length) continue;
      DTYPE  ail = _Ail_window[i][l]*correction;
      if (ail > occurrence_threshold && best_so_far < l+wm_len-1) {
	best_so_far = l+wm_len-1;
	best_wm_index = i;
      }
    }
  }
  if (best_wm_index >=0 ) {
    char name[32];
    _wmc->WM(best_wm_index)->Name(name);
    // printf("right: %s: %d %d\n",name, _wmc->WM(best_wm_index)->Length(), best_so_far-rel_position);
  }
  return best_so_far-rel_position;
}

Parameters_H0::Parameters_H0()
{
  _pi = _oldpi = NULL;
  _alpha = _beta = NULL;
  _cij = NULL;
  _Ai = NULL;
  _Ail_window = NULL;
  _c = NULL; 
}

Parameters_H0::~Parameters_H0()
{
  Destroy();
}

void Parameters_H0::Destroy()
{
  int i;

  if (_pi) {
    delete [] _pi;
  }

  if (_oldpi) {
    delete [] _oldpi;
  }

  if (_free_emission_probabilities) {
    for (i=0; i<_numWM; i++) {
      delete [] _free_emission_probabilities[i];
    }
    delete [] _free_emission_probabilities;
  }

  if (_bkgwm) delete _bkgwm;
  if (_wm_len) delete [] _wm_len;

  if (_windows==NULL) return;

  if (_Ai) {
    int numWindows = _windows->size();
    for (int wi=0; wi<numWindows; wi++) {
      if (_Ai[wi]) {
	  delete [] _Ai[wi];	  
      }
    }
    delete [] _Ai;
    _Ai = NULL;
  }

  if (_Ail_window) {
    for (i=0; i<_numWM; i++) {
      if (_Ail_window[i]) {
	delete [] _Ail_window[i];
      }
    }
    delete [] _Ail_window;
    _Ail_window = NULL;
  }

  if (_alpha) {
    delete [] _alpha;    
    _alpha = NULL;
  }
  if (_beta) {
    delete [] _beta;
    _beta = NULL;
  }
  if (_c) {
    delete [] _c;
    _c = NULL;
  }

  if (_cij) {
    for (i=0; i<_max_window_length; i++) {
      if (_cij[i]) delete [] _cij[i];
    }
    delete [] _cij;
    _cij = NULL;
  }

  if (_fringe_corrections) {
    delete [] _fringe_corrections;
    _fringe_corrections = NULL;
  }

  // destroy the cache if present
  if (_currentwindowcache) {
    int numWindows = _windows->size();
    for (int wi=0; wi<numWindows; wi++) _currentwindowcache[wi].Destroy();
    delete [] _currentwindowcache;
    _currentwindowcache = NULL;
  }

  return;
}

Parameters_H0::Parameters_H0(const Parameters_H0 &p)
{
  Copy(p);
}

Parameters_H0& Parameters_H0::operator=(const Parameters_H0 &p)
{
  if (this == &p) return *this;
  Destroy();
  Copy(p);
}

void Parameters_H0::Copy(const Parameters_H0 &p)
{
  _wmc = p._wmc;
  _numWM = p._numWM;
  _windows = p._windows;
  _is_initialized = p._is_initialized;
  _is_trained = p._is_trained;

  int i;

  if (_numWM && p._pi) {
    _pi = new float [_numWM];
    for (i=0; i < _numWM; i++) {
      _pi[i] = p._pi[i];
    }
  }
  else _pi = NULL;

  if (_numWM && p._oldpi) {
    _oldpi = new float [_numWM];
    for (i=0; i< _numWM; i++) {
      _oldpi[i] = p._oldpi[i];
    }
  }
  else _oldpi = NULL;

  int bkgIndex = BackgroundIndex();
  if (_numWM && p._free_emission_probabilities) {
    _free_emission_probabilities = new bool *[_numWM];
    WtMx *w;
    for (i=0; i<_numWM; i++) {
      if (i==bkgIndex) w = p._bkgwm;
      else w = p._wmc->WM(i);
      _free_emission_probabilities[i] = new bool[w->Length()];
      for (int j=0; j<w->Length(); j++) {
	_free_emission_probabilities[i][j] = p._free_emission_probabilities[i][j];
      }
    }
  }
  else _free_emission_probabilities = NULL;

  if (_windows && _numWM && p._Ai) {
    int numWindows = _windows->size();
    _Ai = new DTYPE  *[numWindows];
    for (int wi=0; wi<numWindows; wi++) {
      int length = (*_windows)[wi]->Length();
      _Ai[wi] = new DTYPE  [_numWM];
      for (int i=0; i<_numWM; i++) {
	_Ai[wi][i] = p._Ai[wi][i];
      }
    }
  }
  else _Ai = NULL;

  _currentWindow = NULL;
  _alpha = _beta = NULL;
  _cij = NULL;
  _Ail_window = NULL;

  _bkgwm = new WtMx(p._bkgwm);
  _c = NULL;
  _free_energy = p._free_energy;
  _free_energy_differential = p._free_energy_differential;
  _initialbias = p._initialbias;
  _num_iterations = p._num_iterations;

  _is_initialized = p._is_initialized;

  return;
}

Parameters_H0::Parameters_H0(WtMxCollection *wmc, int extreme)
{
  int i;

  _wmc = wmc;
  _numWM = _wmc->Size()+1;
  _pi = new float[_numWM];
  _oldpi = new float[_numWM];
  for (i=0; i<_numWM; i++) {
    if (i==extreme) {
	_pi[i] = ALMOST_ONE;
    }
    else {
      _pi[i] = (1-ALMOST_ONE)/(_numWM-1);
    }
    _oldpi[i] = _pi[i];
  }

  _Ai = NULL;
  _currentWindow = NULL;
  _alpha = _beta = NULL;
  _cij = NULL;
  _Ail_window = NULL;

  _bkgwm = NULL;
  _c = NULL; 
  _free_energy = 0;
  _free_energy_differential = 0;
  _is_initialized = true;
}


DTYPE  Parameters_H0::Norm_of_parameter_difference()
{
  DTYPE  sum = 0;
  for (int i=0; i<_numWM; i++) {
    sum += (_pi[i]-_oldpi[i])*(_pi[i]-_oldpi[i]);
  }
  return sqrt(sum)/_numWM;
}

char *Parameters_H0::CreateSequence(int length, int randseed, bool verbose)
{
  int i;
  char *sequence = new char[length+1];
  int bkgIndex = BackgroundIndex(); // the background matrix corresponds to last index
  WtMx **wm = new WtMx *[_numWM];
  for (i=0; i<_numWM; i++) {
    if (i!=bkgIndex) {
      wm[i] = _wmc->WM(i);
    }
    else {
      wm[i] = _bkgwm;
    }
  }
                                    
  float *cum = new float[_numWM];
  cum[0] = _pi[0];
  for (i=1; i<_numWM; i++) {
    cum[i] = cum[i-1] + _pi[i];
  }

  int *count = new int[_numWM];
  for (i=0; i<_numWM; i++) count[i] = 0;

  srandom(randseed);
  int pos = 0;
  while (pos < length) {
    float r = random()/float(RAND_MAX);
    int rindex = -1;
    if (r < cum[0]) {
      rindex = 0; 
    }
    else {
      for (int j=1; j<_numWM; j++) {
	if (r < cum[j]) {
	  rindex = j;
	  break;
	}
      }
      if (rindex==-1) rindex = _numWM-1;
    }
    // plant a wm[rindex]
    // but first decide orientation:
    int orientation = count[rindex]%2;
    for (int l=0; l<_wm_len[rindex]; l++) {
      if (orientation==0) 
	sequence[pos] = wm[rindex]->GetRandomChar(l);
      else 
	sequence[pos] = ReverseChar(wm[rindex]->GetRandomChar(_wm_len[rindex]-1-l));
      pos++;
      if (pos >= length) break;
    }
    count[rindex]++;
  }

  if (verbose) {
    printf("Planting stats: ");
    for (i=0; i<_numWM; i++) printf("%d %d, ",i,count[i]);
    printf("\n");
  }

  sequence[length] = 0;

  delete [] count;
  delete [] cum;
  delete [] wm;

  return sequence;
}

void Parameters_H0::DowngradeInitialize(vector<Window *> *wl, WtMxCollection *wmc, Parameters_H01 *init)
{
  Parameters_H0 seed(wmc,0);  // 0 is for bias towards first motif. Doesnt matter, since it'll be reset
  int numWM = init->NumWM();
  for (int i=0; i<numWM; i++) {
      seed._pi[i] = init->_pi[i];
  }
  Initialize(wl,wmc,&seed);
  WtMx *bkgwm;
  init->GetBackground(bkgwm);
  SetBackground(new WtMx(bkgwm));
  CacheBackgroundProbabilities();
}



