/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#ifndef _wtmx_h_ 
#define _wtmx_h_

#include "typedefs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define PSEUDO_COUNT 0.5
extern int globalid;

// The Weight matrix class

class WtMx {
  float    ***_wtmx;
  int      _length;
  bool     _is_normalized;
  char     _name[1024];
  void     Normalize();

  static const float SMALL_FREQUENCY = 1e-10;

  bool     _is_special;
  char     _special_char;

  int      _Morder;     // Markov order: how many previous characters this sees
  int      _powMorder;  // pow(4,_Morder)
  float    **_basicwtmx;// used when Morder > 0 and history word not known
  float    *_higherwtmx;// as above, except this is for k-mers

  float    _pseudo_count;
  float    _forward_bias;
  int      _udata;
public:
  WtMx(float **w, int len, char *nm, float psd = PSEUDO_COUNT);
  WtMx(float ***w, float **bw, int len, char *nm, int Morder, float psd = PSEUDO_COUNT);
  WtMx(char ch);
  WtMx(WtMx *w);
  ~WtMx();

  void     Print(FILE *fp = stdout);
  void     Name(char *str, int pad_to = 16);
  float    Frequency(int offset, int index);
  float    Frequency(int offset, int index, int history);
  float    Frequency(int offset, int index, char *history, int history_length);
  float    HigherOrderFrequency(int offset, int index);
  int      MarkovOrder();
  int      Length();
  void     UpdateFrequency(int offset, DTYPE *freq);
  char     GetRandomChar(int offset);
  char     GetRandomChar(int offset, int history);
  char     GetRandomChar(int offset, char *history, int history_length);
  void     SetUserData(int d);
  int      GetUserData();
  void     SetForwardBias(float bias);
  float    GetForwardBias();

#ifdef _OPTIMIZE_PROTECTED_ACCESS
  friend class Parameters;
#endif
};

// Collection of weight matrices

#include <vector>
using namespace std;

class WtMxCollection {
  vector<WtMx *>  _vec;
  vector<int>     _valid;
  int             _numValid;
public:
  WtMxCollection();
  WtMxCollection(char *flname);
  ~WtMxCollection();

  int  Add(WtMx *w);           // add a new matrix to collection
  WtMx *Remove(int index);     // delete the (index+1)th valid matrix from collection
  WtMx *WM(int index);         // return the (index+1)th valid matrix
  int  Size();                 // number of valid matrices currently
  int  TotalSize();            // total number of matrices currently
  int  MaxLength();
  void Print();
};

#define MAX_MOTIF_LENGTH 32

#endif


