/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#ifndef _sequence_h_
#define _sequence_h_

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "wtmx.h"
#include <vector>

#define MAX_ALIGNED_SEQUENCES 2

extern int  numsequencesread;
class Alignment;

class Sequence {
  char  *_seq;
  char  *_indexseq;        
  int   _length;
  char  _name[1024];

  int   _id;
  void  *_udata;

  char   *ReadFasta(FILE *fp);
public:
  static const int MAX_LINE_LENGTH = 1024;
  static const int MAX_SEQ_LENGTH = 40000000;

  Sequence(char *flname);
  Sequence(char *seq, int length, char *name);
  ~Sequence();

  int   Length();
  char  CharAt(int index);
  char  IndexOfCharAt(int index);
  bool  AmbiguousCharAt(int index);
  void  Print(int start, int stop);
  void  Name(char *name);

  void  SetSpeciesIndex(int i);    
  int   GetSpeciesIndex();

  void  SetUserData(void *udata);
  void  *GetUserData();

  static class Alignment *_alignments;

  friend class WindowIterator;
  friend class MSWindowIterator;
#ifdef _OPTIMIZE_PROTECTED_ACCESS
  friend class Parameters;
#endif
};

struct AlignmentNode;

class Window
{
  Sequence *_seq;
  char     *_indexofchar;
  bool     *_ambiguouschar;
  char     *_alignmentbdry;
  int      _start;
  int      _stop;
  int      _length;
  int      _window_index;

  // to allow for multiple sequences:
  char     **_arrayofchar;

  bool  AmbiguousChar(int index);

public:
  Window(Sequence *seq,int start, int stop);
  ~Window();
  Sequence *Seq();
  Window *Context(int size);
  void  AlignWindow(Window *w, int offset);   // window w will be aligned starting at posn offset of this window

  int   NumSpecificCharacters();
  void  ComputeBaseFrequencies(float *bkg);
  void  ComputeBaseFrequenciesWithHistory(float **bkg, int history_length);
  int   Start();
  int   Stop();
  char  IndexOfCharAt(int index, char *&arrayofchar);
  char  IndexOfCharAtInReferenceSequence(int index);
  bool  AmbiguousCharAt(int index);
  bool  AmbiguousCharAtInReferenceSequence(int index);
  bool  AlignmentBeginsAt(int index);
  bool  AlignmentEndsAt(int index);
  int   Length();
  void  Print(bool verbose = false);
  int   GetIndex() { return _window_index;}
  void  SetIndex(int i) { _window_index = i; }

#ifdef _OPTIMIZE_PROTECTED_ACCESS
  friend class Parameters;
#endif
};

class WindowIterator {
  int _window_size;
  int _shift_size;

  Sequence *_seq;
  int _current;

public:
  WindowIterator();
  WindowIterator(Sequence *seq);

  bool Begin(int window_size, int shift_size);
  bool End();
  void Next();


  Window *Current();
  void CurrentWindowList(vector<Window *> *&wl, int min_window_length=0);
};

class MSWindowIterator {
  int _window_size;
  int _shift_size;

  int _numS;
  Sequence **_seqs;
  int *_currentbegins;
  int *_currentends;
  int *_nextalignedcolumn;
  int *_alignedwith;

 public:
  MSWindowIterator();
  MSWindowIterator(Sequence **seqs, int numSequences);
  ~MSWindowIterator();

  virtual bool Begin(int window_size, int shift_size);
  virtual void Next() = 0;
  virtual void CurrentWindowList(vector<Window *> *&wl, int min_window_length=0) = 0;
  bool End();

  friend class MSWindowIteratorFixedShift;
  friend class MSWindowIteratorAlignmentPunctuated;
};

class MSWindowIteratorFixedShift: public MSWindowIterator {
  bool _ref_seq_only;
  void ComputeEndsGivenBegins();
 public:
  MSWindowIteratorFixedShift(Sequence **seqs, int numSequences, bool ReferenceSequenceOnly = true);
  virtual bool Begin(int window_size, int shift_size);
  virtual void Next();
  virtual void CurrentWindowList(vector<Window *> *&wl, int min_window_length=0);
  void CurrentWindowListEW(vector<Window *> *&wl, int min_window_length=0);
};

class MSWindowIteratorAlignmentPunctuated: public MSWindowIterator {
  void ComputeEndsGivenBegins();
 public:
  MSWindowIteratorAlignmentPunctuated(Sequence **seqs, int numSequences);
  virtual bool Begin(int window_size, int shift_size);
  virtual void Next();
  virtual void CurrentWindowList(vector<Window *> *&wl, int min_window_length=0);
};

struct AlignmentNode {
  Sequence * _thisSeq;
  int _l1;
  int _r1;
  Sequence *_otherSeq;
  int _l2;
  int _r2;
  struct AlignmentNode *_next;
  struct AlignmentNode *_prev;

  AlignmentNode(Sequence *seq, int l1, int r1, Sequence *seq2, int l2, int r2);
  void ExtendToLeft(int left);
  void ExtendToRight(int right);
};

class Alignment {
  vector<struct AlignmentNode *> *_alist;
  int _num_seq;

 public:
  Alignment();
  Alignment(int num_seq);
  Alignment(Alignment *a);
  ~Alignment();
  
  void AddAlignmentNode(AlignmentNode *nd);
  struct AlignmentNode *GetAlignmentNodeList(Sequence *seq, int l, int r);
  void Print(FILE *fp = stdout);
  void PrintAnchs(FILE *fp = stdout);
  float MutationRateInAlignments(Sequence *f, Sequence *c, int &mutation, int &total);
};

#endif


