/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#include "sequence.h"
#include "util.h"

WindowIterator::WindowIterator()
{
  _seq = NULL; 
  _window_size = -1;
  _shift_size = -1;
  _current = -1; 
}

MSWindowIterator::MSWindowIterator()
{
  _seqs = NULL; _numS = 0;
  _window_size = -1;
  _shift_size = -1;
  _currentbegins = NULL; _currentends = NULL;
  _nextalignedcolumn = NULL; _alignedwith = NULL;
}

MSWindowIterator::~MSWindowIterator()
{
  if (_currentbegins) {
    delete [] _currentbegins;
    _currentbegins = NULL;
  }

  if (_currentends) {
    delete [] _currentends;
    _currentends = NULL;
  }

  if (_nextalignedcolumn) {
    delete [] _nextalignedcolumn;
    _nextalignedcolumn = NULL;
  }

  if (_alignedwith) {
    delete [] _alignedwith;
    _alignedwith = NULL;
  }
}

WindowIterator::WindowIterator(Sequence *seq)
{
  _seq = seq;
  _window_size = -1;
  _shift_size = -1;
  _current = -1;
}

MSWindowIterator::MSWindowIterator(Sequence **seqs, int numSequences)
{
  if (numSequences != 2) {
    printf("Multiple sequence window iterator only supported for two species\n");
    exit(1);
  }

  _seqs = seqs;
  _numS = numSequences;
  _window_size = -1;
  _shift_size = -1;
  _currentbegins = new int[_numS];
  _currentends = new int [_numS];
  if (_seqs[0]==NULL || _seqs[0]->Length() < 0) {
    printf("Error: cannot create window iterator\n");
    exit(1);
  }

  int length = _seqs[0]->Length();
  _nextalignedcolumn = new int[length];
  _alignedwith = new int[length];

  struct AlignmentNode *ndlist = _seqs[0]->_alignments->GetAlignmentNodeList(_seqs[0],0,length-1);
  int curptr = 0;
  for (struct AlignmentNode *ptr = ndlist; ptr!=NULL; ptr = ptr->_next) {
    if (ptr->_otherSeq != _seqs[1]) continue;
    int l1 = ptr->_l1; int r1 = ptr->_r1;
    while (curptr < ptr->_l1 && curptr < length) {
      _nextalignedcolumn[curptr] = ptr->_l1;
      _alignedwith[curptr] = -1;
      curptr++;
    }
    while (curptr <= ptr->_r1 && curptr < length) {
      _nextalignedcolumn[curptr] = curptr;
      _alignedwith[curptr] = ptr->_l2 + (curptr-ptr->_l1);
      curptr++;
    }
  }
  while (curptr < length) {
    _nextalignedcolumn[curptr] = length;
    _alignedwith[curptr] = -1;
    curptr++;
  }
}

MSWindowIteratorFixedShift::MSWindowIteratorFixedShift(Sequence **seqs, int numSequences, bool ReferenceSequenceOnly)
  :MSWindowIterator(seqs,numSequences)
{
  _ref_seq_only = ReferenceSequenceOnly;
}

MSWindowIteratorAlignmentPunctuated::MSWindowIteratorAlignmentPunctuated(Sequence **seqs, int numSequences)
  :MSWindowIterator(seqs,numSequences)
{
}

bool WindowIterator::Begin(int window_size, int shift_size)
{
  if (_seq == NULL) {
    printf("WindowIterator called on null sequence\n");
    exit(1);
  }
  if (window_size > _seq->Length()) {
    char seqname[1024]; _seq->Name(seqname); 
    printf("Error: Cannot create windows larger than sequence \"%s\" of length %d\n",seqname,_seq->Length());
    return false;
  }
  _window_size = window_size;
  _shift_size = shift_size;
  _current = 0;
  return true;
}

bool MSWindowIterator::Begin(int window_size, int shift_size)
{
  if (_seqs[0] == NULL) {
    printf("WindowIterator called on null sequence\n");
    exit(1);
  }
  if (window_size > _seqs[0]->Length()) {
    char seqname[1024]; _seqs[0]->Name(seqname); 
    printf("Error: Cannot create windows larger than sequence \"%s\" of length %d\n",seqname,_seqs[0]->Length());
    return false;
  }
  _window_size = window_size;
  _shift_size = shift_size;
  
  int length = _seqs[0]->Length();
  return true;
}

bool MSWindowIteratorFixedShift::Begin(int window_size, int shift_size)
{
  if (!this->MSWindowIterator::Begin(window_size, shift_size)) return false;
  _currentbegins[0] = 0;
  ComputeEndsGivenBegins();
  return true;
}

bool MSWindowIteratorAlignmentPunctuated::Begin(int window_size, int shift_size)
{
  if (!this->MSWindowIterator::Begin(window_size, shift_size)) return false;
  _currentbegins[0] = 0;
  _currentbegins[1] = 0;
  ComputeEndsGivenBegins();
  return true;
}

bool WindowIterator::End()
{
  return (_current + _window_size - 1 >= _seq->Length());
}

bool MSWindowIterator::End()
{
  return  (_currentends[0] >= _seqs[0]->Length());
}

void WindowIterator::Next()
{
  _current += _shift_size;
}

void MSWindowIteratorFixedShift::Next()
{
    int length = _seqs[0]->Length();

    int beginat = _currentbegins[0];
    beginat += _shift_size;
    if (beginat >= length) {
      _currentbegins[0] = beginat;
      _currentends[0] = beginat +_window_size-1;
      return;   // this will be the last iteration, shouldnt reach here anyway
    }
    _currentbegins[0] = beginat;

    ComputeEndsGivenBegins();
}

void MSWindowIteratorAlignmentPunctuated::Next()
{
    int length = _seqs[0]->Length();

    int beginat = _currentbegins[0];
#if 0
    beginat += _shift_size;
    if (beginat >= length) {
      _currentbegins[0] = beginat;
      _currentends[0] = beginat +_window_size-1;
      return;   // this will be the last iteration, shouldnt reach here anyway
    }

    beginat = _nextalignedcolumn[beginat];
#endif
#if 1
    bool alignment_state = (beginat == _nextalignedcolumn[beginat]) ;
    do { 
      beginat++;
      if (beginat >= length) break;
      bool new_alignment_state = (beginat == _nextalignedcolumn[beginat]) ;
      if (new_alignment_state != alignment_state) break;
    } while (true);
#endif
    if (beginat >= length) {
      _currentbegins[0] = length;
      _currentends[0] = length+_window_size-1;
      return;   // this will be the last iteration
    }

    if (_nextalignedcolumn[beginat] == beginat) {
      _currentbegins[0] = beginat;
      _currentbegins[1] = _alignedwith[beginat];
    }
    else {
      _currentbegins[0] = beginat;
      _currentbegins[1] = _alignedwith[beginat-1]+1;
    }

    ComputeEndsGivenBegins();
}

Window *WindowIterator::Current()
  // OBSOLETE
{
  Window *win = new Window(_seq,_current,_current+_window_size-1);
  if (_seq->_alignments == NULL) return win;
  else {
    printf("Error: Single sequence window iterator incompatible with alignment information\n");
    exit(1);
  }
}

void WindowIterator::CurrentWindowList(vector<Window *> *&wl, int min_window_length)
{
  int start = _current;
  int ptr = _current;
  while (ptr <= _current+_window_size-1) {
    if (_seq->CharAt(ptr) == '$') {
      if (ptr > start && ptr-start > min_window_length) {
	Window *window = new Window(_seq,start,ptr-1);
	if (window->Length() >= min_window_length)
	   wl->push_back(window);
	else delete window;
      }
      start = ptr+1;
    }
    ptr++;
  }
  if (ptr > start && ptr-start > min_window_length) {
    Window *window = new Window(_seq,start,ptr-1);
    if (window->Length() >= min_window_length)
       wl->push_back(window);
    else delete window;
  }    
}

void MSWindowIteratorFixedShift::CurrentWindowList(vector<Window *> *&wl, int min_window_length)
{
  int numWindows = 0;
  Window *win = new Window(_seqs[0],_currentbegins[0],_currentends[0]);
  if (win->Length() < min_window_length) {
    delete win;
    return;
  }
  wl->push_back(win);

  int otherleft = -1;
  struct AlignmentNode *ndlist = _seqs[0]->_alignments->GetAlignmentNodeList(_seqs[0],_currentbegins[0],_currentends[0]);
  for (struct AlignmentNode *ptr = ndlist; ptr != NULL; ptr = ptr->_next) {
    Window *owin = new Window(ptr->_otherSeq, ptr->_l2, ptr->_r2);
    win->AlignWindow(owin,ptr->_l1-_currentbegins[0]);
    delete owin;

    if (!_ref_seq_only) {
      int otherright = ptr->_l2 - 1;
      if (otherleft >= 0 && otherright >= otherleft) {
	 Window *enclosed_window = new Window(_seqs[1],otherleft,otherright);
	 if (enclosed_window->Length() >= min_window_length) 
	    wl->push_back(enclosed_window);
	 else delete enclosed_window;
      }
    }
    otherleft = ptr->_r2+1;
  }


  return ;
}

void MSWindowIteratorFixedShift::CurrentWindowListEW(vector<Window *> *&wl, int min_window_length)
  // same as above, except that homologous windows are of equal length
{
  if (_ref_seq_only) {
    printf("Error: not supported when ref_seq is true\n");
    exit(1);
  }

  int numWindows = 0;
  Window *win = new Window(_seqs[0],_currentbegins[0],_currentends[0]);
  if (win->Length() < min_window_length) {
    delete win;
    return;
  }
  wl->push_back(win);

  int otherleft = -1;
  int lastthisright = -1;
  struct AlignmentNode *ndlist = _seqs[0]->_alignments->GetAlignmentNodeList(_seqs[0],_currentbegins[0],_currentends[0]);
  for (struct AlignmentNode *ptr = ndlist; ptr != NULL; ptr = ptr->_next) {
    Window *owin = new Window(ptr->_otherSeq, ptr->_l2, ptr->_r2);
    win->AlignWindow(owin,ptr->_l1-_currentbegins[0]);
    delete owin;

    if (!_ref_seq_only) {
      int otherright = ptr->_l2 - 1;
      if (otherleft >= 0 && otherright >= otherleft) {
	 Window *enclosed_window = new Window(_seqs[1],otherleft,otherright);
	 if (enclosed_window->Length() >= min_window_length) 
	    wl->push_back(enclosed_window);
	 else delete enclosed_window;
      }

      if (otherleft < 0) { // add the left most hanging piece from other sequence
	int lefthangingsize = ptr->_l1-_currentbegins[0];
	if (lefthangingsize <= 0) {
	  otherleft = ptr->_r2+1;
	  lastthisright = ptr->_r1;	  
	  continue;
	}

	int lefthangingotherright = ptr->_l2-1;
	if (lefthangingotherright < 0) {
	  otherleft = ptr->_r2+1;
	  lastthisright = ptr->_r1;
	  continue;
	}

	int lefthangingotherleft  = ptr->_l2-lefthangingsize;
	if (lefthangingotherleft < 0) lefthangingotherleft = 0;
	lefthangingsize = lefthangingotherright - lefthangingotherleft + 1;

	if (lefthangingsize >= min_window_length) {
	  Window *lefthangingwindow = new Window(_seqs[1],lefthangingotherleft,lefthangingotherright);
	  wl->push_back(lefthangingwindow);
	}
      }
    }
    otherleft = ptr->_r2+1;
    lastthisright = ptr->_r1;
  }

  if (lastthisright >= 0) {
    int righthangingsize = _currentends[0] - lastthisright;
    if (righthangingsize > 0) {
      int righthangingotherleft = otherleft;
      if (righthangingotherleft < _seqs[1]->Length()) {
	int righthangingotherright = otherleft+righthangingsize-1;
	if (righthangingotherright >= _seqs[1]->Length()) righthangingotherright = _seqs[1]->Length()-1;
	righthangingsize = righthangingotherright - righthangingotherleft + 1;
	if (righthangingsize >= min_window_length) {
	  Window *righthangingwindow = new Window(_seqs[1],righthangingotherleft,righthangingotherright);
	  wl->push_back(righthangingwindow);
	}
      }
    }
  }

  return;
}

void MSWindowIteratorAlignmentPunctuated::CurrentWindowList(vector<Window *> *&wl, int min_window_length)
{
  int numWindows = 0;
  vector<Window *> *tmpwl = new vector<Window*>;
  Window *win = new Window(_seqs[0],_currentbegins[0],_currentends[0]);
  tmpwl->push_back(win);

  int otherleft = _currentbegins[1];
  struct AlignmentNode *ndlist = _seqs[0]->_alignments->GetAlignmentNodeList(_seqs[0],_currentbegins[0],_currentends[0]);
  for (struct AlignmentNode *ptr = ndlist; ptr != NULL; ptr = ptr->_next) {
    Window owin(ptr->_otherSeq, ptr->_l2, ptr->_r2);
    win->AlignWindow(&owin,ptr->_l1-_currentbegins[0]);

    // also add the uncorrelated part to the left of owin
    int otherright = ptr->_l2 - 1;
    if (otherright >= otherleft) tmpwl->push_back(new Window(_seqs[1],otherleft,otherright));
    otherleft = ptr->_r2+1;
  }
  // and the uncorrelated part to the right of the last owin, if any
  if (otherleft <= _currentends[1]) tmpwl->push_back(new Window(_seqs[1],otherleft,_currentends[1]));

  for (int i=0; i<tmpwl->size(); i++) {
    Window *win = (*tmpwl)[i];
    if (win->Length() >= min_window_length) wl->push_back(win);
    else delete win;
  }
  delete tmpwl;
  return ;
}

void MSWindowIteratorFixedShift::ComputeEndsGivenBegins()
{
  int length = _seqs[0]->Length();

  int endat = _currentbegins[0]+_window_size-1;
  if (endat > length-1) {
    _currentends[0] = length;
    return;
  } 

  _currentends[0] = endat;
}

void MSWindowIteratorAlignmentPunctuated::ComputeEndsGivenBegins()
{
  int length = _seqs[0]->Length();

  int endat = _currentbegins[0]+_window_size-1;
  if (endat > length-1) {
    _currentends[0] = length;
    return;
  } 

#if 1
    bool alignment_state = (endat == _nextalignedcolumn[endat]) ;
    do { 
      endat++;
      if (endat >= length) break;
      bool new_alignment_state = (endat == _nextalignedcolumn[endat]) ;
      if (new_alignment_state != alignment_state) break;
    } while (true);
#endif

    if (endat >= length) {
      _currentends[0] = length-1;
      _currentends[1] = _seqs[1]->Length()-1;
      return;
    }

    if (_nextalignedcolumn[endat] == endat) {
      _currentends[0] = endat-1;
      _currentends[1] = _alignedwith[endat]-1;
    }
    else {
      _currentends[0] = endat-1;
      _currentends[1] = _alignedwith[endat-1];
    }

    return;
#if 0
  int nac = _nextalignedcolumn[endat];
  if (nac == endat) {
    _currentends[0] = nac;
    _currentends[1] = _alignedwith[nac]-1;
    return;
  }

  if (nac > length-1) {
    _currentends[0] = length-1;
    _currentends[1] = _seqs[1]->Length()-1;
  }
  else {
    _currentends[0] = nac-1;
    int aw = _alignedwith[nac];
    if (aw <= 0) {
      printf("Error: aligned column not aligned\n");
      exit(1);
    }
    _currentends[1] = aw-1;
  }
#endif
}


