/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fastafile.h"

FastaFile::FastaFile()
{
  for (int i=0; i<MAX_NUM_SEQUENCES; i++)
    _sequences[i] = NULL;
  _numSequences = 0;
}

void FastaFile::ReadFasta(char *filename)
{
  FILE *fp = fopen(filename,"r");
  if (fp == NULL) {
    printf("Error reading file %s\n",filename);
    exit(1);
  }

  int numSequences = -1; int ptr;
  char *sequence = new char[Sequence::MAX_SEQ_LENGTH+1];
  char name[1024];
  char prevname[1024];

  int ch = 0;
  while ((ch = fgetc(fp)) != EOF){
    if (ch == '>') {	
				// skip to end of line and copy as name
      int nptr = 0;
      while ((ch = fgetc(fp)) != EOF && ch != '\n' && ch != '\r') name[nptr++] = ch;
      if (ch == EOF) break;
      name[nptr] = 0; 
				// get ready for a new region
      if (numSequences > -1) {  // already seen a sequence ...
	sequence[ptr] = 0;
	_sequences[numSequences++] = new Sequence(sequence,ptr,prevname);
	ptr = 0;
      }
      else {                    // this is the first sequence
	numSequences = 0;
	ptr = 0;
      }
      strcpy(prevname,name);
    }
    if (ch != '\n' && ch != '\r') {
      if (ptr==Sequence::MAX_SEQ_LENGTH) continue;
      sequence[ptr++] = ch;
    }
  }

  if (numSequences > -1) {
    sequence[ptr] = 0;
    _sequences[numSequences++] = new Sequence(sequence,ptr,name);
  }

  _numSequences = numSequences;

  fclose(fp);
  delete [] sequence;
}

FastaFile::~FastaFile()
{
  for (int i=0; i<_numSequences; i++) delete _sequences[i];
}

int FastaFile::Size()
{
  return _numSequences;
}

Sequence *FastaFile::operator[] (int index)
{
  return _sequences[index];
}
