/*****************************************************************
        Copyright by Rockefeller University,
can not be reproduced or distributed without written permission of
copyright holder.  Version of October 2003.

Written by Saurabh Sinha (contact person), Erik van Nimwegen, and 
Eric Siggia.

The program stubb (and its relatives) implement an algorithm for
finding likely cis-regulatory modules, described in the following
paper:
"A Probabilistic Method to Detect Regulatory Modules"
by Saurabh Sinha, Erik van Nimwegen and Eric Siggia. 
Eleventh International Conference on Intelligent Systems for
Molecular Biology, Brisbane, Australia, July 2003, pg 292-301.

The file sample/gap_wtmx that comes with this distribution includes 
a sample set of transcription factor weight matrices (PWM's) that 
were reported in :
"Computational detection of genomic cis-regulatory modules applied
to body patterning in the early Drosophila embryo"
by N. Rajewsky, M. Vergassola, U. Gaul and E. Siggia.
BMC Bioinformatics 3 (30) 2002.
******************************************************************/

#include "sequence.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>

#define LAGAN_LINELEN 1024

Sequence **ReadLaganOutput(char *fastafile, char *anchsfile, int &numSequences)
{
  char **sequences = new char *[MAX_ALIGNED_SEQUENCES];
  int *seq_lengths = new int[MAX_ALIGNED_SEQUENCES];
  char **seq_names = new char *[MAX_ALIGNED_SEQUENCES];
  numSequences = 0;

  char line[LAGAN_LINELEN];
  char *sequence = NULL;
  int last_seq_len = 0;
  char name[LAGAN_LINELEN];

  // Read the sequences
  bool first_sequence_encountered = false;
  FILE *fp = fopen(fastafile,"r");
  if (fp == NULL) {
    printf("Error: file %s could not be opened\n",fastafile);
    exit(1);
  }
  while(fgets(line,LAGAN_LINELEN-1,fp)) {
    if (line[0]!='>' && !first_sequence_encountered) continue;
    if (line[0]=='>' && !first_sequence_encountered) first_sequence_encountered = true;

    // chomp
    int last_pos = strlen(line)-1;
    if (line[last_pos]=='\n' || line[last_pos]=='\r') line[last_pos] = 0;

    if (line[0]=='>') {
      if (sequence!=NULL) {
	sequences[numSequences] = new char[last_seq_len+1];
	strcpy(sequences[numSequences],sequence);
	seq_names[numSequences] = new char[strlen(name)+1];
	strcpy(seq_names[numSequences],name);
	seq_lengths[numSequences] = last_seq_len;
	delete [] sequence;

	numSequences++;
      }

      strcpy(name,line);
      sequence = new char[Sequence::MAX_SEQ_LENGTH+1];
      sequence[0] = 0;
      continue;
    }
      
    // concatenate
    strcat(sequence,line);
    last_seq_len = strlen(sequence);
  }
  fclose(fp);

  if (sequence) {
    sequences[numSequences] = new char[last_seq_len+1];
    strcpy(sequences[numSequences],sequence);
    seq_names[numSequences] = new char[strlen(name)+1];
    strcpy(seq_names[numSequences],name);
    seq_lengths[numSequences] = last_seq_len;
    delete [] sequence;
    sequence = NULL;
    numSequences++;
  }

  if (numSequences != 2) {
    printf("Error: Only two species supported\n");
    exit(1);
  }

  // Create the sequence objects
  Sequence **seqs = new Sequence *[numSequences];
  for (int i=0; i<numSequences; i++) {
    seqs[i] = new Sequence(sequences[i],seq_lengths[i],seq_names[i]);
    seqs[i]->SetSpeciesIndex(i);
  }

  // Create the alignment object
  FILE *afp = fopen(anchsfile,"r");
  if (afp == NULL) {
    printf("Error: file %s could not be opened\n",anchsfile);
    exit(1);
  }
  Alignment *alignments = new Alignment(numSequences);
  while (fgets(line,LAGAN_LINELEN-1,afp)) {
    int l1,l2,r1,r2;
    if (sscanf(line,"(%d %d)=(%d %d)", &l1,&r1,&l2,&r2) < 4) break;
    l1--; l2--; r1--; r2--;
    if (r1-l1 != r2-l2) {
#ifdef WARNINGS
      printf("Warning: Unequal length aligned block encountered and discarded\n");
#endif
      continue;
    }
    alignments->AddAlignmentNode(new AlignmentNode(seqs[0],l1,r1,seqs[1],l2,r2));
    alignments->AddAlignmentNode(new AlignmentNode(seqs[1],l2,r2,seqs[0],l1,r1));
  }

  // alignments->Print();

  for (int i=0; i<numSequences; i++) {
    delete [] seq_names[i];
    delete [] sequences[i];
  }
  delete [] seq_lengths;
  delete [] seq_names;
  delete [] sequences;

  seqs[0]->_alignments = alignments;
  return seqs;
}


