/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $  
 * $Date: 2005/05/02 08:54:09 $ 
 * $Id: inittrackedbins.c,v 1.1 2005/05/02 08:54:09 rsidd Exp $ 
 */

#include "interspecies.h"
#include <stdio.h>
#include <string.h>
#include <glib.h>
#include <assert.h>
#include <ctype.h>


void mychomp(char *s) {
    if (s[strlen(s)-1]=='\n')
        s[strlen(s)-1]='\0';
    if (s[strlen(s)-1]=='\r')
        s[strlen(s)-1]='\0';
}


int myisnumber(char s)
{
  if(s == '0' || 
     s == '1' || 
     s == '2' || 
     s == '3' ||
     s == '4' ||
     s == '5' ||
     s == '6' || 
     s == '7' ||
     s == '8' ||
     s == '9'   )
    {
      return 1;
    }
  else{
    return 0;
  }
}
  



int readmotiffile(params *v) 
{
  char s[1024];
  int lastpos,thispos,motifnum,inmotif,pos,thislen,tmp;
  double na,nc,ng,nt,zero;
  GPtrArray *thispriorbinbase = NULL; /***base counts for a WM ***/
  GString *thisname = NULL;
  GArray *onebinbase; /**array with the base-counts in a color***/
  FILE* inputfile;
  
  zero = 0.0;
  /***this routine reads a set of WMs from a file and initializes the pseudocount of their colors
      to these WMs**************/
  
  v->priorbinbase=g_ptr_array_new(); /**array with all WMs**/
  v->priorbinname=g_ptr_array_new(); /**array with names of the WMs**/
  inputfile=fopen(v->motiffile,"r");
  
  motifnum = 0;
  lastpos = 0;
  inmotif = 0;
  
  if (inputfile==NULL) {
    fprintf(stderr, "Motif file %s does not exist or cannot be opened.\n",v->motiffile);
    return 1;
  }
  /**reading the file****/
  while (fgets(s,1024,inputfile)) /**get line from the file***/
    {
      /***line with start of motif indicator***/
      if(inmotif == 0 && s[0] == '/' && s[1] == '/')
	{
	  thispriorbinbase=g_ptr_array_new();
	  /**set empty name***/
	  thisname = g_string_new("");
	  ++motifnum;
	  lastpos = 0;
	  inmotif = 1;
	  thislen = 0;
	}
      /**name line, already in motif***/
      else if((s[0] == 'N' && s[1] == 'A' && inmotif == 1) || (s[0] == 'I' && s[1] == 'D' && inmotif == 1))
	{
	  /**set name line***/
	  mychomp(s);
	  pos = 2;
	  while(isspace(s[pos]) && s[pos] != '\0'){
	    ++pos;
	  }
	  if(s[pos] == '\0'){
	    --pos;
	  }
	  if(pos < 2){
	    pos = 2;
	  }
	  g_string_append(thisname,s+pos);
	}
      /**name line starting a new motif****/
      else if((s[0] == 'N' && s[1] == 'A' && inmotif == 0) || (s[0] == 'I' && s[1] == 'D' && inmotif == 0))
	{
	  thispriorbinbase=g_ptr_array_new();
	  /**set empty name***/
	  thisname = g_string_new("");
	  ++motifnum;
	  lastpos = 0;
	  inmotif = 1;
	  thislen = 0;
	  /**set name line***/
	  mychomp(s);
	  pos = 2;
	  while(isspace(s[pos]) && s[pos] != '\0'){
	    ++pos;
	  }
	  if(s[pos] == '\0'){
	    --pos;
	  }
	  if(pos < 2){
	    pos = 2;
	  }
	  g_string_append(thisname,s+pos);
	}
      /***line with counts***/
      else if(myisnumber(s[0]) &&( myisnumber(s[1]) || isblank(s[1])))
	{
	  if(inmotif == 0)
	    {
	      fprintf(stderr, "error: encountered a line with motif counts without having read ID (identifier) or NA (name) for motif %d\n",motifnum);
	      return 1;
	    }
	  sscanf(s,"%d %lf %lf %lf %lf",&thispos,&na,&nc,&ng,&nt);
	  if(thispos-lastpos != 1 && !(thispos == 0 && lastpos == 0))
	    {
	      fprintf(stderr,"Warning: position numbering went from %d to %d in input motif %s number %d\n",lastpos,thispos,thisname->str,motifnum);
	    }
	  lastpos = thispos;
	  onebinbase=g_array_new(TRUE,TRUE,sizeof(double));
	  g_array_append_val(onebinbase,na);
	  g_array_append_val(onebinbase,nc);
	  g_array_append_val(onebinbase,ng);
	  g_array_append_val(onebinbase,nt);
	  /***add column to counts for this WM***/
	  g_ptr_array_add(thispriorbinbase,onebinbase);
	  ++thislen;
	}
      /**end of a WM***/
      else if( s[0] == '/' && s[1] == '/' && inmotif == 1)
	{
	  if(thislen < v->wwidth)
	    {
	      fprintf(stderr,"Warning: width of motif in motiffile smaller than window length\n");
	      fprintf(stderr,"motif %s number %d has length %d whereas window length is %d\n",thisname->str,motifnum,thislen,v->wwidth);
	      fprintf(stderr,"the program will pad the motif with zero-count columns\n");
	      while(thislen < v->wwidth){
		onebinbase=g_array_new(TRUE,TRUE,sizeof(double));
		g_array_append_val(onebinbase,zero);
		g_array_append_val(onebinbase,zero);
		g_array_append_val(onebinbase,zero);
		g_array_append_val(onebinbase,zero);
		/***add column to counts for this WM***/
		g_ptr_array_add(thispriorbinbase,onebinbase);
		++thislen;
	      }
	    }
	  else if(thislen > v->wwidth)
	    {
	      printf("Warning: width of motif in motiffile larger than window length\n");
	      printf("motif %s number %d has length %d whereas window length is %d\n",thisname->str,motifnum,thislen,v->wwidth);
	      printf("the program will ignore the last %d positions in the motif\n",thislen-(v->wwidth));
	    }
	  /**add the WM to the list of WMs***/
	  g_ptr_array_add((v->priorbinbase),thispriorbinbase);
	  g_ptr_array_add((v->priorbinname),thisname);
	  /**check width and issue a warning or error if not matching***/
	  inmotif = 0;
	}
      /***line with total number of TFs****/
      else if(s[0] == 'N' && s[1] == 'U' && s[2] == 'M' && s[3] == 'T' && s[4] == 'F')
        {
          sscanf(s,"NUMTF %d\n",&tmp);
          v->numtfs = tmp;
        }

    }
  fclose(inputfile);


  /**if no numtfs specified return an error***/
  if(v->numtfs < 0)
    {
      fprintf(stderr,"Warning: No total number of WMs found. Will assume no additional motifs possible\n");
      v->numtfs = 0;
    }


  /*motif ended without a // **/
  if(inmotif)
    {
      if(thislen < v->wwidth)
	{
	  fprintf(stderr,"Warning: width of motif in motiffile smaller than window length\n");
	  fprintf(stderr,"motif number %d has length %d whereas window length is %d\n",motifnum,lastpos,v->wwidth);
	  fprintf(stderr,"the program will pad the motif with zero-count columns\n");
	  while(thislen < v->wwidth){
	    onebinbase=g_array_new(TRUE,TRUE,sizeof(double));
	    g_array_append_val(onebinbase,zero);
	    g_array_append_val(onebinbase,zero);
	    g_array_append_val(onebinbase,zero);
	    g_array_append_val(onebinbase,zero);
	    /***add column to counts for this WM***/
	    g_ptr_array_add(thispriorbinbase,onebinbase);
	    ++thislen;
	  }
	}
      else if(thislen > v->wwidth)
	{
	  fprintf(stderr,"Warning: width of motif in motiffile larger than window length\n");
	  fprintf(stderr,"motif number %d has length %d whereas window length is %d\n",motifnum,thislen,v->wwidth);
	  fprintf(stderr,"the program will ignore the last %d positions in the motif\n",thislen-(v->wwidth));
	}
      g_ptr_array_add((v->priorbinbase),thispriorbinbase);
      g_ptr_array_add((v->priorbinname),thisname);
    }
  /*printf("read %d motifs from %s\n",motifnum,v->motiffile);*/

  /*for(n=0;n<(v->priorbinbase)->len;++n)
    {
      printf("MOTIF %d\n", n);
      thispriorbinbase = g_ptr_array_index(v->priorbinbase,n);
      thisname = g_ptr_array_index(v->priorbinname,n);
      printf("name of motif %d is %s\n",n,thisname->str);
      for(m=0;m<(thispriorbinbase->len);++m)
	{
	  onebinbase = g_ptr_array_index(thispriorbinbase,m);
	  na = g_array_index(onebinbase,double,0);
	  nc = g_array_index(onebinbase,double,1);
	  ng = g_array_index(onebinbase,double,2);
	  nt = g_array_index(onebinbase,double,3);
	  printf("counts %lf %lf %lf %lf\n",na,nc,ng,nt);
	}
	}*/
  return 0;
}
