/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $  
 * $Date: 2005/05/22 13:29:54 $ 
 * $Id: binprint.c,v 1.6 2005/05/22 13:29:54 rsidd Exp $ 
 */


#include <stdio.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_sort.h>
#include <gsl/gsl_sort_vector.h>
#include <gsl/gsl_matrix.h>

#include "interspecies.h"
#include "binbasecount.h"
#include "fullbinweight.h"
#include "windowscore.h"
#include "commonroutines.h"
#include "tree_routines.h"

char rcchar(char a) {
    char r;
    switch (a) {
            case 'A':
                    r= 'T';
                    break;
            case 'C':
                    r= 'G';
                    break;
            case 'G':
                    r= 'C';
                    break;
            case 'T':
                    r= 'A';
                    break;
            case 'S':
                    r= 'S';
                    break;
            case 'W':
                    r= 'W';
                    break;
            case 'R':
                    r= 'Y';
                    break;
            case 'Y':
                    r= 'R';
                    break;
            case 'M':
                    r= 'K';
                    break;
            case 'K':
                    r= 'M';
                    break;
            case 'D':
                    r= 'H';
                    break;
            case 'H':
                    r= 'D';
                    break;
            case 'B':
                    r= 'V';
                    break;
            case 'V':
                    r= 'B';
                    break;
            case 'N':
                    r= 'N';
                    break;
            default:
                    r= a;
    }
    return r;
}
    

void getnameshortstr(char *seq, char *outseq, int count) {
  int nlow,nhigh,nlen;

  nlow=0;
  nlen=strlen(seq);
  while ((seq[nlow]=='>')||(isspace(seq[nlow])))
    nlow++;
  nhigh=nlow+1;
  while ((!isspace(seq[nhigh]))&&(nhigh<nlow+count)&&(nhigh<nlen))
    nhigh++;
  strncpy(outseq,seq+nlow,nhigh-nlow+1);
  outseq[nhigh-nlow+1]=0;
}


int binprint(params *v, char *printfile) 
{

    int l,m,m1,n,n1,n2,n3,currpos,thisstart;

    char *dataptr, outchar, tempstr[11];
    GPtrArray *binwin1;
    dialignseq tempseq;
    window *currwin1, *currwin2;
    FILE *outputfile;
    GArray *thisbasecountset;
    GPtrArray *thispriorbinbase; /***base counts for a WM ***/
    GArray *onebinbase; /**array with the base-counts in a color***/
    GString *thisname;
    gsl_permutation *win_permut, *bin_permut;
    gsl_vector *winscorevec, *binscorevec;
    gsl_matrix *basecounts;
    double thiswinscore,thisbinscore,fbw,nA,nC,nG,nT,thisscore;
    thispriorbinbase = NULL;

    if ((v->binnums)!=NULL)
        g_array_free((v->binnums),TRUE);
    v->binnums=g_array_new(TRUE,TRUE,sizeof(int));
    
    
    makebinbasecount(v);
    currwin2=NULL;

    /**empty predictions, simply return***/
    if ((v->bin)->len==1)
        return 1;


    basecounts=gsl_matrix_alloc(v->wwidth,4);
    bin_permut=gsl_permutation_alloc((v->bin)->len-1);
    binscorevec=gsl_vector_alloc((v->bin)->len-1);
    /**get score of best scoring window for each color***/
    for (n=1; n<(v->bin)->len; n++) {
        binwin1=g_ptr_array_index((v->bin),n);
        thisbinscore=100.0;
	for (m=0; m<binwin1->len; m++) {
            currwin1=g_ptr_array_index(binwin1,m);
            thiswinscore=windowscore(v,currwin1);
            if (thiswinscore<thisbinscore)
                thisbinscore=thiswinscore;
        }
        gsl_vector_set(binscorevec,n-1,thisbinscore);
    }
    /**sort the bins according to which has best scoring window****/
    gsl_sort_vector_index(bin_permut, binscorevec);
    /**open output file***/
    if (strlen(printfile)>0)
        outputfile=fopen(printfile,"wt");
    else {
        if (strcmp(v->outfile,"stdout")==0) 
            outputfile=stdout;
        else
            outputfile=fopen(v->outfile,"wt");
    }
    
    fprintf(outputfile,"Command-line arguments: %s\n",v->arguments);
    
    for (n=0; n<(v->seqarray)->len; n++) {
        tempseq=g_array_index((v->seqarray),dialignseq,n);
        fprintf(outputfile,"Seq %3d: %s Length %d\n",n,tempseq.name->str,tempseq.barelength);
    }
    fprintf(outputfile,"\n");
    fprintf(outputfile, "GSL Random number seed: %ld\n", v->seed);
    fbw=fullbinweight(v);
    fprintf(outputfile,"No. of moves: colour %ld, single window %ld, shift %ld, total %ld\n",
            v->tcsteps,v->twsteps,v->tssteps,v->tcsteps+v->twsteps+v->tssteps);
    fprintf(outputfile, "Log-posterior probability of the reference state: %f\n\n",fbw/(v->beta));

    fprintf(outputfile,"======== Reference state obtained through annealing. ========\n\n");

    for (n=1; n<(v->bin)->len; n++) 
      {
	n1=gsl_permutation_get(bin_permut,n-1)+1;
	g_array_append_val((v->binnums),n1);
	binwin1=g_ptr_array_index((v->bin),n1);
        if (binwin1->len == 0) 
	  continue;
	/**check if it has prior motif****/
	if((v->priorbinbase)->len < n1)
	  {
	    fprintf(outputfile,"Motif %d.\nNumber of windows = %d  Top window score= %g \n\n",
		    n, binwin1->len, gsl_vector_get(binscorevec,n1-1));
	  }
	else
	  {
	    thisname = g_ptr_array_index(v->priorbinname,n1-1);
	    fprintf(outputfile,"Motif %d, Reference Motif %d %s.\nNumber of windows = %d  Top window score= %g \n\n",n,n1, thisname->str,binwin1->len, gsl_vector_get(binscorevec,n1-1));
          }

        winscorevec=gsl_vector_alloc(binwin1->len);
        win_permut=gsl_permutation_alloc(binwin1->len);
        gsl_matrix_set_zero(basecounts);
	for (m=0; m<binwin1->len; m++) 
	  {
	    currwin1=g_ptr_array_index(binwin1,m);
	    gsl_vector_set(winscorevec,m,windowscore(v,currwin1));
	  }
        if (binwin1->len > 1)
	  gsl_sort_vector_index(win_permut, winscorevec);
        for (m=0; m<binwin1->len; m++) 
	  {
	    if (binwin1->len > 1)
	      m1=gsl_permutation_get(win_permut,m);
            else
	      m1=0;
            currwin1=g_ptr_array_index(binwin1,m1);

	    /***update base counts from this window****/
	    for(l=0;l<v->wwidth;++l)
	      {
		if(currwin1->dir)
		  {
		    thisbasecountset=g_ptr_array_index(currwin1->basecount,v->wwidth-1-l);
		    for (n3=0; n3<4; n3++)
		      gsl_matrix_set(basecounts,l,n3,gsl_matrix_get(basecounts,l,n3)+g_array_index(thisbasecountset,double,3-n3));
		  }
		else
		  {
		    thisbasecountset=g_ptr_array_index(currwin1->basecount,l);
		    for(n3=0;n3<4;n3++)
		      gsl_matrix_set(basecounts,l,n3,gsl_matrix_get(basecounts,l,n3)+g_array_index(thisbasecountset,double,n3));
		  }
	      }
	    /**run over all sequences in the window*****/
            for (n2=0; n2<currwin1->seq->len; n2++) 
	      {
		/**reverse complemented window*****/
		if(currwin1->dir)
		  {
		    /**run backward****/
		    for (l=v->wwidth-1+v->wwidth/2; l>=-v->wwidth/2; l--)
		      {
			outchar='x';
			/**if we are inside site***/
			if (!((l<=v->wwidth-1)&&(l>=0)))
			  {
			    currpos=g_array_index(currwin1->start,int,n2)+l;
			    tempseq=g_array_index((v->seqarray),dialignseq,
						  g_array_index(currwin1->seq,int,n2));
			    if ((currpos<0) || (currpos>=tempseq.bareseq->len))
			      outchar=' ';
			  }
			/***print if there is a character at this position**/
			if (outchar != ' ')
			  {
			    dataptr=g_array_index(currwin1->data,char*,n2);
			    switch (*(dataptr+l))
			      {
			      case 'a': case 'A': outchar='T'; break;
			      case 'c': case 'C': outchar='G'; break;
			      case 'g': case 'G': outchar='C'; break;
			      case 't': case 'T': outchar='A'; break;
			      case 'r': case 'R': outchar='Y'; break;
			      case 'y': case 'Y': outchar='R'; break;
			      case 'm': case 'M': outchar='K'; break;
			      case 'k': case 'K': outchar='M'; break;
			      case 's': case 'S': outchar='S'; break;
			      case 'w': case 'W': outchar='W'; break;
			      case 'b': case 'B': outchar='V'; break;
			      case 'v': case 'V': outchar='B'; break;
			      case 'd': case 'D': outchar='H'; break;
			      case 'h': case 'H': outchar='D'; break;
			      case 'n': case 'N': outchar='N'; break;
			      }
			    if (!((l<=v->wwidth-1)&&(l>=0)))
			      outchar=tolower(outchar);
			  }
			fputc(outchar,outputfile);
		      }
		    /***print rev-complement and place in alignment***/
		    if (n2==0)
		      fprintf(outputfile," -- [rev] ");
		    else if (n2==currwin1->seq->len-1)
		      fprintf(outputfile," `- [rev] ");
		    else
		      fprintf(outputfile," |- [rev] ");
		  }
		/***on forward strand*****/
		else
		  {
		    for (l=-v->wwidth/2; l<v->wwidth+v->wwidth/2; l++)
		      if ((l>=0)&&(l<v->wwidth))
			{
			  dataptr=g_array_index(currwin1->data,char*,n2);
			  outchar=toupper(*(dataptr+l));
			  fputc(outchar,outputfile);
			}
		      else
			{
			  currpos=g_array_index(currwin1->start,int,n2)+l;
			  tempseq=g_array_index((v->seqarray),dialignseq,
						g_array_index(currwin1->seq,int,n2));
			  if ((currpos>=0) && (currpos<tempseq.bareseq->len))
			    {
			      dataptr=g_array_index(currwin1->data,char*,n2);
			      outchar=tolower(*(dataptr+l));
			      fputc(outchar,outputfile);
			    }
			  else
			    {
			      outchar=' ';
			      fputc(outchar,outputfile);
			    }
			}
		    if (n2==0)
		      fprintf(outputfile," -- [fwd] ");
		    else if (n2==currwin1->seq->len-1)
		      fprintf(outputfile," `- [fwd] ");
		    else
		      fprintf(outputfile," |- [fwd] ");
		  }
		
		tempseq=g_array_index((v->seqarray),dialignseq,g_array_index(currwin1->seq,int,n2));
		getnameshortstr(tempseq.name->str,tempstr,10);
		thisstart= g_array_index(currwin1->start,int,n2);
		thisscore = gsl_vector_get(winscorevec,m1);
		if (v->printreverse)
		  thisstart=-(tempseq.barelength-thisstart);
		if (n2==0)
		  fprintf(outputfile," seq  %3d %11s  pos  %4d score %9.4g ",
			  g_array_index(currwin1->seq,int,0), tempstr,
			  thisstart,thisscore);
		else
		  fprintf(outputfile," seq  %3d %11s  ",
			  g_array_index(currwin1->seq,int,n2), tempstr);

		if (n2!=0)
		  fprintf(outputfile, "pos  %4d  ",thisstart);
		
		fprintf(outputfile,"\n");
	      }
	  }
        fprintf(outputfile,"-------- Weight matrix for this motif (absolute base counts)---------\n");
        fprintf(outputfile,"//\n");
	if((v->priorbinbase)->len < n1)
          {
	    fprintf(outputfile,"NA Motif_%d\n",n);
	  }
	else
	  {
	    thisname = g_ptr_array_index(v->priorbinname,n1-1);
	    fprintf(outputfile,"NA Motif_%d, Reference %d %s\n",n,n1,thisname->str);
	    thispriorbinbase = g_ptr_array_index(v->priorbinbase,n1-1);
	  }
        fprintf(outputfile,"%2s    %6c     %6c     %6c     %6c     %6s      %6s\n","PO",'A','C','G','T',"cons","inf");
	for (m=0; m<v->wwidth; m++) 
	  {
            nA=gsl_matrix_get(basecounts,m,0);
            nC=gsl_matrix_get(basecounts,m,1);
            nG=gsl_matrix_get(basecounts,m,2);
            nT=gsl_matrix_get(basecounts,m,3);
	    /**add prior counts if existent****/
	    if((v->priorbinbase)->len >= n1)
	      {
		onebinbase = g_ptr_array_index(thispriorbinbase,m);
		nA += g_array_index(onebinbase,double,0);
		nC += g_array_index(onebinbase,double,1);
		nG += g_array_index(onebinbase,double,2);
		nT += g_array_index(onebinbase,double,3);
	      }
            fprintf(outputfile,"%2s    %6.2f     %6.2f     %6.2f     %6.2f     %6c      %6.2f\n",
                    twodigitstr(m+1), nA,nC,nG,nT,
                    consensus(nA,nC,nG,nT),
                    infscore(nA,nC,nG,nT));
	  }
        fprintf(outputfile,"//\n");
        fprintf(outputfile, "==============================\n");
        gsl_vector_free(winscorevec);
        gsl_permutation_free(win_permut);
      }

    fprintf(outputfile, "\n");
    
    if ((strlen(printfile)==0)&&(strcmp(v->outfile,"stdout")!=0))
      fclose(outputfile);
    
    gsl_vector_free(binscorevec);
    gsl_permutation_free(bin_permut);
    gsl_matrix_free(basecounts);
        
    return 0;

}
