/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $
 * $Date: 2005/05/22 12:01:41 $
 * $Id: commonroutines.c,v 1.6 2005/05/22 12:01:41 rsidd Exp $
 */

#include <string.h>
#include <math.h>
#include "interspecies.h"
#include "fasta.h"

const char digits[10]="0123456789";


/* function that returns a seqset stripped of all dashes, to
 * be used in setbgcounts */
GArray* stripdash(GArray *seqs) 
{
    int n,m;
    GArray *newseqs;
    fastaseq oneseq,oneoutputseq;
    gchar currchar;

    newseqs=g_array_new(TRUE,TRUE,sizeof(fastaseq));
    for (n=0; n<seqs->len; n++) {
        oneseq=g_array_index(seqs,fastaseq,n);
        oneoutputseq.header=oneseq.header;
        oneoutputseq.seq=g_string_sized_new(oneseq.seq->len);
        for (m=0; m<oneseq.seq->len; m++) {
            currchar=oneseq.seq->str[m];
            if (currchar!='-')
                g_string_append_c(oneoutputseq.seq,currchar);
        }
        oneoutputseq.seqnums=NULL;
        g_array_append_val(newseqs,oneoutputseq);
    }
    return newseqs;
}       
        
    

/* function that tells whether s1 is a substring of s2 */
int substring(char *s1, char *s2) 
{
    int n,l1,l2;
   
    l1=(int) strlen(s1);
    l2=(int) strlen(s2);
    for(n=0;n<=(l2-l1);++n)
      {
        if (strncmp(s1,s2+n,l1)==0) 
	  {
	    return (1);
   	  }
      }
    return 0;
}

/* function that converts int to two-digit str */
char* twodigitstr(int m)
{
    char *c;

    c=malloc(4);
    if ((m<0)||(m>99)) {
        c[0]='E';
        c[1]='E';
    }
    else {
        c[0]=digits[m/10];
        c[1]=digits[m%10];
    }
    c[2]='\0';
    return c;
}


/* function that converts basecounts into a consensus base */
/* This seems arbitrary,but anything is arbitrary: no standard
 * apparently.  So:
 * if any one base count > (1/2)*N, then choose that as consensus
 * else
 * if any two base counts > (3/4)*N (if we've reached here, these must
 *   be the two highest-count bases), choose those as consensus.
 * else
 * if any three base counts > (7/8)*N (the fourth must have lowest count
 *    if we've reached here), choose those as consensus
 * else choose N ("any" base).
 *
 */
char consensus(double na, double nc, double ng, double nt) 
{
    double ntot;
    char c;
    
    ntot=na+nc+ng+nt;
    /* changed 0.5 to 0.51 because of pathology that with
       equally-divided bases roundoff error determines which is picked */
    if (na > 0.51*ntot)
        c='A';
    else if (nc > 0.51*ntot)
        c='C';
    else if (ng > 0.51*ntot)
        c='G';
    else if (nt > 0.51*ntot)
        c='T';
    else if ((na+nc) > 3.0*ntot/4.0)
        c='M';
    else if ((na+ng) > 3.0*ntot/4.0)
        c='R';
    else if ((na+nt) > 3.0*ntot/4.0)
        c='W';
    else if ((nc+ng) > 3.0*ntot/4.0)
        c='S';
    else if ((nc+nt) > 3.0*ntot/4.0)
        c='Y';
    else if ((ng+nt) > 3.0*ntot/4.0)
        c='K';
    else if (na+nc+ng > 7.0*ntot/8.0)
        c='V';
    else if (na+nc+nt > 7.0*ntot/8.0)
        c='H';
    else if (na+ng+nt > 7.0*ntot/8.0)
        c='D';
    else if (nc+ng+nt > 7.0*ntot/8.0)
        c='B';
    else
        c='N';
    return c;
}

/* Information score of four bases with basecounts na, nc, ng, nt */
/* using I = - \sum p_i log p_i, where i=a,c,g,t */
double nln(double n) 
{
    if (fabs(n)<0.000000001)
        return 0.0;
    else
        return n*log(n);
}

    
double infscore(double na, double nc, double ng, double nt)
{
    double ntot,inf;

    ntot=na+nc+ng+nt;
    na = na/ntot;
    nc = nc/ntot;
    ng = ng/ntot;
    nt = nt/ntot;
    inf = nln(na)+nln(nc)+nln(ng)+nln(nt);
    inf = -inf/(log(2.0));
    inf = 2.0-inf;
    if(inf < 0.0)
        inf = 0.0;
    return inf;
}

void destroy_gptrarray_array(GPtrArray **gptr) 
{
    int n;
    GArray *garr;

    for (n=0; n<(*gptr)->len; n++) {
        garr=g_ptr_array_index((*gptr),n);
        g_array_free(garr,TRUE);
    }
    g_ptr_array_free((*gptr),TRUE);
    *gptr=NULL;
}
