/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $  
 * $Date: 2005/05/22 12:01:41 $ 
 * $Id: setwinbasecount.c,v 1.5 2005/05/22 12:01:41 rsidd Exp $ 
 */

/*
 * 2005/04/14 Erik van Nimwegen
 * Significant changes made to this routine.
 *
 * There are now 3 different methods implemented for setting the lambdas
 * Method 1 uses pseudocounts zero for calculating the exact means and variances
 * and finds the overall scale lambda by averaging over all 16 second moments.
 * This is currently the default.
 * Method 2 uses the pseudocount specified by the user, sets negative lambda_a
 * to zero and averages the scale lambda over the 10 unique second moments.
 * Method 3 also uses pseudocount zero but sets the scale lambda by minimizing
 * the square deviation of the second moments. 
 *
 * Earlier changes include setting the prefactor c of the approximation such that for 
 * a single coloured window the approximation is exact.
 */

/* 2005/05/17 - 2005/05/21 Rahul Siddharthan
 * implemented general phylogenies
 */

#include "interspecies.h"
#include <math.h>
#include <assert.h>
#include "base2num.h"
#include "wminteg.h"
#include "commonroutines.h"
#include "tree_routines.h"

void getbasenumlist_one(int *bases, double *mus, int nbases,GPtrArray **outbasenums, int ancbase)
{
  GArray *onebasenum, *ancmu_old, *ancmu, *dummy;
  int zer,nancbases,nanc,n,m,otherbasecounts[4];
  double fzer,nonancmu;
  
  zer=0;
  fzer=0.0;
  for (n=0; n<4; n++) 
    otherbasecounts[n]=0;

  /***remove existing***/
  if (*outbasenums!=NULL)
      destroy_gptrarray_array(outbasenums);
  *outbasenums = g_ptr_array_new();


  ancmu=g_array_new(TRUE,TRUE,sizeof(double));
  ancmu_old=g_array_new(TRUE,TRUE,sizeof(double));

  nancbases=0;
  for (n=0; n<nbases; n++)
    {
      if (bases[n]==ancbase)
	nancbases += 1;
    }
  for (n=0; n<=nancbases; n++) 
    {
      onebasenum=g_array_new(TRUE,TRUE,sizeof(double));
      for (m=0; m<5; m++) 
	g_array_append_val(onebasenum,fzer);
      
      g_ptr_array_add(*outbasenums,onebasenum);
      g_array_append_val(ancmu,fzer);
      g_array_append_val(ancmu_old,fzer);
    }
	
  g_array_index(ancmu_old,double,0) = 1.0;
  
  nanc=0;
  nonancmu=1.0;
  for (n=0; n<nbases; n++) 
    {
      if (bases[n]==ancbase) 
	{
	  g_array_index(ancmu,double,0) = g_array_index(ancmu_old,double,0) * mus[n];
	  g_array_index(ancmu, double, nanc+1) = g_array_index(ancmu_old, double, nanc) * (1.0-mus[n]);
	  for (m=1; m<=nanc; m++)
	    g_array_index(ancmu,double,m) = g_array_index(ancmu_old,double,m)*mus[n]+ g_array_index(ancmu_old,double,m-1)*(1.0-mus[n]);
	  dummy=ancmu_old;
	  ancmu_old=ancmu;
	  ancmu=dummy;
	  nanc++;
	}
      else 
	{
	  nonancmu *= 1.0-mus[n];
	  otherbasecounts[bases[n]] += 1;
	}
    }
  for (n=0; n<=nancbases; n++) 
    {
      onebasenum=g_ptr_array_index(*outbasenums,n);
      g_array_index(onebasenum,double,0) = nonancmu * g_array_index(ancmu_old,double,n);
      for (m=0; m<4; m++)
	if (m==ancbase)
	  g_array_index(onebasenum,double,m+1) = 1.0*(n+1);
	else
	  g_array_index(onebasenum,double,m+1) = 1.0*otherbasecounts[m];
    }
  g_array_free(ancmu,TRUE);
  g_array_free(ancmu_old,TRUE);
}



void getbasenumlist(GArray **bases, GArray **mus, GPtrArray **outbasenums) 
{
    GPtrArray *onebasenumset;
    GArray *onebasenum;
    int n,anc;


    /**clean existing lists***/
    if (*outbasenums != NULL)
        destroy_gptrarray_array(outbasenums);

    *outbasenums=g_ptr_array_new();	
	
    onebasenumset=NULL;
	
    /**run over base in ancestor***/
    for (anc=0; anc<4; anc++) {
        getbasenumlist_one((int *)(*bases)->data,(double *)(*mus)->data,(*bases)->len,&onebasenumset,anc);
        for (n=0; n<onebasenumset->len; n++) {
            onebasenum = g_ptr_array_index(onebasenumset,n);
            g_ptr_array_add(*outbasenums,onebasenum);
        }
        g_ptr_array_free(onebasenumset,TRUE);
        onebasenumset=NULL;
    }
}




double exactwminteg(int newbase, int newbase2,double offset, GPtrArray **outbasenums) 
{
    double integ,h;
    GArray *onebasenum;

    int n;
    
    integ=0.0;
    /**run over all the monomial terms***/
    for (n=0; n<(*outbasenums)->len; n++) {
        onebasenum = g_ptr_array_index((*outbasenums),n);
	/**prefactor of this monomial term***/
        h=g_array_index(onebasenum,double,0);
        if (newbase > -1)
            g_array_index(onebasenum,double,newbase+1) += 1.0;
        if (newbase2 > -1)
            g_array_index(onebasenum,double,newbase2+1) += 1.0;
        integ += h * exp(precise_wminteg(g_array_index(onebasenum,double,1)+offset,
                        g_array_index(onebasenum,double,2)+offset,
                        g_array_index(onebasenum,double,3)+offset,
                        g_array_index(onebasenum,double,4)+offset));
        if (newbase > -1)
            g_array_index(onebasenum,double,newbase+1) -= 1.0;
        if (newbase2 > -1)
            g_array_index(onebasenum,double,newbase2+1) -= 1.0;
    }
    return integ;
}

    


void setquicklambdas(GArray **bases,double lambda[4],double *c) 
{
	int n;
	*c=1.0;
	lambda[0]=0.0;
	lambda[1]=0.0;
	lambda[2]=0.0;
	lambda[3]=0.0;
	for (n=0; n<(*bases)->len; n++)
	  lambda[g_array_index((*bases),int,n)] += 1.0;
}



void setlambdas(GArray **bases, GArray **mus, GArray **priors,
                double lambda[4], double *c, GPtrArray **outbasenums,
                int method) 
{
    double totlambda,Aback;
    double m1[4],m2[4][4];
    double offset;
    int n,n2;
    double numerator, denominator;
    GArray *onebasenum;
    
    /* If outbasenums has only one element, just return that */
    if ((*outbasenums)->len == 1) {
        onebasenum=g_ptr_array_index((*outbasenums),0);
        *c = g_array_index(onebasenum,double,0);
        for (n=0; n<4; n++)
            lambda[n]=g_array_index(onebasenum,double,n+1);
        return;
    }
    
    /**use pseudo-count zero for estimating means and variances***/
    /**average over all second order moments to get lambda_tot****/
    if(method==1) {
        /**substract the pseudocount when doing these integrals***/
        offset = -g_array_index(*priors,double,0)+0.00001;
        Aback=exactwminteg(-1,-1,offset,outbasenums);
        for (n=0; n<4; n++) { 
            m1[n]=exactwminteg(n,-1,offset,outbasenums)/Aback;
            for (n2=n; n2<4; n2++) {
                m2[n][n2]=exactwminteg(n,n2,offset,outbasenums)/Aback;
            }
        } 
        offset = 0;
        /**get real normalization for window***/
        Aback=exactwminteg(-1,-1,offset,outbasenums);
        
        /**sum (lam+4q) over all pairs***/
        totlambda=0.0;
        for (n=0; n<4; n++) 
        {
            for (n2=n; n2<4; n2++) 
            {
                if (n==n2) 
                    totlambda += (m1[n]-m2[n][n2])/(m2[n][n2]-m1[n]*m1[n2]);
                else
                {
                    totlambda -=  2*m2[n][n2]/(m2[n][n2]-m1[n]*m1[n2]);
                }
            }
        }
      
        /**divide by number of pairs***/
        totlambda=totlambda/16.0;
        for(n=0;n<4;++n)
        {
            lambda[n] = m1[n] * totlambda;
        }
        *c= -wminteg(lambda[0],lambda[1],lambda[2],lambda[3]) + log(Aback);
    }
    
    /***Similar to previous method. Use same pseudocount throughout
	and average lambda_tot over the 10 unique (as opposed to all16) 
	second moments. Set negative lambdas to zero****/
    if(method==2){
        offset = 0.0;
        Aback=exactwminteg(-1,-1,offset,outbasenums);
        for (n=0; n<4; n++) { 
            m1[n]=exactwminteg(n,-1,offset,outbasenums)/Aback;
            for (n2=n; n2<4; n2++) {
                m2[n][n2]=exactwminteg(n,n2,offset,outbasenums)/Aback;
            }
        } 
        /**sum (lam+4q) over all pairs***/
        totlambda=0.0;
        for (n=0; n<4; n++) {
            for (n2=n; n2<4; n2++) {
                if (n==n2) 
                    totlambda += (m1[n]-m2[n][n2])/(m2[n][n2]-m1[n]*m1[n2]);
                else {
                    totlambda -=  m2[n][n2]/(m2[n][n2]-m1[n]*m1[n2]);
                }
            }
        }
      
        /**divide by number of pairs***/
        totlambda=totlambda/10.0;
        /**subtract 4q ****/
        totlambda -= g_array_index(*priors,double,4);
      
        for (n=0;n<4;++n) {
            lambda[n] = m1[n] * (totlambda+g_array_index(*priors,double,4))-g_array_index(*priors,double,n);
            if(lambda[n] < 0.0)
                lambda[n] = 0.0;
        }
        *c= -wminteg(lambda[0],lambda[1],lambda[2],lambda[3]) + log(Aback);
    }

    /***least square calculation for lambda_tot***/
    /***use pseudocount zero to estimate first and second moments****/
    if (method==3) {
        offset = -g_array_index(*priors,double,0)+0.00001;
        Aback=exactwminteg(-1,-1,offset,outbasenums);
        for (n=0; n<4; n++) { 
            m1[n]=exactwminteg(n,-1,offset,outbasenums)/Aback;
            for (n2=n; n2<4; n2++) {
                m2[n][n2]=exactwminteg(n,n2,offset,outbasenums)/Aback;
            }
        } 
        offset = 0;
        /**get real normalization for window***/
        Aback=exactwminteg(-1,-1,offset,outbasenums);
        
        numerator = 0;
        denominator = 0;
        for(n=0;n<4;++n){
            /**diagonal term**/
            numerator += (m1[n]*m1[n]-m2[n][n])*m1[n]*(1.0-m1[n]);
            denominator += m1[n]*m1[n]*(1.0-m1[n])*(1.0-m1[n]);
            for(n2=(n+1);n2<4;++n2){
                numerator -= 2*(m1[n]*m1[n2]-m2[n][n2])*m1[n]*m1[n2];
                denominator += 2*m1[n]*m1[n]*m1[n2]*m1[n2];
            }
        }
        totlambda = -1-denominator/numerator;
        
        /**set the lambdas****/
        for(n=0;n<4;++n)
        {
            lambda[n] = m1[n]*totlambda;
        }
        *c= -wminteg(lambda[0],lambda[1],lambda[2],lambda[3]) + log(Aback);
    }

    *c=exp(*c);

    return;
}


/* For non-star topologies, use this instead of getbasenumlist.
   basetreelist is a set of reduced star-topology tree products.
   The final answer is a sum over these. */
void getbasenumlist_tree(params *v, GArray *realbases,
                         GPtrArray *basetreelist, GPtrArray **outbasenums)
{
    base_tree *onetree;
    GArray *onebasenum,*bases,*mus, *oneFset;
    GPtrArray *tempbasenums;
    int n,m,p,base,species;
    double mu,prefactot,basect[4],lambda[4],prefac,onebasect;

    /**clean existing lists***/
    if (*outbasenums != NULL)
        destroy_gptrarray_array(outbasenums);

    *outbasenums=g_ptr_array_new();	

    for (n=0; n<basetreelist->len; n++) {
        onetree=g_ptr_array_index(basetreelist,n);
        prefactot=onetree->c;
        for (m=0; m<4; m++)
            basect[m]=0.0;
        for (m=0; m<onetree->F->len; m++) {
            oneFset=g_ptr_array_index(onetree->F,m);
            bases=g_array_new(TRUE,TRUE,sizeof(int));
            mus=g_array_new(TRUE,TRUE,sizeof(double));
            for (p=0; p<oneFset->len; p++){
                species=g_array_index(oneFset,int,p);
                base=g_array_index(realbases,int,species);
                assert(base != -1);
                mu=g_array_index(v->seqmuset,double,species);
                g_array_append_val(bases,base);
                g_array_append_val(mus,mu);
            }
            tempbasenums=NULL;
            getbasenumlist(&bases,&mus,&tempbasenums);
            setlambdas(&bases,&mus,&(v->priors),lambda,
                       &prefac,&tempbasenums,1);
            destroy_gptrarray_array(&tempbasenums);
            prefactot *= prefac;
            for (p=0; p<4; p++)
                basect[p] += lambda[p];
            g_array_free(bases,TRUE);
            g_array_free(mus,TRUE);
        }
        onebasenum=g_array_new(TRUE,TRUE,sizeof(double));
        g_array_append_val(onebasenum,prefactot);
        for (m=0; m<4; m++) {
            onebasect=basect[m];
            g_array_append_val(onebasenum,onebasect);
        }
        g_ptr_array_add(*outbasenums,onebasenum);
    }
}


/* This gets a background count for a "column" n of a window.
   Since the background for non-star topologies can be calculated
   from star-type subtrees, the routine uses *seqsubset as an
   input, which is a "mask" for which sequences are actually
   included */
double getbgcount(params *v, window *currwin, GArray *seqsubset,
                  GArray *mus, int nseq, int n) 
{
    int ancbase,thisstart,m,indincr,indtot,q,get_leafprob,index;
    double ancprob,thisterm,thisprob,thisfac,leafprob,thisbgcount;
    GArray *correlcounts;
    char* data;

    thisbgcount=0.0;
    leafprob=0.0;
    for (ancbase=0;ancbase<4;++ancbase) {
        ancprob = 0.0;
        /*vector of bgprobs to this base depending on context**/
        correlcounts=g_ptr_array_index((v->bgcount),ancbase);
        /* get the bg entry for this ancestor base by averaging
         * over the contexts of the leafs**/
        for (m=0; m<currwin->seq->len; m++)
            if (g_array_index(seqsubset,int,m) > 0) {
                thisstart=g_array_index(currwin->start,int,m);
                data=g_array_index(currwin->data,char*,m);
                /**check that there is enough sequence to the left
                 * of current position***/
                if ((n+thisstart)>=v->nbgc) {
                    index=base2num(data+n,-(v->nbgc));
                    ancprob += g_array_index(correlcounts,double,index);
                }
                else {
                    index=base2num(data+n,-(n+thisstart));
                    indincr=1;
                    indtot=1;
                    for (q=1; q<=(v->nbgc); q++) {
                        if ((n+thisstart-q)>=0)
                            indincr *= 4;
                        else
                            indtot *= 4;
                    }
                    thisprob=0.0;
                    for (q=0; q<indtot; q++)
                        thisprob += g_array_index(correlcounts,double,
                                                  q*indincr+index);
                    ancprob += thisprob/(1.0*indtot);
                }
            }
        ancprob /= ((double) (nseq));
        
        /**initialize current term with probability of the ancestor**/
        thisterm=ancprob;
        /**now run over the leafs to multiply their contribution to this term**/
        for (m=0; m<currwin->seq->len; m++)
            if (g_array_index(seqsubset,int,m)) { 
                get_leafprob = 1;
                thisstart=g_array_index(currwin->start,int,m);
                data=g_array_index(currwin->data,char*,m);
                /**get appropriate vector of bg probs and initiale thisfac with appropriate term**/
                if(*(data+n) == 'A' || *(data+n) == 'a') {
                    correlcounts=g_ptr_array_index((v->bgcount),0);
                    if(ancbase == 0)
                        thisfac = g_array_index(mus,double,m);
                    else
                        thisfac = 0;
                }
                else if(*(data+n) == 'C' || *(data+n) == 'c') {
                    correlcounts=g_ptr_array_index((v->bgcount),1);
                    if(ancbase == 1)
                        thisfac = g_array_index(mus,double,m);
                    else
                        thisfac = 0;
                }
                else if(*(data+n) == 'G' || *(data+n) == 'g') {
                    correlcounts=g_ptr_array_index((v->bgcount),2);
                    if(ancbase == 2)
                        thisfac = g_array_index(mus,double,m);
                    else
                        thisfac = 0;
                }
                else if(*(data+n) == 'T' || *(data+n) == 't') {
                    correlcounts=g_ptr_array_index((v->bgcount),3);
                    if (ancbase == 3)
                        thisfac = g_array_index(mus,double,m);
                    else
                        thisfac = 0;
                }
                /* for now, handle other bases crudely */
                else {
                    leafprob=0.25;
                    get_leafprob = 0;
                    thisfac =  0;
                }
                
                if(get_leafprob) {
                    /**check that there is enough sequence to the left of current position***/
                    if ((n+thisstart)>=v->nbgc) {
                        index=base2num(data+n,-(v->nbgc));
                        leafprob = g_array_index(correlcounts,double,index);
                    }
                    else {
                        index=base2num(data+n,-(n+thisstart));
                        indincr=1;
                        indtot=1;
                        for (q=1; q<=(v->nbgc); q++) {
                            if ((n+thisstart-q)>=0)
                                indincr *= 4;
                            else
                                indtot *= 4;
                        }
                        leafprob=0.0;
                        for (q=0; q<indtot; q++)
                            leafprob += g_array_index(correlcounts,double,
                                                      q*indincr+index);
                        leafprob = leafprob/(1.0*indtot);
                    }
                }
                /**now calculate factor in this term***/
                thisfac += (1.0-g_array_index(mus,double,m))*leafprob;
                thisterm *= thisfac;
            }
        thisbgcount += thisterm;
    }
    return thisbgcount;
}


/* get background count in case of star topology. */
double getbgcount_tree(params *v, window *currwin,
                       GArray *mus, int n, GPtrArray *basetreelist) 
{
    base_tree *onetree;
    int m,m2,p,s;
    double totbgcount,thisbgcount;
    GArray *seqsubset, *oneFlist;
    
    totbgcount=0.0;
    m=0;
    seqsubset=g_array_new(TRUE,TRUE,sizeof(int));
    for (m2=0; m2<currwin->seq->len; m2++)
        g_array_append_val(seqsubset,m);
    
    for (m2=0; m2<basetreelist->len; m2++) {
        onetree=g_ptr_array_index(basetreelist,m2);
        thisbgcount=onetree->c;
        for (m=0; m<onetree->F->len; m++) {
            for (p=0; p<seqsubset->len; p++) 
                g_array_index(seqsubset,int,p)=0;
            oneFlist=g_ptr_array_index(onetree->F,m);
            for (s=0; s<currwin->species->len; s++) {
                for (p=0; p<oneFlist->len; p++) {
                    if (g_array_index(currwin->species,int,s)
                        ==g_array_index(oneFlist,int,p)) {
                        g_array_index(seqsubset,int,s)=1;
                        break;
                    }
                }
            }
            thisbgcount *=
                getbgcount(v,currwin,seqsubset,mus,oneFlist->len,n);
        }
        totbgcount += thisbgcount;
    }
    g_array_free(seqsubset,TRUE);
    return totbgcount;
}


int setwinbasecount(window *currwin, params *v) 
{
    int n,nn,m,pn,q,thisseq,thisstart,thisbase,index,
        thisibase,indincr,indtot,allmuzero;
    GArray *winbasecount, *correlcounts;
    char *data;
    double zer=0.0;
    double thismu,leafprob,thisfac,*c_prefac,thisbgcount,lscore;
    int bmone,bzero,bone,btwo,bthree,get_leafprob,*qval;
    double count[4],tcount[4];
    GArray *bases, *mus, *realbases, *seqsubset, *oneFlist, *tempbinbase;
    GPtrArray *outbasenums,*basetreelist, *bigFlist,*qvalues;
    base_tree *basetree;
    
    thisbase=0;   /* to prevent gcc from complaining about uninitialized  */
    thisibase=0;  /* variables */
    correlcounts=NULL;
    leafprob=0.0;
    realbases=NULL;
    /**remove existing counts. Unclear to me why this is necessary***/
    assert(currwin->basecount == NULL);
  
    currwin->basecount=g_ptr_array_new();

    /**we need an array of prefactors of the wm integral***/
    c_prefac = (double *) calloc(v->wwidth,sizeof(double));

    seqsubset=g_array_new(TRUE,TRUE,sizeof(int));

    bone=1;
    for (m=0; m<currwin->seq->len; m++)
        g_array_append_val(seqsubset,bone);

    if (v->labeltree != NULL) {
        realbases=g_array_new(TRUE,TRUE,sizeof(int));
        bmone= -1;
        for (nn=0; nn<v->seqmuset->len; nn++)
            g_array_append_val(realbases,bmone);
        basetree =make_base_tree(v,currwin);
        qvalues=g_ptr_array_new();
        get_qvalues(basetree->tree,&qvalues);
        basetreelist=g_ptr_array_new();
        g_ptr_array_add(basetreelist,basetree);
        prune_trees(&basetreelist);

        basetree=g_ptr_array_index(basetreelist,0);        
        
/* Testing/debugging stuff, to be removed        
        printf("Pruned base trees\n");
        fflush(NULL);
        for (nn=0; nn<basetreelist->len; nn++) {
            basetree=g_ptr_array_index(basetreelist,nn);
            treeprint(stdout,basetree->tree);
            printf("\n");
            for (nn1=0; nn1<basetree->F->len; nn1++) {
                thisF=g_ptr_array_index(basetree->F,nn1);
                for (nn2=0; nn2<thisF->len; nn2++)
                    printf(" %d",g_array_index(thisF,int,nn2));
                printf("\n");
            }
            printf("\n");
        }
        exit(1);
*/
    }


    
    /**run over all columns***/
    for (pn=0; pn<v->wwidth; pn++) {
        for (m=0; m<4; m++) { 
            count[m]=0.0; 
            tcount[m]=0.0; 
        }
        /**if single window**/
        if (currwin->start->len==1) {
            data=g_array_index(currwin->data,char*,0);
            switch (*(data+pn)) {
            case 'A': case 'a': count[0]=1.0; break;
            case 'C': case 'c': count[1]=1.0; break;
            case 'G': case 'g': count[2]=1.0; break;
            case 'T': case 't': count[3]=1.0; break;
            case 'S': case 's': count[1]=0.5; count[2]=0.5; break;
            case 'W': case 'w': count[0]=0.5; count[3]=0.5; break;
            case 'R': case 'r': count[0]=0.5; count[2]=0.5; break;
            case 'Y': case 'y': count[1]=0.5; count[3]=0.5; break;
            case 'M': case 'm': count[0]=0.5; count[1]=0.5; break;
            case 'K': case 'k': count[2]=0.5; count[3]=0.5; break;
            case 'B': case 'b': count[1]=0.333; count[2]=0.333; count[3]=0.333; break;
            case 'D': case 'd': count[0]=0.333; count[2]=0.333; count[3]=0.333; break;
            case 'H': case 'h': count[0]=0.333; count[1]=0.333; count[3]=0.333; break;
            case 'V': case 'v': count[0]=0.333; count[1]=0.333; count[2]=0.333; break;
	    case 'N': case 'n': count[0]=0.25; count[1]=0.25; count[2]=0.25; count[3]=0.25; break;
            }
            c_prefac[pn]=1.0;
        }
        /***otherwise multiple windows****/
        else {
            /***array with bases at the leafs**/
            bases=g_array_new(TRUE,TRUE,sizeof(int));
            mus=g_array_new(TRUE,TRUE,sizeof(double));
            allmuzero=1;
            /**run over all sequence that are members of the window***/
            for (q=0; q<currwin->start->len; q++) {
                data=g_array_index(currwin->data,char*,q);
                thisseq=g_array_index(currwin->species,int,q);
                if ((thisseq < (v->seqmuset->len)) && (thisseq >=0 ))
                    thismu=g_array_index((v->seqmuset),double,thisseq);
                else
                    thismu=v->nomutprob;
                if (thismu!=0.0)
                    allmuzero=0;
                g_array_append_val(mus,thismu);
                bzero=0;
                bone=1;
                btwo=2;
                bthree=3;
                switch (*(data+pn)) {
                case 'A': case 'a': g_array_append_val(bases,bzero); break;
                case 'C': case 'c': g_array_append_val(bases,bone); break;
                case 'G': case 'g': g_array_append_val(bases,btwo); break;
                case 'T': case 't': g_array_append_val(bases,bthree); break;
		default: fprintf(stderr,"unknown character %c at position %d\n",*(data+pn),pn); return (1);
                }
            }
            if (allmuzero)
                setquicklambdas(&bases,count,c_prefac+pn);
            else {
                outbasenums=NULL;
                if (v->labeltree != NULL) {
                    /* realbases contains bases indexed by species number
                     * instead of in order in window; missing species are
                     * -1 */
                    for (nn=0; nn<v->seqmuset->len; nn++)
                        g_array_index(realbases,int,nn)= -1;
                    for (nn=0; nn<currwin->species->len; nn++) 
                        g_array_index(realbases,int,
                                      g_array_index(currwin->species,int,nn))
                            =g_array_index(bases,int,nn);
                    getbasenumlist_tree(v,realbases,basetreelist,&outbasenums);
                } else {
                    getbasenumlist(&bases,&mus,&outbasenums);
                }
                setlambdas(&bases,&mus,&(v->priors),count,c_prefac+pn,
                           &outbasenums,1);
                destroy_gptrarray_array(&outbasenums);
            }
        }
        winbasecount=g_array_new(TRUE,TRUE,sizeof(double));
        for (m=0; m<4; m++)
            g_array_append_val(winbasecount,count[m]);
        g_ptr_array_add(currwin->basecount,winbasecount);
    }

    /**set bg count**/
    currwin->bgcount=g_array_new(TRUE,TRUE,sizeof(double));
    
    /***run over all columns***/
    for (n=0; n<v->wwidth; n++) {
        g_array_append_val(currwin->bgcount,zer);
	if (currwin->start->len==1) {
            get_leafprob = 1;
            thisstart=g_array_index(currwin->start,int,0);
            data=g_array_index(currwin->data,char*,0);
            /**get appropriate vector of bg probs and**/
            if(*(data+n) == 'A' || *(data+n) == 'a')
                correlcounts=g_ptr_array_index((v->bgcount),0);
            else if(*(data+n) == 'C' || *(data+n) == 'c')
                correlcounts=g_ptr_array_index((v->bgcount),1);
            else if(*(data+n) == 'G' || *(data+n) == 'g')
                correlcounts=g_ptr_array_index((v->bgcount),2);
            else if(*(data+n) == 'T' || *(data+n) == 't')
                correlcounts=g_ptr_array_index((v->bgcount),3);

            /* for now, handle other bases crudely */
            else {
                leafprob=0.25;
                get_leafprob = 0;
                thisfac =  0;
            }
            if(get_leafprob)
            {
                /**check that there is enough sequence to the left of current position***/
                if ((n+thisstart)>=v->nbgc) {
                    index=base2num(data+n,-(v->nbgc));
                    leafprob = g_array_index(correlcounts,double,index);
                }
                else {
                    index=base2num(data+n,-(n+thisstart));
                    indincr=1;
                    indtot=1;
                    for (q=1; q<=(v->nbgc); q++) {
                        if ((n+thisstart-q)>=0)
                            indincr *= 4;
                        else
                            indtot *= 4;
                    }
                    leafprob=0.0;
                    for (q=0; q<indtot; q++)
                        leafprob += g_array_index(correlcounts,double,q*indincr+index);
                    leafprob = leafprob/(1.0*indtot);
                }
            }
            g_array_index(currwin->bgcount,double,n) = log(leafprob);
        }
        else {
            if (v->labeltree != NULL) {
                thisbgcount=getbgcount_tree(v,currwin,mus,n,basetreelist);
            }
            else
                thisbgcount = getbgcount(v,currwin,seqsubset,mus,
                                     currwin->seq->len,n);

            g_array_index(currwin->bgcount,double,n)
                = log(thisbgcount)-log(c_prefac[n]);
        }
    }
    
    /***get the log-score of this window***/
    lscore = 0.0;
    for(n=0;n<v->wwidth;++n)
      {
	lscore -= g_array_index(currwin->bgcount,double,n);
	tempbinbase = g_ptr_array_index(currwin->basecount,n);
	for(m=0;m<4;++m){
	  count[m] = g_array_index(tempbinbase,double,m);
	}
	lscore += wminteg(count[0],count[1],count[2],count[3]);
      }
    lscore = exp(lscore);
    v->mu += lscore;

    if (v->labeltree) {
        bigFlist=g_ptr_array_new();
        for (n=0; n<basetreelist->len; n++) {
            basetree=g_ptr_array_index(basetreelist,n);
            for (m=0; m<basetree->F->len; m++) {
                oneFlist=g_ptr_array_index(basetree->F,m);
                g_ptr_array_remove(bigFlist,oneFlist);
                g_ptr_array_add(bigFlist,oneFlist);
            }
            g_ptr_array_free(basetree->F,TRUE);
            free(basetree);
        }
        for (n=0; n<bigFlist->len; n++) {
            oneFlist=g_ptr_array_index(bigFlist,n);
            g_array_free(oneFlist,TRUE);
        }
        for (n=0; n<qvalues->len; n++) {
            qval=g_ptr_array_index(qvalues,n);
            free(qval);
        }
        g_ptr_array_free(qvalues,TRUE);
        g_ptr_array_free(bigFlist,TRUE);
        g_ptr_array_free(basetreelist,TRUE);
        g_array_free(realbases,TRUE);
    }
    
    
    free(c_prefac);
    return(0);
}
