/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

#include <glib.h>
#include <assert.h>
#include "base2num.h"
#include "fasta.h"

/* The GArray **bgprobs contains conditional probabilities for
 * finding a base at site n given bases at previous sites n-1, n-2, ...
 *
 * If set to NULL (ncorrel==-1), use 1/4 for each base.  If ncorrel==0,
 * use raw single-site counts.  Otherwise, conditional probs for n
 * preceding bases.  RC-symmetrised.
 */

void setbgcount(int ncorrel, GArray *bgseqs, GPtrArray **bgprobs,double bgpscount)
{    
    GArray *correlcounts;
    int n,m,l,base,basenum,tmpbase=0;
    fastaseq oneseq;
    double c, rawcounts[4];
    
    (*bgprobs)=g_ptr_array_new();
    if(ncorrel == -1) {
        c=0.25;
        for (n=0; n<4; n++) {
            correlcounts=g_array_new(TRUE,TRUE,sizeof(double));
            g_array_append_val(correlcounts,c);
            g_ptr_array_add((*bgprobs),correlcounts);
        }
        return;
    }
    else {
        /***now first do the raw counts with a ncorrel of zero*****/
        for (n=0; n<4; n++) {
            rawcounts[n] = 0.0;
        }
        for (n=0; n<bgseqs->len; n++) {
            oneseq=g_array_index(bgseqs,fastaseq,n);
            for (m=0; m<oneseq.seq->len; m++) {
                base=g_array_index(oneseq.seqnums,int,m);
                if (base>3) {
                    base=tmpbase;
                    tmpbase++;
                    if (tmpbase>3)
                        tmpbase=0;
                }				
                rawcounts[base] += 1.0;
            }
        }
        /**normalize the raw counts****/
        c=0.0;
        for (m=0; m<4; m++) {
            c += rawcounts[m];
        }
        for (m=0; m<4;++m) {
            rawcounts[m] /= c;
        }
        
        /**no context, copy the rawcounts into the bgcounts with bgpscount***/
        if (ncorrel == 0) {
            for (n=0; n<4; n++) {
                correlcounts=g_array_new(TRUE,TRUE,sizeof(double));
                g_array_append_val(correlcounts,rawcounts[n]);
                g_ptr_array_add((*bgprobs),correlcounts);
            }
            return;
        }
        /***nonzero context***/
        else {
            l=1;
            /**l will be set to 4^ncorrel, is length of vectors of bgprobs***/
            for (m=0; m<ncorrel; m++)
                l *= 4;
            /**initialize al bgprobs to zero***/
            for (n=0; n<4; n++) {
                /**correlcounts has the vector of probabilities of seeing the base in the 4^ncorrel contexts***/ 
                correlcounts=g_array_new(TRUE,TRUE,sizeof(double));
                c=0.0;
                for (m=0; m<l; m++)
                    g_array_append_val(correlcounts,c);
                g_ptr_array_add((*bgprobs),correlcounts);
            }
            for (n=0; n<bgseqs->len; n++) {
                oneseq=g_array_index(bgseqs,fastaseq,n);
                /**I am only doing the forward counts***/
                for (m=ncorrel; m<oneseq.seq->len; m++) {
                    base=g_array_index(oneseq.seqnums,int,m);
                    if (base>3) {
                        base=tmpbase;
                        tmpbase++;
                        if (tmpbase>3)
                            tmpbase=0;
                    }
                    /**vector of counts for this base**/
                    correlcounts=g_ptr_array_index((*bgprobs),base);
                    /**get context number**/
                    basenum=base2num((char *)(oneseq.seq->str+m),-ncorrel);
                    assert(basenum<l);
                    g_array_index(correlcounts,double,basenum) += 1.0;
                }
            }
            /***normalize the probs for each context***/
            for (n=0; n<l; n++) {
                c=0.0;
                for (m=0; m<4; m++) {
                    correlcounts=g_ptr_array_index((*bgprobs),m);
                    c=c+g_array_index(correlcounts,double,n);
                }
                for (m=0; m<4; m++) {
                    correlcounts=g_ptr_array_index((*bgprobs),m);
                    if (bgpscount<0.00001)
                        g_array_index(correlcounts,double,n) /= c;
                    else 
                        g_array_index(correlcounts,double,n) = (g_array_index(correlcounts,double,n) +bgpscount*rawcounts[m])/(c+bgpscount);
                }
            }
        }
    }
    return;
}

