/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $  
 * $Date: 2005/05/22 05:12:30 $ 
 * $Id: initwindows.c,v 1.7 2005/05/22 05:12:30 rsidd Exp $ 
 */

#include "interspecies.h"
#include <gsl/gsl_matrix.h>
#include "expandwindow.h"
#include "checkwindow.h"
#include "renumberwindowseqs.h"
#include "splitwindow.h"
#include "setwinbasecount.h"
#include "commonroutines.h"
#include <math.h>

/*  Initialize the window structure.
 *
 *  An array of windows, each of which contains info about:
 *
 *    o its starting and ending points on each sequence 
 *
 *    o flags for whether it's forward or backward (dir), whether it's
 *      available or blocked because of some other window (blocked);
 *
 *    o windows it would conflict with (blockedwins)
 *
 *    o windows which are presently blocking it (blockers -- if this
 *      array is zero-length, the "blocked" flag should be 0 ie it's
 *      free)
 *  
 *  This routine initializes the whole structure.  Afterwards,
 *  whenever a window is placed, it should be verified first that it
 *  is not blocked by another window; after placing it, other windows
 *  should be blocked/modified, and their "blockers/modifiers" lists
 *  should be updated.
 *
 *  When the window is removed, again the "blockers/modifiers" lists
 *  of its blocked windows should be updated.
 *
 *  Basically the "blockedwins" are never changed,
 *  since the concerned windows would always be blocked by
 *  this one.  The "blockers" are changed every time you
 *  do something to this one.
 *
 */  

int initwindows(params *v) 
{
    int nseq,lseq,n,m,s,l,lstop,n2,l2,expok,s1,s2,winok,nprog,
        tempwinastart,tempwinaseq,thisseq,thisgroup,curgroup,seqpos,zer,smax,recurselevel,thistaken, exists, thisstart,thisstop,num_match;
    GArray *splitwinset;
    gsl_matrix_int * taken;
    
    char * dataptr;
    
    fflush(NULL);
    
    
    window tempwin1;
    window *tempwina, *tempwinb, *tempwinc;
    
    GPtrArray *seqwin1;
    
    dialignseq tempseq,tempseq2;
    
    smax=v->wwidth/2;
    
    zer=0;
    /**the total number of sequences from all sequence groups summed together***/
    nseq=(v->seqarray)->len;
    v->seqwin=g_ptr_array_new();

    if (!(v->quiet)) {
        printf("Parsing sequence, ");
        fflush(NULL);
    }
    
    /**set the windows at each position of each sequence to zero***/
    lseq=0;/**longest bare sequence **/
    for (n=0; n<nseq; n++) {
        tempseq=g_array_index(v->seqarray,dialignseq,n);
        /**keep track of maximum length bare sequences**/
        if (tempseq.barelength>lseq)
            lseq=tempseq.barelength;
        
        /* initialize seqwin with all pointers set to NULL */
        seqwin1=g_ptr_array_new();
        for (m=0; m<tempseq.barelength; m++) {
            tempwina=NULL;
            g_ptr_array_add(seqwin1,tempwina);
        }
        /**add to the large array of all seq windows 
           (referring to the windows occurring at each position of each sequence**/
        g_ptr_array_add((v->seqwin),seqwin1);
    }
    
    v->win=g_array_new(FALSE, FALSE, sizeof(window));
    
    tempwin1.blockedwins=g_ptr_array_new();
    tempwin1.blockers=g_ptr_array_new();
    tempwin1.dir=FALSE;
    tempwin1.blocked=FALSE;
    tempwin1.colour=0;   
    
    /**a matrix with all pos in all seq set taken to zero**/
    taken=gsl_matrix_int_alloc (nseq,lseq);
    gsl_matrix_int_set_zero(taken);
    /*run over all sequences***/
    for (n=0; n<nseq; n++) {
        tempseq2=g_array_index(v->seqarray,dialignseq,n);
        lseq=tempseq2.barelength;
        /**run over all positions where one can start a windows**/
        for (l=0; l<lseq-(v->wwidth)+1; l++) {
            /*  Strategy: check if this site (n,l) is
             *  already taken as the starting site of an
             *  earlier window.  If not, start a window
             *  here; if it contains capital letters, extend
             *  it down as far as it will
             *  go.  (Not up, because if that were
             *  possible, the window would already be
             *  taken.)  This is done by expandwindow()
             *
             *  Align level 1:
             *    Check fully window for violation of constraints;
             *    if not ok, split into acceptable smaller windows.
             *    This is done by splitwindow()
             *  
             *  Align level 2:
             *    Check fully constructed window for
             *    violations of constraints.  If not valid,
             *    reject window altogether.  This is done
             *    by checkwindow()
             *
             *  Second pass will set the blocked, blockers
             *  stuff etc.  This pass will only make an
             *  array of windows.
             *
             */
            if (gsl_matrix_int_get(taken,n,l)==0) {
                /* construct window starting here  */
                tempwin1.seq=g_array_new(FALSE,FALSE,sizeof(int));
                tempwin1.start=g_array_new(FALSE,FALSE,sizeof(int));
                tempwin1.stop=g_array_new(FALSE,FALSE,sizeof(int));
		tempwin1.data=g_array_new(FALSE,FALSE,sizeof(char*));
                
                g_array_append_val(tempwin1.seq,n);
                g_array_append_val(tempwin1.start,l);
                lstop=l+(v->wwidth)-1;
                g_array_append_val(tempwin1.stop,lstop);
		/***check if capitals appear and record starts, stops, and bareseq lengths of all other members that are aligned in****/
                expok=expandwindow(&(v->seqarray),&(v->seqgroups),&tempwin1.seq, &tempwin1.start,&tempwin1.stop);  
                /**run over all members and record where sequence data can be gotten ***/
                for (l2=0; l2<tempwin1.seq->len; l2++) {
                    tempseq=g_array_index(v->seqarray,dialignseq,g_array_index(tempwin1.seq,int,l2));
                    dataptr=(char *)tempseq.bareseq->data;
                    dataptr=dataptr+g_array_index(tempwin1.start,int,l2);
                    g_array_append_val(tempwin1.data,dataptr);
                }
                /***if there were capitals***/
                if (expok) 
		  {
                    if (v->usedialign==1) 
		      {
                        recurselevel=0;
                        splitwindow(v->wwidth,&tempwin1,&(v->seqarray),&splitwinset,&recurselevel);
                        if (splitwinset->len > 0) 
			  {
                            /* It can happen that a sequence in one of these windows is
                               already taken; if so, chuck it out, for lack of better ideas */
                            for (n2=splitwinset->len-1; n2>=0; n2--) 
			      {
                                tempwina=&g_array_index(splitwinset,window,n2);
                                thistaken=0;
                                for (l2=0; l2<tempwina->seq->len; l2++)
				  if (gsl_matrix_int_get(taken,g_array_index(tempwina->seq,int,l2),g_array_index(tempwina->start,int,l2))!=0) {
				    thistaken=1;
				    break;
				  }
                                if (thistaken)
				  g_array_remove_index_fast(splitwinset,n2);
			      }
                            g_array_append_vals(v->win,splitwinset->data,splitwinset->len);
                            for (n2=0; n2<splitwinset->len; n2++) 
			      {
				tempwina=&g_array_index(splitwinset,window,n2);
			      for (l2=0; l2<tempwina->seq->len; l2++) 
				{
				  assert(gsl_matrix_int_get(taken,g_array_index(tempwina->seq,int,l2),g_array_index(tempwina->start,int,l2))==0);
				  gsl_matrix_int_set(taken,g_array_index(tempwina->seq,int,l2),g_array_index(tempwina->start,int,l2),1);
                                }
			      }	
			  }
                        g_array_free(splitwinset,TRUE);
                    } 
		    else
		      { /* usedialign==2 */
                        winok=checkwindow(v->wwidth,&tempwin1,&(v->seqarray));
                        if (winok) 
			  {
                            g_array_append_val(v->win,tempwin1);
                            for (l2=0; l2<tempwin1.seq->len; l2++) 
                                gsl_matrix_int_set(taken,g_array_index(tempwin1.seq,int,l2),g_array_index(tempwin1.start,int,l2),1);
			  }
		      }
		  }
		else 
		  { /*no capitals in this window**/
                    winok=checkwindow(v->wwidth,&tempwin1,&(v->seqarray));
                    if (winok) 
		      {
                        g_array_append_val(v->win,tempwin1);
                        for (l2=0; l2<tempwin1.seq->len; l2++) 
			  gsl_matrix_int_set(taken,g_array_index(tempwin1.seq,int,l2),g_array_index(tempwin1.start,int,l2),1);
		      }
		  }
            }
        }
    }
    
    /***at this point we no longer need di2bare and bare2di*****/
    /***clean them to save memory*****/
    for(n=0;n<((v->seqarray)->len);++n)
      {
	tempseq=g_array_index(v->seqarray,dialignseq,n);
	g_array_free(tempseq.di2bare,TRUE);
	g_array_free(tempseq.bare2di,TRUE);
      }
    
    /*
     * seqwinarray setup, and further bookkeeping inside window set
     */
    /***numbers of the windows where groups start***/
    /***initialize the first value with 1****/
    v->groupstarts=g_array_new(TRUE,TRUE,sizeof(int));
    curgroup = 0;
    g_array_append_val((v->groupstarts),zer);
    l=(v->win)->len;
    for (n=0; n<l; n++) 
      {
        tempwina=&g_array_index(v->win,window,n);
        tempwina->left=NULL;
        tempwina->right=NULL;
        tempwina->basecount=NULL;
	tempwina->trackedocc=NULL;
	tempwina->trackedrev=NULL;
	tempwina->blocked=0;
        tempwina->permblocked=0;
        tempwina->shiftblocked=0;
        tempwina->occ=0.0;

	/***stop is guaranteed to be start+wwidth-1. Can be removed***/
	g_array_free(tempwina->stop,TRUE);
	
	/***first sequence in this window***/
	thisseq = g_array_index((tempwina->seq),int,0);
	/***group of this sequence***/
	thisgroup = g_array_index((v->seqgroups),int,thisseq);
	/**a new group****/
	if(thisgroup>curgroup)
	  {
	    /**put the number of the window****/
	    g_array_append_val((v->groupstarts),n);
	    curgroup = thisgroup;
	  }

        for (l2=0; l2<(*tempwina).seq->len; l2++) 
	  {
            thisseq=g_array_index((tempwina->seq),int,l2);
            seqwin1=g_ptr_array_index((v->seqwin),thisseq);
            seqpos=g_array_index((tempwina->start),int,l2);
            assert(g_ptr_array_index(seqwin1,seqpos)==NULL);
            g_ptr_array_index(seqwin1,seqpos)=tempwina;
	  }
      }
    /***final group at end of list****/
    g_array_append_val((v->groupstarts),l);

    /*
     * Blocked windows: based on real estate
     * considerations
     */
    
    l=(v->win)->len;
    /***alternative strategy. Go through all places where a blocking window
        may start, look up what window it corresponds to and add that
        window to the list of blockers if not yet blocking****/
    for(n=0;n<l;n++) {
        tempwina=&g_array_index(v->win,window,n);
        tempwina->blockedwins=g_ptr_array_new();
        tempwina->blockers=g_ptr_array_new();
        for (s1=0; s1<tempwina->seq->len; s1++) {
            thisseq=g_array_index((tempwina->seq),int,s1);
            thisstart = (g_array_index(tempwina->start,int,s1)-(v->wwidth)+1);
            if(thisstart < 0)
                thisstart = 0;
            thisstop = g_array_index(tempwina->start,int,s1)+v->wwidth-1;
            tempseq=g_array_index(v->seqarray,dialignseq,thisseq);
            if(thisstop > (tempseq.barelength-(v->wwidth)))
                thisstop = (tempseq.barelength-(v->wwidth));
            /***run over all places where a blocking window may start**/
            for (seqpos=thisstart;seqpos<=thisstop;++seqpos) {
                /***get the window starting at that position****/
                seqwin1=g_ptr_array_index((v->seqwin),thisseq);
                tempwinb = g_ptr_array_index(seqwin1,seqpos);
                if (tempwinb != NULL) {
                    /**check if it already exists in the list of blockers***/
                    exists = 0;
                    /**loop over previous blockers**/
                    for (m=0;m<(tempwina->blockedwins->len);++m) {
                        tempwinc = g_ptr_array_index(tempwina->blockedwins,m);
                        if(tempwinc == tempwinb) {
                            exists = 1;
                        }
                    }
                    if(exists == 0 && tempwinb != tempwina) {
                        g_ptr_array_add(tempwina->blockedwins,tempwinb);
                    }
                }
            }
        }
    }
    
    
    /* left/right pointers */
    
    
    l=(v->win)->len;
    for (n=0; n<l; n++) {
        tempwina=&g_array_index(v->win,window,n);
        /**first sequence in window**/
        tempwinaseq=g_array_index((tempwina->seq),int,0);
        /**position where window start in first sequence of window**/
        tempwinastart=g_array_index((tempwina->start),int,0);
        
        if(tempwinastart > 0){
            /**take window that starts in same sequence one base to the left***/
            seqwin1=g_ptr_array_index((v->seqwin),tempwinaseq);
            tempwinb = g_ptr_array_index(seqwin1,tempwinastart-1);
            if(tempwinb == NULL) 
                exists = 0;
            else {
                /**check window b has same sequences as window a****/
                exists = 1;
                if((tempwina->seq->len) == (tempwinb->seq->len)) {
                    num_match = 0;
                    for(s1=0; s1<tempwina->seq->len; ++s1) {
                        for(s2=0;s2<tempwinb->seq->len;++s2) {
                            if(g_array_index((tempwina->seq),int,s1) == g_array_index((tempwinb->seq),int,s2)) {
                                ++num_match;
                            }
                        }
                    }
                    if(num_match < (tempwina->seq->len))
                        exists = 0;
                } else {
                    exists = 0;
                }
            }
            /**window b exists and has same set of sequences, add pointers**/
            if (exists) {
                if(tempwinb->right==NULL) {
                    tempwinb->right=g_ptr_array_new();
                    g_ptr_array_add(tempwinb->right,tempwina);
                }
                if (tempwina->left==NULL) {
                    tempwina->left=g_ptr_array_new();
                    g_ptr_array_add(tempwina->left,tempwinb);
                }
            }
        }
        
        /***now the window one to the right of current window a**/
        tempseq=g_array_index(v->seqarray,dialignseq,tempwinaseq);
        if(tempwinastart < (tempseq.barelength-(v->wwidth))) {
            seqwin1=g_ptr_array_index((v->seqwin),tempwinaseq);
            tempwinb = g_ptr_array_index(seqwin1,tempwinastart+1);
            if(tempwinb == NULL)
                exists = 0;
            else {
                /**check window b has same sequences as window a****/
                exists = 1;
                if((tempwina->seq->len) == (tempwinb->seq->len)) {
                    num_match = 0;
                    for(s1=0; s1<tempwina->seq->len; ++s1) {
                        for(s2=0;s2<tempwinb->seq->len;++s2) {
                            if(g_array_index((tempwina->seq),int,s1) == g_array_index((tempwinb->seq),int,s2)) 
                                ++num_match;
                        }
                    }
                    if(num_match < (tempwina->seq->len))
                        exists = 0;
                }
                else
                    exists = 0;
            }
            /**window b exists and has same set of sequences, add pointers**/
            if(exists) {
                if(tempwinb->left==NULL) {
                    tempwinb->left=g_ptr_array_new();
                    g_ptr_array_add(tempwinb->left,tempwina);
                }
                if (tempwina->right==NULL) {
                    tempwina->right=g_ptr_array_new();
                    g_ptr_array_add(tempwina->right,tempwinb);
                }
            }
        }
    }

    if (!(v->quiet))
      printf("initialising windows...\n");
    
    for (n=0; n<(v->win)->len; n++) {
        if (!(v->quiet)) {
            show_progress_bar(n,((v->win)->len-1));
        }
        tempwina=&g_array_index(v->win,window,n);

        if (v->usedialign > 0) 
	  /* renumber all sequences in this window */
	  if (renumberwindowseqs(v,tempwina))
	    return 1;


        /* set basecounts for each window, needs to be done only once */
	/* if there was an error returns 1****/
        if(setwinbasecount(tempwina,v))
	  return 1;
        
        /* set remaining left pointers, up to smax=wwidth/2 */
        if (tempwina->left==NULL) {
            tempwina->left=g_ptr_array_new();
            tempwinb=NULL;
            g_ptr_array_add(tempwina->left,tempwinb);
        }
        assert(tempwina->left->len==1);
        tempwinb=g_ptr_array_index(tempwina->left,0);
        for (s=0; s<smax; s++) {
            if ((tempwinb!=NULL)&&(tempwinb->left!=NULL))
                tempwinb=g_ptr_array_index(tempwinb->left,0);
            g_ptr_array_add(tempwina->left,tempwinb);
        }
        
        /* and right pointers, same way */
        if (tempwina->right==NULL) {
            tempwina->right=g_ptr_array_new();
            tempwinb=NULL;
            g_ptr_array_add(tempwina->right,tempwinb);
        }
        assert(tempwina->right->len==1);
        tempwinb=g_ptr_array_index(tempwina->right,0);
        for (s=0; s<smax; s++) {
            if ((tempwinb!=NULL)&&(tempwinb->right!=NULL))
                tempwinb=g_ptr_array_index(tempwinb->right,0);
            g_ptr_array_add(tempwina->right,tempwinb);
        }
    }
    /**average foreground score of all windows***/
    /**this can be used in estimatepriorparams or it may be overruled there***/
    v->mu = log((v->mu)/((double) ((v->win)->len)));
    
    if (!(v->quiet))
        printf("| Done.\n");
    
    gsl_matrix_int_free(taken);
    
    return 0;
}
