/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $  
 * $Date: 2005/05/02 08:54:09 $ 
 * $Id: expandwindow.c,v 1.1 2005/05/02 08:54:09 rsidd Exp $ 
 */



#include "interspecies.h"
#include <ctype.h>

/* If the window has no capital letters, it doesn't span multiple
 * sequences.  So we want to know whether it has capitals, and if it
 * does, which other sequences it's aligned with, ie what are the
 * start and stop points of the other sequences.  This function does
 * that.  Only used by initwindows() probably, but separated out for
 * cleanness.
 */

/**takes array of sequences, array of seqgroups, array to fill sequences in, array to fill starts in, array to fill stops in, and lengths of bareseq? ***/
int expandwindow(GArray **seq, GArray **seqgroups,
		GArray **outseqs, GArray **outstarts, GArray **outstops) {

  dialignseq tempseq,tempseq2;
  int m,n,nseq,start,stop, nseq2,cap1,nocaps,curr2,dicurr2,
    zer,start2,stop2,oldseqlen,thisgroup;
  GArray *seq_added;
  char c;
  
  zer=0;
  
  nseq=g_array_index(*outseqs,int,0); /**sequence number of first in window***/
  thisgroup=g_array_index(*seqgroups,int,nseq); /***group number of first in window****/
  start=g_array_index(*outstarts,int,0); /**start in first sequence of window***/
  stop=g_array_index(*outstops,int,0); /**stop in first sequence of window****/
  
  tempseq=g_array_index(*seq,dialignseq,nseq);/***first sequence in window****/
  
  seq_added=g_array_new(FALSE,FALSE,sizeof(int));
  
  /***run over all sequences in array and mark as 'not included'*****/
  for (n=0; n<(**seq).len; n++) 
    {
      g_array_append_val(seq_added,zer);
    }
  /***set included sequence to 1*****/
  g_array_index(seq_added,int,nseq)=1;
  
  cap1=0;
  
  do 
    {
      oldseqlen=(*outseqs)->len; /***number of sequences included in window (starts out as 1) *****/
      for (m=0; m<oldseqlen; m++)/**go over all sequences currently included in window****/
	{ 
	  nseq=g_array_index(*outseqs,int,m);
	  start=g_array_index(*outstarts,int,m);
	  stop=g_array_index(*outstops,int,m);
	  
	  if(start < 0)
	    start = 0;
	  if (stop>=tempseq.bareseq->len)
	    stop=tempseq.bareseq->len-1; 
	  
	  tempseq=g_array_index(*seq,dialignseq,nseq);/**sequence under consideration****/
	  
	  for (n=start; n<=stop; n++) 
	    {
	      c=g_array_index(tempseq.bareseq,char,n);/**read character from bare sequence***/
	      if (isupper(c)) /**if capital****/
		{
		  cap1=0;
		  nocaps=1;
		  nseq2=nseq;
		  while ((nseq2>=0)&&(g_array_index(*seqgroups,int,nseq2)==thisgroup))
		    nseq2--;
		  while ((g_array_index(*seqgroups,int,++nseq2)==thisgroup) &&(nseq2<(*seq)->len)) 
		    {
		      if (nseq2==nseq)
			continue;
		      tempseq2=g_array_index(*seq,dialignseq,nseq2);
		      dicurr2=g_array_index(tempseq.bare2di,int,n);
		      if ((dicurr2<tempseq2.dialignlength) && (isupper(g_array_index(tempseq2.dialignseq,char,dicurr2)))) 
			{
			  nocaps=0;
			  if (stop < tempseq2.barelength)
			    cap1++;
                        }
		    }
		  if (nocaps) 
		    {
		      c=tolower(c);
		      g_array_index(tempseq.bareseq,char,n)=c;
		    }
		}
	      if (isupper(c)) 
		{
		  cap1=1;
		  nseq2=nseq;
		  while ((nseq2>=0)&&(g_array_index(*seqgroups,int,nseq2)==thisgroup))/**go to start of group***/
		    nseq2--;
		  while ((g_array_index(*seqgroups,int,++nseq2)==thisgroup)&& (nseq2<(*seq)->len)) /**run over all members in group***/
		    {
		      if ((nseq2!=nseq)&&(g_array_index(seq_added,int,nseq2)==0)) /**other sequence not yet in window***/
			{
			  tempseq2=g_array_index(*seq,dialignseq,nseq2);
			  dicurr2=g_array_index(tempseq.bare2di,int,n);/**position n in bareseq of tempseq corresponds to this dialignpos****/
			  if (dicurr2<tempseq2.dialignlength) 
			    {
			      curr2=g_array_index(tempseq2.di2bare,int,dicurr2);/**letter in bare sequence tempseq2****/
			      if (isupper(g_array_index(tempseq2.dialignseq,char,dicurr2))) 
				{
				  start2=curr2+start-n;
				  stop2=curr2+stop-n;
				  if (stop2<tempseq2.barelength) /***add this sequence in and record starts and stops****/
				    {
				      g_array_index(seq_added,int,nseq2)=1;
				      g_array_append_val(*outseqs,nseq2);
				      g_array_append_val(*outstarts,start2);
				      g_array_append_val(*outstops,stop2);
				    }
				}
			    }
			}
		    }
		}
	    }
	}
    } while (oldseqlen<(*outseqs)->len); /***check if list of sequences has grown****/

  return cap1;
}
	
