/*                        PhyloGibbs                                  */

/*   Algorithm developed by Rahul Siddharthan, Erik van Nimwegen      * 
 *   and Eric D. Siggia at The Rockefeller University, New York       * 
 *                                                                    *
 *   This code copyright (C) 2004 Rahul Siddharthan <rsidd@online.fr> * 
 *   Licensed under the GNU General Public License (see COPYING)      */ 

/* 
 * $Author: rsidd $
 * $Date: 2005/07/05 12:48:51 $
 * $Id: renumberwindowseqs.c,v 1.4 2005/07/05 12:48:51 rsidd Exp $
 */

#include <string.h>
#include "interspecies.h"
#include "commonroutines.h"
#include "tree_routines.h"


/* Initially, the sequence numbers in each window (win->seq) are set
 * to the sequence number in the original set not in the present
 * group.  This routine creates win->species that has numbers
 * corresponding to the species, rather than the sequence number, in
 * the present window.  This is done by either consulting labels (if
 * available) that correspond to the original species, or simply
 * subtracting the sequence number of the first sequence in the group.
 */


int renumberwindowseqs(params *v, window *win)
{
    int n,m,match,thisgroup,species;
    char *onelabel;
    dialignseq thisseq;
    int *labelstaken;
 
    win->species=g_array_new(TRUE,TRUE,sizeof(int));

    if (v->labels == NULL) { /* just subtract sequence number of
                                 first sequence in group */
      /***first sequence in this window ***/
      n=g_array_index(win->seq,int,0);
      /**group of sequence number n ***/
      thisgroup=g_array_index(v->seqgroups,int,n);
      /***find the number of first sequence in group ***/
      while ((n>=0) && (g_array_index(v->seqgroups,int,n)==thisgroup))
	n--;
      n++;
      for (m=0; m<win->seq->len; m++) {
	species = g_array_index(win->seq,int,m) - n;
	g_array_append_val(win->species,species); 
	if ((species >= v->seqmuset->len) && (v->nomutprob < -0.001)) {
	  if (v->seqmuset->len ==0)
	    fprintf(stderr,"\rError: No phylogenetic relationship specified\n       See manual pages for options -H, -l, -G, -L\n");
	  else
	    fprintf(stderr,"\rError: %d phylogenies specified via -H but some aligned sequence groups\n       contain more sequences.  Check your -H option (or use -G)\n",v->seqmuset->len);
	  return 1;
	}
      }
            
    } 
    else { /* use the labels to assign the sequence number */
      if ((v->labels->len > v->seqmuset->len)&&(v->nomutprob < -0.0001)) {
	if (v->seqmuset->len == 0) 
	  fprintf(stderr,"\rError: %d labels (-l) specified but phylogenetic parameters (-H) not given\n       See manual pages for -H, -G options\n",v->labels->len);
	else
	  fprintf(stderr,"\rError: %d labels (-l) but only %d phylogenetic parameters (-H) specified\n       Check your -H parameters (or use -G)\n",v->labels->len,v->seqmuset->len);
	return 1;
      }
	
      labelstaken=calloc(v->labels->len,sizeof(int));
      for (m=0; m<win->seq->len; m++) {
	n = g_array_index(win->seq,int,m);
	thisseq=g_array_index(v->seqarray,dialignseq,n);
	match = -1;
	for (n=0; n<v->labels->len; n++) {
	  onelabel=g_ptr_array_index(v->labels,n);
	  if (substring(onelabel,thisseq.name->str)) {
	    if (match > -1) {
	      fprintf(stderr,"\rError: sequence header has matches to multiple labels!  Make sure that\n       headers have one and only one match for the labels in -l or -L\n");
	      return 1;
	    }
	    else if (labelstaken[n]) {
	      fprintf(stderr,"\rError: label found in multiple sequences! Make sure that no label appears\n       in more than one header\n");
	      return 1;
	    }
	    else {
	      match=n;
	      labelstaken[n]=1;
	    }
	  }
	}
	if ((match == -1) && (v->nomutprob < -0.001)) {
	  if (v->labeltree) 
	    fprintf(stderr,"\rError: none of specified labels found in input sequence with name %s\n       The string supplied with -L must be complete and correct!\n",thisseq.name->str);
	  
	  else
	    fprintf(stderr,"\rError: none of specified labels found in an input sequence\n       To specify a default (fallback) phylogeny parameter for sequences\n       lacking given labels, use -G\n");
	  return 1;
	}
	g_array_append_val(win->species,match);
      }
      free(labelstaken);
    }
    return 0;
}
