#include "procse.h"
#include "procse_State.h"

#define NO_LGAMMA_IN_MATH_LIBRARY

#ifdef NO_LGAMMA_IN_MATH_LIBRARY
#include "log_gamma.h"
#define procse_lgamma log_gamma
#else
#define procse_lgamma lgamma
#endif

/* global functions */

/***calculates an approximation to -log[(Exp(-beta*H_wm)+Exp(-beta*H_bg))/2]/beta***/
/*ERIK This is a replacement for what we had previously and should be faster*/
/*you might look into applying some tricks to make it faster still */

double get_column_score_beta_global = 1.0;
extern struct MainParameters mp;

inline double get_column_score(double H_wm,double H_bg){
  double a;

  H_wm *=get_column_score_beta_global;
  H_bg *=get_column_score_beta_global;

    /**switch between which entropy is larger**/
  if(H_wm >= H_bg){
    a = H_wm-H_bg;
  }
  else{
    a = H_bg-H_wm;
    H_bg = H_wm;
  }

  /**now apply different algorithms depending on what the difference is between the entropies**/
  if(a<2.1){
    /*** we are going to use the small a expansion***/

    double res = a/2.0;
        double fac = res;
        if(a < 0.04)
          return ((H_bg +res)/get_column_score_beta_global);
        else{
          fac *= a/4.0;
          res -= fac;
          if(a<0.927)
            return ((H_bg+res)/get_column_score_beta_global);
            else{
              fac *= a*a/24;
              res += fac;
              if(a<1.613)

                return ((H_bg+res)/get_column_score_beta_global);
              else{
                fac *= a*a/15;
                res -= fac;
                return ((H_bg+res)/get_column_score_beta_global);
              }
            }
        }
  }
  else if(a<5.0){
    /***divide by log(2)**/
        a = a/0.693147181;
        /**now calculate 2^(-a) **/
        /**take integer part**/
        int val = int(a);
        /***remaining part = res+0.5**/
        double res = a-val-0.5;
        /**one over 2^val **/
        double term = 1.0/((double) (1 << val));
        /**approximation for the remaining part**/
        term *= (0.707107 - res*(0.490129-0.169865792*res));
        
        return((H_bg + 0.693147181-term)/get_column_score_beta_global);
  }
  else{
    return ((H_bg+0.693147181)/get_column_score_beta_global);
  }
}

/***first function get ratios of Stirling numbers
    of the second kind******/
/***I assume the space for vec is already allocated***/
/***we want vec[n] to contain vec[n]= log(S[N,n]/S[N,n+1])***/
/**The Stirling numbers of the second kind S[N,n] follow
   the recursion relation:
   S[N,n] = S[N-1,n-1] + n S[N-1,n]
***************************************/
void stirling_number_ratios(int N, double *vec)
{
  double *current_nums, *new_nums, *dummy,prod;
  int i,n,m;
  current_nums = new double[N+1];
  new_nums = new double[N+1];
  for (i=1;i<=N;++i)
    {
      current_nums[i] = 0.0;
      new_nums[i] = 0.0;
    }

  /***calculate it in a different way (dealing with large N numerics)****/
  /***Define X[n,m] = S[n,m]/S[n-1,m]*********/
  /***It obeys the recurrence relation******/
  /***X[n,m] = m + prod_{i=m}^{n-1} X[i,m-1]/X[i,m]***/
  /***Our final goal S[N,m]/S[N,m+1] = X[N+1,m+1]-m-1**/
  /***set X[n,m] =1 for n <= m *****/
  /***first set X[n,1] = S[n,1]/S[n-1,1] = 1 ***/
  for(n=1;n<=N;++n)
    {
      current_nums[n] = 1.0;
    }

  m = 1;
  while(m < N)
    {
      /**this is the number we will calculate X[n,m] for****/ 
      ++m;
      new_nums[m] = 1.0;
      prod = current_nums[m]/new_nums[m];
      for(n=(m+1);n<=N;++n) {
	  new_nums[n] = m + prod;
	  prod = prod*current_nums[n]/new_nums[n];
	}
      vec[m-1] = log(prod);
      /*printf("vec[%d] = %lf\n",m-1,vec[m-1]);*/

      dummy = current_nums;
      current_nums = new_nums;
      new_nums = dummy;
    }


  delete[] current_nums;
  delete[] new_nums;

  return;
}
	  
/**put vec[m] = log[p_m(N)/p_{m+1}(N)] ****/
/**calculate p_m(N) using the recursion relation
   p_m(n) = p_{m-1}(n-1) + p_{m}(n-m)
****************/
void partition_number_ratios(int N,double *vec)
{
  double *partnum, *current_nums,*new_nums,*dummy;
  int i,m,n;

  /**contains p_m(N)****/
  partnum = new double[N+1];
  /**contains the p_{m-1}(n) for all n***/
  current_nums = new double[N+1];
  /***contains the p_m(n) of the m we are working on***/
  new_nums = new double[N+1];
  for(i=0;i<=N;++i)
    {
      partnum[i] = 0.0;
      current_nums[i] = 0.0;
      new_nums[i] = 0.0;
    }

  /**first set p_1(n) = 1 for all n***/
  for(i=1;i<=N;++i)
    {
      current_nums[i] = 1.0;
    }
  /***p_1(N) = 1***/
  partnum[1] = 1.0;

  m=2;
  while(m<=N){
    /**calculate all p_n(m) for this m**/
    /**note that p_m(n) = 0 for n < m**/
    for(n=0;n<m;++n)
      new_nums[n] = 0.0;

    new_nums[m] = 1.0;
    for(n=(m+1);n<=N;++n)
      {
	new_nums[n] = current_nums[n-1];/***p_{m-1}(n-1) ***/
	if(n-m >= m)
	  {
	    new_nums[n] += new_nums[n-m];/***p_m(n-m)***/
	  }
      }
    partnum[m] = new_nums[N];
    ++m;
    /***swap pointers***/
    dummy = current_nums;
    current_nums = new_nums;
    new_nums = dummy;
  }    
  
  for(m=1;m<N;++m)
    {
      vec[m] = log(partnum[m]/partnum[m+1]);
    }

  delete[] partnum;
  delete[] current_nums;
  delete[] new_nums;

  return;
}

/* fgetline_stretch() - reads a line from an input FILE into a buffer.
	buffer must be dynamically allocated (using 'new'). This function
	can resize the buffer as needed by deallocating and reallocating
	new memory. buffer must be passed in as a pointer to a pointer to
	char so that function can redirect the pointer in the caller to
	point to newly allocated memory. Buffer length is also required
	and can be modified by this function. The length of the line that
	is read is returned in the last argument, and the string is also
	null terminated in the buffer. Line termination occurs at newline
	or EOF.
*/
void fgetline_stretch(FILE *in, char **buffer, int *buffer_len, int *line_len)
{
	*line_len = 0;
	char *insert = *buffer;
	char *bufend = *buffer + *buffer_len - 1;
	int charbuf;
	char *newbuf;
	char *fromoldbuf;
	while (true) {
		charbuf = fgetc(in);
		if (charbuf == EOF || charbuf == '\n') {
			*insert = '\0';
			break;
		}
		*insert++ = charbuf;
		(*line_len)++;
		if (insert == bufend) {
			/* out of buffer */
			*buffer_len *= 2;
			newbuf = new char[*buffer_len];
			if (newbuf == 0) {
				fprintf(stderr, "out of memory reading line from input file\n");
				exit(1);
			}
			fromoldbuf = *buffer;
			insert = newbuf;
			while (fromoldbuf != bufend) *insert++ = *fromoldbuf++;
			delete[] *buffer;
			*buffer = newbuf;
			bufend = *buffer + *buffer_len - 1;
		}
	}
}

/* right_trim() - removes whitespace from the right edge of a cstring 
*/
void right_trim(char *buffer, int *line_len)
{
	if (*line_len < 1) return;
	char *buf_end = buffer + *line_len - 1;
	while (line_len > 0 &&
			(*buf_end == ' ' || *buf_end == '\n' || *buf_end == '\t' ||
			*buf_end == '\r' || *buf_end == '\v' || *buf_end == '\f')) {
		*buf_end-- = '\0';
		*line_len -= 1;
	}
}

/* int_compare - comparison function used for qsort() routine
	function compares integer values stored in global array
	int_sort_values[]
*/
int *int_sort_values = 0;

int int_compare_global(const void *p1, const void *p2) {
	return int_sort_values[*(int *)p1] - int_sort_values[*(int *)p2];
}

int int_compare(const void *p1, const void *p2) {
	return *(int *)p1 - *(int *)p2;
}

/* TGA_Raw implementation */

/* TGA_Raw constructor - allocates space for name and distribution and copies into object
*/
TGA_Raw::TGA_Raw(const char *a_name, int a_id, int a_num_tracts, int a_tract_len, int a_base_distrib[][NUM_BASES])
{
	name = new char[strlen(a_name)+1];
	if (name == 0) {
		fprintf(stderr,"out of memory while creating TGA_Raw object\n");
		exit(1);
	}
	strcpy(name,a_name);
	id = a_id;
	num_tracts = a_num_tracts;
	tract_len = a_tract_len;
	base_distrib = new int[tract_len][NUM_BASES];
	if (base_distrib == 0) {
		fprintf(stderr,"out of memory while creating TGA_Raw object\n");
		exit(1);
	}
	int checksum = 0;
	for (int pos = 0; pos < tract_len; pos++) {
		checksum = 0;
		for (int base = 0; base < NUM_BASES; base++) {
			base_distrib[pos][base] = a_base_distrib[pos][base];
			checksum += base_distrib[pos][base];
		}
		if (checksum != num_tracts) {
			fprintf(stderr,"Error in creating tga(%s): ",name);
			fprintf(stderr,"distribution does not sum to number of tracts\n");
			exit(1);
		}
	}
}

/* TGA_Raw destructor - deallocates memory
*/
TGA_Raw::~TGA_Raw()
{
	delete[] name;
	delete[] base_distrib;
}

/* TGA_Raw_Vector implementation */

/* node for link list -- used in TGA_Raw_Vector constructor
*/
struct TGA_Raw_Node
{
	TGA_Raw *item;
	TGA_Raw_Node *next;
};

/* TGA_Raw_Vector constructor - reads tga from a user formatted input file
	containing a sequence of tga. Each tga begins with a name line and
	is followed by a number (>=1) of tract lines. All tract lines in
	a tga are of equal length (>=1). Blank lines and lines beginning
	with "#" are ignored. Memory is allocated dynamically so as to
	accomodate an arbitrary number of tga, with an arbitrary number of
	tracts of arbitrary length.
*/
TGA_Raw_Vector::TGA_Raw_Vector(FILE *in)
{
	if (in == 0) {
		fprintf(stderr,"attept to read tga's from an invalid input file\n");
		exit(1);
	}
	size = 0;
	TGA_Raw_Node *first_node = 0;
	int linebuffersize = 100;
	char *linebuffer = new char[linebuffersize];
	if (linebuffer == 0) {
		fprintf(stderr,"out of memory while reading tga from input file\n");
		exit(1);
	}
	int linelength;
	int tract_length = 0;
	int num_tracts = 0;
	char *namebuffer = 0;
	int (*distbuffer)[TGA_Raw::NUM_BASES] = 0;
	while (!feof(in)){
		fgetline_stretch(in,&linebuffer,&linebuffersize,&linelength);
		right_trim(linebuffer,&linelength);
		if (*linebuffer != '>' && *linebuffer != '#' && linelength > 0) {
			/* process tract line */
			if (namebuffer == 0) {
				fprintf(stderr, "error in input file format:");
				fprintf(stderr, " file does not begin with a tract group name\n");
				exit(1);
			}
			if (distbuffer == 0) {
				/* first tract */
				tract_length = linelength;
				distbuffer = new int[tract_length][TGA_Raw::NUM_BASES];
				if (distbuffer == 0) {
					fprintf(stderr,"out of memory while reading tga from input file\n");
					exit(1);
				}
				for (int pos1 = 0; pos1 < tract_length; pos1++) {
					for (int base1 = 0; base1 < TGA_Raw::NUM_BASES; base1++) {
						distbuffer[pos1][base1] = 0;
					}
				}
			} 
			if (linelength != tract_length) {
				fprintf(stderr, "error in input file format:");
				fprintf(stderr, " tract group '%s' has unequal tract lengths\n", namebuffer);
				exit(1);
			}
			for (int pos2 = 0; pos2 < tract_length; pos2++) {
				switch (linebuffer[pos2]) {
				case 'a':
				case 'A':
					distbuffer[pos2][0] += 1; break;
				case 'c':
				case 'C':
					distbuffer[pos2][1] += 1; break;
				case 'g':
				case 'G':
					distbuffer[pos2][2] += 1; break;
				case 't':
				case 'T':
					distbuffer[pos2][3] += 1; break;
				case 'n':
				case 'N':
					distbuffer[pos2][int_ranged_rand(4)] += 1; break;
				default:
					fprintf(stderr, "error in input file: unrecognized character");
					fprintf(stderr, " '%c' in tract of group '%s'\n",
							linebuffer[pos2], namebuffer);
					exit(1);
				}
			}
			num_tracts++;
		}
		if ((*linebuffer == '>' && namebuffer != 0) || feof(in)) {
			/* end of the node - write it */
			if (namebuffer == 0 && feof(in)) {
				fprintf(stderr,"error in input file format:");
				fprintf(stderr," no tract groups found\n");
				exit(1);
			}
			if (distbuffer == 0) {
				fprintf(stderr,"error in input file format:");
				fprintf(stderr," tract group '%s' missing tract data.\n", namebuffer);
				exit(1);
			}
			TGA_Raw *new_tga = new TGA_Raw(namebuffer, size, num_tracts, tract_length, distbuffer);
			if (new_tga == 0) {
				fprintf(stderr,"out of memory creating new tga\n");
				exit(1);
			}
			TGA_Raw_Node *new_node = new TGA_Raw_Node();
			if (new_node == 0) {
				fprintf(stderr,"out of memory creating new tga\n");
				exit(1);
			}
			new_node -> item = new_tga;
			new_node -> next = 0;
			TGA::tract_len_seen(tract_length);
			/* check for uniqueness of names and insert */
			if (first_node == 0) {
				first_node = new_node;
			} else {
				TGA_Raw_Node *node_scan = first_node;
				while (node_scan -> next != 0) {
					if (strcmp(node_scan->item->get_name(),
							new_node->item->get_name()) == 0) {
						fprintf(stderr, "error in input file:");
						fprintf(stderr, " duplicate tract group");
						fprintf(stderr, " name: %s\n",new_node->item->get_name());
						exit(1);
					}
					node_scan = node_scan -> next;
				}
				if (strcmp(node_scan->item->get_name(),
						new_node->item->get_name()) == 0) {
					fprintf(stderr, "error in input file: duplicate tract group");
					fprintf(stderr, " name: %s\n",new_node->item->get_name());
					exit(1);
				}
				node_scan -> next = new_node;
			}
			size++;
			delete[] namebuffer;
			delete[] distbuffer;
			namebuffer = 0;
			distbuffer = 0;
			num_tracts = 0;
			tract_length = 0;
		}
		if (*linebuffer == '>') {
			/* record name */
			if (feof(in)) {
				fprintf(stderr,"error in input file format:");
				fprintf(stderr," tract group '%s' missing tract data.\n", linebuffer);
				exit(1);
			}
			namebuffer = new char[linelength];
			if (namebuffer == 0) {
				fprintf(stderr,"out of memory while reading tga from input file\n");
				exit(1);
			}
			strcpy(namebuffer,linebuffer + 1);
		}
	}
	delete[] linebuffer;
	TGA_Raw_Node *del_node;
	tga = new TGA_Raw *[size];
	if (tga == 0) {
		fprintf(stderr,"out of memory while reading tga from input file\n");
		exit(1);
	}
	for (int pos3 = 0; pos3 < size; pos3++) {
		del_node = first_node;
		tga[pos3] = first_node->item;
		first_node = first_node->next;
		delete del_node;
	}
}

/* TGA_Raw_Vector destructor - deletes all TGA_Raw elements and array object
*/
TGA_Raw_Vector::~TGA_Raw_Vector()
{
	for (int pos = 0; pos < size; pos++) delete tga[pos];
	delete[] tga;
}

/* TGA_Raw_Vector subscript operator
*/
const TGA_Raw *TGA_Raw_Vector::operator[](int index) const
{
	if (index < 0 || index >= size) {
		fprintf(stderr,"Out of bounds access in TGA_Raw_Vector[]\n");
		exit(1);
	}
	return tga[index];
}

int TGA_Raw_Vector::get_total_tracts() const
{
	int tract_count = 0;
	for (int pos = 0; pos < size; pos++) {
		tract_count += tga[pos]->get_num_tracts();
	}
	return tract_count;
}

/* TGA implementation */

int TGA::aw_len = INT_MAX;
int TGA::aw_len_max = INT_MAX;

void TGA::tract_len_seen(int len) {
	if (len < aw_len_max) {
		aw_len_max = len;
		aw_len = len;
	}
}

void TGA::set_aw_len(int new_aw_len) {
	if (new_aw_len < 1 || new_aw_len > aw_len_max) {
		fprintf(stderr,"Attept to set alignment window length out of bounds.\n");
		fprintf(stderr,"Value: %d, Allowable bounds: [1,%d].\n",new_aw_len,aw_len_max);
		exit(1);
	}
	aw_len = new_aw_len;
	aw_len_max = new_aw_len; /* window can never grow - avoids aw_left_max errors */
}

TGA::TGA(const TGA_Raw *a_core) {
	core = a_core;
	aw_left_max = core->get_tract_len() - aw_len;
	aw_sense = true;
	aw_left = aw_left_max / 2;
	cluster_num = -1; /* unassigned */
}

TGA::TGA(const TGA *tga) {
	core = tga->core;
	aw_left_max = tga->aw_left_max;
	aw_sense = tga->aw_sense;
	aw_left = tga->aw_left;
	cluster_num = tga->cluster_num;
}

TGA *TGA::create_clone()
{
	return new TGA(this);
}

void TGA::write(FILE *out, bool verbose, int aw_left_max_max)
{
	if (aw_sense) {
		fprintf(out,">%s    (offset:%d)\n",get_name(),aw_left);
		if (verbose) for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
			for (int i=0; i<aw_left_max_max - aw_left; i++) fprintf(out,"     ");
			for (int pos=0; pos<TGA::core->get_tract_len(); pos++) {
				fprintf(out,"%5d",get_distrib()[pos][base]);
			}
			fprintf(out,"\n");
		}
	} else {
		fprintf(out,">%s    (offset:%d,rev-cmp)\n",get_name(),aw_left);
		if (verbose) for (int base=TGA_Raw::NUM_BASES - 1; base >= 0; base--) {
			for (int i=0; i<aw_left_max_max - (aw_left_max - aw_left); i++) fprintf(out,"     ");
			for (int pos=TGA::core->get_tract_len() - 1; pos >= 0; pos--) {
				fprintf(out,"%5d",get_distrib()[pos][base]);
			}
			fprintf(out,"\n");
		}
	}
}

/* Cluster implementation */

Cluster::Cluster(int a_id)
{
	id = a_id;
	num_tga = 0;
	num_tracts = 0;
	base_distrib = new int[TGA::get_aw_len()][TGA_Raw::NUM_BASES];
	if (base_distrib == 0) {
		fprintf(stderr,"out of memory while creating cluster\n");
		exit(1);
	}
	for (int pos = 0; pos < TGA::get_aw_len(); pos++) {
		for (int base = 0; base < TGA_Raw::NUM_BASES; base++) {
			base_distrib[pos][base] = 0;
		}
	}
	entropy = 0.0;
}

Cluster::Cluster(const Cluster *cluster)
{
	id = cluster->id;
	num_tga = cluster->num_tga;
	num_tracts = cluster->num_tracts;
	base_distrib = new int[TGA::get_aw_len()][TGA_Raw::NUM_BASES];
	if (base_distrib == 0) {
		fprintf(stderr,"out of memory while cloning cluster\n");
		exit(1);
	}
	for (int pos = 0; pos < TGA::get_aw_len(); pos++) {
		for (int base = 0; base < TGA_Raw::NUM_BASES; base++) {
			base_distrib[pos][base] = cluster->base_distrib[pos][base];
		}
	}
	entropy = cluster->entropy;
}

Cluster::~Cluster()
{
	delete[] base_distrib;
}

void Cluster::insert(TGA *tga, double new_entropy)
{
	num_tga++;
	num_tracts += tga->get_num_tracts();
	/* modify distribution */
	const int aw_len = TGA::get_aw_len();
	const int (*aw_start)[TGA_Raw::NUM_BASES] = tga->get_aw_distrib();
	/* inner loop variables */
	const int *aw_base = aw_start[0];
	const int *aw_base_stop = aw_start[aw_len];
	int *cluster_base;
	if (tga->aw_sense) {
		cluster_base = &base_distrib[0][0];
		while (aw_base < aw_base_stop) {
			*cluster_base++ += *aw_base++;
		}
	} else {
		cluster_base = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
		while (aw_base < aw_base_stop) {
			*cluster_base-- += *aw_base++;
		}
	}
	/* set entropy */
	entropy = new_entropy;
	tga->cluster_num = id;
}

void Cluster::set_distrib_and_entropy(int *distrib_start, double a_entropy)
{
	int *firstclusterbase = &base_distrib[0][0];
	int *lastclusterbase = &base_distrib[TGA::get_aw_len()-1][TGA_Raw::NUM_BASES];
	int *insertpos;
//printf("before shift:\n");
//for (insertpos = firstclusterbase; insertpos != lastclusterbase; insertpos++) {
//printf("%2d",*insertpos);
//}
//printf("\n");
	for (insertpos = firstclusterbase; insertpos != lastclusterbase; insertpos++) {
		*insertpos = *distrib_start++;
	}
//printf("after shift:\n");
//for (insertpos = firstclusterbase; insertpos != lastclusterbase; insertpos++) {
//printf("%2d",*insertpos);
//}
//printf("\n");
	entropy = a_entropy;
}

void Cluster::write(FILE *out, int aw_left_max_max)
{
	fprintf(out,"Cluster %d has %d tga, %d tracts, %f entropy\n", id, num_tga, num_tracts, entropy);
	for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
		for (int i=0; i<aw_left_max_max; i++) fprintf(out,"-----");
		for (int pos=0; pos<TGA::get_aw_len(); pos++) {
			fprintf(out,"%5d",base_distrib[pos][base]);
		}
		for (int i=0; i<aw_left_max_max; i++) fprintf(out,"-----");
		fprintf(out,"\n");
	}
}

/* ClusterPlain implementation */

ClusterPlain::ClusterPlain(int a_id) : Cluster(a_id) {}

ClusterPlain::ClusterPlain(const ClusterPlain *cluster) : Cluster(cluster) {}

Cluster *ClusterPlain::create_clone()
{
	return new ClusterPlain(this);
}

void ClusterPlain::get_insert_entropy_profile(TGA *tga,
		double *alignment_entropy, int *vector_pos) const
{

	/* TODO initially, this is done so that the window is always sampled
	probablistically (even for singlets - singles always have best alignment? */

	const int aw_len = TGA::get_aw_len();
	const int aw_left_max = tga->get_aw_left_max();
	const int (*tga_start)[TGA_Raw::NUM_BASES] = tga->get_distrib();
	const double positive_entropy = aw_len * LGNC::lgnor4q(num_tracts + tga->get_num_tracts());
	const int * const cluster_base_first = &base_distrib[0][0];
	const int * const cluster_base_last = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
	/* outer loop variables */
	const int (*tga_left)[TGA_Raw::NUM_BASES];
	const int (* const tga_left_last)[TGA_Raw::NUM_BASES] = &tga_start[aw_left_max];
	int vector_pos_forw;
	int vector_pos_back;
	/* inner loop variables */
	const int *tga_base;
	const int *tga_base_stop;
	const int *cluster_base_forw;
	const int *cluster_base_back;
	double try_entropy_forw;
	double try_entropy_back;
	vector_pos_forw = *vector_pos;
	vector_pos_back = *vector_pos + aw_left_max + 1;
	/* consider forward and backward window senses, insert entropies into vector */
	for (tga_left = tga_start; tga_left <= tga_left_last; tga_left++) {
		tga_base = tga_left[0];
		tga_base_stop = tga_left[aw_len];
		cluster_base_forw = cluster_base_first;
		cluster_base_back = cluster_base_last;
		try_entropy_forw = positive_entropy;
		try_entropy_back = positive_entropy;
		while (tga_base < tga_base_stop) {
			const int tga_base_count = *tga_base++;
			try_entropy_forw -= LGNC::lgnorq(tga_base_count + *cluster_base_forw++);
			try_entropy_back -= LGNC::lgnorq(tga_base_count + *cluster_base_back--);
		}
		alignment_entropy[vector_pos_forw++] = try_entropy_forw;
		if(mp.single_strand == false){
		  alignment_entropy[vector_pos_back++] = try_entropy_back;
		}
		else{
		  alignment_entropy[vector_pos_back++] = try_entropy_forw;
		}
	}
	*vector_pos = vector_pos_back;
}

void ClusterPlain::insert(TGA *tga)
{
	num_tga++;
	num_tracts += tga->get_num_tracts();
	/* modify distribution and calculate new entropy */
	entropy = TGA::get_aw_len() * LGNC::lgnor4q(num_tracts);
	const int aw_len = TGA::get_aw_len();
	const int (*aw_start)[TGA_Raw::NUM_BASES] = tga->get_aw_distrib();
	/* inner loop variables */
	const int *aw_base = aw_start[0];
	const int *aw_base_stop = aw_start[aw_len];
	int *cluster_base;
	if (tga->aw_sense) {
		cluster_base = &base_distrib[0][0];
		while (aw_base < aw_base_stop) {
			const int base_sum = *cluster_base + *aw_base++;
			*cluster_base++ = base_sum;
			entropy -= LGNC::lgnorq(base_sum);
		}
	} else {
		cluster_base = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
		while (aw_base < aw_base_stop) {
			const int base_sum = *cluster_base + *aw_base++;
			*cluster_base-- = base_sum;
			entropy -= LGNC::lgnorq(base_sum);
		}
	}
	tga->cluster_num = id;
}

/* removes a tga from a cluster, and adjusts the entropy */
void ClusterPlain::remove(TGA *tga)
{
	num_tga--;
	num_tracts -= tga->get_num_tracts();
	if (num_tracts == 0) {
		entropy = 0;
		for (int pos = 0; pos < TGA::get_aw_len(); pos++) {
			for (int base = 0; base < TGA_Raw::NUM_BASES; base++) {
				base_distrib[pos][base] = 0;
			}
		}
	} else {
		entropy = TGA::get_aw_len() * LGNC::lgnor4q(num_tracts);
		const int aw_len = TGA::get_aw_len();
		const int (*aw_start)[TGA_Raw::NUM_BASES] = tga->get_aw_distrib();
		/* inner loop variables */
		const int *aw_base = aw_start[0];
		const int *aw_base_stop = aw_start[aw_len];
		int *cluster_base;
		if (tga->aw_sense) {
			cluster_base = &base_distrib[0][0];
			while (aw_base < aw_base_stop) {
				const int base_sum = *cluster_base - *aw_base++;
				*cluster_base++ = base_sum;
				entropy -= LGNC::lgnorq(base_sum);
			}
		} else {
			cluster_base = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
			while (aw_base < aw_base_stop) {
				const int base_sum = *cluster_base - *aw_base++;
				*cluster_base-- = base_sum;
				entropy -= LGNC::lgnorq(base_sum);
			}
		}
	}
}

/* ClusterRandom implementation */

ClusterRandom::ClusterRandom(int a_id) : Cluster(a_id) {}

ClusterRandom::ClusterRandom(const ClusterRandom *cluster) : Cluster(cluster) {}

Cluster *ClusterRandom::create_clone()
{
	return new ClusterRandom(this);
}

void ClusterRandom::get_insert_entropy_profile(TGA *tga,
		double *alignment_entropy, int *vector_pos) const
{
  const int aw_len = TGA::get_aw_len();
  const int aw_left_max = tga->get_aw_left_max();
  const int (*tga_start)[TGA_Raw::NUM_BASES] = tga->get_distrib();
  /* note that this is the constant for a single column */
  const double positive_entropy = 
    LGNC::lgnor4q(num_tracts + tga->get_num_tracts());

   /** these are minus the logarithms of the background frequencies***/
  static double MinLogBgFreq[4] = {1.386294361,1.386294361,1.386294361,1.386294361};
  /***ERIK probably for opimization you want to do this differently?***/

  /** i.e. I assume that I have available variables MinLogBgFreq[0] through  MinLogBgFreq[0]**/
  /** with A=0,C=1,G=2,T=3**/

  /*** we also need log(2) a lot****/
  const double base_entropy = 0;
  
  const int * const cluster_base_first = &base_distrib[0][0];
  const int * const cluster_base_last = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
  /* outer loop variables */
  const int (*tga_left)[TGA_Raw::NUM_BASES];
  const int (* const tga_left_last)[TGA_Raw::NUM_BASES] = &tga_start[aw_left_max];
  int vector_pos_forw;
  int vector_pos_back;
  /* inner loop variables */
  const int *tga_base;
  const int *tga_base_stop;
  const int *cluster_base_forw;
  const int *cluster_base_back;
  double try_entropy_forw;
  double try_entropy_back;
  /***this counter checks if we have gotten all the bases for a column***/
  int modcount;
  double H_wm_forw;
  double H_wm_back;
  double H_bg_forw;
  double H_bg_back;
  vector_pos_forw = *vector_pos;
  vector_pos_back = *vector_pos + aw_left_max + 1;
  /* consider forward and backward window senses, insert entropies into vector */
  for (tga_left = tga_start; tga_left <= tga_left_last; tga_left++) {
    tga_base = tga_left[0];
    tga_base_stop = tga_left[aw_len];
    cluster_base_forw = cluster_base_first;
    cluster_base_back = cluster_base_last;
    /** set entropies to base entropy (basically length times log 2)**/
    try_entropy_forw = base_entropy;
    try_entropy_back = base_entropy;
    
    modcount = 0;
    /***these are going to contain the WM and BG entropies for a single column**/
    H_wm_forw = positive_entropy;
    H_bg_forw = 0;
    H_wm_back = positive_entropy;
    H_bg_back = 0;
    while (tga_base < tga_base_stop) {
      const int tga_base_count = *tga_base++;
      H_wm_forw -= LGNC::lgnorq(tga_base_count + *cluster_base_forw);
      H_wm_back -= LGNC::lgnorq(tga_base_count + *cluster_base_back);
      H_bg_forw += (tga_base_count + *cluster_base_forw) * MinLogBgFreq[modcount];
      H_bg_back += (tga_base_count + *cluster_base_back) * MinLogBgFreq[3-modcount];
      ++cluster_base_forw;
      --cluster_base_back;
      ++modcount;  
      
      /**we have completed the score for a column***/
      if(modcount == 4){
        modcount = 0;
        try_entropy_forw += get_column_score(H_wm_forw,H_bg_forw);
        try_entropy_back += get_column_score(H_wm_back,H_bg_back);
        /**reset values**/
        H_wm_forw = positive_entropy;
        H_bg_forw = 0;
        H_wm_back = positive_entropy;
        H_bg_back = 0;
      }
    }
    alignment_entropy[vector_pos_forw++] = try_entropy_forw;
    if(mp.single_strand == false){
      alignment_entropy[vector_pos_back++] = try_entropy_back;
    }
    else{
      alignment_entropy[vector_pos_back++] = try_entropy_forw;
    }
  }
  *vector_pos = vector_pos_back;
}

/**ERIK copy of the insert function for random columns case**/
void ClusterRandom::insert(TGA *tga)
{
        num_tga++;
        num_tracts += tga->get_num_tracts();
        /* modify distribution and calculate new entropy */
        entropy = 0;
        double positive_entropy = LGNC::lgnor4q(num_tracts);
        static double MinLogBgFreq[4] = {1.386294361,1.386294361,1.386294361,1.386294361};
        const int aw_len = TGA::get_aw_len();
        const int (*aw_start)[TGA_Raw::NUM_BASES] = tga->get_aw_distrib();
        /* inner loop variables */
        const int *aw_base = aw_start[0];
        const int *aw_base_stop = aw_start[aw_len];
        int *cluster_base;
        if (tga->aw_sense) 
          {
            cluster_base = &base_distrib[0][0];
            double H_wm = positive_entropy;
            double H_bg = 0;
            int modcount = 0;
            while (aw_base < aw_base_stop) {
              const int base_sum = *cluster_base + *aw_base++;
              *cluster_base++ = base_sum;
              H_wm -= LGNC::lgnorq(base_sum);
              H_bg += base_sum * MinLogBgFreq[modcount];
              ++modcount;
              if(modcount == 4)
                {
                  entropy += get_column_score(H_wm,H_bg);
                  H_wm = positive_entropy;
                  H_bg = 0;
                  modcount = 0;
                }
            }
        } 
        else 
          {

            cluster_base = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
            double H_wm = positive_entropy;
            double H_bg = 0;
            int modcount = 0;
            while (aw_base < aw_base_stop) 
              {
                const int base_sum = *cluster_base + *aw_base++;
                *cluster_base-- = base_sum;
                H_wm -= LGNC::lgnorq(base_sum);
                H_bg += base_sum * MinLogBgFreq[3-modcount];
                ++modcount;
                if(modcount == 4)
                  {
                    entropy += get_column_score(H_wm,H_bg);
                    H_wm = positive_entropy;
                    H_bg = 0;
                    modcount = 0;
                  }
              }
          }
        tga->cluster_num = id;
}

/**ERIK The random columns version of the remove function**/
void ClusterRandom::remove(TGA *tga)
{
  num_tga--;
  num_tracts -= tga->get_num_tracts();
  /**cluster has become empty**/
  if (num_tracts == 0) 
    {
      entropy = 0;
      for (int pos = 0; pos < TGA::get_aw_len(); pos++) 
        {
          for (int base = 0; base < TGA_Raw::NUM_BASES; base++) 
            {
              base_distrib[pos][base] = 0;
            }
        }
    } 
  else 
    {
      entropy = 0;
      double positive_entropy = LGNC::lgnor4q(num_tracts);
      static double MinLogBgFreq[4] = {1.386294361,1.386294361,1.386294361,1.386294361};
      const int aw_len = TGA::get_aw_len();
      const int (*aw_start)[TGA_Raw::NUM_BASES] = tga->get_aw_distrib();
      /* inner loop variables */
      const int *aw_base = aw_start[0];
      const int *aw_base_stop = aw_start[aw_len];
      int *cluster_base;
      if (tga->aw_sense) 
        {
          cluster_base = &base_distrib[0][0];
          double H_wm = positive_entropy;
          double H_bg = 0;
          int modcount = 0;
          while (aw_base < aw_base_stop) 
            {
              const int base_sum = *cluster_base - *aw_base++;
              *cluster_base++ = base_sum;
              H_wm -= LGNC::lgnorq(base_sum);
              H_bg += base_sum * MinLogBgFreq[modcount];
              ++modcount;
              if(modcount == 4)
                {
                  entropy += get_column_score(H_wm,H_bg);
                  modcount = 0;
                  H_wm = positive_entropy;
                  H_bg = 0;
                }
            }
        } 
      else 
        {
          cluster_base = &base_distrib[aw_len - 1][TGA_Raw::NUM_BASES - 1];
          double H_wm = positive_entropy;
          double H_bg = 0;
          int modcount = 0;
          while (aw_base < aw_base_stop) 
            {
              const int base_sum = *cluster_base - *aw_base++;
              *cluster_base-- = base_sum;
              H_wm -= LGNC::lgnorq(base_sum);
              H_bg += base_sum * MinLogBgFreq[3-modcount];
              ++modcount;
              if(modcount == 4)
                {
                  entropy += get_column_score(H_wm,H_bg);
                  modcount = 0;
                  H_wm = positive_entropy;
                  H_bg = 0;
                }
            }
        }
    }
}

/* MoveSet implementation */

MoveSet::MoveSet(int a_state_size) : state_size(a_state_size)
{
	num_moves = 0;
	applied = true;
	neglogpostprob = new double[1];
	if (!neglogpostprob) {
		fprintf(stderr,"Error: out of memory during MoveSet Constructor\n");
		fflush(0);
		exit(1);
	}
	neglogpostprob_size = 1;
}

MoveSet::~MoveSet()
{
	delete[] neglogpostprob;
}

/* InsertMoveSet implementation */

InsertMoveSet::InsertMoveSet(int a_state_size, int *a_cluster_size_hist,
		double *a_stirling_partition, double a_chemical_potential) :
		MoveSet(a_state_size), stirling_partition(a_stirling_partition),
		chemical_potential(a_chemical_potential)
{
	tga = 0;
	donor = 0;
	inserted_entropy = new double[1];
	if (!inserted_entropy) {
		fprintf(stderr,"Error: out of memory during InsertMoveSet Constructor\n");
		fflush(0);
		exit(1);
	}
	inserted_entropy_size = 1;
	destination = donor;
	cluster_size_hist = a_cluster_size_hist; 
	LC::configure(a_state_size + 1);
}

InsertMoveSet::~InsertMoveSet()
{
	delete[] inserted_entropy;
}

void InsertMoveSet::canabalize(Cluster *a_donor, Cluster *a_acceptor, TGA *a_tga)
{
	const int num_clusters = 2;	/* currently, only one possible acceptor (aside from donor) */
	tga = a_tga;
	donor = a_donor;
	num_moves = 4 * (tga->get_aw_left_max() + 1); /* two slides, two clusters */
	/* make sure sizes of vectors is sufficient */
	acceptor.grow(num_clusters);
	acceptor[0] = donor; 
	acceptor[1] = a_acceptor;
	if (num_moves > inserted_entropy_size) {
		delete[] inserted_entropy;
		inserted_entropy = new double[num_moves];
		if (!inserted_entropy) {
			fprintf(stderr,"Error: out of memory during InsertMoveSet::canabalize\n");
			fflush(0);
			exit(1);
		}
		inserted_entropy_size = num_moves;
	}
	if (num_moves > neglogpostprob_size) {
		delete[] neglogpostprob;
		neglogpostprob = new double[num_moves];
		if (!neglogpostprob) {
			fprintf(stderr,"Error: out of memory during InsertMoveSet::canabalize\n");
			fflush(0);
			exit(1);
		}
		neglogpostprob_size = num_moves;
	}
	cluster_size_hist[donor->get_num_tga()]--;
	donor->remove(tga);	/* move is now half completed */
	cluster_size_hist[donor->get_num_tga()]++;
	int vector_pos = 0;
	int vector_scan = 0;
	int acceptor_id;
	for (acceptor_id = 0; acceptor_id < num_clusters; acceptor_id++) {
		Cluster *acc = acceptor[acceptor_id];
		/* set up moves back to donor */
		acc->get_insert_entropy_profile(tga,inserted_entropy,&vector_pos);
		const double current_entropy = acc->get_entropy();
		const double prior_neglogpostprob = get_prior_nlpp(acceptor[acceptor_id]);
		for (; vector_scan < vector_pos; vector_scan++) {
			neglogpostprob[vector_scan] = inserted_entropy[vector_scan] - current_entropy;
			neglogpostprob[vector_scan] -= prior_neglogpostprob;
//printf("%d\t%f\n",vector_scan,neglogpostprob[vector_scan]);
		}
	}
	destination = donor;	/* no move made yet */
	applied = false;
}

void InsertMoveSet::apply(int take_move)
{
	if (completed()) {
		fprintf(stderr,"Error: InsertMove object was applied twice\n");
		exit(1);
	}
//printf("chosen move: %d (%g)\n\n",take_move,neglogpostprob[take_move]);
	int aw_left_max = tga->get_aw_left_max();
	int moves_per_cluster = 2 * (aw_left_max + 1);
	int dest_id = take_move / moves_per_cluster;
	destination = acceptor[dest_id];
	int move_offset = take_move % moves_per_cluster;
	tga->aw_left = move_offset % (aw_left_max + 1);
	if(mp.single_strand == false){
	  tga->aw_sense = move_offset <= aw_left_max;
	}
	else{
	  tga->aw_sense = true;
	}
//printf("awleft\t%d\tawsense\t%d\n",tga->aw_left,tga->aw_sense);
	cluster_size_hist[destination->get_num_tga()]--;
	destination->insert(tga,inserted_entropy[take_move]);
	cluster_size_hist[destination->get_num_tga()]++;
	applied = true;
//fprintf(stdout,"move %s (%d -> %d) d_ent: ?\n",tga->get_name(),donor->get_id(),destination->get_id()); fflush(0);
}

InsertMoveSetSANone::InsertMoveSetSANone(int a_state_size, int *a_cluster_size_hist,
		double *a_stirling_partition, double a_chemical_potential) :
		InsertMoveSet(a_state_size,a_cluster_size_hist,
			a_stirling_partition,a_chemical_potential) {}

double InsertMoveSetSANone::get_prior_nlpp(Cluster *cluster) const
{
	return cluster->empty() ? chemical_potential : 0.0;
}

InsertMoveSetSACount::InsertMoveSetSACount(int a_state_size, int *a_cluster_size_hist,
		 double *a_stirling_partition, double a_chemical_potential) :
		InsertMoveSet(a_state_size,a_cluster_size_hist,
			a_stirling_partition,a_chemical_potential) {}

double InsertMoveSetSACount::get_prior_nlpp(Cluster *cluster) const
{
	return cluster->empty() ? stirling_partition[state_size - cluster_size_hist[0]] + chemical_potential : 0.0;
}

InsertMoveSetSACountAndSize::InsertMoveSetSACountAndSize(int a_state_size, int *a_cluster_size_hist,
		double *a_stirling_partition, double a_chemical_potential) :
		InsertMoveSet(a_state_size,a_cluster_size_hist,
			a_stirling_partition,a_chemical_potential) {}

double InsertMoveSetSACountAndSize::get_prior_nlpp(Cluster *cluster) const
{
        if (cluster->empty()) {
                return stirling_partition[state_size - cluster_size_hist[0]] + LC::log(cluster_size_hist[1]+1) + chemical_potential;
	} else {
                int s = cluster->get_num_tga() + 1;
                return LC::log(s) +
			LC::log(cluster_size_hist[s] + 1) -
			LC::log(cluster_size_hist[s-1]);
	}
}

/* CoherentShiftMoveSet implementation */

CoherentShiftMoveSet::CoherentShiftMoveSet(int a_state_size, TGA **&a_tga) : 
		MoveSet(a_state_size), tga(a_tga) 
{
	cluster = 0;
	num_member_tga = 0;
	max_left_shift = 0;
	base_distrib_width = 0;
	column_entropy = new double[1];
	if (!column_entropy) {
		fprintf(stderr,"Error: out of memory during CoherentShiftMoveSet Constructor\n");
		fflush(0);
		exit(1);
	}
	column_entropy_size = 1;
}

CoherentShiftMoveSet::~CoherentShiftMoveSet()
{
	delete[] column_entropy;
}

void CoherentShiftMoveSet::canabalize_fill_base_distrib()
{
	int cluster_id = cluster->get_id();
//printf("propose c-s on cluster %d\n",cluster_id);
	/* find tga's that are members of cluster, and shift limits */
	num_member_tga = cluster->get_num_tga();
//printf("cluster has %d tga:\n",num_member_tga);
	member_tga.grow(num_member_tga);
	max_left_shift = INT_MAX;
	int max_right_shift = INT_MAX;
	int member_tga_id = 0;
	int tga_id;
	for (tga_id = 0; tga_id < state_size; tga_id++) {
		if (tga[tga_id]->cluster_num == cluster_id) {
//tga[tga_id]->write(stdout,true,6);
			member_tga[member_tga_id++] = tga[tga_id];
			int mls;
			int mrs;
			/* windows with opposite senses shift in opposite directions */
			if (tga[tga_id]->aw_sense) {
				mls = tga[tga_id]->aw_left;
				mrs = tga[tga_id]->get_aw_left_max() - tga[tga_id]->aw_left;
			} else {
				mls = tga[tga_id]->get_aw_left_max() - tga[tga_id]->aw_left;
				mrs = tga[tga_id]->aw_left;
			}
			if (mls < max_left_shift) max_left_shift = mls; 
			if (mrs < max_right_shift) max_right_shift = mrs;
		}
	}
//printf("max left shift: %d  max right shift: %d\n",max_left_shift,max_right_shift);
	if (member_tga_id != num_member_tga) {
		fprintf(stderr,"Error: mismatch in tga count in CoherentShiftMoveSet::canabalize\n");
		exit(1);
	}
	/* resize vectors */
	const int aw_len = TGA::get_aw_len();
	num_moves = max_left_shift + max_right_shift + 1;
//printf("number of possible moves: %d\n",num_moves);
	if (num_moves > neglogpostprob_size) {
		delete[] neglogpostprob;
		neglogpostprob = new double[num_moves];
		if (!neglogpostprob) {
			fprintf(stderr,"Error: out of memory during CoherentShiftMoveSet::canabalize_fill_base_distrib\n");
			fflush(0);
			exit(1);
		}
		neglogpostprob_size = num_moves;
	}
	base_distrib_width = max_left_shift + max_right_shift + aw_len;
//printf("base distribution width: %d\n",base_distrib_width);
	base_distrib.grow(base_distrib_width);
	if (base_distrib_width > column_entropy_size) {
		delete[] column_entropy;
		column_entropy= new double[base_distrib_width];
		if (!column_entropy) {
			fprintf(stderr,"Error: out of memory during CoherentShiftMoveSet::canabalize_fill_base_distrib\n");
			fflush(0);
			exit(1);
		}
		column_entropy_size = base_distrib_width;
	}
	/* construct expanded distribution */
	int column;
	int row;
	for (column = 0; column < base_distrib_width; column++) {
		for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
			base_distrib[column][row] = 0;
		}
	}
	int * dist_first = &base_distrib[0][0];
	int * dist_last = &base_distrib[base_distrib_width - 1][TGA_Raw::NUM_BASES - 1];
	int * base_scan;
	const int * tga_base_scan;
	for (member_tga_id = 0; member_tga_id < num_member_tga; member_tga_id++) {
		TGA *mtga = member_tga[member_tga_id];
		base_scan = dist_first;
		if (mtga->aw_sense) {
			tga_base_scan = &(mtga->get_distrib())[mtga->aw_left - max_left_shift][0];
			while (base_scan != dist_last) *base_scan++ += *tga_base_scan++;
		} else {
			tga_base_scan = &(mtga->get_distrib())
					[mtga->aw_left + aw_len - 1 + max_left_shift]
					[TGA_Raw::NUM_BASES - 1];
			while (base_scan != dist_last) *base_scan++ += *tga_base_scan--;
		}
		*base_scan += *tga_base_scan;
//printf("accumulated:\n");
//int rr,cc;
//for (rr = 0; rr < TGA_Raw::NUM_BASES; rr++) {
//int spaces = 12 - 2 * max_left_shift;
//int s; for (s=0; s<spaces;s++) printf(" ");
//for (cc = 0; cc < base_distrib_width; cc++) {
//printf("%2d",base_distrib[cc][rr]);
//}
//printf("\n");
//}
	}
}

void CoherentShiftMoveSet::canabalize_set_neglogpostprob()
{
	double window_entropy = 0.0;
	const int aw_len = TGA::get_aw_len();
	int column;
	for (column = 0; column < aw_len - 1; column++) {
		window_entropy += column_entropy[column];
	}
	int neglogpostprob_pos;
	for (neglogpostprob_pos = 0; neglogpostprob_pos < num_moves; neglogpostprob_pos++) {
		window_entropy += column_entropy[column++];
		neglogpostprob[neglogpostprob_pos] = window_entropy;
//printf("entropy for window %d-%d: %3.3f\n",neglogpostprob_pos,neglogpostprob_pos + aw_len - 1,neglogpostprob[neglogpostprob_pos]);
		window_entropy -= column_entropy[neglogpostprob_pos];
	}
	applied = false;
}

void CoherentShiftMoveSet::apply(int take_move)
{
	if (completed()) {
		fprintf(stderr,"Error: CoherentShiftMove object was applied twice\n");
		exit(1);
	}
	/* adjust tga's aw_left */
	int leftward_shift = max_left_shift - take_move;
	int member_tga_id;
	for (member_tga_id = 0; member_tga_id < num_member_tga; member_tga_id++) {
//printf("aw_left -- old: %d",member_tga[member_tga_id]->aw_left);
		if (member_tga[member_tga_id]->aw_sense) {
			member_tga[member_tga_id]->aw_left -= leftward_shift;
		} else {
			member_tga[member_tga_id]->aw_left += leftward_shift;
		}
//printf(", new: %d\n",member_tga[member_tga_id]->aw_left); fflush(0);
	}
	/* adjust cluster distribution and entropy */
	cluster->set_distrib_and_entropy(&base_distrib[take_move][0],neglogpostprob[take_move]);
//printf("coherent shift done: picked %d (leftshift:%d)\n",take_move,max_left_shift - take_move);
	applied = true;
}

/* CoherentShiftMoveSetPlain implementation */

CoherentShiftMoveSetPlain::CoherentShiftMoveSetPlain(int a_state_size, TGA **&a_tga) :
		 CoherentShiftMoveSet(a_state_size, a_tga)
{
}

void CoherentShiftMoveSetPlain::canabalize(Cluster *a_cluster)
{
	cluster = a_cluster;
	canabalize_fill_base_distrib();
	/* calculate entropy profile */
	const double positive_entropy_column = LGNC::lgnor4q(cluster->get_num_tracts());
	int column;
	int row;
	for (column = 0; column < base_distrib_width; column++) {
		column_entropy[column] = positive_entropy_column;
		for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
			column_entropy[column] -= LGNC::lgnorq(base_distrib[column][row]);
		}
//printf("entropy for column %3d: %3.3f\n",column,column_entropy[column]);
	}
	canabalize_set_neglogpostprob();
}

/* CoherentShiftMoveSetRandom implementation */

CoherentShiftMoveSetRandom::CoherentShiftMoveSetRandom(int a_state_size, TGA **&a_tga) :
		 CoherentShiftMoveSet(a_state_size, a_tga)
{
}

void CoherentShiftMoveSetRandom::canabalize(Cluster *a_cluster)
{
	cluster = a_cluster;
	canabalize_fill_base_distrib();
	/* calculate entropy profile */
	static const double MinLogBgFreq[4] = {1.386294361,1.386294361,1.386294361,1.386294361};
	const double positive_entropy_column = LGNC::lgnor4q(cluster->get_num_tracts());
	int column;
	int row;
	for (column = 0; column < base_distrib_width; column++) {
		double H_wm = positive_entropy_column;
		double H_bg = 0.0;
		for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
			H_wm -= LGNC::lgnorq(base_distrib[column][row]);
			H_bg += MinLogBgFreq[row] * base_distrib[column][row];
		}
		column_entropy[column] = get_column_score(H_wm,H_bg);
//printf("entropy for column %3d: %3.3f\n",column,column_entropy[column]);
	}
	canabalize_set_neglogpostprob();
}

/* State implementation */

State::State(const TGA_Raw_Vector &trv, int initial_cluster_size, double a_coherentshiftperiod, double a_chemical_potential) :
		coherentshiftperiod(a_coherentshiftperiod), chemical_potential(a_chemical_potential)
{
	size = trv.get_size();
	if (initial_cluster_size < 1 || initial_cluster_size > size) {
		fprintf(stderr,"illegal inital cluster size (%d) while constructing initial state\n",initial_cluster_size);
		exit(1);
	}
	tga = new TGA*[size];
	if (tga == 0) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
	for (int i = 0; i < size; i++) {
		tga[i] = new TGA(trv[i]);
		if (tga[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
	}
	cluster = new Cluster *[size];
	if (cluster == 0) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
	/* derived class constructor will create cluster objects and insert TGAs */
	/* derived class constructor will create cluster_size_dist[] */
	/* stirling_partition numbers depend on size only -- done here */
	stirling_partition = new double[size];
	stirling_number_ratios(size,stirling_partition);
	partition_number_ratios(size,stirling_partition);
	insertmove = true;
	coherentshiftcount = coherentshiftperiod;
}

State::State(const State *state) :
		coherentshiftperiod(state->coherentshiftperiod),
		chemical_potential(state->chemical_potential)
{
	size = state->size;
	tga = new TGA*[size];
	if (tga == 0) {
		fprintf(stderr,"out of memory while cloning state\n");
		exit(1);
	}
	cluster = new Cluster*[size];
	if (cluster == 0) {
		fprintf(stderr,"out of memory while cloning state\n");
		exit(1);
	}
	int i;
	for (i = 0; i < size; i++) {
		tga[i] = state->tga[i]->create_clone();	/* deep copy construction */
		if (tga[i] == 0) {
			fprintf(stderr,"out of memory while cloning state\n");
			exit(1);
		}
		cluster[i] = state->cluster[i]->create_clone();
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while cloning state\n");
			exit(1);
		}
	}
	stirling_partition = new double[size];
	for (i = 0; i < size; i++) stirling_partition[i] = state->stirling_partition[i];
	insertmove = state->insertmove;
	coherentshiftcount = state->coherentshiftcount;
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during State deep copy constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = state->cluster_size_hist[i];
}

State::~State()
{
	for (int i = 0; i < size; i++) {
		delete tga[i];
		delete cluster[i];
	}
	delete[] tga;
	delete[] cluster;
	delete[] cluster_size_hist;
	delete[] stirling_partition;
}

double State::get_entropy() const
{
	double total_entropy = 0.0;
	int i;
	for (i = 0; i < size; i++) {
		total_entropy += cluster[i]->get_entropy();
	}
	return total_entropy;
}

int State::get_num_cluster() const
{
	int num_clusters = 0;
	int i;
	for (i = 0; i < size; i++) {
		if (!cluster[i]->empty()) num_clusters++;
	}
	return num_clusters;
}

MoveSet *State::propose_moveset() {
	/* check if it is time for a CoherentShift */
	if (coherentshiftcount == 0) {
		coherentshiftcount = coherentshiftperiod;
		int cluster_id = int_ranged_rand(size);
		while (cluster_id < size && cluster[cluster_id]->empty()) cluster_id++;
		if (cluster_id == size) {
			cluster_id = 0;
			while (cluster[cluster_id]->empty()) cluster_id++;
		}
		coherentshiftmoveset->canabalize(cluster[cluster_id]);
		insertmove = false;
		return coherentshiftmoveset;
	}
	coherentshiftcount -= 1.0;
	/* do an Insert */
	int tga_num;
	int from_clus;
	int to_clus;
	bool reject = true;
	tga_num = int_ranged_rand(size);
	from_clus = tga[tga_num]->cluster_num;
	while (reject) {
		reject = false;
		to_clus = int_ranged_rand(size - 1);
		if (to_clus >= from_clus) to_clus++;
		if (cluster[from_clus]->singlet()) { 
			/* no chance to move from singlet to empty box */
			if (cluster[to_clus]->empty()) reject = true;
		} else {
			/* tga from multiplet only can go to one of the empty boxes */
			if (cluster[to_clus]->empty() && int_ranged_rand(cluster_size_hist[0]) != 0) reject = true;
		}
	}
	insertmoveset->canabalize(cluster[from_clus], cluster[to_clus], tga[tga_num]);
	insertmove = true;
	return insertmoveset;
}

/* State::rearrange_to_pairwise_assignment() -- this function prepares
	a state for further pairwise postprocessing and report generation.
	DO NOT CONTINUE TO USE THE STATE FOR MONTE CARLO EXPLORATION
	AFTER YOU CALL THIS FUNCTION
*/
void State::rearrange_to_pairwise_assignment(const int *assignments)
{
	int max_cluster = -1;
	/* empty clusters */
	int i;
	for (i = 0; i < size; i++) {
		cluster[tga[i]->cluster_num]->remove(tga[i]);
		/* pairwise clusters have uncertain alignments, so skip the
		adding of weight matricies for now. Just set cluster_num */
		tga[i]->cluster_num = assignments[i];
		if (tga[i]->cluster_num > max_cluster) max_cluster = tga[i]->cluster_num;
	}
}

void State::accumulate_significance_metrics(State *state,
		double **cluster_sig_presence_hist,
		double **cluster_sig_purity_hist,
		double *cluster_sig_tga)
{
	/* find which cluster most closely matches each reference cluster */
	/* create list of values for each tga, showing the pairing of the
	cluster number in the reference cluster to the cluster number in the
	current cluster. Then sort the list primarily according to reference
	cluster number, and secondarily according to current cluster number */
	if (INT_MAX / 2 < size) {
		fprintf(stderr, "Error : too many clusters to sort in accumulate_significance_metrics\n");
		exit(1);
	}
	int *cluster_mapping = new int[size];
	int *best_current_for_ref = new int[size];
	if (!cluster_mapping) {
		fprintf(stderr,"Error : out of memory in accumulate_significance_metrics\n");
		exit(1);
	}
	int tga_id;
	for (tga_id = 0; tga_id < size; tga_id++) {
		cluster_mapping[tga_id] = 
				state->tga[tga_id]->cluster_num * size +
				tga[tga_id]->cluster_num;
		best_current_for_ref[tga_id] = -1; /* reset for each cluster (tga_id = cluster_id) */
	}
	qsort(cluster_mapping,size,sizeof(int),&int_compare);
	int ref_cluster_scan = -1;	/* current ref cluster for which best match is sought */
	int cluster_scan = -1;		/* current cluster being compared */
	int best_match = -1;
	int best_match_presence = -1;
	double best_match_purity = 0.0;
	int cluster_presence = 0;
	int map_scan = 0;
	int ref_cluster_id = cluster_mapping[0] / size;
	int cluster_id = cluster_mapping[0] - ref_cluster_id * size;
	while (map_scan < size) {
		if (ref_cluster_id != ref_cluster_scan) {
			/* moving to new ref_cluster */
			ref_cluster_scan = ref_cluster_id;
			best_match_presence = -1;
			best_match_purity = -1.0;
			cluster_scan = -1;
		}
		if (cluster_id != cluster_scan) {
			/* moving to new cluster */
			cluster_scan = cluster_id;
			cluster_presence = 0;
		}
		if (++cluster_presence > best_match_presence || 
				(cluster_presence == best_match_presence &&
				!((double)cluster_presence / get_cluster_size(cluster_id) <=
					best_match_purity))) {
			/* new best found */
			best_match = cluster_id;
			best_match_presence = cluster_presence;
			best_match_purity = (double)cluster_presence / get_cluster_size(cluster_id);
		}
		if (++map_scan < size) {
			ref_cluster_id = cluster_mapping[map_scan] / size;
			cluster_id = cluster_mapping[map_scan] - ref_cluster_id * size;
		}
		if (map_scan == size || ref_cluster_id != ref_cluster_scan) {
			/* register results for finished ref_cluster */
			double *csprh = cluster_sig_presence_hist[ref_cluster_scan];
			csprh[best_match_presence]+=1.0;
			double *cspuh = cluster_sig_purity_hist[ref_cluster_scan];
			cspuh[best_match_purity >= 1.0 ? 49 : (int)(best_match_purity * 50.0)]+=1.0;
			best_current_for_ref[ref_cluster_scan] = best_match;
		}
	}
	for (ref_cluster_id = 0; ref_cluster_id < size; ref_cluster_id++) {
		if (state->cluster[ref_cluster_id]->get_num_tga() < 2) continue; /* ignore empty & singlet */
		int best_cluster_id = best_current_for_ref[ref_cluster_id];
		if (best_cluster_id == -1) {
			/* empties should have been ignored */
			fprintf(stderr,"Error in accumulate significance stats: Logical Bug\n");
			exit(1);
		}
/*TODO: This is horrible (double looping like this) .. this must be optimized */
		for (tga_id = 0; tga_id < size; tga_id++) {
			int cluster_in = tga[tga_id]->cluster_num;
			if (cluster_in == best_cluster_id) {
				cluster_sig_tga[ref_cluster_id * size + tga_id]+=1.0;
			}
		}
	}
	delete[] cluster_mapping;
	delete[] best_current_for_ref;
}

void State::write(FILE *out, bool verbose)
{
	double total_entropy = 0.0;
	int aw_left_max_max = 0;
	int *sortorder = new int[size];
	if (int_sort_values != 0) delete[] int_sort_values;
	int_sort_values = new int[size];
	for (int i=0; i<size; i++) {
		sortorder[i] = i;
		int_sort_values[i] = tga[i]->cluster_num * size + i;
		total_entropy += cluster[i]->get_entropy();
		if (tga[i]->get_aw_left_max() > aw_left_max_max) {
			aw_left_max_max = tga[i]->get_aw_left_max();
		}
	}
	qsort((void *)sortorder, size, sizeof(int), &int_compare_global);
	fprintf(out,"TOTAL SYSTEM ENTROPY: %f\n",total_entropy);
	int sortsearch = 0;
	for (int c_num = 0; c_num < size; c_num++) {
		if (cluster[c_num]->empty()) continue;
		cluster[c_num]->write(out,aw_left_max_max);
		while (sortsearch < size && tga[sortorder[sortsearch]]->cluster_num == c_num) {
			tga[sortorder[sortsearch]]->write(out,verbose,aw_left_max_max);
			sortsearch++;
		}
		fprintf(out,"\n");
	}
	delete[] sortorder;
	delete[] int_sort_values;
	int_sort_values = 0;
}

void State::dump_cluster_sizes(FILE *out)
{
	int cluster_id, num_nonempty = 0;
	for (cluster_id=0; cluster_id < size; cluster_id++) {
		int cluster_size = cluster[cluster_id]->get_num_tga();
		if (cluster_size > 0) {
		  fprintf(out, "%d ",cluster_size);
		  ++num_nonempty;
		}
	}
	fprintf(out,"\ntotal: %d\n",num_nonempty);
	
}

void State::write_membership_summary(FILE *out)
{
	int *sortorder = new int[size];
	if (int_sort_values != 0) delete[] int_sort_values;
	int_sort_values = new int[size];
	for (int i=0; i<size; i++) {
		sortorder[i] = i;
		int_sort_values[i] = tga[i]->cluster_num * size + i;
	}
	qsort((void *)sortorder, size, sizeof(int), &int_compare_global);
	int sortsearch = 0;
	for (int c_num = 0; c_num < size; c_num++) {
		if (sortsearch >= size - 1) break;
		if (tga[sortorder[sortsearch]]->cluster_num != 
			tga[sortorder[sortsearch+1]]->cluster_num) {
			sortsearch++;
			continue;
		}
		fprintf(out,"TGA members of cluster %d:\n",c_num);
		while (sortsearch < size && tga[sortorder[sortsearch]]->cluster_num == c_num) {
			fprintf(out,"\t%s\n",tga[sortorder[sortsearch]]->get_name());
			sortsearch++;
		}
		fprintf(out,"\n");
		if (sortsearch >= size) break;
	}
	delete[] sortorder;
	delete[] int_sort_values;
	int_sort_values = 0;
}

double State::get_column_entropy_plain(const double *column, double q) const
{
	double base_total = 0.0;
	int row;
	for (row = 0; row < TGA_Raw::NUM_BASES; row++) base_total += column[row];
	double H_wm = procse_lgamma(base_total + 4.0 * q);
	H_wm += 4.0 * procse_lgamma(q);
	H_wm -= procse_lgamma(4.0 * q);
	for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
		H_wm -= procse_lgamma(column[row] + q);
	}
	return H_wm;
}

double State::get_column_entropy_random(const double *column, double q) const
{
	double H_wm = get_column_entropy_plain(column,q);
  	static double MinLogBgFreq[4] = {1.386294361,1.386294361,1.386294361,1.386294361};
	double H_bg = 0.0;
	int row;
	for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
		H_bg += procse_lgamma(column[row] * MinLogBgFreq[row]);
	}
	return get_column_score(H_wm,H_bg);
}

void State::calculate_cluster_weight_matrix(double *weight_matrix, int *tga_aw_left,
		bool *tga_aw_sense, double *final_entropy,
		const int *sorted_tga_id, const double *sorted_sig, double q) const
{
	if (!consistant()) {
		fprintf(stderr,"Error: calculate_cluster_weight_matrix called with inconsistant state\n");
		fflush(0);
		exit(1);
	}
	const int aw_len = TGA::get_aw_len();
	/* find aw_left_max_max (the largest aw_left_max) */
	int aw_left_max_max = -1;
	int sorted_tga_index;
	for (sorted_tga_index = 0; sorted_tga_index < size; sorted_tga_index++) {
		int tga_id = sorted_tga_id[sorted_tga_index];
		if (tga[tga_id]->get_aw_left_max() > aw_left_max_max) {
			aw_left_max_max = tga[tga_id]->get_aw_left_max();
		}
	}
	int base_distrib_width = aw_left_max_max + aw_len;
	/* member probablility weighted base distribution matrix */
	double *base_distrib = new double[base_distrib_width*TGA_Raw::NUM_BASES];
	double *test_distrib = new double[base_distrib_width*TGA_Raw::NUM_BASES];
	double *column_entropy = new double[base_distrib_width];
	if (!base_distrib || !test_distrib || !column_entropy) {
		fprintf(stderr,"Error: out of memory in State::calculate_cluster_weight_matrix\n");
		fflush(0);
		exit(1);
	}
	int column;
	int row;
	for (column = 0; column < base_distrib_width; column++) {
		for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
			base_distrib[column*TGA_Raw::NUM_BASES+row] = 0.0;
		}
	}
	/* allow maximum "play" in alignment window at first */
	int aw_left_max = aw_left_max_max;
	int aw_left_min = 0;
	int aw_left = 0;
	/* start main loop of adding / shifting */
	double best_insertion_entropy = DBL_MAX;
	int best_insertion_left = 0;
	int best_insertion_sense = 0;
	for (sorted_tga_index = 0; sorted_tga_index < size; sorted_tga_index++) {
		int tga_id = sorted_tga_id[sorted_tga_index];
		const int (*tga_base_distrib)[TGA_Raw::NUM_BASES] = tga[tga_id]->get_distrib();
		const int tga_aw_left_max = tga[tga_id]->get_aw_left_max();
		double tga_sig = sorted_sig[sorted_tga_index];
		int insertion_left;
		best_insertion_entropy = DBL_MAX;
		int best_insertion_overlap = 0;
		bool insertion_sense = true; /* forward sense first */
#undef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
fprintf(out,"inserting tga #%d, sig: %g\n",tga_id,tga_sig);
printf("start distrib:\n");
for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
for (int pos=0; pos<base_distrib_width; pos++) {
printf("%2.4f ",base_distrib[pos*TGA_Raw::NUM_BASES+base]);
}
printf("\n");
}
#endif
		do {
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
printf("tga distrib:\n");
for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
for (int pos=0; pos<tga[tga_id]->get_tract_len(); pos++) {
printf("%2.4f ",tga[tga_id]->get_distrib()[pos][base] * tga_sig);
}
printf("\n");
}
#endif
			for (insertion_left = tga_aw_left_max;	/* start with maximum left shift */
					insertion_left >= aw_left_min - aw_left_max;	/* most negative value */
					insertion_left--) {
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
printf("  evaluating insertion offset %d%s\n",insertion_left,insertion_sense?"":"rev-cmp");
#endif
				/* create test_distrib */
				int test_aw_left_min = aw_left_min;
				int test_aw_left_max = aw_left_max;
				if (insertion_sense) {
					/* determine play limits for forward test */
					if (insertion_left < 0) test_aw_left_min -= insertion_left;
					int test_aw_left_max_proposed = test_aw_left_min - insertion_left + tga_aw_left_max;
					if (test_aw_left_max_proposed < test_aw_left_max) test_aw_left_max = test_aw_left_max_proposed;
				} else {
					/* determine play limits for revered test */
					if (insertion_left < 0) test_aw_left_max += insertion_left;
					int test_aw_left_min_proposed = test_aw_left_max + insertion_left - tga_aw_left_max;
					if (test_aw_left_min_proposed > test_aw_left_min) test_aw_left_min = test_aw_left_min_proposed;
				}
				int num_columns_in_overlap = test_aw_left_max - test_aw_left_min + aw_len;
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
printf("  test_aw_left_min: %d  test_aw_left_max: %d  num_columns_in_overlap: %d\n",test_aw_left_min,test_aw_left_max,num_columns_in_overlap);
/* overwrite test_distrib */
for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
for (int pos=0; pos<base_distrib_width; pos++) {
test_distrib[pos*TGA_Raw::NUM_BASES+base] = 0.0;
}
}
#endif
				int tga_start_offset = insertion_left;
				if (tga_start_offset < 0) tga_start_offset = 0;
				for (column = 0; column < num_columns_in_overlap; column++) {
					for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
						if (insertion_sense) {
							test_distrib[(test_aw_left_min + column)*TGA_Raw::NUM_BASES+row] =
								base_distrib[(test_aw_left_min + column)*TGA_Raw::NUM_BASES+row] +
								tga_base_distrib[tga_start_offset + column][row] * tga_sig;
						} else {
							test_distrib[(test_aw_left_min + column)*TGA_Raw::NUM_BASES+row] =
								base_distrib[(test_aw_left_min + column)*TGA_Raw::NUM_BASES+row] +
								tga_base_distrib
									[tga_start_offset + num_columns_in_overlap - 1 - column]
									[TGA_Raw::NUM_BASES - 1 - row] * tga_sig;
						}
					}
					column_entropy[test_aw_left_min + column] =
						get_column_entropy(test_distrib+(test_aw_left_min + column)*TGA_Raw::NUM_BASES,q);
				}
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
printf("test distrib:\n");
for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
for (int pos=0; pos<base_distrib_width; pos++) {
printf("%2.4f ",test_distrib[pos*TGA_Raw::NUM_BASES+base]);
}
printf("\n");
}
#endif
				/* search for best alignment window entropy */
				double insertion_entropy = 0.0;
				for (column = 0; column < aw_len - 1; column++) {
					insertion_entropy += column_entropy[test_aw_left_min + column];
				}
				while (column < num_columns_in_overlap) {
					insertion_entropy += column_entropy[test_aw_left_min + column];
					if (insertion_entropy < best_insertion_entropy || 
							(insertion_entropy == best_insertion_entropy &&
							num_columns_in_overlap > best_insertion_overlap)) {
						best_insertion_entropy = insertion_entropy;
						best_insertion_overlap = num_columns_in_overlap;
						best_insertion_left = insertion_left;
						best_insertion_sense = insertion_sense;
						aw_left = test_aw_left_min + column - aw_len + 1;
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
printf("new best window entropy ending at %d: %f (aw_left = %d)\n",column,insertion_entropy,aw_left);
#endif
					}
					column++;
					insertion_entropy -= column_entropy[test_aw_left_min + column - aw_len];
				}
			}
			if(mp.single_strand == false){
			  insertion_sense = !insertion_sense;
			}
		} while (insertion_sense == false);
		/* now we have best_insertion_left and best_insertion_sense - limit shift range */
		if (best_insertion_sense) {
			/* determine play limits for forward test */
			if (best_insertion_left < 0) aw_left_min -= best_insertion_left;
			int aw_left_max_proposed = aw_left_min - best_insertion_left + tga_aw_left_max;
			if (aw_left_max_proposed < aw_left_max) aw_left_max = aw_left_max_proposed;
		} else {
			/* determine play limits for revered test */
			if (best_insertion_left < 0) aw_left_max += best_insertion_left;
			int aw_left_min_proposed = aw_left_max + best_insertion_left - tga_aw_left_max;
			if (aw_left_min_proposed > aw_left_min) aw_left_min = aw_left_min_proposed;
		}

#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
fprintf(out,"inserted with offset: %d%s,\taw_left_min: %d\t aw_left_max: %d\n",best_insertion_left,best_insertion_sense?"":"rev-cmp",aw_left_min,aw_left_max);
#endif
		/* insert tga */
		int num_columns_in_overlap = aw_left_max - aw_left_min + aw_len;
		int tga_start_offset = best_insertion_left;
		if (tga_start_offset < 0) tga_start_offset = 0;
		tga_aw_sense[tga_id] = best_insertion_sense;
		if (best_insertion_sense) {
			tga_aw_left[tga_id] = aw_left_min - tga_start_offset;	/* for later verbose printout */
		} else {
			tga_aw_left[tga_id] = aw_left_min + tga_start_offset + num_columns_in_overlap - 1;
		}
		for (column = 0; column < num_columns_in_overlap; column++) {
			for (row = 0; row < TGA_Raw::NUM_BASES; row++) {
				if (best_insertion_sense) {
					base_distrib[(aw_left_min + column)*TGA_Raw::NUM_BASES+row] +=
						tga_base_distrib[tga_start_offset + column][row] * tga_sig;
				} else {
					base_distrib[(aw_left_min + column)*TGA_Raw::NUM_BASES+row] +=
						tga_base_distrib
							[tga_start_offset + num_columns_in_overlap - 1 - column]
							[TGA_Raw::NUM_BASES - 1 - row] * tga_sig;
				}
			}
		}
	}
	/* copy alignment window */
	int base;
	int pos;
	for (base=0; base<TGA_Raw::NUM_BASES; base++) {
		for (pos=0; pos<aw_len; pos++) {
			weight_matrix[pos*TGA_Raw::NUM_BASES+base] = base_distrib[(aw_left + pos)*TGA_Raw::NUM_BASES+base];
		}
	}
	/* adjust tga_aw_left according to aw_left */
	for (sorted_tga_index = 0; sorted_tga_index < size; sorted_tga_index++) {
		int tga_id = sorted_tga_id[sorted_tga_index];
		double tga_sig = sorted_sig[sorted_tga_index];
		if (tga_sig == 0.0) break;
		if (tga_aw_sense[tga_id]) tga_aw_left[tga_id] = aw_left - tga_aw_left[tga_id];
		else tga_aw_left[tga_id] = tga_aw_left[tga_id] - aw_left - (aw_len - 1);
	}
	*final_entropy = best_insertion_entropy;
	delete[] column_entropy;
	delete[] test_distrib;
	delete[] base_distrib;
#ifdef DEBUG_WEIGHT_MATRIX_RECONSTRUCTION
fprintf(out,"final aw_left: %d\n",aw_left);
#endif
}

void State::write_cluster_weight_matrix(const double *weight_matrix, const int *tga_aw_left,
		const bool *tga_aw_sense, const double *final_entropy,
		FILE *out, bool verbose, const int *sorted_tga_id, const double *sorted_sig, double q) const
{
	if (!consistant()) {
		fprintf(stderr,"Error: write_cluster_weight_matrix called with inconsistant state\n");
		fflush(0);
		exit(1);
	}
	const int aw_len = TGA::get_aw_len();
	/* total columns */
	double *weight_column_total = new double[aw_len];
	if (weight_column_total == 0) {
		fprintf(stderr, "out of memory while presenting weight matrix\n");
		exit(1);
	}
	int base;
	int pos;
	for (pos=0; pos<aw_len; pos++) {
		weight_column_total[pos] = 0.0;
		for (base=0; base<TGA_Raw::NUM_BASES; base++) {
			weight_column_total[pos] += weight_matrix[pos*TGA_Raw::NUM_BASES+base];
		}
	}
	/* print reconstructed weight matrix distribution */
 	fprintf(out,"%f entropy\n", *final_entropy);
	fprintf(out,"Reconstructed Weight Matrix:\n");
	for (base=0; base<TGA_Raw::NUM_BASES; base++) {
		for (pos=0; pos<aw_len; pos++) {
			fprintf(out,"%9.4f ",weight_matrix[pos*TGA_Raw::NUM_BASES+base] / weight_column_total[pos]);
		}
		fprintf(out,"\n");
	}
	/* print column totals */
	fprintf(out,"Column Totals (contributing bases):\n");
	for (pos=0; pos<aw_len; pos++) {
		fprintf(out,"%9.4f ", weight_column_total[pos]);
	}
	fprintf(out,"\n");
	/* print gross summary letters */
	fprintf(out,"Majority Weight Summary String: (* = no base has majority of weight)\n");
	static const char base_letter[] = "ACGT";
	for (pos=0; pos<aw_len; pos++) {
		for (base=0; base<TGA_Raw::NUM_BASES; base++) {
			if (weight_matrix[pos*TGA_Raw::NUM_BASES+base] / weight_column_total[pos] > 0.5) {
				fprintf(out,"%c",base_letter[base]);
				break;
			}
		}
		if (base == TGA_Raw::NUM_BASES) fprintf(out,"*");
	}
	fprintf(out,"\n");
	delete[] weight_column_total;
	if (!verbose) return;
	fprintf(out,"\nTGA members:\n");
	int sorted_tga_index;
	for (sorted_tga_index = 0; sorted_tga_index < size; sorted_tga_index++) {
		int tga_id = sorted_tga_id[sorted_tga_index];
		double tga_sig = sorted_sig[sorted_tga_index];
		if (tga_sig == 0.0) break;
		if (tga_aw_sense[tga_id]) {
			fprintf(out,">%s    (offset:%d)  (significance:%f)\n",tga[tga_id]->get_name(),tga_aw_left[tga_id],tga_sig);
			for (int base=0; base<TGA_Raw::NUM_BASES; base++) {
				for (int pos=tga_aw_left[tga_id]; pos<tga_aw_left[tga_id]+aw_len; pos++) {
					fprintf(out,"%9.4f ",tga[tga_id]->get_distrib()[pos][base] * tga_sig);
				}
				fprintf(out,"\n");
			}
		} else {
			fprintf(out,">%s    (offset:%d,rev-cmp)  (significance:%f)\n",tga[tga_id]->get_name(),tga_aw_left[tga_id],tga_sig);
			for (int base=TGA_Raw::NUM_BASES - 1; base >= 0; base--) {
				for (int pos=tga_aw_left[tga_id]+aw_len-1;pos>=tga_aw_left[tga_id]; pos--) {
					fprintf(out,"%9.4f ",tga[tga_id]->get_distrib()[pos][base] * tga_sig);
				}
				fprintf(out,"\n");
			}
		}
	}
}

/* StatePlainSANone implementation */

StatePlainSANone::StatePlainSANone(const TGA_Raw_Vector &trv,
		int initial_cluster_size, double a_coherentshiftperiod, double a_chemical_potential) :
		State(trv, initial_cluster_size, a_coherentshiftperiod, a_chemical_potential)
{
	int i;
	/* create raw clusters */
	for (i = 0; i < size; i++) {
		cluster[i] = new ClusterPlain(i);
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
		cluster[i / initial_cluster_size]->insert(tga[i]);
	}
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during StatePlainSANone Constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = 0;
	for (i = 0; i < size; i++) cluster_size_hist[cluster[i]->get_num_tga()]++;
	/* create SANone InsertMove */
	insertmoveset = new InsertMoveSetSANone(trv.get_size(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetPlain(trv.get_size(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StatePlainSANone::StatePlainSANone(const StatePlainSANone *state) :
		State(state)
{
	/* create SANone InsertMove */
	insertmoveset = new InsertMoveSetSANone(state->get_num_tga(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetPlain(state->get_num_tga(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StatePlainSANone::~StatePlainSANone()
{
	delete insertmoveset;
	delete coherentshiftmoveset;
}

State *StatePlainSANone::create_clone()
{
	StatePlainSANone *clone = new StatePlainSANone(this);
	return clone;
}

/* StatePlainSACount implementation */

StatePlainSACount::StatePlainSACount(const TGA_Raw_Vector &trv,
		int initial_cluster_size, double a_coherentshiftperiod,
		double a_chemical_potential) :
		State(trv, initial_cluster_size, a_coherentshiftperiod, a_chemical_potential)
{
	/* create raw clusters */
	int i;
	for (i = 0; i < size; i++) {
		cluster[i] = new ClusterPlain(i);
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
		cluster[i / initial_cluster_size]->insert(tga[i]);
	}
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during StatePlainSACount Constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = 0;
	for (i = 0; i < size; i++) cluster_size_hist[cluster[i]->get_num_tga()]++;
	insertmoveset = new InsertMoveSetSACount(trv.get_size(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetPlain(trv.get_size(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StatePlainSACount::StatePlainSACount(const StatePlainSACount *state) :
		State(state)
{
	insertmoveset = new InsertMoveSetSACount(state->get_num_tga(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetPlain(state->get_num_tga(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StatePlainSACount::~StatePlainSACount()
{
	delete insertmoveset;
	delete coherentshiftmoveset;
}

State *StatePlainSACount::create_clone()
{
	return new StatePlainSACount(this);
}

/* StatePlainSACountAndSize implementation */

StatePlainSACountAndSize::StatePlainSACountAndSize(const TGA_Raw_Vector &trv,
		int initial_cluster_size, double a_coherentshiftperiod,
		double a_chemical_potential) :
		State(trv, initial_cluster_size, a_coherentshiftperiod, a_chemical_potential)
{
	/* create raw clusters */
	int i;
	for (i = 0; i < size; i++) {
		cluster[i] = new ClusterPlain(i);
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
		cluster[i / initial_cluster_size]->insert(tga[i]);
	}
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during StatePlainSACountAndSize Constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = 0;
	for (i = 0; i < size; i++) cluster_size_hist[cluster[i]->get_num_tga()]++;
	insertmoveset = new InsertMoveSetSACountAndSize(trv.get_size(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetPlain(trv.get_size(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StatePlainSACountAndSize::StatePlainSACountAndSize(const StatePlainSACountAndSize *state) :
		State(state)
{
	insertmoveset = new InsertMoveSetSACountAndSize(state->get_num_tga(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetPlain(state->get_num_tga(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StatePlainSACountAndSize::~StatePlainSACountAndSize()
{
	delete insertmoveset;
	delete coherentshiftmoveset;
}

State *StatePlainSACountAndSize::create_clone()
{
	return new StatePlainSACountAndSize(this);
}

/* StateRandomSANone implementation */

StateRandomSANone::StateRandomSANone(const TGA_Raw_Vector &trv,
		int initial_cluster_size, double a_coherentshiftperiod, double a_chemical_potential) :
		State(trv, initial_cluster_size, a_coherentshiftperiod, a_chemical_potential)
{
	/* create raw clusters */
	int i;
	for (i = 0; i < size; i++) {
		cluster[i] = new ClusterRandom(i);
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
		cluster[i / initial_cluster_size]->insert(tga[i]);
	}
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during StateRandomSANone Constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = 0;
	for (i = 0; i < size; i++) cluster_size_hist[cluster[i]->get_num_tga()]++;
	/* create SANone InsertMove */
	insertmoveset = new InsertMoveSetSANone(trv.get_size(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetRandom(trv.get_size(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StateRandomSANone::StateRandomSANone(const StateRandomSANone *state) :
		State(state)
{
	/* create SANone InsertMove */
	insertmoveset = new InsertMoveSetSANone(state->get_num_tga(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetRandom(state->get_num_tga(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StateRandomSANone::~StateRandomSANone()
{
	delete insertmoveset;
	delete coherentshiftmoveset;
}

State *StateRandomSANone::create_clone()
{
	StateRandomSANone *clone = new StateRandomSANone(this);
	return clone;
}

/* StateRandomSACount implementation */

StateRandomSACount::StateRandomSACount(const TGA_Raw_Vector &trv,
		int initial_cluster_size, double a_coherentshiftperiod,
		double a_chemical_potential) :
		State(trv, initial_cluster_size, a_coherentshiftperiod, a_chemical_potential)
{
	/* create raw clusters */
	int i;
	for (i = 0; i < size; i++) {
		cluster[i] = new ClusterRandom(i);
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
		cluster[i / initial_cluster_size]->insert(tga[i]);
	}
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during StateRandomSACount Constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = 0;
	for (i = 0; i < size; i++) cluster_size_hist[cluster[i]->get_num_tga()]++;
	insertmoveset = new InsertMoveSetSACount(trv.get_size(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetRandom(trv.get_size(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StateRandomSACount::StateRandomSACount(const StateRandomSACount *state) :
		State(state)
{
	insertmoveset = new InsertMoveSetSACount(state->get_num_tga(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetRandom(state->get_num_tga(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StateRandomSACount::~StateRandomSACount()
{
	delete insertmoveset;
	delete coherentshiftmoveset;
}

State *StateRandomSACount::create_clone()
{
	return new StateRandomSACount(this);
}

/* StateRandomSACountAndSize implementation */

StateRandomSACountAndSize::StateRandomSACountAndSize(const TGA_Raw_Vector &trv,
		int initial_cluster_size, double a_coherentshiftperiod,
		double a_chemical_potential) :
		State(trv, initial_cluster_size, a_coherentshiftperiod, a_chemical_potential)
{
	/* create raw clusters */
	int i;
	for (i = 0; i < size; i++) {
		cluster[i] = new ClusterRandom(i);
		if (cluster[i] == 0) {
			fprintf(stderr,"out of memory while creating state\n");
			exit(1);
		}
		cluster[i / initial_cluster_size]->insert(tga[i]);
	}
	cluster_size_hist = new int[size+1];
	if (!cluster_size_hist) {
		fprintf(stderr,"Error: out of memory during StateRandomSACountAndSize Constructor\n");
		fflush(0);
		exit(1);
	}
	for (i = 0; i <= size; i++) cluster_size_hist[i] = 0;
	for (i = 0; i < size; i++) cluster_size_hist[cluster[i]->get_num_tga()]++;
	insertmoveset = new InsertMoveSetSACountAndSize(trv.get_size(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetRandom(trv.get_size(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StateRandomSACountAndSize::StateRandomSACountAndSize(const StateRandomSACountAndSize *state) :
		State(state)
{
	insertmoveset = new InsertMoveSetSACountAndSize(state->get_num_tga(), cluster_size_hist, stirling_partition, chemical_potential);
	coherentshiftmoveset = new CoherentShiftMoveSetRandom(state->get_num_tga(),tga);
	if (!insertmoveset || !coherentshiftmoveset) {
		fprintf(stderr,"out of memory while creating state\n");
		exit(1);
	}
}

StateRandomSACountAndSize::~StateRandomSACountAndSize()
{
	delete insertmoveset;
	delete coherentshiftmoveset;
}

State *StateRandomSACountAndSize::create_clone()
{
	return new StateRandomSACountAndSize(this);
}
