#include <stdio.h>
#include <string>
#include <limits.h>
#define __THROW_BAD_ALLOC

/* __THROW_BAD_ALLOC is needed for some stl implementations (g++) */
/* otherwise, cerr, operator <<, and endl are referenced in <map> */
#include <map>
#include "spa.h"
#include "dnaSeq.h"
#include "locus.h"

using namespace std;

/* TYPE DECLARATIONS */

struct gfHit
/* A genoFind hit. */
{
	struct gfHit *next;
	unsigned int qStart;	/* Where it hits in query. */
	unsigned int tStart;	/* Where it hits in target. */
	unsigned int diagonal;	/* tStart + qSize - qStart. */
};

/* CONSTANTS, STATIC SCOPE DEFINITIONS AND INITIALIZERS */

/* zero bit is platform independant 0x0000 */
/* rightbit is platform independant 0x0001 */
/* rightnibble is platform independant 0x000F */
/* rightbyte is platform independant 0x00FF */
static const unsigned int zerobit = 0^0;
static const unsigned int rightbit = (~zerobit)^(~zerobit<<1);
static const unsigned int rightnibble = (~zerobit)^(~zerobit<<4);
static const unsigned int rightbyte = (~zerobit)^(~zerobit<<8);

/* nib file signatures */
static const unsigned int NIBSAMEENDIANSIG = 0x6BE93D3Au;
static const unsigned int NIBOPPOSITEENDIANSIG = 0x3A3DE96Bu;

/* static arrays for converting between nt val and char */

#define T_BASE_VAL 0
#define C_BASE_VAL 1
#define A_BASE_VAL 2
#define G_BASE_VAL 3
#define N_BASE_VAL 4   /* Used in 1/2 byte representation. */

static const char val2nt[16] = {
	't','c','a','g','n','n','n','n',
	'n','n','n','n','n','n','n','n'};

static const unsigned int nt2val[128] = {
	4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
	4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
	4,2,4,1,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,
	4,2,4,1,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4,4,4,4,4,4,4,4,4};

static bool ntmatch[128][128]; /* static array to lookup nt "match" */
static bool ntmatch_initialized = false; /* set to true after initialized */

int compareDiag(diagSeg *in, diagSeg *out);

unsigned int count_hit(gfHit *h) {
         if (h == NULL)
             return 0; 
         else
             return 1+count_hit(h->next);
}
     

unsigned int count_clump_hit(diagSeg *d) {
         if (d == NULL)
             return 0;
         else
             return 1+count_clump_hit(d->next);
}

void initialize_ntmatch()
{
	for (int x = 0; x < 128; x++)
		for (int y = 0; y < 128; y++)
			if (nt2val[x] == 4 || nt2val[y] == 4 || nt2val[x] == nt2val[y]) {
				ntmatch[x][y] = true;
			} else {
				ntmatch[x][y] = false;
			}
}

/* static array holding the distance from the previous tile start for each
	tile start position. this is used to allow overlapping tiles. */
static unsigned int tileOverlayShift[16];

static void initTileOverlayShift(int tileSize, int tileOverlay)
{
	static int prev_tileSize = 0;
	static int prev_tileOverlay = 0;
	if (prev_tileSize != tileSize || prev_tileOverlay != tileOverlay) {
		int i;
		for (i=0; i<tileOverlay; i++) {
			tileOverlayShift[i] = tileSize * (i + 1) / tileOverlay -
					tileSize * i / tileOverlay;
		}
		for (;i<tileSize;i++) tileOverlayShift[i]=UINT_MAX;
		prev_tileSize = tileSize;
		prev_tileOverlay = tileOverlay;
	}
}

/* MEMBER FUNCTION DEFINITIONS */

Locus::Locus(const char *a_nibFileName, unsigned int a_startOffset, unsigned int a_endOffset, unsigned int a_tileSize,
		unsigned int a_tileOverlay,unsigned int a_maximumLocusExpand,  
                unsigned int a_locusExpand, unsigned int a_nibFileSize)
{
	nibFileName = 0;
	dna = 0;
	FILE *f = 0;
try {
        if (a_startOffset >= a_locusExpand)  
            locusStart1 = a_startOffset - a_locusExpand;
        else 
            locusStart1 = 0;
        
        if (a_startOffset >= a_maximumLocusExpand) 
            locusStart0 = a_startOffset - a_maximumLocusExpand;
        else
            locusStart0 = 0;               
 
        locusEnd1   = a_endOffset + a_locusExpand;
    
        if (locusEnd1 > a_nibFileSize - 1)
            locusEnd1 = locusEnd0 = a_nibFileSize - 1;
        else {
            locusEnd0 = a_endOffset + a_maximumLocusExpand;
            if (locusEnd0 > a_nibFileSize - 1)
                locusEnd0 = a_nibFileSize - 1;
        }
        offSet = locusStart1 - locusStart0;  
	if (a_tileSize > TMAX || a_tileSize < TMIN) {
	    constructorError = LOCUS_TILESIZE_ERROR; 
            return;
	}
	if (a_tileOverlay > a_tileSize || a_tileOverlay < 1) {
		constructorError = LOCUS_TILEOVERLAY_ERROR; return;
	}
	if (a_endOffset < a_startOffset) {
		constructorError = LOCUS_LOCUSOFFSET_ERROR; return;
	}
	if (!a_nibFileName) {
		constructorError = LOCUS_NIBFILENAME_ERROR; return;
	}
	f = fopen(a_nibFileName,"rb");
	if (!f) {
		constructorError = LOCUS_NIBFILENAME_ERROR; return;
	}
	unsigned int sig;
	if (fread(&sig,sizeof(unsigned int),1,f) != 1 || 
			(sig != NIBSAMEENDIANSIG &&
			sig != NIBOPPOSITEENDIANSIG)) {
		fclose(f);
		constructorError = LOCUS_CORRUPTNIB_ERROR; return;
	}
	if (fread(&nibSize,sizeof(unsigned int),1,f) != 1) {
		fclose(f);
		constructorError = LOCUS_CORRUPTNIB_ERROR; return;
	}
	if (sig == NIBOPPOSITEENDIANSIG) {
		nibSize = (nibSize >> 24 & rightbyte) |
				(nibSize >> 16 & rightbyte) << 8 |
				(nibSize >> 8 & rightbyte) << 16 |
				(nibSize & rightbyte) << 24;
	}
        
        /* First consider a larger locus */
	startOffset = locusStart0;
	endOffset = locusEnd0;
	size = endOffset - startOffset + 1;
        
	if (startOffset > endOffset || endOffset >= nibSize) {
		fclose(f);
		constructorError = LOCUS_LOCUSOFFSET_ERROR; return;
	}
	nibFileName = new char[strlen(a_nibFileName)+1];
	strcpy(nibFileName,a_nibFileName);
	dna = new char[size + 1];
	int fileoffset = startOffset / 2 + 8;
	if (fseek(f,fileoffset,SEEK_SET)!=0) {
corruptFile:
		fclose(f);
		delete[] nibFileName;
		nibFileName = 0;
		delete[] dna;
		dna = 0;
		constructorError = LOCUS_CORRUPTNIB_ERROR; return;
	}
	int ntToRead = size;
	int cbuf;
	char *dnascan = dna;
	if (ntToRead > 0 && startOffset & rightbit) {
		cbuf = fgetc(f);
		if (cbuf == EOF) goto corruptFile;
		*dnascan++ = val2nt[cbuf & rightnibble];
		ntToRead--;
	}
	while (ntToRead > 1) {
		cbuf = fgetc(f);
		if (cbuf == EOF) goto corruptFile;
		*dnascan++ = val2nt[cbuf >> 4];
		if (cbuf == EOF) goto corruptFile;
		*dnascan++ = val2nt[cbuf & rightnibble];
		ntToRead -= 2;
	}
	if (ntToRead > 0) {
		cbuf = fgetc(f);
		if (cbuf == EOF) goto corruptFile;
		*dnascan++ = val2nt[cbuf >> 4];
	}
	*dnascan = '\0';
	fclose(f);
        
	f = 0;
	/* create index and allocate */
	tileSize = a_tileSize;
	tileMask = zerobit;
	for (unsigned int tmc = 0; tmc < tileSize; tmc++) {
		tileMask <<= 1;
		tileMask |= rightbit;
		tileMask <<= 1;
		tileMask |= rightbit;
	}
	tileOverlay = a_tileOverlay;
	initTileOverlayShift(tileSize,tileOverlay);
        
        /* Tiling only a part of the locus */
        startOffset = locusStart1;
        endOffset   = locusEnd1;
        size = endOffset - startOffset + 1;
	const char *poly = dna + locusStart1 - locusStart0;
	const char *polyEnd = poly + (size - tileSize);
	unsigned int offset = startOffset;
	unsigned int tileShiftIndex = 0;
        string tile;
	while (poly <= polyEnd) {
		unsigned int i;
		for (i=0; i<tileSize; ++i) {
		     if (nt2val[(int)(poly[i])] == N_BASE_VAL) {
		         goto nexttile;
		     }
		}
                tile = string(poly, tileSize); 
		tilePos.insert(make_pair(tile,offset));
nexttile:
		offset += tileOverlayShift[tileShiftIndex]; 
		poly += tileOverlayShift[tileShiftIndex];
		if (++tileShiftIndex >= tileOverlay) tileShiftIndex = 0;
	}
	constructorError = 0;
}
catch (bad_alloc) {
	if (nibFileName) delete[] nibFileName;
	nibFileName = 0;
	if (dna) delete[] dna;
	dna = 0;
	if (f) fclose(f);
	constructorError = LOCUS_OUTOFMEMORY_ERROR;
}
}

Locus::Locus(const Locus &locus, unsigned int a_startOffset,
		unsigned int a_endOffset, unsigned int a_tileSize,
		unsigned int a_tileOverlay)
{
	nibFileName = 0;
	dna = 0;
try {
	if (a_tileSize > TMAX || a_tileSize < TMIN) {
		constructorError = LOCUS_TILESIZE_ERROR; return;
	}
	if (a_tileOverlay > a_tileSize || a_tileOverlay < 1) {
		constructorError = LOCUS_TILEOVERLAY_ERROR; return;
	}
	if (a_endOffset < a_startOffset) {
		constructorError = LOCUS_LOCUSOFFSET_ERROR; return;
	}
	if (a_startOffset < locus.startOffset) {
		constructorError = LOCUS_LOCUSOFFSET_ERROR; return;
	}
	if (a_endOffset > locus.endOffset) {
		constructorError = LOCUS_LOCUSOFFSET_ERROR; return;
	}
	startOffset = a_startOffset;
	endOffset = a_endOffset;
	size = a_endOffset - a_startOffset + 1;
        offSet = 0;
	nibSize = locus.nibSize;
	nibFileName = new char[strlen(locus.nibFileName)+1];
	strcpy(nibFileName,locus.nibFileName);
	dna = new char[size + 1];
	char *dnato = dna;
	char *dnafrom = locus.dna + (startOffset - locus.locusStart0);
	int basesToCopy = size;
	while (basesToCopy > 0) {
		*dnato++ = *dnafrom++;
		basesToCopy--;
	}
	*dnato = '\0';
	/* create index and allocate */
	tileSize = a_tileSize;
	tileMask = zerobit;
	for (unsigned int tmc = 0; tmc < tileSize; tmc++) {
		tileMask <<= 1;
		tileMask |= rightbit;
		tileMask <<= 1;
		tileMask |= rightbit;
	}
	tileOverlay = a_tileOverlay;
	initTileOverlayShift(tileSize,tileOverlay);
	string tile;
	const char *poly = dna;
	const char *polyEnd = poly + (size - tileSize);
	unsigned int offset = startOffset;
	unsigned int tileShiftIndex = 0;
	while (poly <= polyEnd) {
		unsigned int i;
		for (i=0; i<tileSize; ++i) {
			if (nt2val[(int)(poly[i])] == N_BASE_VAL) {
				goto nexttile;
			}
		}
                tile = string(poly, tileSize);
		tilePos.insert(make_pair(tile,offset));
nexttile:
		offset += tileOverlayShift[tileShiftIndex]; 
		poly += tileOverlayShift[tileShiftIndex];
		if (++tileShiftIndex >= tileOverlay) tileShiftIndex = 0;
	}
	constructorError = 0;
}
catch (bad_alloc) {
	if (nibFileName) delete[] nibFileName;
	nibFileName = 0;
	if (dna) delete[] dna;
	dna = 0;
	constructorError = LOCUS_OUTOFMEMORY_ERROR;
}
}

Locus::~Locus()
{
	if (nibFileName) delete[] nibFileName; 
	if (dna) delete[] dna;
}

/* STATIC FUNCTION DEFINITIONS */

static void hitFreeList(gfHit *pList)
/* Free a list of dynamically allocated hits */
{
	gfHit *el, *next;

	for (el = pList; el != 0; el = next) {
		next = el->next;
                delete el;
	}
	pList = 0;
}

int createLocus(struct Locus **createdLocus, char *targetNibFilename, unsigned int locusStartOffset,
                unsigned int locusEndOffset, unsigned int tileSize, unsigned int tileOverlay, 
                unsigned int maximumLocusExpand, unsigned int locusExpand, unsigned int nibFileSize) {
try {
	*createdLocus = new Locus(targetNibFilename, locusStartOffset, locusEndOffset, tileSize,
                                  tileOverlay, maximumLocusExpand, locusExpand, nibFileSize);
	return (*createdLocus)->get_constructorError();
}
catch (bad_alloc) {
	return LOCUS_OUTOFMEMORY_ERROR;
}
}


void destroyLocus(Locus **doomedLocus)
{
	delete *doomedLocus;
	*doomedLocus = 0;
}

void destroyDefPosList(diagSeg **pList)
/* Free a list of dynamically allocated diagSeg's */
{
	diagSeg *el, *next;
	for (el = *pList; el != 0; el = next) {
		next = el->next;
		delete el;
	}
	*pList = 0;
}

/* Inline comparison function - first compare diagonal, then tStart if == */
static inline bool diagStartCmp( gfHit **list, int pos_a, int pos_b)
{
	return list[pos_a]->diagonal > list[pos_b]->diagonal ||
				(list[pos_a]->diagonal == list[pos_b]->diagonal &&
				list[pos_a]->tStart > list[pos_b]->tStart);
}

/* Inline swap function */
static inline void hitSwap( gfHit **list, int pos_a, int pos_b)
{
	gfHit *tmp = list[pos_a];
	list[pos_a] = list[pos_b];
	list[pos_b] = tmp;
}

static void gfHitSort2Hack(gfHit **ptArray, gfHit **scratchArray, int n)
/* Fast sorting routines for sorting gfHits on diagonal. 
 * More or less equivalent to system qsort, but with
 * comparison function inline. Worth a little tweaking
 * since this is the bottleneck for the whole procedure. */
{
	gfHit **tmp, **pt1, **pt2;
	int n1, n2;

	/* Divide area to sort in two. */
	n1 = (n>>1); /* first half size */
	n2 = n - n1; /* second half size */
	pt1 = ptArray;
	pt2 = ptArray + n1;

/* Sort each area separately. Handle small case (3 or less elements)
 * here. Otherwise recurse to sort. */ /* TODO: DONT RECURSE! */
	if (n1 > 3) {
		gfHitSort2Hack(pt1, scratchArray, n1);
	} else if (n1 == 3) {
		if (diagStartCmp(pt1,0,1)) {
			if (diagStartCmp(pt1,0,2)) {
				hitSwap(pt1,0,2);
				if (diagStartCmp(pt1,0,1)) hitSwap(pt1,0,1);
			} else {
				hitSwap(pt1,0,1);
			}
		} else {
			if (diagStartCmp(pt1,1,2)) {
				hitSwap(pt1,1,2);
				if (diagStartCmp(pt1,0,1)) hitSwap(pt1,0,1);
			}
		}
	} else if (n1 == 2) {
		if (diagStartCmp(pt1,0,1)) hitSwap(pt1,0,1);
	}
	if (n2 > 3) {
		gfHitSort2Hack(pt2, scratchArray, n2);
	} else if (n2 == 3) {
		if (diagStartCmp(pt2,0,1)) {
			if (diagStartCmp(pt2,0,2)) {
				hitSwap(pt2,0,2);
				if (diagStartCmp(pt2,0,1)) hitSwap(pt2,0,1);
			} else {
				hitSwap(pt2,0,1);
			}
		} else {
			if (diagStartCmp(pt2,1,2)) {
				hitSwap(pt2,1,2);
				if (diagStartCmp(pt2,0,1)) hitSwap(pt2,0,1);
			}
		}
	} else if (n2 == 2) {
		if (diagStartCmp(pt2,0,1)) hitSwap(pt2,0,1);
	}
	/* At this point both halves are internally sorted. 
	* Do a merge-sort between two halves copying to temp
	* buffer. Then copy back sorted result to main buffer. */
	tmp = scratchArray;
	while (n1 > 0 && n2 > 0)
	{
		if (pt1[0]->diagonal < pt2[0]->diagonal ||
				(pt1[0]->diagonal == pt2[0]->diagonal &&
				pt1[0]->tStart < pt2[0]->tStart)) {
			--n1;
			*tmp++ = *pt1++;
		} else {
			--n2;
			*tmp++ = *pt2++;
		}
	}
/* One or both sides are now fully merged. */

/* If some of first side left to merge copy it to end of temp buf. */
	if (n1 > 0) memcpy(tmp, pt1, n1 * sizeof(*tmp));

/* If some of second side left to merge, we finesse it here:
 * simply refrain from copying over it as we copy back temp buf. */
	memcpy(ptArray, scratchArray, (n - n2) * sizeof(*ptArray));
}

static void gfHitSortDiagonalHack(gfHit **pList, int bucketSize)
/* Sort a hit list */
{
	gfHit **array = 0;
	gfHit **scratchArray = 0;

try {
	if (*pList != 0 && (*pList)->next != 0) {
		gfHit *el;
		int i;
		array = new gfHit *[bucketSize];
		scratchArray = new gfHit *[bucketSize];
		for (el = *pList, i=0; el != 0; el = el->next, i++) {
			array[i] = el;
		}
		gfHitSort2Hack(array, scratchArray, bucketSize);
		*pList = array[0];
		for (i=0; i<bucketSize - 1; ++i) {
			array[i]->next = array[i+1];
		}
		array[bucketSize - 1]->next = 0;
		delete[] array;
		array = 0;
		delete[] scratchArray;
		scratchArray = 0;
	}
}
catch (bad_alloc) {
	if (array) delete[] array;
	array = 0;
	if (scratchArray) delete[] scratchArray;
	scratchArray = 0;
	throw;
}
}

static diagSeg *clumpHitsHack(const Locus &locus, gfHit *hitList, int querySize)
/* Begun to improve func */
{
	gfHit **buckets = 0;
	int *bucketSize = 0;
	diagSeg *clumpList = 0, *clumpListEnd = 0, *clump = 0;
try {
	gfHit *hit, *hitNext;
	int bucketShift = 16;		/* 64k buckets. */
	int bucketCount = ((locus.get_size() + querySize - 1) >> bucketShift) + 1;
	int bucketNum;

	clumpList = 0;
	/* Sort hit list into buckets. (each bucket has 64k diagonals) */
	buckets = new gfHit *[bucketCount];
	bucketSize = new int [bucketCount];
	memset(buckets,0,bucketCount * sizeof(gfHit *));
	memset(bucketSize,0,bucketCount * sizeof(int));
	for (hit = hitList; hit != 0; hit = hitNext) {
		bucketNum = (hit->diagonal - locus.get_startOffset()) >> bucketShift;
		hitNext=hit->next;
		hit->next = buckets[bucketNum];
		buckets[bucketNum] = hit;
		bucketSize[bucketNum]++;
	}
	/* Sort each bucket on diagonal and clump. */
	for (bucketNum = 0; bucketNum < bucketCount; ++bucketNum) {
		if (bucketSize[bucketNum] == 0) continue;
		gfHitSortDiagonalHack(buckets + bucketNum,bucketSize[bucketNum]);
		for (hit = buckets[bucketNum]; hit != 0; hit = hit->next) {
			clump = new diagSeg;
			clump->qStart = hit->qStart;
			clump->tStart = hit->tStart;
			clump->diagonal = hit->diagonal;
			while (hit->next!=0 &&
					hit->next->diagonal == clump->diagonal &&
					hit->tStart + locus.get_tileSize() >= hit->next->tStart)
				hit = hit->next;
			clump->qEnd = hit->qStart + locus.get_tileSize() - 1;
			clump->tEnd = hit->tStart + locus.get_tileSize() - 1;
			clump->next = 0;
			if (clumpList == 0) {
				clumpList = clump;
			} else {
				clumpListEnd->next = clump;
			}
			clumpListEnd = clump;
		}
	}
        int k;
        for (k = 0; k < bucketCount; k++) 
             hitFreeList(buckets[k]); 
	delete[] buckets;
	delete[] bucketSize;
	return clumpList;
}
catch (bad_alloc) {
	if (buckets) delete[] buckets;
	buckets = 0;
	if (bucketSize) delete[] bucketSize;
	bucketSize = 0;
	destroyDefPosList(&clumpList);
	throw;
}
}

static gfHit *gfFastFindDnaHitsHack(const Locus &locus, dnaSeq *seq)
/* Find hits associated with one sequence. This is is special fast
 * case for DNA that is in an unsegmented index. */
{
	gfHit *hitList = 0, *hit = 0;
try {
	unsigned int size = seq->size;
	unsigned int tileSizeMinusOne = locus.get_tileSize() - 1;
	unsigned int minTileEnd = 0;
	char *dna = seq->dna;
	unsigned int i;
	unsigned int bVal;
	unsigned int qStart;
        string tile;
	pair<multimap<string,unsigned int>::const_iterator,
			multimap<string, unsigned int>::const_iterator> range;
	for (i=0; i<tileSizeMinusOne; ++i) {
	     bVal = nt2val[(int)(dna[i])];
	     if (bVal == N_BASE_VAL) {
	         minTileEnd = i + locus.get_tileSize();
	     } 
	}
	for (i=tileSizeMinusOne; i<size; ++i) {
		bVal = nt2val[(int)(dna[i])];
		if (bVal == N_BASE_VAL) {
		    minTileEnd = i + locus.get_tileSize();
		} 
		if (i < minTileEnd) continue;
	        tile = string(dna+i-tileSizeMinusOne, tileSizeMinusOne+1);	
		range = locus.get_tilePos().equal_range(tile);
		qStart = i - tileSizeMinusOne;
		for (multimap<string,unsigned int>::const_iterator lp = range.first;
				lp != range.second; lp++) {
			hit = new gfHit;
			hit->qStart = qStart;
			hit->tStart = lp->second;
			hit->diagonal = hit->tStart + size - qStart - 1;
			hit->next = hitList;
			hitList = hit;
                        hit = 0;
		}
	}
	return hitList;
}
catch (bad_alloc) {
	hitFreeList(hitList);
	throw;
}
}

static diagSeg *gfFindClumpsHack(const Locus &locus, dnaSeq *seq)
/* Find clumps associated with one sequence. */
{
	gfHit *hitList = 0;
	diagSeg *clumpList = 0;

try {
	hitList = gfFastFindDnaHitsHack(locus, seq);
	clumpList = clumpHitsHack(locus, hitList, seq->size);
	return clumpList;
}
catch (bad_alloc) {
	if (hitList) hitFreeList(hitList);
	if (clumpList) destroyDefPosList(&clumpList);
	throw;
}
}

static void extendClumps(diagSeg *clumpList, dnaSeq *querySeq, const Locus &locus)
{
        unsigned int cSize = (unsigned int) (querySeq->size);
	while(clumpList != 0) {
	  unsigned int qProbe, tProbe;
	  unsigned int qHorizon, tHorizon;
	  unsigned int winLen;
	  unsigned int tmp;
#undef DEBUG_EXTENSION_PROCESS
#ifdef DEBUG_EXTENSION_PROCESS
	  printf("%d\t%d:\t%d\t%d\n",clumpList->qStart,clumpList->qEnd,clumpList->tStart,clumpList->tEnd);
#endif
	  qProbe = clumpList->qStart;
	  tProbe = clumpList->tStart;
	  qHorizon = clumpList->qEnd;
	  tHorizon = clumpList->tEnd;
	  
	  
	  
	  tmp = tProbe-locus.get_startOffset();
	  if (tmp < qProbe) {
	    winLen = tmp;
	  }
	  else {
	    winLen = qProbe;
	  }
	  for (unsigned int i = 1; i <= winLen; i++) {
	    /* get backward matches */
	    if (ntmatch[(int)(querySeq->dna[qProbe-i])]
		[(int)(locus.get_dna()[tmp-i])]) {
	      clumpList->qStart--;
	      clumpList->tStart--;
	    }
	    else  {
	      break;
	    }
	  }
	  
	  if ((cSize-1-qHorizon) < (locus.get_endOffset()-tHorizon)) {
	    winLen = cSize-1-qHorizon;
	  }
	  else {
	    winLen = locus.get_endOffset()-tHorizon;
	  }
	  for (unsigned int i = 1; i <= winLen; i++) {
	    /* get forward matches */
	    if (ntmatch[(int)(querySeq->dna[qHorizon+i])]
		[(int)(locus.get_dna()[tHorizon-locus.get_startOffset()+i])]) {
	      clumpList->qEnd++;
	      clumpList->tEnd++;
	    }
	    else  {
	      break;
	    }
	  }
	  
	  
	  clumpList = clumpList->next;
	}
}

/* getDefPosList() - Search for hits from a query sequence.
	Return a list of diagonal segments which are defined
	positions for alignment threading with spa */
int createDefPosList(diagSeg	**defPosList,
		dnaSeq		*querySequence,
		Locus		*locus) {
	*defPosList = 0;
try {
	if (querySequence == 0 || querySequence->size <= 0) {
		return GETDEFPOS_QUERYSEQ_ERROR;
	}
         
	if (locus == 0 || locus->get_constructorError() != LOCUS_SUCCESS) {
		return GETDEFPOS_BAD_LOCUS_ERROR;
	}
	if (!ntmatch_initialized) initialize_ntmatch();
	/* search target - clump hits and extend */

	*defPosList = gfFindClumpsHack(*locus, querySequence);
	extendClumps(*defPosList, querySequence, *locus);
        if (*defPosList == NULL)
             return DEFPOS_EMPTY;
	return GETDEFPOS_SUCCESS;
}
catch (bad_alloc) {
	destroyDefPosList(defPosList);
	return GETDEFPOS_OUTOFMEMORY_ERROR;
}
}

/* addDefPos() - add a single defined position to a defPosList */
int addDefPos(diagSeg **defPosList, unsigned int qOffset,
		unsigned int tOffset, unsigned int querySize) {
try {
	diagSeg *defPos = new diagSeg;
	defPos->qStart = qOffset;
	defPos->qEnd = qOffset;
	defPos->tStart = tOffset;
	defPos->tEnd = tOffset;
	defPos->diagonal = tOffset + querySize - qOffset - 1;
	defPos->next = *defPosList;
	*defPosList = defPos;
	return GETDEFPOS_SUCCESS;
}
catch (bad_alloc) {
}
	return GETDEFPOS_OUTOFMEMORY_ERROR;
}

/* normalizeDefPos() - make the retiled defined positions compatible with the previous ones */
void normalizeDefPos(diagSeg **in, unsigned int qSize, unsigned int sqStart, unsigned int sqEnd) {
     diagSeg *tmp;
     tmp = *in;
     while (tmp != NULL) {
            tmp->diagonal = tmp->diagonal + qSize - sqEnd;
            tmp->qStart = tmp->qStart + sqStart;
            tmp->qEnd   = tmp->qEnd   + sqStart;
            tmp = tmp->next;
     }
}
             

/* insertDefPos() - insert some defined positions to a previous list of defines positions */
void insertDefPos(diagSeg **in, diagSeg **out) {
     diagSeg *last, *cur;
     diagSeg *tmp;
     last = cur = *out;
     tmp = *in;
     while (tmp != NULL) {
       if(compareDiag(tmp,cur) == -1){
                if (last == cur) {
                    tmp = tmp->next;
                    (*in)->next = cur;
                    last = *out = *in;
                }
                else {
                    last->next = tmp;
                    tmp = tmp->next;
                    last->next->next = cur;
                    last = last->next;
                }
            }
            else if(compareDiag(tmp,cur) == 1) {
                  if (cur->next == NULL) {
                      cur->next = tmp;
                      break;
                  }
                  if (last != cur) {
                      last = last->next;
                  }
                  cur = cur->next;
                                                                                                                                           
            }
       //subsumed so skip it
            else {
              diagSeg *prev = tmp;
              tmp = tmp->next;
              delete prev;
            }
     }
}

int compareDiag(diagSeg *in, diagSeg *out) {
  if(in->diagonal < out->diagonal){
    return -1;
  }
  if(in->diagonal > out->diagonal){
    return 1;
  }
  //in subsumed by out
  if(in->tStart >= out->tStart && in->tEnd <= out->tEnd){
    return 0;
  }
  if(in->tEnd < out->tStart){
    return -1;
  }
  if(in->tStart > out->tEnd){
    return 1;
  }
  return -2;
}
                     

/* c wrappers for Locus member functions */
const char *getLocusDna(struct Locus *locus) {
	return locus->get_dna();
}

char getLocusBase(struct Locus *locus, int offset) {
	return locus->get_dna()[offset - locus->get_startOffset()];
}
