#!/usr/bin/perl -w

use FileHandle;
autoflush STDOUT;
use Getopt::Long;


# script reads per STDIN file with ahab/Stubb's profile and produces gff outpt. it needs to know
# sliding window size, the cutoff for binding sites to be included 
# in the gff output, and a file with the originial ahab/Stubb input dictionary 
# ouput is gff to STDOUT

&GetOptions("window=s", "cutoff=s", "help", "dictionary=s") || print_help(1);
use vars qw($opt_window $opt_help $opt_cutoff $opt_table $opt_dictionary);

if (! defined($opt_dictionary)){
    die "# you need to specify the dictionary inut file\n";
}
else{
    $wtm_table = $opt_dictionary;
}



%pos = ();
if (! defined($opt_window)){die "# you need to specify window positions via -window option\n";}
if ($opt_window =~ /\s/){
    @targets = split(/\s+/, $opt_window);
}
else{
    @targets = ("$opt_window");
}

foreach (@targets){
    $pos{$_} = 1;
}

#######################  M   A   I   N   ########################



%wtm_length = read_wtm_length($wtm_table);
 
while(<>){
    if (/^>/){
	$title = <>;
	$title =~ /Sequence \: (\S+)/; $id = $1;
	$title =~ /Position (\d+)/; $position = $1;
	if (exists($pos{$position})){
	    parse_window($id, $position);
	    evaluate_window($id, $position);
	}
    }
}




##################################################################
sub read_wtm_length
{
    my ($file) = @_;
    my (%wtm_le, $fac, $le) = ();
    open (IN, "<$file") || die "Could not open wtmx_table file $file\n";
#    warn "# reading $file\n";
    while(<IN>){
	if (/^>/){
	    ($fac, $le) = split(/\s+/, $_);
	    $fac =~ s/^>//;
	    $wtm_le{$fac} = $le;
	}
    }
    close IN;
    return %wtm_le;
}

sub evaluate_window
{
    my ($id, $position) = @_;

    $cutoff = $opt_cutoff;

    $win_pos = $position;

    for ($i = 1; $i <= $Window; $i ++){
	$tot = $win_pos + $i - 1;

	foreach $dat (@{$X{$tot}}){
	    ($motif, $str, $label, $prob) = split(/\s+/, $dat);
	    next if (! $label);
	    next if ($label != 1);
	    next if ($prob < $cutoff);
	    $start = $tot;
	    if (! exists($wtm_length{$motif})){
		die "# motif $motif not in wtm_table $wtm_table (length undetermined!)\n";
	    }
	    $stop = $start + $wtm_length{$motif} - 1;
	    $match = "";
	    for ($x = $start; $x <= $stop; $x ++){
		$match = $match . $Sequence{$x};
	    }
	    $prob = sprintf("%.6f",$prob*10);

#mark changed moby_dick to ahab for posterity and accuracy 8/31
#ss changed ahab to stubb
	    $gff = join("\t", ($id,"Stubb",$motif,$start,$stop,$prob,$str,"\.","$motif; $match"));
	    print "$gff\n";
	}
	
    }
}

    

########################

# %X stores list of lines (motifs\s+prob) keyed on global sequ position
# %Sequence stores sequence
# $Window holds window length

sub parse_window
{
    my ($id, $position) = @_;
    my ($dat, $base, $pos);
    $Window = 0; 
    for ( ; ; ){
	$dat = <>;
	chomp $dat;
	return if ($dat =~ /^</);
	if ($dat =~ /^(\d)/){
	    ($pos, $base) = split(/\s+/, $dat);
	    $Window ++;
	    $Sequence{$pos} = $base;
	    next;
	}

	push (@{$X{$pos}}, $dat);
    }
    
}   


#########################














