package Intron;

use strict;
use warnings;

sub new {
    my $that = shift;
    my $class = ref($that) || $that;
    my $iData = shift;
    my $Nbin = shift;
    unless (defined($iData)) {
          die "Input Data undefined\n";
    }
    unless ((defined $Nbin) && ($Nbin > 0)) {
	die "Number of bins is missing\n";
    }

    my $sumFreq = 0;
    my $max = 0;
    my $min = 1000000;
    my $extraNumber = 1;
    my $k;
    foreach $k (keys %{$iData}) {
               $sumFreq += $iData->{$k};
               if ($k > 0 && $min > $k) {
                   $min = $k;
               }
               if ($max < $k) {
                   $max = $k;
               }
    }
    my $self = {
       shortest_intron_length	=> $min,
       longest_intron_length	=> $max,
       intron_length_table_size	=> $Nbin,
       score_genome_extension	=> log($iData->{0}/$sumFreq),
       intron	=>	[],
    };

    #print "Max-Min : $max-$min\n";
    my ($low, $high, $sum);
    my $delta = (log($max+$extraNumber) - log($min))/$Nbin;
    my $fac = exp($delta);
    $sumFreq += $Nbin;
    my $intronIdx = 0;
    $low = $min;
    for (my $i = 0; $i < $Nbin; $i++) {
	$high = int($low*$fac);
	if ($high == $low) {
		++$high;
	}
	 #$low = int($min*exp($delta*$i));
	 # to avoid roundoff error
	 if ($i == $Nbin-1 && $high < $max+$extraNumber) {
		$high = $max+$extraNumber;
	 }
	 #$high = ($i < $Nbin-1) ? int($min*exp($delta*($i+1))) : $max+$extraNumber;
         $sum = 0;
         for (my $j = $low; $j < $high; $j++) {
              if (exists($iData->{$j})) {
                  $sum += $iData->{$j};
              }
         }
	 #print "low-high-delta-sum-sumFreq : $low-$high-$delta-$sum-$sumFreq\n";
         $self->{intron}[$intronIdx] = log(($sum+1)/($sumFreq*($high-$low)));
         $intronIdx++;
	$low = $high;
    }
 
    bless($self, $class);
    return $self;
}

sub new_unbiased {
	my $that = shift;
	my $class = ref($that) || $that;

	my $epsilon = shift;
	my $min = shift;
	my $max = shift;
	my $Nbin = shift;
	
	unless ($epsilon > 0 && $epsilon < 1) {
		die "Intron::new_unbiased -> Bad epsilon value $epsilon\n";
	}
	unless ($min > 0) {
		die "Intron::new_unbiased -> Bad Min $min\n";
	}
	unless ($max > $min) {
		die "Intron::new_unbiased -> Bad Min Max $min-$max\n";
	}
	unless ($Nbin > 0) {
		die "Intron::new_unbiased -> Bad Bin Number $Nbin\n";
	}
 
	my $extraNumber = 1;
	my $self = {
       	shortest_intron_length   => $min,
       	longest_intron_length    => $max,
      	intron_length_table_size => $Nbin,
       	score_genome_extension   => log(1.0-$epsilon),
       	intron   =>      []
    	}; 

	my $S = get_sum($min, $max);
	my $c = $epsilon/$S;

	my %data = ();
	for (my $j = 25; $j <= $max; $j++) {
		$data{$j} = $c/$j;
	}
    	my ($low, $high, $sum);
    	my $delta = (log($max+$extraNumber) - log($min))/$Nbin;
    	my $fac = exp($delta);
    	my $intronIdx = 0;
    	$low = $min;
    	for (my $i = 0; $i < $Nbin; $i++) {
        	$high = int($low*$fac);
        	if ($high == $low) {
                	++$high;
        	}
         	# to avoid roundoff error
         	if ($i == $Nbin-1 && $high < $max+$extraNumber) {
                	$high = $max+$extraNumber;
         	}
         	#$high = ($i < $Nbin-1) ? int($min*exp($delta*($i+1))) : $max+$extraNumber;
         	$sum = 0;
         	for (my $j = $low; $j < $high; $j++) {
              		if (exists($data{$j})) {
                  		$sum += $data{$j};
              		}
         	}
         	#print "low-high-delta-sum-sumFreq : $low-$high-$delta-$sum-$sumFreq\n";
         	$self->{intron}[$intronIdx] = log($sum/($high-$low));
         	$intronIdx++;
        	$low = $high;
    	}
                                                                                                                                                         
    	bless($self, $class);
    	return $self;
}
	

sub print {
    my $self = shift;

    for (my $i = 0; $i <= $#{$self->{intron}}; $i++) {
         print "score_intron : $self->{intron}[$i]\n";
    }
}

sub print_data {
    my $self = shift;
    my $out = shift;

    print $out "# score_genome_extension - added to non intron genome jump\n";
    print $out "# = log of probability to NOT start an intron (controls exon-length distribution)\n";
    print $out "score_genome_extension $self->{score_genome_extension}\n\n\n";
    print $out "shortest_intron_length $self->{shortest_intron_length}\n";
    print $out "longest_intron_length $self->{longest_intron_length}\n\n";
    for (my $i = 0; $i <= $#{$self->{intron}}; $i++) {
         print $out "score_intron $self->{intron}[$i]\n";
    }
    print $out "\n\n\n";
}

sub get_sum {
	my $min = shift;
	my $max = shift;
	my $S = 0;
	for (my $i = $min; $i <= $max; $i++) {
		$S += 1/$i;
	}
	return $S;
}

1;
