package Statistics;

use Clone;
use Parameters;
use POSIX qw(tmpnam);
use IO::File;
use File::Copy;
use strict;

sub new {
    	my $that = shift;
    	my $class = ref($that) || $that;

	my $self = {
		insertion	=> {},
		deletion	=> {},
		intron		=> {},
		eRateInfo	=> {},
		fileInfo	=> {},
		matches		=> 0,
		mismatches	=> 0,
		padding		=> 0,
		uniqueId	=> 0,
		groupToRemap	=> [],
		groupSize	=> 0,
		fileNames       => [],
		fileNameCount	=> 0,
		mapDir		=> '',
		totalNbClones	=> 0,
		totNbMisClones	=> 0,
		matchScore	=> 0,
		mismatchScore	=> 0,
		lmismatchScore  => 0
	};

    	bless($self, $class);
    	return $self;
}

    
sub set_insertion_dist {
	my $self = shift;
	my $clone = shift;

    	my ($i, $j);
    	my $e;

    	for ($i = 0; $i < $clone->{exonsNum}; $i++) {
         	$e = $clone->{exons}[$i];
         	for ($j = 0; $j < $e->{distInsLen}; $j++) {
			if (exists($self->{insertion}->{$e->{distIns}[$j]})) {
                  		$self->{insertion}->{$e->{distIns}[$j]}++;
              		}
              		else {
                  		$self->{insertion}->{$e->{distIns}[$j]} = 1;
              		}
         	}
	}
	for ($i = 0; $i < $clone->{SENumber}; $i++) {
        	if (exists($self->{insertion}->{$clone->{SEdist}[$i]})) {
            		$self->{insertion}->{$clone->{SEdist}[$i]}--;
			if ($self->{insertion}->{$clone->{SEdist}[$i]} == 0) {
				delete($self->{insertion}->{$clone->{SEdist}[$i]});
			}
        	}
        	else {
           		die "Start-End Insertion expected\n";
		} 
        }
	if (exists($self->{insertion}->{'0'})) {
       		$self->{insertion}->{'0'} += $clone->{nIns0};
   	}
   	else {
      		$self->{insertion}->{'0'} = $clone->{nIns0};
   	}
}

	
sub set_deletion_dist {
        my $self = shift;
        my $clone = shift;
                                                                                                                                                  
        my ($i, $j);
        my $e;
        for ($i = 0; $i < $clone->{exonsNum}; $i++) {
                $e = $clone->{exons}[$i];
                for ($j = 0; $j < $e->{distDelLen}; $j++) {
                        if (exists($self->{deletion}->{$e->{distDel}[$j]})) {
                                $self->{deletion}->{$e->{distDel}[$j]}++;
                        }
                        else {
                                $self->{deletion}->{$e->{distDel}[$j]} = 1;
                        }
                }
        }
	if (exists($self->{deletion}->{'0'})) {
       		$self->{deletion}->{'0'} += $clone->{nDel0};
   	}
   	else {
      		$self->{deletion}->{'0'} = $clone->{nDel0};
   	}
}


sub set_intron_dist {
	my $self = shift;
	my $clone = shift;
	my $i;

   	for ($i = 0; $i < $clone->{exonsNum}-1; $i++) {
        	if (exists($self->{intron}->{$clone->{introns}[$i]})) {
            		$self->{intron}->{$clone->{introns}[$i]}++;
        	}
        	else {
            		$self->{intron}->{$clone->{introns}[$i]} = 1;
        	}
   	}
	if (exists($self->{intron}->{'0'})) {
		$self->{intron}->{'0'} += $clone->{nInt0};
	}
	else {
		$self->{intron}->{'0'} = $clone->{nInt0};
	}	
}

sub set_eRateInfo {
	my $self = shift;
	my $clone = shift;
	my $file = shift;
	my $l;

	$self->{eRateInfo}->{$self->{uniqueId}}[0] = $file;
	$self->{eRateInfo}->{$self->{uniqueId}}[1] = $clone->{cloneId};
	$self->{eRateInfo}->{$self->{uniqueId}}[2] = $clone->{matches};
	$self->{eRateInfo}->{$self->{uniqueId}}[3] = $clone->{insertions}-$clone->{SEunMapPart}+$clone->{mismatches};
	$self->{uniqueId}++;
}	

sub set_eRate {
	my $self = shift;
	my $k;
	my @u = ();
	my @m = ();

	foreach $k (sort { $a <=> $b } keys %{$self->{eRateInfo}}) {
		push(@u, $self->{eRateInfo}->{$k}[3]);
		push(@m, $self->{eRateInfo}->{$k}[2]);
	}
	
	# let @u be an array with, for each clone, the number of internal unmapped bases.
	# That is $u[$i] = is the number of bases in mismatches and internal insertions in clone $i
	# Let @m be an array with the number of matches for each clone.
                                                                                                                                                  
	#number of clones
	my $n = @u;
                                                                                                                                                  
	#set a very low initial value for elow
	my $el = 0.001;
	#set very high initial value for ehigh
	my $eh = 0.006;
	#initial fraction in low is 0.5
	my $pi = 0.5;
                                                                                                                                                  
	my $dif = 10;
        my @post;                                                                                                                                          
                                                                                                                                                  
	#iterate while relative difference bigger than some cut-off
	while($dif > 0.001){
    		my $x = log($eh/$el);
    		my $y = log((1-$eh)/(1-$el));
    		my $pirat = log((1.0-$pi)/$pi);
    		#get new value of pi
    		my $pinew = 0;
    		@post = ();
    		for(my $i=0;$i<$n;++$i){
        		#my $rr = $u[$i]/($u[$i]+$m[$i]);
        		#print "$u[$i] $m[$i] $rr ";
        		my $val = $u[$i]*$x + $m[$i]*$y+$pirat;
        		#print "$val ";
        		$post[$i] = 1.0/(1.0+exp($val));
        		#print "$post[$i]\n";
        		$pinew += $post[$i];
    		}
		$pinew = $pinew/$n;
    		#get new value of eh
    		my $ehnew = 0;
    		my $denomeh = 0;
    		my $elnew = 0;
    		my $denomel = 0;
    		for(my $i=0;$i<$n;++$i){
        		$elnew += $u[$i]*$post[$i];
        		$denomel += ($u[$i]+$m[$i])*$post[$i];
        		$ehnew += $u[$i]*(1-$post[$i]);
        		$denomeh += ($u[$i]+$m[$i])*(1-$post[$i]);
    		}
    		$elnew = $elnew/$denomel;
    		$ehnew = $ehnew/$denomeh;
                                                                                                                                                  
    		$dif = 0;
    		$dif += 0.5*abs($ehnew-$eh)/($ehnew+$eh);
    		$dif += 0.5*abs($elnew-$el)/($elnew+$el);
    		$dif += 0.5*abs($pinew-$pi)/($pinew+$pi);
    		$el = $elnew;
    		$eh = $ehnew;
    		$pi = $pinew;
    		#print "new values of el $el eh $eh pi $pi\n";
	}

	print "el $el eh $eh pi $pi\n";
        print "Clones To Remap\n";
	$self->{mismatchScore} = $eh;
    	$self->{matchScore} = 1 - $eh;  
	$self->{lmismatchScore} = $el;
	print "New Match Score : $self->{matchScore}\n";
	print "New Mismatch Score : $self->{mismatchScore}\n";
	foreach $k (sort { $a <=> $b } keys %{$self->{eRateInfo}}) {
		if ($post[$k] < 0.5) {
			$self->{groupToRemap}[$self->{groupSize}++] = $self->{eRateInfo}->{$k}[0];
			$self->{groupToRemap}[$self->{groupSize}++] = $self->{eRateInfo}->{$k}[1];	
			printf "%1.5f\t%10d\t%10d\t%10d\n", $post[$k], $self->{eRateInfo}->{$k}[0],$self->{eRateInfo}->{$k}[2], $self->{eRateInfo}->{$k}[3];
		}
        }
	#foreach $k (sort { $a <=> $b } keys %{$self->{eRateInfo}}) {
	#	if ($post[$k] >= 0.5) {
	#		printf "%1.5f\t%10d\t%10d\t%10d\n", $post[$k], $self->{eRateInfo}->{$k}[0], $self->{eRateInfo}->{$k}[2], $self->{eRateInfo}->{$k}[3];
	#	}
	#}
	return $self->{groupSize};	
}	

sub get_tmp_file {
        my $infile;
        my $ifh;
        do {
                $infile = 'tmp_'.rand();
        } until $ifh = IO::File->new($infile, O_RDWR|O_CREAT|O_EXCL);
        return ($infile, $ifh);
}


# clean_fasta_file subroutine
# Effects : clean fasta file and remove sequences
#           selected to get remapped.
sub clean_fasta_file {
        my $dir = shift;
	my $newDir = shift;
        my $num = shift;
	my $file = shift;
        my $fastaFile = $dir.'/'.$file.'.fa';
	my $newFastaFile = $newDir.'/'.$num.'.fa';
        my ($id, $n, $id_file);
        my $line;
        my @lineTab;
        my $ignore = 0;
        my ($tmpfile, $tmpfd);
        my $cloneNotToRemap = 0;
        open(FA, $fastaFile) || die "Could not open $fastaFile\n";
	open(OUT, ">$newFastaFile") || die "Could not open $newFastaFile : $!\n";
        while (scalar(@_) > 0) {
                $id = shift(@_); # get the clone id to remove
                if (defined($line)) {
                        $id_file = $line;
                        $id_file =~ s/^>//;
                        @lineTab = split(" ", $id_file);
                        if ($lineTab[0] eq $id) {
                                $ignore = 1;
                                print OUT $line;
                        }
                        else {
                                if ($cloneNotToRemap == 0) {
                                        $cloneNotToRemap = 1;
                                        ($tmpfile, $tmpfd) = get_tmp_file();
                                }
                                print $tmpfd $line;
                        }
                }
                while ($line = <FA>) {
                        if ($line =~ /^>/) {
                                if ($ignore == 1) {
                                        $ignore = 0;
                                        last;
                                }
                                $id_file = $line;
                                $id_file =~ s/^>//;
                                @lineTab = split(" ", $id_file);
                                if ($lineTab[0] eq $id) {
                                        $ignore = 1;
                                }
                                else {
                                        if ($cloneNotToRemap == 0) {
                                                $cloneNotToRemap = 1;
                                                ($tmpfile, $tmpfd) = get_tmp_file();
                                        }
                                }
                        }
                        if ($ignore == 0) {
                                print $tmpfd $line;
                        }
                        else {
                                print OUT $line;
                        }
                }
        }
        if ($ignore == 0) {
		if (! defined($tmpfd)) {
			($tmpfile, $tmpfd) = get_tmp_file();
		}
                print $tmpfd $line;
                while ($line = <FA>) {
                        print $tmpfd $line;
                }
                $tmpfd->close();
		unlink($fastaFile);
		move($tmpfile, $fastaFile) or  warn "Couldn't move $tmpfile to $fastaFile: $!\n";
                close(OUT);
        }
        else {
                if ($cloneNotToRemap == 0) {
                        close(OUT);
                        unlink($fastaFile);
                }
                else {
                        close(OUT);
                        $tmpfd->close();
                        unlink($fastaFile);
			move($tmpfile, $fastaFile) or  warn "Couldn't move $tmpfile to $fastaFile: $!\n";
                }
        }
}


# clean_match_file subroutine
# Effects : clean match file by removing sequences
#           selected to get remapped.
sub clean_match_file {
        my $dir = shift;
        my $file = shift;
        my $matchFile = $dir.'/'.$file.'.match';
        my $currentname;
        my ($id, $n, $id_file);
        my $line;
        my @lineTab;
        my $ignore = 0;
        my ($tmpfile, $tmpfd);
        my $cloneNotToRemap = 0;
        open(MA, $matchFile) || die "Could not open $matchFile\n";
        while (scalar(@_) > 0) {
                $id = shift(@_); # get the clone id to remove
                if (defined($line)) {
                        @lineTab = split(" ", $line);
                        if ($lineTab[0] eq $id) {
                                $ignore = 1;
                        }
                        else {
                                if ($cloneNotToRemap == 0) {
                                        $cloneNotToRemap = 1;
                                        ($tmpfile, $tmpfd) = get_tmp_file();
                                }
                                print $tmpfd $line;
                        }
                }
                while ($line = <MA>) {
                        if ($line =~ /^#/) {
                                next;
                        }
                        @lineTab = split(" ", $line);
                        if (scalar(@lineTab) == 12) {
                                if ($ignore == 1) {
                                        $ignore = 0;
                                        last;
                                }
                                if ($lineTab[0] eq $id) {
                                        $ignore = 1;
                                }
                                else {
                                        if ($cloneNotToRemap == 0) {
                                                $cloneNotToRemap = 1;
                                                ($tmpfile, $tmpfd) = get_tmp_file();
                                        }
                                }
                        }
                        if ($ignore == 0) {
                                print $tmpfd $line;
                        }
                }
        }
        if ($ignore == 0) {
		if (! defined($tmpfd)) {
                        ($tmpfile, $tmpfd) = get_tmp_file();
                }
                print $tmpfd $line;
                while ($line = <MA>) {
                        print $tmpfd $line;
                }
                $tmpfd->close();
		unlink($matchFile);
                move($tmpfile, $matchFile) or  warn "Couldn't move $tmpfile to $matchFile: $!\n";
        }
        else {
                if ($cloneNotToRemap == 0) {
                        unlink($matchFile);
                }
                else {
                        $tmpfd->close();
                        unlink($matchFile);
                        move($tmpfile, $matchFile) or  warn "Couldn't move $tmpfile to $matchFile: $!\n";
                }
        }
}

sub clean {
	my $self = shift;
	my $dir = shift;
	my $newDir = shift;
	my $fToRemove;
	my $num = 1;
  	my @basket = ();
	push(@basket, $self->{groupToRemap}[0], $self->{groupToRemap}[1]);
	for (my $i = 2; $i < $self->{groupSize}; $i = $i + 2) {
		if ($basket[0] ne $self->{groupToRemap}[$i]) {
			print "Info related to remapping :\n";
			print "$dir\n";
			print @basket, "\n";
			clean_fasta_file($dir, $newDir, $num, @basket);
			clean_match_file($dir, @basket);
			$self->{fileNames}[$self->{fileNameCount}++] = $basket[0];
			$num++;
			@basket = ($self->{groupToRemap}[$i], $self->{groupToRemap}[$i+1]);
		}
		else {
			push(@basket, $self->{groupToRemap}[$i+1]);
		} 			
	}
	print "Info related to remapping :\n";
        print "$dir\n";
        print @basket, "\n";
        clean_fasta_file($dir, $newDir, $num, @basket);
        clean_match_file($dir, @basket);
	$self->{fileNames}[$self->{fileNameCount}++] = $basket[0];
	$self->{insertion}	= {};
        $self->{deletion}	= {};
        $self->{intron}		= {};
        $self->{eRateInfo}	= {};
        $self->{fileInfo}	= {};
        $self->{matches}	= 0;
        $self->{mismatches}     = 0;
        $self->{padding}        = 0;
        $self->{uniqueId}       = 0;
        $self->{groupToRemap}   = [];
        $self->{groupSize}      = 0;
	$self->{uniqueId}       = 0;
	return $self->{fileNameCount};
}

sub cat {
	my $f_out = shift;
	my $f_in  = shift;
	open(OUT, ">>$f_out") || die "Could not open $f_out : $!\n";
	open(IN, $f_in) || die "Could not open $f_in : $!\n";
	while (<IN>) {
		if (/^\#/) {
			next;
		}
		print OUT $_;
	}
	close(IN);
	close(OUT);
	unlink($f_in);
}

sub merge_mapping {
	my $self = shift;
	my $dir = shift;
	my $newDir = shift;
	my ($f1, $f2);
	my $num;

	for (my $i = 0; $i < $self->{fileNameCount}; $i++) {
		$num = $i + 1;
		$f1 = $dir.'/'.$self->{fileNames}[$i].'.fa';
		$f2 = $newDir.'/'.$num.'.fa';
		if (-e $f1) {
			cat($f1, $f2);
		}
		else {
			move($f2, $f1);
		}
		$f1 = $dir.'/'.$self->{fileNames}[$i].'.match';
                $f2 = $newDir.'/'.$num.'.match';
		if (-e $f1) {
                        cat($f1, $f2);
                }
                else {
			move($f2, $f1);
                }
	}
}
	
sub run_mapping {
	my $self = shift;
	my $command = shift;
	my $status = system($command) >> 8;
	if ($status != 0) {
		print "Status Value :  $status\n";
	}
}
	
sub set_parameters {
	my $self = shift;
	my $iFile = shift;
	my $oFile = shift;

	open(IN, $iFile) || die "Could not open $iFile\n";
	open(OU, ">$oFile") || die "Could not open $oFile\n";
	while (<IN>) {
		if (/score_match/) {
			print OU "score_match $self->{matchScore}\n";
		}
		elsif (/score_mismatch/) {
			print OU "score_mismatch $self->{mismatchScore}\n";
		}
		else {
			print OU $_;
		}
	}
	close(IN);
	close(OU);
}

sub set_fileInfo {
	my $self = shift;
	my $clone = shift;
	my $file = shift;
	my $t;

	my $key = $clone->{chromId} . '-' . $clone->{cloneId};
	if (exists($self->{fileInfo}->{$key})) {
		print "Statistics::set_fileInfo -> clone $clone->{cloneId} has already processed\n";
		die "Mutiple copies of the same clone were found in match file $file.match\n";
	}
	elsif ($clone->{exonsNum} > 1) {
		$clone->add_supplement_info($file);
		$self->{fileInfo}->{$key} = $clone->{fileInfo};
	}
}	

sub set_info {
	my $self = shift;
	my $clone = shift;
	my $file = shift;
	
	set_insertion_dist($self, $clone);
	set_deletion_dist($self, $clone);
	set_intron_dist($self, $clone);
	set_fileInfo($self, $clone, $file);
	set_eRateInfo($self, $clone, $file);
	$self->{matches} += $clone->{matches};
	$self->{mismatches} += $clone->{mismatches};
}
	

sub collect_info {
	my $self = shift;
	my $filename = shift;
	my $file = shift;
	my $partial_dat = shift;
	my @lineG;
	my $clone;
	my ($open, $numberG) = (0, 0);

     	open(INPUT, $filename) || die "Cannot open $filename : $!\n";
     	while (<INPUT>) {
             	if (/^\#/) {
                 	next;
             	}
             	if (/^\//) {
                 	if ($open == 1) {
				$open = 0;
                     		$clone->set_data();
    				$clone->check_data();
				$clone->set_stat_info($self->{padding});
				if ($partial_dat) {
					set_eRateInfo($self, $clone, $file);
				}
				else {
					set_info($self, $clone, $file);
				}
                     		next;
                 	}
             	}
             	chomp;
             	@lineG = split(" ", $_);
             	if (scalar(@lineG) == 12) {
                     	$numberG = 0;
                     	$open = 1;
                     	$clone = Clone->new(@lineG);
			$self->{totalNbClones}++;
			if ($clone->{misoriented} eq '*') {
				$self->{totNbMisClones}++;
			}
			next;
		}
             	if ((scalar(@lineG) == 10) && ($open == 1)) {
                 	$clone->add_exon($numberG, @lineG);
                 	$numberG++;
             	}
     	}
     	close(INPUT);
}


sub load {
	my $self = shift;
	my $dir  = shift;
	my $padding = shift;
	my $partial_dat = shift;
	my ($file, $filename, $clone);
	
	unless (-d $dir) {
                die "Spa Directory not found\n";
        }
	unless (defined($padding) && ($padding > 8)) {
		die "Padding must be greater than 8\n";
	}
	
        $self->{padding} = $padding; 	
	opendir(DH, $dir) or die "Couldn't open $dir for reading: $!\n";
	while (defined($file = readdir(DH))) {
                next unless ($file =~ /\.match$/i);
                $filename = "$dir/$file";
                if (-T $filename) {
			print "Processing $filename ... \n";
			$file =~ s/\.match//;
                        collect_info($self, $filename, $file, $partial_dat);
                }
        }
	closedir(DH);
}

sub print_insertion_dist {
	my $self = shift;
	my $out = shift;
	my $k;

	print $out "Insertion Distribution\n";
	foreach $k (sort {$a <=> $b} keys %{$self->{insertion}}) {
		print $out "$k $self->{insertion}->{$k}\n";
	}
	print $out "\n\n";
}

sub print_deletion_dist {
        my $self = shift;
	my $out = shift;
        my $k;
                                             
	print $out "Deletion Distribution\n";                                                                                                     
        foreach $k (sort {$a <=> $b} keys %{$self->{deletion}}) {
                print $out "$k $self->{deletion}->{$k}\n";
        }
	print $out "\n\n";
}

sub print_intron_dist {
        my $self = shift;
	my $out = shift;
        my $k;
                                            
	print $out "Intron Length Distribution\n";  
        foreach $k (sort {$a <=> $b} keys %{$self->{intron}}) {
                print $out "$k $self->{intron}->{$k}\n";
        }
	print $out "\n\n";
}

sub print_fileInfo {
	my $self = shift;
	my $out = shift;
	my $k;

	print $out "FileInfo Information\n";
	foreach $k (sort keys %{$self->{fileInfo}}) {
		print $out "$k\n";
		print $out "$self->{fileInfo}->{$k}[0]\n";
		print $out "$self->{fileInfo}->{$k}[1]\n";
		print $out "$self->{fileInfo}->{$k}[2]\n";
		for (my $j = 0; $j <= $#{$self->{fileInfo}->{$k}[3]}; $j++) {
			print $out "$self->{fileInfo}->{$k}[3][$j] ";
		}
		print $out "\n";
		for (my $j = 0; $j <= $#{$self->{fileInfo}->{$k}[4]}; $j++) {
                        print $out "$self->{fileInfo}->{$k}[4][$j] ";
                }
                print $out "\n\n\n";
	}
}

sub print_match {
	my $self = shift;
	my $out = shift;

	print $out "Matches Information\n";
	print $out "Macthes    : $self->{matches}\n";
	print $out "Mismatches : $self->{mismatches}\n\n\n";
}

sub free {
	my $self = shift;
	undef($self->{insertion});
	undef($self->{deletion});
	undef($self->{intron});
	undef($self->{eRateInfo});
	$self->{matches}        = 0;
        $self->{mismatches}     = 0;
        $self->{padding}        = 0;
        $self->{uniqueId}       = 0;
        $self->{groupToRemap}   = [];
        $self->{groupSize}      = 0;
        $self->{uniqueId}       = 0;
}

sub free_insertion {
	my $self = shift;

	undef($self->{insertion});
}

sub free_deletion {
        my $self = shift;
                                                                                                                                                  
        undef($self->{deletion});
}

sub free_intron {
        my $self = shift;
                                                                                                                                                  
        undef($self->{intron});
}

sub free_eRateInfo {
	my $self = shift;

	undef($self->{eRateInfo});
}

1;
