#include <iostream>
#include <fstream>
#include <string>
#include <vector>

using namespace std;

struct clone_info {
  string clone_id;
  string clone_seq;
  int clone_beg, clone_end, clone_len;
  string chr_id;
  string chr_seq;
  int chr_beg, chr_end, chr_len;
  char orientation;
  int score;
  int num_exons;
  char misorientation;
};

struct exon {
  string exon_id;
  string clone_id;
  string clone_seq;
  int clone_beg, clone_end;
  string chr_seq;
  unsigned int chr_beg, chr_end;
  char orientation;
  int score;
};

clone_info split_clone_line ( string s );
exon split_exon_line ( string s );
vector<string> tokenize_string ( string s );
int print_alignment ( ofstream& outfile, clone_info cur_clone, vector< exon > exons );

using namespace std;

int main ( int argc, char *argv[] ) {

  if ( argc < 3 ) {
  cout << "Usage:" << endl
       << argv[0] << " infile.mach outfile.hum" << endl
       << "where infile.mach is a machine-readable file generated by spa"
       << endl;
    return 0;
  }

  string machine_file = argv[1];
  string human_file = argv[2];
  ifstream infile (machine_file.c_str());  
  ofstream outfile (human_file.c_str());  

  string s;
  string mode = "clone";
  int lines_processed = 0;

  clone_info cur_clone;
  exon tmp_exon;
  vector< exon > exons;

  while ( getline(infile, s) ) {
    if ( s[0] == '#' ) {
      continue;
    }
    if ( s.substr(0,2) == "//" ) {
      mode = "clone";

      cur_clone.clone_beg = exons[0].clone_beg;
      cur_clone.clone_end = exons[exons.size()-1].clone_end;

      print_alignment ( outfile, cur_clone, exons );
      
      exons.resize(0);
      continue;
    }
    
    if ( lines_processed == 0 || mode == "clone" ) {
      mode = "exon";
      cur_clone = split_clone_line ( s );
    }
    else {
      exons.push_back( split_exon_line( s ) );
    }
    lines_processed++;
  }
  return 0;
}

clone_info split_clone_line ( string s ) {

  clone_info clone_line;

  vector<string> tokens = tokenize_string(s);
  clone_line.clone_id = tokens[0];
  clone_line.clone_seq = tokens[1];
  clone_line.clone_len = atoi(tokens[2].c_str());
  clone_line.chr_id = tokens[3];
  clone_line.chr_beg = atoi(tokens[4].c_str());
  clone_line.chr_end = atoi(tokens[5].c_str());
  clone_line.chr_len = atoi(tokens[6].c_str());
  clone_line.chr_seq = tokens[7];
  clone_line.orientation = tokens[8][0];
  clone_line.score = atoi(tokens[9].c_str());
  clone_line.num_exons = atoi(tokens[10].c_str());
  clone_line.misorientation = atoi(tokens[11].c_str());

  return clone_line;
}

exon split_exon_line ( string s ) {

  exon exon_line;

  vector<string> tokens = tokenize_string(s);

  exon_line.exon_id = tokens[0];
  exon_line.clone_id = tokens[1];
  exon_line.clone_beg = atoi(tokens[2].c_str());
  exon_line.clone_end = atoi(tokens[3].c_str());
  exon_line.clone_seq = tokens[4];
  exon_line.chr_beg = atoi(tokens[5].c_str());
  exon_line.chr_end = atoi(tokens[6].c_str());
  exon_line.chr_seq = tokens[7];
  exon_line.orientation = tokens[8][0];
  exon_line.score = atoi(tokens[9].c_str());

  return exon_line;
}

vector<string> tokenize_string ( string s ) {
  string::size_type i;
  char delim = '\t';
  vector<string> line;
  string temp = "";
  for ( i = 0; i != s.size(); i++ ) {
    if ( s[i] == delim ) {
      line.push_back(temp);
      temp = "";
      continue;
    }
    temp += s[i];
  }
  line.push_back(temp);
  return line;
}
 
int print_alignment ( ofstream& outfile, clone_info cur_clone, vector< exon > exons ) {

  vector<unsigned int> chr_coords;
  vector<int> clone_coords;
  string alignment = "";

  outfile << "seq1 = " << cur_clone.clone_id << ", " << cur_clone.clone_len << " bp" << endl;
  outfile << "seq2 = " << cur_clone.chr_id << ", " << cur_clone.chr_len << " bp" << endl;

  outfile << endl << endl;

  for ( unsigned int i = 0; i < exons.size(); i++ ) {
    outfile << exons[i].clone_beg << "-" << exons[i].clone_end << "\t"
	 << "(" << exons[i].chr_beg << "-" << exons[i].chr_end << ")" << "\t"
	 << exons[i].score << "\t";
    if ( cur_clone.orientation == '+' ) {
      outfile << "->" << endl;
    }
    else {
      outfile << "<-" << endl;
    }

    if ( cur_clone.orientation == '+' ) {
      unsigned int j = exons[i].chr_beg; 
      int chr_base = 0;
      while ( j <= exons[i].chr_end ) {
	chr_coords.push_back(j);
	while ( exons[i].chr_seq[chr_base] == '-' || exons[i].chr_seq[chr_base] == '*') {
	  chr_coords.push_back(j);
	  chr_base++;
	}
	j++;
	chr_base++;
      }
      while ( exons[i].chr_seq[chr_base] == '-' || exons[i].chr_seq[chr_base] == '*') {
	chr_coords.push_back(j);
	chr_base++;
      }
    }
    else {
      unsigned int j = exons[i].chr_beg; 
      int chr_base = 0;
      while ( j >= exons[i].chr_end ) {
	chr_coords.push_back(j);
	while ( exons[i].chr_seq[chr_base] == '-' || exons[i].chr_seq[chr_base] == '*') {
	  chr_coords.push_back(j);
	  chr_base++;
	}
	j--;
	chr_base++;
      }
      while ( exons[i].chr_seq[chr_base] == '-' || exons[i].chr_seq[chr_base] == '*') {
	chr_coords.push_back(j);
	chr_base++;
      }
    }
    for ( int j = 0; j < 9; j++ ) {
      chr_coords.push_back(0);
    }

    int j = exons[i].clone_beg; 
    int clone_base = 0;
    while ( j <= exons[i].clone_end ) {
      clone_coords.push_back(j);
      while ( exons[i].clone_seq[clone_base] == '-' ) {
	clone_coords.push_back(j);
	clone_base++;
      }
      j++;
      clone_base++;
    }
    while ( exons[i].clone_seq[clone_base] == '-' ) {
      clone_coords.push_back(j);
	clone_base++;
    }

    for ( int j = 0; j < 9; j++ ) {
      clone_coords.push_back(0);
    }     
 
  }
  
  outfile << endl;

  /* now we print the actual alignment... */
  for ( unsigned int i = 0; i < cur_clone.chr_seq.size() && i < cur_clone.clone_seq.size(); i++ ) {
  
    if ( chr_coords[i] == 0 ) {
      if ( chr_coords[i-3] != 0 || chr_coords[i+3] != 0 ) {
	if ( cur_clone.orientation == '+' ) {
	  alignment += '>';
	} 
	else {
	  alignment += '<';
	}
      }
      else {
	alignment += '.';
      }
    }
    else {
      if ( cur_clone.chr_seq[i] == cur_clone.clone_seq[i] ) {
	alignment += "|";
      }
      else {
	alignment += "-";
      }
    }

  } 

  for ( unsigned int i = 0; i < cur_clone.chr_seq.size() &&  i < cur_clone.clone_seq.size(); i += 60 ) {
    int idx = i;
    outfile << i << endl;
    while ( clone_coords[idx] == 0 )
      idx++;
    outfile << clone_coords[idx] << endl;
    outfile << cur_clone.clone_seq.substr(i, 60) << endl << alignment.substr(i,60) << endl << cur_clone.chr_seq.substr(i,60) << endl;
    idx = i;
    while ( chr_coords[idx] == 0 )
      idx++;
    outfile << chr_coords[idx] << endl << endl;
  }

  outfile << endl << endl;
  return 0;   
}
