######################################################################
# SPA PARAMETER FILE
######################################################################

######## GENERAL REMARKS ###############################
# Spa uses a probabilistic Bayesian models to score alignments
# Dynamic programming is used to find the optimal alignment under this model
# To keep running times feasible we use the BLAT gfserver to identify genomic loci
# and only try to align those loci.
# In addition, for each locus, we first find regions of homology between clone and genome
# and only do the dynamic programming for a subset of positions in the dynamic programming matrix
# that are in or near such regions of homology. We call these 'defined positions' of the dynamic
# programming matrix.
# The parameters in this file control
# 1. the specifics of the Bayesian model (i.e. likelihoods of various gene structures and sequencing errors)
# 2. The heuristics that determine defined positions in the dynamic programming matrix (which trade-off running
#    time against alignment accuracy.

######################################################################
# Scoring Parameters

# score_match/score_mismatch - probability per base
# for a mismatch to be introduced by sequencing error
# (or SNP)
# (if both defined, must sum to 1.0; if one defined, other will be
# set to make both to sum to 1.0)
score_match 0.997876860803614
score_mismatch 0.00212313919638583


# score_splice_NNNN - relative probability of various splice juctions
# (first two and last two bases of the intron GT-AG is the canonical boundary)
# (if all are defined, must sum to 1.0; otherwise sum must not exceed
# 1.0, and undefined terms will be set equal to each other so as to
# yield a total sum of 1.0)
score_splice_tttt  0.0000617784
score_splice_tttc  0.0000014912
# score_splice_ttta
# score_splice_tttg
# score_splice_ttct
# score_splice_ttcc
# score_splice_ttca
# score_splice_ttcg
score_splice_ttat  0.0000021303
score_splice_ttac  0.0000035150
score_splice_ttaa  0.0000063909
score_splice_ttag  0.0000106514
# score_splice_ttgt
# score_splice_ttgc
score_splice_ttga  0.0000085212
# score_splice_ttgg
score_splice_tctt  0.0000063909
score_splice_tctc  0.0000007456
score_splice_tcta  0.0000034085
score_splice_tctg  0.0000002130
score_splice_tcct  0.0000390908
score_splice_tccc  0.0001661625
score_splice_tcca  0.0000045801
# score_splice_tccg
score_splice_tcat  0.0000144859
# score_splice_tcac
# score_splice_tcaa
# score_splice_tcag
score_splice_tcgt  0.0000031955
# score_splice_tcgc
# score_splice_tcga
# score_splice_tcgg
score_splice_tatt  0.0000085212
score_splice_tatc  0.0000086277
score_splice_tata  0.0000042606
score_splice_tatg  0.0000195987
score_splice_tact  0.0000021303
score_splice_tacc  0.0000339781
score_splice_taca  0.0000042606
# score_splice_tacg
# score_splice_taat
score_splice_taac  0.0000063909
# score_splice_taaa
# score_splice_taag
# score_splice_tagt
score_splice_tagc  0.0000192791
# score_splice_taga
score_splice_tagg  0.0000021303
score_splice_tgtt  0.0000166162
score_splice_tgtc  0.0000132078
# score_splice_tgta
score_splice_tgtg  0.0002497763
# score_splice_tgct
score_splice_tgcc  0.0002083422
# score_splice_tgca
# score_splice_tgcg
# score_splice_tgat
# score_splice_tgac
score_splice_tgaa  0.0000089472
score_splice_tgag  0.0000115035
# score_splice_tggt
score_splice_tggc  0.0000606067
score_splice_tgga  0.0000080951
# score_splice_tggg
# score_splice_cttt
# score_splice_cttc
score_splice_ctta  0.0000021303
score_splice_cttg  0.0000052192
score_splice_ctct  0.0000197052
score_splice_ctcc  0.0000002130
# score_splice_ctca
score_splice_ctcg  0.0000004261
score_splice_ctat  0.0000008521
# score_splice_ctac
# score_splice_ctaa
score_splice_ctag  0.0000126752
# score_splice_ctgt
# score_splice_ctgc
score_splice_ctga  0.0000031954
score_splice_ctgg  0.0000100124
# score_splice_cctt
# score_splice_cctc
# score_splice_ccta
score_splice_cctg  0.0000015977
score_splice_ccct  0.0000157641
score_splice_cccc  0.0000631631
# score_splice_ccca
# score_splice_cccg
# score_splice_ccat
# score_splice_ccac
# score_splice_ccaa
# score_splice_ccag
# score_splice_ccgt
score_splice_ccgc  0.0000080951
# score_splice_ccga
score_splice_ccgg  0.0000118231
score_splice_catt  0.0000033020
# score_splice_catc
score_splice_cata  0.0000021303
score_splice_catg  0.0000346172
score_splice_cact  0.0000013847
score_splice_cacc  0.0000006391
score_splice_caca  0.0000815901
# score_splice_cacg
# score_splice_caat
# score_splice_caac
score_splice_caaa  0.0000042606
score_splice_caag  0.0000361084
# score_splice_cagt
score_splice_cagc  0.0000185335
# score_splice_caga
# score_splice_cagg
# score_splice_cgtt
# score_splice_cgtc
# score_splice_cgta
# score_splice_cgtg
# score_splice_cgct
# score_splice_cgcc
# score_splice_cgca
# score_splice_cgcg
# score_splice_cgat
# score_splice_cgac
# score_splice_cgaa
score_splice_cgag  0.0000013847
# score_splice_cggt
score_splice_cggc  0.0000008521
# score_splice_cgga
# score_splice_cggg
# score_splice_attt
score_splice_attc  0.0000242853
# score_splice_atta
# score_splice_attg
# score_splice_atct
# score_splice_atcc
# score_splice_atca
# score_splice_atcg
score_splice_atat  0.0000714712
score_splice_atac  0.0012163947
score_splice_ataa  0.0000762643
score_splice_atag  0.0000711516
# score_splice_atgt
# score_splice_atgc
# score_splice_atga
# score_splice_atgg
score_splice_actt  0.0000127817
score_splice_actc  0.0000069234
# score_splice_acta
score_splice_actg  0.0000042606
# score_splice_acct
score_splice_accc  0.0000183205
# score_splice_acca
# score_splice_accg
# score_splice_acat
# score_splice_acac
score_splice_acaa  0.0000150185
# score_splice_acag
score_splice_acgt  0.0000149120
# score_splice_acgc
# score_splice_acga
# score_splice_acgg
score_splice_aatt  0.0000166163
score_splice_aatc  0.0000167228
# score_splice_aata
# score_splice_aatg
score_splice_aact  0.0000021303
score_splice_aacc  0.0000095863
score_splice_aaca  0.0000005326
# score_splice_aacg
score_splice_aaat  0.0000097993
score_splice_aaac  0.0000218354
score_splice_aaaa  0.0001606237
score_splice_aaag  0.0000339781
# score_splice_aagt
score_splice_aagc  0.0000103319
# score_splice_aaga
score_splice_aagg  0.0000171488
# score_splice_agtt
score_splice_agtc  0.0000023433
# score_splice_agta
score_splice_agtg  0.0000021303
# score_splice_agct
score_splice_agcc  0.0001363385
# score_splice_agca
# score_splice_agcg
# score_splice_agat
score_splice_agac  0.0000066039
# score_splice_agaa
score_splice_agag  0.0000947978
score_splice_aggt  0.0000042606
score_splice_aggc  0.0000005326
# score_splice_agga
score_splice_aggg  0.0000051127
# score_splice_gttt
score_splice_gttc  0.0000021303
score_splice_gtta  0.0000935197
score_splice_gttg  0.0003063354
score_splice_gtct  0.0000006391
score_splice_gtcc  0.0000106514
score_splice_gtca  0.0004580120
score_splice_gtcg  0.0000255635
score_splice_gtat  0.0000826552
score_splice_gtac  0.0000319543
score_splice_gtaa  0.0001429423
score_splice_gtag  0.9862979847
# score_splice_gtgt
# score_splice_gtgc
# score_splice_gtga
score_splice_gtgg  0.0001469899
# score_splice_gctt
# score_splice_gctc
# score_splice_gcta
# score_splice_gctg
score_splice_gcct  0.0000011717
score_splice_gccc  0.0000399429
score_splice_gcca  0.0000202377
# score_splice_gccg
# score_splice_gcat
# score_splice_gcac
# score_splice_gcaa
score_splice_gcag  0.0081845682
score_splice_gcgt  0.0000035150
score_splice_gcgc  0.0000004261
# score_splice_gcga
# score_splice_gcgg
score_splice_gatt  0.0000042606
# score_splice_gatc
score_splice_gata  0.0000006391
# score_splice_gatg
# score_splice_gact
# score_splice_gacc
# score_splice_gaca
# score_splice_gacg
# score_splice_gaat
# score_splice_gaac
# score_splice_gaaa
score_splice_gaag  0.0002788548
score_splice_gagt  0.0000148055
# score_splice_gagc
score_splice_gaga  0.0000156576
score_splice_gagg  0.0001878915
# score_splice_ggtt
# score_splice_ggtc
# score_splice_ggta
# score_splice_ggtg
# score_splice_ggct
score_splice_ggcc  0.0000470793
score_splice_ggca  0.0000021303
# score_splice_ggcg
# score_splice_ggat
score_splice_ggac  0.0000042606
# score_splice_ggaa
score_splice_ggag  0.0000681692
# score_splice_gggt
score_splice_gggc  0.0000764774
# score_splice_ggga
score_splice_gggg  0.0000076691



# This parameter defines misorientation
min_diff_misorientation -4.71072028280753

# genome non-intron jump scoring curve parameters
# (specified as log(probability))
#These parameters control the probability of sequencing errors leading
#to deletions (from the clone) of various lengths.

score_genome_jump_beta0 -0.000213829835382684
score_genome_jump_beta1 -8.72920300383271
score_genome_jump_beta2 -1.85589000287158
score_genome_jump_beta3 -1.22460971717752
score_genome_jump_alpha -0.203458174017843


# clone non-intron jump scoring curve parameters
# (specified as log(probability))
#These parameters control the probability of sequencing errors leading
#to insertions (into the clone) of various lengths.
score_clone_jump_beta0 -0.000316234154897822
score_clone_jump_beta1 -8.23122005705243
score_clone_jump_beta2 -2.45617899431579
score_clone_jump_beta3 -1.20377198094942
score_clone_jump_alpha -0.182546705956225


# Intron parameters

# score_genome_extension - added to non intron genome jump
# = log of probability to NOT start an intron (controls exon-length distribution)
score_genome_extension -0.00222592652898059


shortest_intron_length 30
longest_intron_length 982271

score_intron -16.8981318435216
score_intron -16.9671247150086
score_intron -17.0804534003156
score_intron -17.0034923591794
score_intron -16.5094738537298
score_intron -16.4926667354134
score_intron -16.2049846629617
score_intron -16.3011285235146
score_intron -15.095322538107
score_intron -13.4164522903247
score_intron -12.2853655809119
score_intron -12.0945662759085
score_intron -12.4605522497275
score_intron -12.7226263179881
score_intron -12.9728636113434
score_intron -13.2411003482315
score_intron -13.3721635526818
score_intron -13.4845119418609
score_intron -13.5158152919191
score_intron -13.5971231891149
score_intron -13.5536651596963
score_intron -13.6098792208756
score_intron -13.8086056906813
score_intron -13.8159161100795
score_intron -13.8825969426714
score_intron -13.9366273631115
score_intron -13.9631080367624
score_intron -13.9684464745205
score_intron -14.0290082015816
score_intron -14.0687441775016
score_intron -14.0893987163127
score_intron -14.1402942432278
score_intron -14.1931165461145
score_intron -14.2624856408048
score_intron -14.2800447398739
score_intron -14.3786870522714
score_intron -14.4254794586571
score_intron -14.5168777670463
score_intron -14.6358625074669
score_intron -14.7141895306583
score_intron -14.7949573164414
score_intron -14.8937065348362
score_intron -15.0551209377074
score_intron -15.1637486767933
score_intron -15.3152134903395
score_intron -15.484212555256
score_intron -15.6000269901357
score_intron -15.8477663883929
score_intron -16.0636255312995
score_intron -16.221342897868
score_intron -16.4236650280919
score_intron -16.6503481331301
score_intron -16.8156812009152
score_intron -17.0265608667914
score_intron -17.1624665519764
score_intron -17.3691300500356
score_intron -17.4932588155184
score_intron -17.7604311950113
score_intron -17.9637867490446
score_intron -18.1362484964997
score_intron -18.3582529223988
score_intron -18.5565975909522
score_intron -18.6887491081709
score_intron -18.9441525532411
score_intron -19.1153697432025
score_intron -19.2686349243448
score_intron -19.4537020288698
score_intron -19.7112029440447
score_intron -19.8246671431285
score_intron -20.1348255117066
score_intron -20.2550612970759
score_intron -20.5617831395684
score_intron -20.6912501081842
score_intron -20.7890786837756
score_intron -21.1909073362642
score_intron -21.3930731705345
score_intron -21.4657641101906
score_intron -21.7899836016279
score_intron -22.1071309438329
score_intron -22.1239175461943
score_intron -22.7225152493233
score_intron -22.706060376364
score_intron -23.0305732697274
score_intron -23.256856378741
score_intron -23.6622256269455
score_intron -23.9295698802873
score_intron -24.3431347830786
score_intron -24.5311872507988
score_intron -24.7863974471122
score_intron -25.1445863288302
score_intron -25.5017514951343
score_intron -25.8331110905979
score_intron -26.1763076606685
score_intron -27.0008117265494
score_intron -27.5972616271124
score_intron -29.000509252713
score_intron -28.8167890477174
score_intron -29.6138944682175
score_intron -30.4110091145809
score_intron -30.1774519600837

######################################################################
# end SPA PARAMETER FILE
