#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <glib.h>
#include "interspecies.h"
#include "tree_routines.h"
#include "commonroutines.h"



/* clones a gtree, optionally if you give it a "noted node" in the old
   tree, returns the cloned copy in *newnotednode */
GNode* clone_gtree(GNode *oldtree, GNode *oldnotednode, GNode **newnotednode,
                   int copyq) 
{
    GNode *new_node;
    GNode *child;
    GPtrArray *newdata;
    double *q;
    int n;
    void *ptr;
    q=NULL;
    if (oldtree->data != NULL) {
        /* We don't want the original q here, since it may be fiddled with */
        if (copyq) {
	  q = (double *)malloc(sizeof(double));
	  *q= *(double *)(g_ptr_array_index((GPtrArray*)(oldtree->data),0));
        }
        newdata=g_ptr_array_new();
        for (n=0; n<((GPtrArray *)(oldtree->data))->len; n++) {
            if ((n==0)&&(copyq))
                g_ptr_array_add(newdata,q);
            else {
                ptr=g_ptr_array_index((GPtrArray*)(oldtree->data),n);
                g_ptr_array_add(newdata,ptr);
            }
        }
    }
    else newdata=NULL;
    
    new_node = g_node_new(newdata);
    if (oldtree==oldnotednode)
        *newnotednode = new_node;
    child = oldtree->children;
    while (child != NULL) { /* Is not leaf */
        g_node_append(new_node,clone_gtree(child,oldnotednode,
                                           newnotednode,copyq));
        child=child->next;
    }
    return new_node;
}


/* find all leaves in the gtree */
void get_gtree_leaves(GNode *oldtree, GPtrArray **leaves) 
{
    GNode *child;
    char *label;

    if (*leaves==NULL)
        *leaves=g_ptr_array_new();
    child = oldtree->children;
    if (child==NULL) /* leaf */ {
        g_ptr_array_add(*leaves,oldtree);
        label = g_ptr_array_index((GPtrArray *)(oldtree->data),1);
    }
    while (child != NULL) { /* Is not leaf */
        get_gtree_leaves(child,leaves);
        child=child->next;
    }
}


void get_qvalues(GNode *tree,GPtrArray **qvalues)
{
    GNode *child;
    double *q;

    if (tree->data != NULL) {
        q=g_ptr_array_index(((GPtrArray *)(tree->data)),0);
        g_ptr_array_add((*qvalues),q);
    }
    child=tree->children;
    if (child==NULL)
        return;
    while (child != NULL) {
        get_qvalues(child,qvalues);
        child=child->next;
    }
    
}



void merge_node(GNode *node1, GNode *node2) 
{
    GNode *child,*child2;

    child=node2->children;
    while (child != NULL) {
        child2=child->next;
        g_node_unlink(child);
        g_node_prepend(node1,child);
        child=child2;
    }
}


int getclosingbracket(char* str1, int nl) 
{
    int n,nleft,nright;
    
    if (str1[nl] != '(')
        return -1;

    n = nl+1;
    nleft = 0;
    nright = 0;
    
    while ((str1[n]!=')')||(nleft!=nright)) {
        if (str1[n]=='(')
            nleft++;
        else if (str1[n]==')')
            nright++;
        if (nright>nleft)
            return -1;
        n++;
    }
    return n;
}


GNode* find_single_childed_node(GNode* tree) 
{
    GNode *child,*snode;

    if ((tree->parent!=NULL)&&(tree->children!=NULL)
        &&(tree->children->prev==NULL)&&(tree->children->next==NULL))
        return tree;

    child=tree->children;
    if (child==NULL)
        return NULL;
    while (child!=NULL) {
        snode=find_single_childed_node(child);
        if (snode != NULL)
            return snode;
        child=child->next;
    }
    return NULL;
}


void strip_single_nodes(GNode** tree) 
{
    int stripped;
    GNode *snode,*parent,*child;
    double *qparent,*qchild;
    
    do {
        stripped=0;
        snode=find_single_childed_node(*tree);
        if ((snode != NULL)) {
            parent=snode->parent;
            child=snode->children;
            qparent= (double *)g_ptr_array_index((GPtrArray *)(snode->data),0);
            qchild= (double *)g_ptr_array_index((GPtrArray *)(child->data),0);
            g_node_unlink(child);
            g_node_destroy(snode);
            g_node_prepend(parent,child);
            *qchild = (*qparent)*(*qchild);
            stripped=1;
        }
    } while (stripped);
}



void stringtotree(char* treestr, GNode **currnode, int startpos) 
{
    char templabel[64],*newlabel;
    int n,n0,n_end,nsub_end;
    double *tempmu;
    GPtrArray *nodedata;
    GNode *newnode;
    
    if (treestr[startpos] !='(') {
        fprintf(stderr, "Error: tree string must begin with bracket\n");
        exit(1);
    }
    

    n_end = getclosingbracket(treestr,startpos);
    if (n_end <= startpos+1) {
        fprintf(stderr, "Error: mismatched or null brackets in tree string\n");
        exit(1);
    }
    if ((startpos==0)&&(n_end < strlen(treestr)-1))
        fprintf(stderr,"Warning: extra characters in tree string after last closing bracket ignored\n");

    n=startpos+1;
    while (n < n_end) {        
        nodedata=g_ptr_array_new();
        g_ptr_array_add(nodedata,NULL);
        newnode=g_node_new(nodedata);
        if (treestr[n]=='(') {
            newlabel=NULL;
            nsub_end=getclosingbracket(treestr,n);
            stringtotree(treestr,&newnode,n);
            n=nsub_end+1;
        }
        else {
            n0 = n;
            while ((treestr[n]!=':')&&(n<n_end)) {
                if ((treestr[n]=='(')||(treestr[n]==')')) {
                    fprintf(stderr,"Error: invalid tree string\n");
                    printf("n=%d\n",n);
                    exit(1);
                }
                templabel[n-n0]=treestr[n];
                n++;
                if ((n-n0)>63) {
                    fprintf(stderr,"Error: label too long (max length 64!)\n");
                    exit(1);
                }
            }
            templabel[n-n0]='\0';
            if (strlen(templabel)==0) {
                fprintf(stderr,"Error: proximity parameter with no label or subtree in tree string\n");
                exit(1);
            }
            newlabel=malloc(sizeof(char)*(n-n0+2));
            strncpy(newlabel,templabel,n-n0+1);
        }
        g_ptr_array_add(newnode->data,newlabel);
        g_node_append(*currnode,newnode);
        
        if (treestr[n]!=':') {
            fprintf(stderr,"Error: missing colon in tree string?\n");
            exit(1);
        }
        n++;
        
        n0=n;
        while (((treestr[n]>='0')&&(treestr[n]<='9'))||(treestr[n]=='.')) {
            templabel[n-n0]=treestr[n];
            n++;
            if ((n-n0) >= 64) {
                fprintf(stderr,"Error parsing float in tree string: too long\n");
                exit(1);
            }
        }
        templabel[n-n0] = '\0';
        if (n==n0) {
            fprintf(stderr,"Error: missing phylogenetic proximity parameter (float) in tree string\n");
            exit(1);
        }
        tempmu = (double *) malloc(sizeof(double));
        *tempmu = atof(templabel);
        if ((*tempmu<0.0)||(*tempmu>1.0)) {
            fprintf(stderr,"Error: phylogenetic proximity parameter must be between 0 and 1\n");
            exit(1);
        }
        g_ptr_array_index(nodedata,0)=tempmu;

        if ((treestr[n]!=',')&&(n<n_end)) {
            fprintf(stderr,"Syntax error in tree string (missing comma?)\n");
            exit(1);
        }
        n++;
    }
    strip_single_nodes(currnode);
}


/* print out phylogenetic tree, used in binprint, also for debugging to
   be sure tree was read correctly */
void treeprint(FILE *stream, GNode *treenode) 
{
    GNode *child;
    double *mu;
    char *species;

    if (treenode == NULL)
        return;
    fprintf(stream,"(");

    child=treenode->children;
    while (child != NULL) {        
        mu = g_ptr_array_index(((GPtrArray *) child->data),0);
        species = g_ptr_array_index(((GPtrArray *)child->data),1);
        if (child->children != NULL) {
            assert(species==NULL);
            treeprint(stream,child);
        }
        else 
            fprintf(stream,"%s", species);
        fprintf(stream,":%f",*mu);            
        child=child->next;
        if (child!=NULL)
            fprintf(stream,",");
    }
    fprintf(stream,")");
}




void remove_node_from_gtree(GNode *thisnode) 
{
    /* Removes a node, but if that leaves a solitary sibling,
     * removes also the parent and attaches the sibling directly
     * to the grandparent, multiplying proximity by parent's proximity
     */
    GNode *thisparent,*thisgrandparent,*sibling;
    double *q1,*q2;

    thisparent=thisnode->parent;
    if (thisparent != NULL)
        thisgrandparent=thisparent->parent;
    else
        thisgrandparent=NULL;

    if (thisnode->next != NULL)
        sibling=thisnode->next;
    else if (thisnode->prev != NULL)
        sibling=thisnode->prev;
    else sibling=NULL;
    
    g_node_unlink(thisnode);
    if ((sibling != NULL) && (sibling->prev == NULL)
        && (sibling->next == NULL) && (thisgrandparent != NULL)) {
        q2=(double *)(g_ptr_array_index(((GPtrArray*)(sibling->data)),0));
        g_node_unlink(sibling);
        q1=(double *)(g_ptr_array_index(((GPtrArray*)(thisparent->data)),0));
        *q2 = (*q1) * (*q2);
        g_node_destroy(thisparent);
        g_node_append(thisgrandparent,sibling);
    }
}



/* Given a window, this makes a copy of the phylogenetic tree,
   retaining only the species that actually exist in the window,
   and initialising the c and F elements. */   
base_tree* make_base_tree(params *v, window *win) 
{
    base_tree *newtree;
    GPtrArray *leaves;
    GNode *leaf;
    int n,m,match;
    
    newtree= (base_tree *) malloc(sizeof(base_tree));
    newtree->c = 1.0;
    newtree->F = g_ptr_array_new();
    fflush(NULL);
    newtree->tree=clone_gtree(v->labeltree,NULL,NULL,1);
    strip_single_nodes(&(newtree->tree));

    leaves=NULL;
    get_gtree_leaves(newtree->tree,&leaves);

    /* remove leaves that aren't actually in current window */
    for (m=0; m<leaves->len; m++) {
        leaf=g_ptr_array_index(leaves,m);
        match=0;
        for (n=0; n<win->species->len; n++) {
            if ( (*(int *)(g_ptr_array_index((GPtrArray *)(leaf->data),2)))
                 == g_array_index(win->species,int,n)) {
                match=1;
                break;
            }
        }
        if (!match) {
            remove_node_from_gtree(leaf);
        }
    }                
    
    g_ptr_array_free(leaves,TRUE);
    return newtree;    
}




/* Given a tree, returns a node all of whose children are leaves.
 * Returns the first such node it finds: order is not important. */
GNode* find_star_node(GNode *tree) 
{
    GNode *child;

    child=tree->children;
    if (child==NULL)
        return tree;
    else
        while (child != NULL) {
            if (child->children != NULL)
                return find_star_node(child);
            else
                child=child->next;
        }
    return tree;
}

/* This is the main function for non-star phylogenies.  Given a list
 * of trees (initially, just one tree) with non-star phylogenies, it
 * takes a tree, takes a node all of whose children are leaves, and
 * makes two "reduced" trees (with appropriate multiplicative factors
 * c and F).  It does this until all trees in the list have no
 * remaining branches but just the F factors (which indicate subtrees
 * with star topologies).
 *
 * This pruned tree list can then be used both for the wm integral and
 * for the background calculation.  The background calculation is
 * exact, the wm integral uses the monomial approximation for all
 * subtrees (F-factors).
 */

void prune_trees(GPtrArray **treelist) 
{
    int ntrees,n,m;
    base_tree *onetree,*newtree;
    GNode *starnode,*parent,*child,*newstarnode;
    GArray *Flist;
    double *q;
    int species,pruned;

    do {
        ntrees=(*treelist)->len;
        pruned=0;
        for (n=0; n<ntrees; n++) {
            onetree=g_ptr_array_index(*treelist,n);
            if (onetree->tree != NULL) {
                starnode=find_star_node(onetree->tree);
                if (starnode == onetree->tree) { /* No parent, no split node */
                    Flist=g_array_new(TRUE,TRUE,sizeof(int));
                    child=starnode->children;
                    while (child != NULL) {
                        species= *(int *)
                            g_ptr_array_index((GPtrArray *)(child->data),2);
                        g_array_append_val(Flist,species);
                        child=child->next;
                    }
                    if (Flist->len > 0)
                        g_ptr_array_add(onetree->F,Flist);
                    else
                        g_array_free(Flist,TRUE);
                    g_node_destroy(onetree->tree);
                    onetree->tree=NULL;
                }
                else {
                    /* copy tree, eliminate starnode in copy,
                       starnode merged into parent in original */
                    Flist=g_array_new(TRUE,TRUE,sizeof(int));
                    newtree=(base_tree *)malloc(sizeof(base_tree));
                    q = g_ptr_array_index((GPtrArray *)(starnode->data),0);
                    newtree->c = onetree->c * (1.0 - *q);
                    onetree->c *= (*q);
                    /* new factor F(children) in new tree */
                    newtree->F = g_ptr_array_new();
                    for (m=0; m<onetree->F->len; m++) {
                        Flist = g_ptr_array_index(onetree->F,m);
                        g_ptr_array_add(newtree->F,Flist);
                    }
                    Flist=g_array_new(TRUE,TRUE,sizeof(int));
                    child=starnode->children;
                    while (child != NULL) {
                        species= *(int *)
                            g_ptr_array_index((GPtrArray *)(child->data),2);
                        g_array_append_val(Flist,species);
                        child=child->next;
                    }
                    g_ptr_array_add(newtree->F,Flist);
                    newtree->tree=clone_gtree(onetree->tree,starnode,&newstarnode,0);
                    parent=starnode->parent;
                    g_node_unlink(starnode);
                    merge_node(parent,starnode);
                    remove_node_from_gtree(newstarnode);
                    g_node_destroy(newstarnode);
                    g_node_destroy(starnode);
                    pruned=1;
                    g_ptr_array_add((*treelist),newtree);
                }
            }
        }   
    } while (pruned);
}

