/** ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
Hung Huy Huynh - LanThuy Tran
Math 6790 - Case Studies Spring 2005
Find the longest exactly matching sequence between two genomes
Input: 2 FASTA files
fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */

#include <iostream>
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include "FastaFile.h"

using namespace std;

inline bool lessStrCompare( char * s1, char * s2 )
{
    return strcmp( s1, s2 ) < 0;
}

/************************************************************************/
/* Return the matching sequence between 2 strings                       */
/************************************************************************/
string getMatchSequence( char * s1, char * s2 )
{
    string match;
    char * p1 = s1;
    char * p2 = s2;

    while ( * p1 == * p2 && * p1 && * p2 )
    {
        match += * p1;
        p1++;
        p2++;
    }

    return match;
}

enum strand_t {FORWARD, REVERSE};
#define verbose(strid)   ((strid) ? "REVERSE" : "FORWARD")

struct Match
{
    string sequence;
    int loc1;
    strand_t strand1;
    int loc2;
    strand_t strand2;

    Match()
    {
        strand1 = FORWARD;
        strand2 = FORWARD;
        loc1 = 0;
        loc2 = 0;
    }
};

/************************************************************************/
/* Returns longest matching sequence between 2 genomes                  */
/************************************************************************/
Match findLongestMatch(char* gn1, int gn1_len, char* gn2, int gn2_len)
{
    int buffSize = gn1_len + gn2_len;
    char * buff = new char[buffSize + 1]; // null-terminated
    if ( buff == NULL )
    {
        cerr << "kratos.cpp, findLongestMatch: Out of memory" << endl;
        exit(1);
    }
    buff[buffSize] = 0;

    memcpy(buff, gn1, gn1_len);
    memcpy(buff+gn1_len, gn2, gn2_len);

    // Creating sequences...
    int i;
    char** v = new char*[buffSize];
    for (i = 0; i < buffSize; i++)
    {
        v[i] = &buff[i];
    }

    sort (v, v+buffSize, lessStrCompare);

    //Finding longest match...
    Match max_match;
    char * p1, * p2;
    string match;
    for (i = 0; i < buffSize-1; i++)
    {
        match = getMatchSequence( v[i],  v[i+1] );
        if ( match.length() > max_match.sequence.length() && ( p1 = strstr( gn1, match.c_str() ) )
            && ( p2 = strstr( gn2, match.c_str() ) ) )
        {
            max_match.sequence = match;
            max_match.loc1 = p1 - gn1 + 1;
            max_match.loc2 = p2 - gn2 + 1;
        }
    }

    delete [] buff;
    delete [] v;
    return max_match;
}


int main( int argc, char * argv[] )
{
    if (argc < 3)
    {
        cerr << "Syntax: Kratos [file1] [file2]" << endl;
        exit(1);
    }

    char * filename1 = argv[1]; //"../../data/NC_000912.fna";
    char * filename2 = argv[2]; //"../../data/NC_004829.fna";

    FastaFile f1( filename1 );
    FastaFile f2( filename2 );

    cout << f1.fileName << endl
        << f1.firstLine << endl
        << "A=" << f1.nuHist.A << ","
        << " T=" << f1.nuHist.T << ","
        << " G=" << f1.nuHist.G << ","
        << " C=" << f1.nuHist.C << endl << endl;

    cout << f2.fileName << endl
        << f2.firstLine << endl
        << "A=" << f2.nuHist.A << ","
        << " T=" << f2.nuHist.T << ","
        << " G=" << f2.nuHist.G << ","
        << " C=" << f2.nuHist.C << endl << endl;

    cout << "Searching..." << endl;

    //time_t old = time(NULL);
    // longest match in both forward strands
    Match max_match = findLongestMatch(f1.forward, f1.sequenceLength, f2.forward, f2.sequenceLength);

    cout << "Longest match: " << endl << max_match.sequence << endl << endl;
    cout << "File: " << f1.fileName << ", location: " << max_match.loc1 << ", strand: "
        << verbose(max_match.strand1) << endl;
    cout << "File: " << f2.fileName << ", location: " << max_match.loc2 << ", strand: "
        << verbose(max_match.strand2) << endl << endl;

    cout << "Searching..." << endl;
    // longest match in gn1 forward and gn2 reverse
    max_match = findLongestMatch(f1.forward, f1.sequenceLength, f2.reverse, f2.sequenceLength);
    max_match.strand1 = FORWARD;
    max_match.strand2 = REVERSE;
    cout << "Longest match: " << endl << max_match.sequence << endl << endl;
    cout << "File: " << f1.fileName << ", location: " << max_match.loc1
        << ", strand: " << verbose(max_match.strand1) << endl;
    cout << "File: " << f2.fileName << ", location: " << max_match.loc2
        << ", strand: " << verbose(max_match.strand2) << endl << endl;

    cout << "Searching..." << endl;
    // longest match in gn1 reverse and gn2 forward
    max_match = findLongestMatch(f1.reverse, f1.sequenceLength, f2.forward, f2.sequenceLength);
    max_match.strand1 = REVERSE;
    max_match.strand2 = FORWARD;
    cout << "Longest match: " << endl << max_match.sequence << endl << endl;
    cout << "File: " << f1.fileName << ", location: " << max_match.loc1 << ", strand: "
        << verbose(max_match.strand1) << endl;
    cout << "File: " << f2.fileName << ", location: " << max_match.loc2 << ", strand: "
        << verbose(max_match.strand2) << endl << endl;

    cout << "Searching..." << endl;
    // longest match in gn1 reverse and gn2 reverse
    max_match = findLongestMatch(f1.reverse, f1.sequenceLength, f2.reverse, f2.sequenceLength);
    max_match.strand1 = REVERSE;
    max_match.strand2 = REVERSE;
    cout << "Longest match: " << endl << max_match.sequence << endl << endl;
    cout << "File: " << f1.fileName << ", location: " << max_match.loc1 << ", strand: "
        << verbose(max_match.strand1) << endl;
    cout << "File: " << f2.fileName << ", location: " << max_match.loc2 << ", strand: "
        << verbose(max_match.strand2) << endl << endl;


    //time_t curr = time(NULL);
    //cout << "Time elapsed: " << (curr-old) << " seconds" << endl;
    return 0;
}
