#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

#include "DataReader.h"
#include "Config.h"
#include "MemoryControl.h"

/**********************/
/* ̓t@CJ */
/**********************/
int InputFileOpen(FILE **fp, char *file)
{
    int retval = 0;

    /* t@CI[v */
    *fp = fopen(file, "r");
    if (NULL == *fp){
        printf("input file open error! FileName:%s\n", file);
        retval = 1;
    }

    return retval;
}

/**********************/
/* o̓t@CJ */
/**********************/
int OutputFileOpen(FILE **fp, char *file)
{
    int retval = 0;

    /* t@CI[v */
    *fp = fopen(file, "w");
    if (NULL == *fp){
        printf("output file open error! FileName:%s\n", file);
        retval = 1;
    }

    return retval;
}

/********************/
/* t@C */
/********************/
void FileClose(FILE *fp)
{
    if (fp != NULL){
        fclose(fp);
    }
}


/********************************/
/* t@C̍sJEg */
/********************************/
long DataReaderCountFileLine(FILE *fp)
{
    long index = 0;
    long count = 0;
    char buf[MAX_LEN];

    /* 蕶ǂݍ */
    while ( fgets(buf, MAX_LEN, fp) != NULL ){
        index = strlen(buf);
        if ('\n' == buf[index-1]){
            count++;
        }
    }

    return count;
}

/********************************/
/* Hapmapf[^\̂Ɏ߂ */
/********************************/
int DataReaderSetAllHapmapData(FILE *fp, SnpData *snpData, long line, int phasingType)
{
    int retval = 0;
    long i = 0;
    long sampleNum = 0;

    /* HapmapDatãTv擾 */
    sampleNum = DataReaderGetHapmapSampleNum(fp);
    /* t@Cǂݍ */
    for (i = 1; i < line; i++){
        /* SNPf[^i[pz̃m */
        snpData[i].SNPdata = (char*)malloc1Dim(sizeof(char), sampleNum);
        retval = DataReaderSetHapmapData(fp, &snpData[i], i, phasingType);
    }

    return retval;
}

/************************************/
/* Hapmapf[^s\̂Ɏ߂ */
/************************************/
int DataReaderSetHapmapData(FILE *fp, SnpData *snpData, long line, int phasingType)
{
    int i = 0;
    int j = 0;
    int retval = -1;    /* t@CŌ܂ŗĂꍇ-1Ԃ */
    int column = 1;     /* Hapmapf[^̗ */
    int index = 0;
    int maxIndex = 0;
    int dataIndex = 0;
    char buf[1024];
    char tmp[20];


    /* ǂݍ */
    while ( fgets(buf, 1024, fp) != NULL ){
        maxIndex = strlen(buf);
        j = 0;
        /* Psǂݍ񂾂lێďI */
        for (j = 0; j <= maxIndex; j++){
            if ('\n' == buf[j]){
                snpData->sampleNum = dataIndex / 2;
                retval = 0;
                dataIndex = 0;
                return retval;
            }
            /* t@ĈQsڈȍ~珈s */
            else if (line > 0){
                /* t@C̗JEg */
                if (isspace(buf[j]) != 0){
                    if (HAPMAP_RS_NUM == column){
                        snpData->rsNumber[index] = '\0';
                    }
                    else if (HAPMAP_POS == column){
                        tmp[index] = '\0';
                        snpData->pos = atol(tmp);
                    }
                    column++;
                    index = 0;
                }
                /* rs#擾 */
                else if (HAPMAP_RS_NUM == column){
                    if (isalnum(buf[j]) != 0){
                        snpData->rsNumber[index] = buf[j];
                        index++;
                    }
                }
                /* pos擾 */
                else if (HAPMAP_POS == column){
                    tmp[index] = buf[j];
                    index++;
                }
                /* 12ڈڍs̃f[^QƂ */
                else if (column >= HAPMAP_DATA){
                    /* `qf[^i[ */
//                    snpData->SNPdata[dataIndex] = buf[j];
                    if ( ('N' == buf[j]) && (phasingType == 1) ) {
                            snpData->SNPdata[dataIndex] = '?';
                    }
                    else {
                        /* `qf[^i[ */
                        snpData->SNPdata[dataIndex] = buf[j];
                    }
                    dataIndex++;
                }
            }
        }
    }

    return retval;
}

/*****************************************/
/* \SnpDatãmۂ */
/*****************************************/
int DataReaderSnpDataMemoryAllocate(SnpData *snpData, long dataNum, long sampleNum)
{
    long i = 0;

    /* t@Cǂݍ */
    for (i = 0; i < dataNum; i++){
        /* SNPf[^i[pz̃m */
        snpData[i].SNPdata = (char*)malloc1Dim(sizeof(char), sampleNum);
    }

    return 0;
}

/*****************************************/
/* \SnpDataõJ */
/*****************************************/
int DataReaderSnpDataMemoryFree(SnpData *snpData, long dataNum)
{
    long i = 0;

    if (NULL != snpData){
        for (i = 0; i < dataNum; i++){
            /* SNPf[^i[pz̃J */
            free1Dim(snpData[i].SNPdata);
        }
        free1Dim(snpData);
    }

    return 0;
}

/*****************************************/
/* LDubNzɊi[ */
/*****************************************/
int DataReaderSetLDBlock(FILE *fp, long *ldBlock)
{
    long index = 0;
    long strIndex = 0;
    long valIndex = 0;
    char string[MAX_LEN];
    char value[MAX_LEN];

    /* sǂݍ */
    while ( (fgets(string, MAX_LEN, fp)) != NULL){
        strIndex = 0;
        valIndex = 0;
        while (string[strIndex] != '\n'){
            value[valIndex] = string[strIndex];
            strIndex++;
            valIndex++;
        }
        value[valIndex] = '\0';
        ldBlock[index] = atol(value);
        index++;
    }
    return 0;
}



/*****************************************/
/* HapmapDatãTv擾 */
/*****************************************/
long DataReaderGetHapmapSampleNum(FILE *fp)
{
    int character = 0;
    long column = 1;     /* Haplotypef[^̗ */
    long dataNum = 0;    /* Tvl */

    /* Pǂݍ */
    while ( feof(fp) == 0 ){
        character = fgetc(fp);
        /* Psǂݍ񂾂lԂďI */
        if ('\n' == character){
            return dataNum * 2;
        }
        /* t@C̗JEg */
        else if (isspace(character) != 0){
            column++;
            /* 12ڈڍs̃f[^JEg */
            if (column >= HAPMAP_DATA){
                dataNum++;
            }
        }
    }

    /* HapmapData͈liPjłQ̈`qf[^Ă̂łQ{ */
    return dataNum * 2;
}

/************************************/
/* snphapo̓f[^ǂݍ       */
/************************************/
int DataReaderSetSnphapData(FILE *fp, char** haplotype, int maxIndex, int maxSample)
{
    int i = 0;
    int j = 0;
    int flag_read = 0;
    int flag_noline = 0;
    int flag_tmp = 0;
    int retval = 0;
    int dataIndex = 0;
    int character = 0;
    int sampleNumber = 0;
    int tmpIndex = 0;
    long line = 0;
    char tmp[10];

    /* Pǂݍ */
    while ( feof(fp) == 0 ){
        character = fgetc(fp);
        /* Psǂݍ񂾂CfbNXXV */
        if ('\n' == character){
            dataIndex = 0;
            if (flag_noline != 1){
                line++;
            }
            flag_read = 0;
            flag_noline = 0;
            flag_tmp = 0;
        }
        /* t@ĈSsڈȍ~珈s */
        else if (line > 2){
            if (flag_read == 1){
                /* nv^Cvf[^̌ɃXy[XĂ̂ŁAȍ~͔΂ */
                if (isspace(character) != 0){
                    flag_read = 0;
                }
                /* ȃf[^ł邩`FbN */
                else if (isalpha(character) != 0){
                    /* `qf[^i[ */
                    haplotype[line-3][dataIndex] = character;
                    dataIndex++;
                }
                /* ُȃf[^iłĂȂj̏ꍇ */
                else {
                    /* ~bVOf[^iNji[ */
                    haplotype[line-3][dataIndex] = 'N';
                    dataIndex++;
                }
            }
            /* f[^ǂݍ݃tO𗧂Ă */
            else if (')' == character){
                if (1 != flag_noline){
                    flag_read = 1;
                    /* nv^Cvf[^̑OɃXy[XĂ̂Ŕ΂ */
                    character = fgetc(fp);
                }
            }
            else if ('(' == character){
                tmp[tmpIndex] = '\0';
                tmpIndex = 0;
                flag_tmp =1;
                /* TvԍłꍇÃTv̓~bVOf[^Ŗ߂ */
                for (i = sampleNumber; i < atoi(tmp)-1; i++){
                    /* 1ڂ̃nv^Cv */
                    for (j = 0; j < maxIndex; j++){
                        haplotype[line-3][j] = 'N';
                    }
                    line++;
                    /* 2ڂ̃nv^Cv */
                    for (j = 0; j < maxIndex; j++){
                        haplotype[line-3][j] = 'N';
                    }
                    line++;
                }
                sampleNumber = atoi(tmp);
                /* Tv΂ĕ̃nv^CvԂĂꍇAŏ̗̌p */
                character = fgetc(fp);
                character = fgetc(fp);
                if ('1' != character){
                    flag_noline = 1;
                }
            }
            /* Tvԍ擾 */
            else if (flag_tmp == 0){
                if (isspace(character) == 0){
                    tmp[tmpIndex] = character;
                    tmpIndex++;
                }
            }
        }
    }
    
    /* TvԍłꍇÃTv̓~bVOf[^Ŗ߂ */
    for (i = sampleNumber; i < maxSample; i++){
        /* 1ڂ̃nv^Cv */
        for (j = 0; j < maxIndex; j++){
            haplotype[line-3][j] = 'N';
        }
        line++;
        /* 2ڂ̃nv^Cv */
        for (j = 0; j < maxIndex; j++){
            haplotype[line-3][j] = 'N';
        }
        line++;
    }

    return retval;
}

/************************************/
/* PHASEo̓f[^ǂݍ        */
/************************************/
int DataReaderSetPHASEData(FILE *fp, char** haplotype)
{
    int j = 0;
    int flag = 0;
    int retval = 0;
    int index = 0;
    int maxIndex = 0;
    int dataIndex = 0;
    long line = 0;
    char buf[1024];

    /* ǂݍ */
    while ( fgets(buf, 1024, fp) != NULL ){
        maxIndex = strlen(buf);
        j = 0;
        /* nv^Cv蕔܂ł̓XLbv */
        if (strncmp(buf, "BEGIN BESTPAIRS1", 16) == 0){
            flag = 1;
        }
        /* nv^Cv蕔It@CǂݍݏI */
        else if (strncmp(buf, "END BESTPAIRS1", 14) == 0){
            return retval;
        }
        /* nv^Cv蕔ǂݍ */
        else if (flag == 1) {
            /* Psǂݍ񂾂玟̍sǂݍ */
            for (j = 0; j <= maxIndex; j++){
                if ('\n' == buf[j]){
                    dataIndex = 0;
                    /* nv^Cv蕔̃Tv͓ǂ܂Ȃ */
                    if ( (line % 3) != 0 ){
                        index++;
                    }
                    line++;
                    break;
                }
                if (isalpha(buf[j]) != 0){
                    /* `qf[^i[ */
                    haplotype[index][dataIndex] = buf[j];
                    dataIndex++;
                }
            }
        }
    }

    return retval;
}

