# Copyright (C) 2017 Jiaan Dai

"""Script for simulating ideal case scenario.

Usage:
    python scripts/idealcase.py <FASTA>

Example:
    python scripts/idealcase.py data/uniprot-all.fasta
"""

import argparse
import bisect
import sys
sys.path.append('.')
from PowerSimTool.loaders import load_peptides
from PowerSimTool.search import cosine_similarity as score

import logging
_logfmt = '%(asctime)s %(levelname)s [pid=%(process)d] \t %(message)s'
_datefmt = '%Y-%m-%d %I:%M:%S'
logging.basicConfig(level=logging.INFO, format=_logfmt, datefmt=_datefmt)


def main(database):
    peptide_lower_mass = 700
    peptide_upper_mass = 5000
    tag_length = 3
    ms1tol = 250
    logging.info('Loading target database... [path={}]'.format(database))
    peptides = load_peptides(database)
    peptides = [peptide for peptide in peptides if
                peptide_lower_mass <= peptide.mass <= peptide_upper_mass]
    peptide_mass = [peptide.mass for peptide in peptides]
    peptide_tags_array = [peptide.get_theo_tags(tag_length)
                          for peptide in peptides]
    logging.info('Searching...')
    global_shared = 0
    invalid = 0
    total = len(peptides)
    for i in range(0, len(peptides)):
        if i % 10000 == 0:
            logging.info(
                'Finished {} peptides. Total {} peptides.'.format(i, total))
        query_tags = peptide_tags_array[i]
        self_score = score(query_tags, query_tags)
        if 1 - self_score > 0.0001:
            invalid += 1
        precursor_mass = peptide_mass[i]
        start_point = bisect.bisect_left(peptide_mass, precursor_mass - ms1tol)
        end_point = bisect.bisect_right(peptide_mass, precursor_mass + ms1tol)
        shared = 0
        for j in range(start_point, end_point):
            if i == j:
                continue
            s = score(query_tags, peptide_tags_array[j])
            if 1 - s <= 0.0001:
                shared += 1
        if shared > 0:
            global_shared += 1
    logging.info('Finish simulation.')
    print('Total: {}'.format(total))
    print('Shared: {}'.format(global_shared))
    print('Invalid: {}'.format(invalid))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('fasta', help='database file')
    args = parser.parse_args()
    main(args.fasta)
