# Copyright (C) 2017 Jiaan Dai

"""Fasta file loaders"""

from .types import *


def load_proteins(path):
    proteins = dict()
    is_first_protein = True
    with open(path, 'r') as file:
        for line in file:
            line = line.strip()
            if line.startswith('>'):
                if is_first_protein:
                    is_first_protein = False
                else:
                    seq = ''.join(seq_buffer)
                    proteins[protein_name] = seq
                # protein_name = line.lstrip('>').split(maxsplit=1)[0]
                protein_name = line.lstrip('>').split(None, 1)[0]
                seq_buffer = []
            elif line:
                seq_buffer.append(line)
    seq = ''.join(seq_buffer)
    proteins[protein_name] = seq
    return proteins


def load_peptides(path):
    "Return peptides :: [Peptide]."
    proteins = load_proteins(path)
    peptides = set()
    for name, protein_seq in proteins.items():
        sequences = _digest(protein_seq)
        for sequence in sequences:
            if (len(sequence) == 0
                    # don't handle these chars
                    or any(map(lambda x: x in set('BJXZUO'), sequence))):
                continue

            # convert all 'I' into 'L'
            converted = ''.join(
                map(lambda x: x if x != 'I' else 'L', sequence))
            peptide = Peptide(converted, name, '', '')
            peptides.add(peptide)
    return sorted(list(peptides), key=lambda x: x.mass)


def _digest(seq):
    """Digest using Trypsin rule."""
    peptides = []
    start = 0
    for index in range(0, len(seq)):
        if seq[index] == 'K' or seq[index] == 'R':  # trypsin specificity
            if index != len(seq) - 1 and seq[index + 1] != 'P':
                # end should be index+1, there is a bug before
                peptide = seq[start:index+1]

                if all(map(lambda x: x not in peptide, list('BJXZ'))):
                    peptides.append(peptide)
                start = index + 1
    if start != len(seq) - 1:
        # end should be len(seq), there is a bug before
        peptide = seq[start:len(seq)]

        if all(map(lambda x: x not in peptide, list('BJXZ'))):
            peptides.append(peptide)
    return peptides

