/*
 * Decompiled with CFR 0.152.
 */
package org.jcvi.jillion.experimental.align.blast;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.math.BigDecimal;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.jcvi.jillion.core.DirectedRange;
import org.jcvi.jillion.core.Range;
import org.jcvi.jillion.core.Sequence;
import org.jcvi.jillion.core.residue.aa.ProteinSequence;
import org.jcvi.jillion.core.residue.aa.ProteinSequenceBuilder;
import org.jcvi.jillion.core.residue.nt.NucleotideSequence;
import org.jcvi.jillion.core.residue.nt.NucleotideSequenceBuilder;
import org.jcvi.jillion.experimental.align.blast.BlastHitImpl;
import org.jcvi.jillion.experimental.align.blast.BlastParser;
import org.jcvi.jillion.experimental.align.blast.BlastVisitor;
import org.jcvi.jillion.experimental.align.blast.Hsp;
import org.jcvi.jillion.experimental.align.blast.HspBuilder;
import org.jcvi.jillion.internal.core.io.OpenAwareInputStream;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public final class XmlFileBlastParser
implements BlastParser {
    private final SAXParser parser;
    private OpenAwareInputStream inputStream;
    private File file;

    public static BlastParser create(File xml) throws IOException {
        SAXParser parser = XmlFileBlastParser.createSaxParser();
        return new XmlFileBlastParser(parser, xml);
    }

    public static BlastParser create(InputStream xml) throws IOException {
        SAXParser parser = XmlFileBlastParser.createSaxParser();
        return new XmlFileBlastParser(parser, xml);
    }

    private static SAXParser createSaxParser() throws IOException {
        SAXParser parser;
        SAXParserFactory spf = SAXParserFactory.newInstance();
        spf.setValidating(false);
        try {
            parser = spf.newSAXParser();
        }
        catch (ParserConfigurationException e) {
            throw new IOException("error creating SAX parser", e);
        }
        catch (SAXException e) {
            throw new IOException("error creating SAX parser", e);
        }
        return parser;
    }

    private XmlFileBlastParser(SAXParser parser, InputStream inputStream) {
        this.parser = parser;
        this.inputStream = new OpenAwareInputStream(inputStream);
    }

    private XmlFileBlastParser(SAXParser parser, File file) {
        this.parser = parser;
        this.file = file;
    }

    @Override
    public boolean canParse() {
        return this.inputStream == null || this.inputStream.isOpen();
    }

    @Override
    public void parse(BlastVisitor visitor) throws IOException {
        if (this.canParse()) {
            try {
                if (this.inputStream == null) {
                    this.parser.parse(this.file, (DefaultHandler)new SaxBlastParser(visitor));
                } else {
                    this.parser.parse((InputStream)this.inputStream, (DefaultHandler)new SaxBlastParser(visitor));
                }
            }
            catch (SAXException e) {
                throw new IOException("error parsing xml blast output", e);
            }
        }
    }

    private static class SaxBlastParser
    extends DefaultHandler {
        private static final String HIT = "Hit";
        private static final String HSP = "Hsp";
        private static final String BIT_SCORE = "Hsp_bit-score";
        private static final String E_VALUE = "Hsp_evalue";
        private static final String HSP_SCORE = "Hsp_score";
        private static final String ALIGN_LENGTH = "Hsp_align-len";
        private static final String QUERY_FROM = "Hsp_query-from";
        private static final String QUERY_TO = "Hsp_query-to";
        private static final String HIT_FROM = "Hsp_hit-from";
        private static final String HIT_TO = "Hsp_hit-to";
        private static final String IDENTICAL_MATCHES = "Hsp_identity";
        private static final String POSITIVE_MATCHES = "Hsp_positive";
        private static final String NUM_GAPS = "Hsp_gaps";
        private static final String HIT_FRAME = "Hsp_hit-frame";
        private static final String SUBJECT_LENGTH = "Hit_len";
        private static final String MIDLINE = "Hsp_midline";
        private static final String QUERY_SEQUENCE = "Hsp_qseq";
        private static final String SUBJECT_SEQUENCE = "Hsp_hseq";
        private static final String SUBJECT_DEF = "Hit_def";
        private static final Pattern DEFLINE_PATTERN = Pattern.compile("^\\s*(\\S+)\\s*(.*)$");
        private static final String LEGACY_QUERY_ID = "BlastOutput_query-def";
        private static final String PROGRAM_NAME = "BlastOutput_program";
        private static final String PROGRAM_VERSION = "BlastOutput_version";
        private static final String QUERY_LENGTH = "BlastOutput_query-len";
        private static final String BLAST_DB = "BlastOutput_db";
        private static final String BLAST_ITERATIONS = "BlastOutput_iterations";
        private static final String ITERATION_QUERY_ID = "Iteration_query-def";
        private static final String ITERATION_QUERY_LENGTH = "Iteration_query-len";
        private static final String HIT_ACCESSION = "Hit_accession";
        private HspBuilder<?, ?, ?> hspBuilder;
        private final BlastVisitor visitor;
        private String tempVal = null;
        private StringBuilder tempBuilder = null;
        private Integer queryStart;
        private Integer queryEnd;
        private Integer subjectStart;
        private Integer subjectEnd;
        private Integer queryLength;
        private Integer subjectLength;
        private Integer numMatches;
        private String queryId;
        private String subjectId;
        private String programName;
        private String programVersion;
        private String blastDb;
        private String subjectDeflineComment;
        int misMatches = 0;
        int numberOfGapOpenings = 0;
        Sequence<?> querySequence;
        Sequence<?> subjectSequence;
        private boolean isNucleotide = false;
        private boolean inHspBlock = false;
        private BlastHitImpl.Builder hitBuilder;

        SaxBlastParser(BlastVisitor visitor) {
            this.visitor = visitor;
        }

        @Override
        public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
            return new InputSource(new StringReader(""));
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            if (BLAST_ITERATIONS.equals(qName)) {
                this.reportBlastHeaderInfo();
            } else if (HSP.equals(qName) && !this.inHspBlock) {
                this.inHspBlock = true;
                this.hitBuilder = new BlastHitImpl.Builder(this.queryId, this.subjectId);
                this.hitBuilder.setQueryLength(this.queryLength);
                this.hitBuilder.setSubjectDefline(this.subjectDeflineComment);
                this.hitBuilder.setBlastDbName(this.blastDb);
                this.hitBuilder.setBlastProgramName(this.programName);
            }
            this.tempBuilder = new StringBuilder();
        }

        private void reportBlastHeaderInfo() {
            this.visitor.visitInfo(this.programName, this.programVersion, this.blastDb, this.queryId);
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            this.tempBuilder.append(new String(ch, start, length));
        }

        @Override
        public void endDocument() throws SAXException {
            this.visitor.visitEnd();
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            this.tempVal = this.tempBuilder.toString();
            if (this.inHspBlock) {
                if (HSP.equals(qName)) {
                    DirectedRange queryRange = DirectedRange.parse(this.queryStart.intValue(), this.queryEnd.intValue(), Range.CoordinateSystem.RESIDUE_BASED);
                    this.hspBuilder.queryRange(queryRange);
                    this.hspBuilder.subjectRange(DirectedRange.parse(this.subjectStart.intValue(), this.subjectEnd.intValue(), Range.CoordinateSystem.RESIDUE_BASED));
                    if (this.subjectLength != null) {
                        this.hspBuilder.subjectLength(this.subjectLength);
                    }
                    if (this.numMatches == null) {
                        double percentIdentity = (double)(this.tempVal.length() - this.misMatches) / (double)this.tempVal.length();
                        this.hspBuilder.percentIdentity(percentIdentity);
                        this.hspBuilder.numMismatches(this.misMatches);
                    } else {
                        long length = queryRange.asRange().getLength();
                        int numMismatches = (int)(length - (long)this.numMatches.intValue());
                        this.hspBuilder.percentIdentity((double)this.numMatches.intValue() / (double)length);
                        this.hspBuilder.numMismatches(numMismatches);
                    }
                    this.hspBuilder.numGapOpenings(this.numberOfGapOpenings);
                    if (this.isNucleotide) {
                        this.hspBuilder.gappedAlignments((NucleotideSequence)this.querySequence, (NucleotideSequence)this.subjectSequence);
                    } else {
                        this.hspBuilder.gappedAlignments((ProteinSequence)this.querySequence, (ProteinSequence)this.subjectSequence);
                    }
                    if (this.queryLength != null) {
                        this.hspBuilder.queryLength(this.queryLength);
                    }
                    this.hitBuilder.addHsp((Hsp<?, ?, ?>)this.hspBuilder.build());
                    this.queryStart = null;
                    this.queryEnd = null;
                    this.subjectStart = null;
                    this.subjectEnd = null;
                    this.subjectLength = null;
                    this.numberOfGapOpenings = 0;
                    this.misMatches = 0;
                    this.querySequence = null;
                    this.subjectSequence = null;
                } else if (BIT_SCORE.equals(qName)) {
                    this.hspBuilder.bitScore(new BigDecimal(this.tempVal));
                } else if (E_VALUE.equals(qName)) {
                    this.hspBuilder.eValue(new BigDecimal(this.tempVal));
                } else if (ALIGN_LENGTH.equals(qName)) {
                    this.hspBuilder.alignmentLength(Integer.parseInt(this.tempVal));
                } else if (QUERY_FROM.equals(qName)) {
                    this.queryStart = Integer.parseInt(this.tempVal);
                } else if (QUERY_TO.equals(qName)) {
                    this.queryEnd = Integer.parseInt(this.tempVal);
                } else if (HIT_FROM.equals(qName)) {
                    this.subjectStart = Integer.parseInt(this.tempVal);
                } else if (IDENTICAL_MATCHES.equals(qName)) {
                    this.hspBuilder.numIdenticalMatches(Integer.parseInt(this.tempVal));
                } else if (POSITIVE_MATCHES.equals(qName)) {
                    this.numMatches = Integer.parseInt(this.tempVal);
                    this.hspBuilder.numPositiveMatches(this.numMatches);
                } else if (HIT_FRAME.equals(qName)) {
                    this.hspBuilder.hitFrame(Integer.valueOf(this.tempVal));
                } else if (HIT_TO.equals(qName)) {
                    this.subjectEnd = Integer.parseInt(this.tempVal);
                } else if (HSP_SCORE.equals(qName)) {
                    this.hspBuilder.hspScore(Float.parseFloat(this.tempVal));
                } else if (QUERY_SEQUENCE.equals(qName)) {
                    this.querySequence = this.isNucleotide ? new NucleotideSequenceBuilder(this.tempVal).build() : new ProteinSequenceBuilder(this.tempVal).build();
                } else if (SUBJECT_SEQUENCE.endsWith(qName)) {
                    this.subjectSequence = this.isNucleotide ? new NucleotideSequenceBuilder(this.tempVal).build() : new ProteinSequenceBuilder(this.tempVal).build();
                } else if (NUM_GAPS.equals(qName)) {
                    this.numberOfGapOpenings = Integer.parseInt(this.tempVal);
                } else if (MIDLINE.equals(qName)) {
                    int totalMisMatches = this.parseNumberOfMismatches(this.tempVal);
                    this.misMatches = totalMisMatches - this.numberOfGapOpenings;
                } else if (HIT.equals(qName)) {
                    this.inHspBlock = false;
                    this.visitor.visitHit(this.hitBuilder.build());
                }
            } else if (LEGACY_QUERY_ID.equals(qName)) {
                this.queryId = this.tempVal;
            } else if (ITERATION_QUERY_ID.equals(qName)) {
                this.queryId = this.tempVal;
            } else if (ITERATION_QUERY_LENGTH.equals(qName)) {
                this.queryLength = Integer.parseInt(this.tempVal);
            } else if (SUBJECT_LENGTH.equals(qName)) {
                this.subjectLength = Integer.parseInt(this.tempVal);
            } else if (SUBJECT_DEF.equals(qName)) {
                this.hspBuilder = this.isNucleotide ? HspBuilder.forBlastN().query(this.queryId) : HspBuilder.forBlastP().query(this.queryId);
                Matcher matcher = DEFLINE_PATTERN.matcher(this.tempVal);
                if (matcher.find()) {
                    if ("No definition line found".equals(this.tempVal)) {
                        this.hspBuilder.subject(this.tempVal);
                        this.subjectId = this.tempVal;
                    } else {
                        this.hspBuilder.subject(matcher.group(1));
                        this.subjectId = matcher.group(1);
                    }
                    this.subjectDeflineComment = this.tempVal.trim();
                    this.hspBuilder.subjectDef(this.subjectDeflineComment);
                } else {
                    this.hspBuilder.subject(this.tempVal);
                    this.subjectId = this.tempVal.trim();
                    this.hspBuilder.subject(this.subjectId);
                }
            } else if (HIT_ACCESSION.equals(qName) && "No definition line found".equals(this.subjectId)) {
                this.subjectId = this.tempVal.trim();
                this.hspBuilder.subject(this.subjectId);
            } else if (PROGRAM_NAME.equals(qName)) {
                this.isNucleotide = "blastn".equalsIgnoreCase(this.tempVal);
                this.programName = this.tempVal;
            } else if (PROGRAM_VERSION.equals(qName)) {
                this.programVersion = this.tempVal;
            } else if (QUERY_LENGTH.equals(qName)) {
                this.queryLength = Integer.parseInt(this.tempVal);
            } else if (BLAST_DB.equals(qName)) {
                this.blastDb = this.tempVal;
            }
        }

        private int parseNumberOfMismatches(String midline) {
            int misMatches = 0;
            for (int i = 0; i < midline.length(); ++i) {
                if (midline.charAt(i) == '|') continue;
                ++misMatches;
            }
            return misMatches;
        }
    }
}

