/*
 * Decompiled with CFR 0.152.
 */
package picard.annotation;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import picard.annotation.AnnotationException;
import picard.annotation.Gene;
import picard.util.TabbedTextFileWithHeaderParser;

public class RefFlatReader {
    private static final Log LOG = Log.getInstance(RefFlatReader.class);
    private static final String[] RefFlatColumnLabels = new String[RefFlatColumns.values().length];
    private final File refFlatFile;
    private final SAMSequenceDictionary sequenceDictionary;

    RefFlatReader(File refFlatFile, SAMSequenceDictionary sequenceDictionary) {
        this.refFlatFile = refFlatFile;
        this.sequenceDictionary = sequenceDictionary;
    }

    static OverlapDetector<Gene> load(File refFlatFile, SAMSequenceDictionary sequenceDictionary) {
        return new RefFlatReader(refFlatFile, sequenceDictionary).load();
    }

    OverlapDetector<Gene> load() {
        OverlapDetector overlapDetector = new OverlapDetector(0, 0);
        int expectedColumns = RefFlatColumns.values().length;
        TabbedTextFileWithHeaderParser parser = new TabbedTextFileWithHeaderParser(this.refFlatFile, RefFlatColumnLabels);
        HashMap<String, ArrayList<TabbedTextFileWithHeaderParser.Row>> refFlatLinesByGene = new HashMap<String, ArrayList<TabbedTextFileWithHeaderParser.Row>>();
        for (TabbedTextFileWithHeaderParser.Row row : parser) {
            int lineNumber = parser.getCurrentLineNumber();
            if (row.getFields().length != expectedColumns) {
                throw new AnnotationException("Wrong number of fields in refFlat file " + this.refFlatFile + " at line " + lineNumber);
            }
            String geneName = row.getField(RefFlatColumns.GENE_NAME.name());
            String transcriptName = row.getField(RefFlatColumns.TRANSCRIPT_NAME.name());
            String transcriptDescription = geneName + ":" + transcriptName;
            String chromosome = row.getField(RefFlatColumns.CHROMOSOME.name());
            if (!this.isSequenceRecognized(chromosome)) {
                LOG.debug(new Object[]{"Skipping " + transcriptDescription + " due to unrecognized sequence " + chromosome});
                continue;
            }
            ArrayList<TabbedTextFileWithHeaderParser.Row> transcriptLines = (ArrayList<TabbedTextFileWithHeaderParser.Row>)refFlatLinesByGene.get(geneName);
            if (transcriptLines == null) {
                transcriptLines = new ArrayList<TabbedTextFileWithHeaderParser.Row>();
                refFlatLinesByGene.put(geneName, transcriptLines);
            }
            transcriptLines.add(row);
        }
        int longestInterval = 0;
        int numIntervalsOver1MB = 0;
        for (List transcriptLines : refFlatLinesByGene.values()) {
            try {
                Gene gene = this.makeGeneFromRefFlatLines(transcriptLines);
                overlapDetector.addLhs((Object)gene, (Interval)gene);
                if (gene.length() > longestInterval) {
                    longestInterval = gene.length();
                }
                if (gene.length() <= 1000000) continue;
                ++numIntervalsOver1MB;
            }
            catch (AnnotationException e) {
                LOG.debug(new Object[]{e.getMessage() + " -- skipping"});
            }
        }
        LOG.debug(new Object[]{"Longest gene: " + longestInterval + "; number of genes > 1MB: " + numIntervalsOver1MB});
        return overlapDetector;
    }

    private boolean isSequenceRecognized(String sequence) {
        return this.sequenceDictionary.getSequence(sequence) != null;
    }

    private Gene makeGeneFromRefFlatLines(List<TabbedTextFileWithHeaderParser.Row> transcriptLines) {
        String geneName = transcriptLines.get(0).getField(RefFlatColumns.GENE_NAME.name());
        String strandStr = transcriptLines.get(0).getField(RefFlatColumns.STRAND.name());
        boolean negative = strandStr.equals("-");
        String chromosome = transcriptLines.get(0).getField(RefFlatColumns.CHROMOSOME.name());
        int start = Integer.MAX_VALUE;
        int end = Integer.MIN_VALUE;
        for (TabbedTextFileWithHeaderParser.Row row : transcriptLines) {
            start = Math.min(start, row.getIntegerField(RefFlatColumns.TX_START.name()) + 1);
            end = Math.max(end, row.getIntegerField(RefFlatColumns.TX_END.name()));
        }
        Gene gene = new Gene(chromosome, start, end, negative, geneName);
        for (TabbedTextFileWithHeaderParser.Row row : transcriptLines) {
            if (!strandStr.equals(row.getField(RefFlatColumns.STRAND.name()))) {
                throw new AnnotationException("Strand disagreement in refFlat file for gene " + geneName);
            }
            if (!chromosome.equals(row.getField(RefFlatColumns.CHROMOSOME.name()))) {
                throw new AnnotationException("Chromosome disagreement(" + chromosome + " != " + row.getField(RefFlatColumns.CHROMOSOME.name()) + ") in refFlat file for gene " + geneName);
            }
            Gene.Transcript tx = this.makeTranscriptFromRefFlatLine(gene, row);
        }
        return gene;
    }

    private Gene.Transcript makeTranscriptFromRefFlatLine(Gene gene, TabbedTextFileWithHeaderParser.Row row) {
        String geneName = row.getField(RefFlatColumns.GENE_NAME.name());
        String transcriptName = row.getField(RefFlatColumns.TRANSCRIPT_NAME.name());
        String transcriptDescription = geneName + ":" + transcriptName;
        int exonCount = Integer.parseInt(row.getField(RefFlatColumns.EXON_COUNT.name()));
        String[] exonStarts = row.getField(RefFlatColumns.EXON_STARTS.name()).split(",");
        String[] exonEnds = row.getField(RefFlatColumns.EXON_ENDS.name()).split(",");
        if (exonCount != exonStarts.length) {
            throw new AnnotationException("Number of exon starts does not agree with number of exons for " + transcriptDescription);
        }
        if (exonCount != exonEnds.length) {
            throw new AnnotationException("Number of exon ends does not agree with number of exons for " + transcriptDescription);
        }
        int transcriptionStart = row.getIntegerField(RefFlatColumns.TX_START.name()) + 1;
        int transcriptionEnd = row.getIntegerField(RefFlatColumns.TX_END.name());
        int codingStart = row.getIntegerField(RefFlatColumns.CDS_START.name()) + 1;
        int codingEnd = row.getIntegerField(RefFlatColumns.CDS_END.name());
        Gene.Transcript tx = gene.addTranscript(transcriptName, transcriptionStart, transcriptionEnd, codingStart, codingEnd, exonCount);
        for (int i = 0; i < exonCount; ++i) {
            Gene.Transcript.Exon e = tx.addExon(Integer.parseInt(exonStarts[i]) + 1, Integer.parseInt(exonEnds[i]));
            if (e.start >= e.end) {
                throw new AnnotationException("Exon has 0 or negative extent for " + transcriptDescription);
            }
            if (i <= 0 || tx.exons[i - 1].end < tx.exons[i].start) continue;
            throw new AnnotationException("Exons overlap for " + transcriptDescription);
        }
        return tx;
    }

    static {
        for (int i = 0; i < RefFlatColumnLabels.length; ++i) {
            RefFlatReader.RefFlatColumnLabels[i] = RefFlatColumns.values()[i].name();
        }
    }

    public static enum RefFlatColumns {
        GENE_NAME,
        TRANSCRIPT_NAME,
        CHROMOSOME,
        STRAND,
        TX_START,
        TX_END,
        CDS_START,
        CDS_END,
        EXON_COUNT,
        EXON_STARTS,
        EXON_ENDS;

    }
}

