/*
 * Decompiled with CFR 0.152.
 */
package ma;

import ev.io.PreprocessGutellData;
import fig.basic.IOUtils;
import fig.basic.Option;
import fig.basic.Pair;
import goblin.Taxon;
import java.io.File;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import ma.GreedyDecoder;
import ma.MultiAlignment;
import nuts.io.IO;
import nuts.lang.StringUtils;
import nuts.math.TopoSort;
import nuts.tui.Table;
import nuts.util.CollUtils;
import nuts.util.Counter;
import nuts.util.MathUtils;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;

public class MSAPoset
implements Serializable {
    private static final long serialVersionUID = 1L;
    private final TopoSort.PartialOrder<Column> poset;
    private TreeSet<Double> linearizedLocations = new TreeSet();
    private Map<Column, Double> linearizedColumns = CollUtils.map();
    private final Map<Taxon, Column[]> columnMaps = CollUtils.map();
    private final Map<Taxon, String> sequences;
    private final List<Taxon> taxa;
    private boolean linearizationEnabled = true;
    public static boolean _useParseDebug = false;

    public MSAPoset(Map<Taxon, String> sequences) {
        this.taxa = CollUtils.list(sequences.keySet());
        Collections.sort(this.taxa);
        this.sequences = sequences;
        this.poset = new ImplicitPoset();
        Counter<Column> fraction = new Counter<Column>();
        for (Taxon lang : this.taxa) {
            Column[] currentMap = new Column[sequences.get(lang).length()];
            this.columnMaps.put(lang, currentMap);
            for (int i = 0; i < sequences.get(lang).length(); ++i) {
                Column currentCol;
                currentMap[i] = currentCol = new Column(lang, i);
                fraction.setCount(currentCol, -((double)i) / (1.0 + (double)sequences.get(lang).length()));
            }
        }
        int curIdx = 0;
        for (Column c : fraction) {
            Double key = curIdx;
            this.linearizedColumns.put(c, key);
            this.linearizedLocations.add(key);
            ++curIdx;
        }
    }

    public void disableLinearization() {
        this.linearizationEnabled = false;
        this.linearizedLocations = null;
    }

    public void enableLinearization() {
        if (this.linearizationEnabled) {
            return;
        }
        List<Column> linearized = TopoSort.topologicalSort(this.poset);
        if (linearized == null) {
            throw new RuntimeException();
        }
        this.linearizedLocations = new TreeSet();
        this.linearizedColumns = CollUtils.map();
        double idx = 0.0;
        for (Column c : linearized) {
            this.linearizedLocations.add(idx);
            this.linearizedColumns.put(c, idx);
            idx += 1.0;
        }
        this.linearizationEnabled = true;
    }

    public final List<Taxon> taxa() {
        return Collections.unmodifiableList(this.taxa);
    }

    public MSAPoset(MSAPoset base) {
        this(base.sequences);
        for (Column c : base.linearizedColumns.keySet()) {
            if (this.tryAdding(c)) continue;
            throw new RuntimeException();
        }
    }

    public static MSAPoset maxRecallMSA(Map<Taxon, String> sequences, Counter<GreedyDecoder.Edge> edgeCounter) {
        MSAPoset result = new MSAPoset(sequences);
        for (GreedyDecoder.Edge e : edgeCounter) {
            result.tryAdding(e);
        }
        return result;
    }

    public static List<ROCPoint> ROC(Map<Taxon, String> sequences, Counter<GreedyDecoder.Edge> edgeCounter, MSAPoset ref, int nPoints) {
        MSAPoset currentGuess = new MSAPoset(sequences);
        ArrayList<ROCPoint> result = CollUtils.list();
        int totalNPoints = 0;
        for (GreedyDecoder.Edge e : edgeCounter) {
            if (!currentGuess.tryAdding(e)) continue;
            ++totalNPoints;
        }
        int interval = totalNPoints / nPoints;
        currentGuess = new MSAPoset(sequences);
        int current = 0;
        for (GreedyDecoder.Edge e : edgeCounter) {
            if (!currentGuess.tryAdding(e) || ++current != totalNPoints && current % interval != 0) continue;
            result.add(new ROCPoint(MSAPoset.edgePrecision(ref, currentGuess), MSAPoset.edgeRecall(ref, currentGuess), edgeCounter.getCount(e)));
        }
        return result;
    }

    public int nTaxa() {
        return this.sequences.size();
    }

    public int nEdges() {
        int result = 0;
        for (Column c : this.linearizedColumns.keySet()) {
            result += MathUtils.nChoose2(c.points.size());
        }
        return result;
    }

    public Collection<GreedyDecoder.Edge> edges() {
        ArrayList<GreedyDecoder.Edge> result = CollUtils.list();
        for (Column c : this.linearizedColumns.keySet()) {
            for (int l1i = 0; l1i < this.taxa.size(); ++l1i) {
                Taxon l1 = this.taxa.get(l1i);
                if (!c.points.containsKey(l1)) continue;
                for (int l2i = l1i + 1; l2i < this.taxa.size(); ++l2i) {
                    Taxon l2 = this.taxa.get(l2i);
                    if (l1.equals(l2) || !c.points.containsKey(l2)) continue;
                    result.add(new GreedyDecoder.Edge((Integer)c.points.get(l1), (Integer)c.points.get(l2), l1, l2));
                }
            }
        }
        return result;
    }

    public static MSAPoset parseAlnOrMsfFormats(File f) {
        return MSAPoset.fromMultiAlignmentObject(MultiAlignment.parse(f.getAbsolutePath()));
    }

    public static MSAPoset fromMultiAlignmentObject(MultiAlignment ma) {
        return MSAPoset._fromMultiAlignmentObject(ma, false);
    }

    public static MSAPoset coreBlocksFromMultiAlignmentObject(MultiAlignment ma) {
        return MSAPoset._fromMultiAlignmentObject(ma, true);
    }

    private static MSAPoset _fromMultiAlignmentObject(MultiAlignment ma, boolean keepOnlyRef) {
        MSAPoset result = new MSAPoset(ma.getSequences());
        for (MultiAlignment.SequenceCoordinate sc : ma.eqClasses().representatives()) {
            if (keepOnlyRef && !sc.isCoreBlock()) continue;
            for (MultiAlignment.SequenceCoordinate other : ma.eqClasses().eqClass(sc)) {
                if (other.equals(sc)) continue;
                if (keepOnlyRef && sc.isCoreBlock() != other.isCoreBlock()) {
                    throw new RuntimeException("The old format, based on annotation files, assumes that columns are either all or all not core block links.");
                }
                GreedyDecoder.Edge currentEdge = new GreedyDecoder.Edge(sc.indexInSequence(), other.indexInSequence(), sc.getNodeIdentifier(), other.getNodeIdentifier());
                if (result.tryAdding(currentEdge)) continue;
                throw new RuntimeException();
            }
        }
        return result;
    }

    public Map<Taxon, String> sequences() {
        return Collections.unmodifiableMap(this.sequences);
    }

    public char charAt(Column c, Taxon lang) {
        return this.sequences.get(lang).charAt((Integer)c.points.get(lang));
    }

    public char charAt(GreedyDecoder.Edge e, boolean first) {
        return this.sequences.get(first ? e.lang1() : e.lang2()).charAt(first ? e.index1() : e.index2());
    }

    public static void save(MSAPoset msa, File file) {
        ObjectOutputStream out = IOUtils.openBinOutHard(file);
        try {
            out.writeObject(msa);
            out.close();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static MSAPoset restore(File filePath) {
        try {
            ObjectInputStream ois = IOUtils.openBinIn(filePath);
            return (MSAPoset)ois.readObject();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static boolean isValidSplit(Column c, Set<Taxon> keepInCurrent) {
        if (keepInCurrent.size() == 0 || keepInCurrent.size() == c.points.size()) {
            return false;
        }
        return c.points.keySet().containsAll(keepInCurrent);
    }

    public void setString(Taxon t, String s) {
        String cur = this.sequences.get(t);
        if (cur.length() != s.length()) {
            throw new RuntimeException();
        }
        this.sequences.put(t, s);
    }

    public void fixMSAUsingRandomCharacters(Set<Character> allowed, Random rand) {
        ArrayList<Character> list = CollUtils.list(allowed);
        for (Taxon t : this.taxa()) {
            StringBuilder replacement = new StringBuilder();
            for (char c : this.sequences().get(t).toCharArray()) {
                if (allowed.contains(Character.valueOf(c))) {
                    replacement.append(c);
                    continue;
                }
                replacement.append(list.get(rand.nextInt(list.size())));
            }
            this.setString(t, replacement.toString());
        }
    }

    private Double findNextLocation(Column c) {
        Iterator locIterator;
        Double currentLoc = this.linearizedColumns.get(c);
        if (!currentLoc.equals((locIterator = this.linearizedLocations.tailSet(currentLoc).iterator()).next())) {
            throw new RuntimeException();
        }
        Double insertLoc = null;
        if (locIterator.hasNext()) {
            Double nextExistingLoc = (Double)locIterator.next();
            insertLoc = currentLoc + (nextExistingLoc - currentLoc) / 2.0;
            if (insertLoc.equals(nextExistingLoc) || insertLoc.equals(currentLoc)) {
                return null;
            }
        } else {
            insertLoc = currentLoc + 1.0;
        }
        return insertLoc;
    }

    private void recreateLocations() {
        TreeSet<Double> newLinearizedLocations = new TreeSet<Double>();
        HashMap newLinearizedColumns = CollUtils.map();
        double curIdx = 0.0;
        for (Column c : this.linearizedColumns()) {
            newLinearizedLocations.add(curIdx);
            newLinearizedColumns.put(c, curIdx);
            curIdx += 1.0;
        }
        this.linearizedColumns = newLinearizedColumns;
        this.linearizedLocations = newLinearizedLocations;
    }

    public void split(Column c, Set<Taxon> keepInCurrent) {
        Double insertLoc = null;
        if (this.linearizationEnabled && (insertLoc = this.findNextLocation(c)) == null) {
            this.recreateLocations();
            insertLoc = this.findNextLocation(c);
        }
        if (!MSAPoset.isValidSplit(c, keepInCurrent)) {
            throw new RuntimeException();
        }
        HashSet<Taxon> putInNew = CollUtils.set(c.points.keySet());
        putInNew.removeAll(keepInCurrent);
        Column resurrected = new Column();
        for (Taxon taxon : putInNew) {
            resurrected.points.put(taxon, c.points.get(taxon));
        }
        c.points.keySet().removeAll(putInNew);
        if (this.linearizationEnabled) {
            this.linearizedLocations.add(insertLoc);
        }
        this.linearizedColumns.put(resurrected, insertLoc);
        for (Taxon t : resurrected.points.keySet()) {
            int strIdx = (Integer)resurrected.points.get(t);
            this.columnMaps.get((Object)t)[strIdx] = resurrected;
        }
    }

    public static int _testArcs(MSAPoset msa) {
        int nProblems = 0;
        for (Column c : msa.poset.nodes()) {
            for (Column c2 : msa.poset.next(c)) {
                if (CollUtils.intersects(c.getPoints().keySet(), c2.getPoints().keySet())) continue;
                System.out.println("Problem:" + c.getPoints() + " and " + c2.getPoints());
                ++nProblems;
            }
        }
        return nProblems;
    }

    public boolean containsEdge(GreedyDecoder.Edge e) {
        return this.getColumn(e, true) == this.getColumn(e, false);
    }

    public static MSAPoset restrict(MSAPoset msa, Set<Taxon> taxa) {
        HashMap<Taxon, String> sequences = new HashMap<Taxon, String>();
        Set<Taxon> inter = CollUtils.inter(taxa, msa.sequences().keySet());
        for (Taxon t : inter) {
            sequences.put(t, msa.sequences().get(t));
        }
        MSAPoset result = new MSAPoset(sequences);
        for (Column c : msa.columns()) {
            Column newC = new Column();
            for (Taxon t : inter) {
                if (!c.points.containsKey(t)) continue;
                newC.points.put(t, c.points.get(t));
            }
            if (result.tryAdding(newC)) continue;
            throw new RuntimeException();
        }
        return result;
    }

    public static MSAPoset union(Collection<MSAPoset> collection) {
        HashMap<Taxon, String> sequences = new HashMap<Taxon, String>();
        for (MSAPoset msa : collection) {
            for (Taxon key : msa.sequences().keySet()) {
                String current = (String)sequences.get(key);
                if (current == null) {
                    sequences.put(key, msa.sequences().get(key));
                    continue;
                }
                if (current.equals(msa.sequences().get(key))) continue;
                throw new RuntimeException();
            }
        }
        MSAPoset result = new MSAPoset(sequences);
        for (MSAPoset msa : collection) {
            for (Column c : msa.columns()) {
                if (result.tryAdding(c)) continue;
                return null;
            }
        }
        return result;
    }

    public boolean tryAdding(Column c) {
        return this.tryAdding(c.points);
    }

    public boolean tryAdding(Map<Taxon, Integer> points) {
        for (GreedyDecoder.Edge e : MSAPoset.spanningEdges(points)) {
            if (this.tryAdding(e)) continue;
            return false;
        }
        return true;
    }

    public boolean tryAdding(GreedyDecoder.Edge alignmentLink) {
        return this._tryAdding(alignmentLink, true);
    }

    public boolean isValidAddition(GreedyDecoder.Edge alignmentLink) {
        return this._tryAdding(alignmentLink, false);
    }

    private boolean _tryAdding(GreedyDecoder.Edge alignmentLink, boolean commitChanges) {
        Column mergeTo = this.getColumn(alignmentLink, true);
        Column mergeFrom = this.getColumn(alignmentLink, false);
        if (mergeTo == null || mergeFrom == null) {
            throw new RuntimeException();
        }
        if (!mergeTo.disjoint(mergeFrom)) {
            return false;
        }
        boolean success = true;
        if (this.linearizationEnabled) {
            Pair<Column, Column> currentArc;
            Pair<Column, Column> copyArc;
            Iterator<Pair<Column, Column>> iterator = this.arcs(mergeFrom).iterator();
            while (iterator.hasNext() && (success = TopoSort.onlineTopologicalSort(this.poset, this.linearizedColumns, (copyArc = MSAPoset.copyArc(currentArc = iterator.next(), mergeFrom, mergeTo)).getFirst(), copyArc.getSecond()))) {
            }
        }
        if (success && commitChanges) {
            mergeTo.points.putAll(mergeFrom.points);
            for (Taxon t : mergeFrom.points.keySet()) {
                int strIdx = (Integer)mergeFrom.points.get(t);
                Column[] currentMap = this.columnMaps.get(t);
                if (currentMap[strIdx] != mergeFrom) {
                    throw new RuntimeException();
                }
                currentMap[strIdx] = mergeTo;
            }
            Double currentLoc = this.linearizedColumns.get(mergeFrom);
            this.linearizedColumns.remove(mergeFrom);
            if (this.linearizationEnabled) {
                this.linearizedLocations.remove(currentLoc);
            }
        }
        return success;
    }

    private static Pair<Column, Column> copyArc(Pair<Column, Column> currentArc, Column mergeFrom, Column mergeTo) {
        if (currentArc.getFirst() == mergeFrom && currentArc.getSecond() == mergeFrom) {
            throw new RuntimeException();
        }
        if (currentArc.getFirst() == mergeFrom) {
            return Pair.makePair(mergeTo, currentArc.getSecond());
        }
        if (currentArc.getSecond() == mergeFrom) {
            return Pair.makePair(currentArc.getFirst(), mergeTo);
        }
        throw new RuntimeException();
    }

    private List<Pair<Column, Column>> arcs(Column mergeFrom) {
        ArrayList<Pair<Column, Column>> result = CollUtils.list();
        for (Taxon t : mergeFrom.points.keySet()) {
            int curPos = (Integer)mergeFrom.points.get(t);
            if (curPos > 0) {
                Column prev = this.columnMaps.get(t)[curPos - 1];
                result.add(Pair.makePair(prev, mergeFrom));
            }
            if (curPos + 1 >= this.sequences().get(t).length()) continue;
            Column next = this.columnMaps.get(t)[curPos + 1];
            result.add(Pair.makePair(mergeFrom, next));
        }
        return result;
    }

    private Column getColumn(GreedyDecoder.Edge alignmentLink, boolean b) {
        if (b) {
            return this.columnMaps.get(alignmentLink.lang1())[alignmentLink.index1()];
        }
        return this.columnMaps.get(alignmentLink.lang2())[alignmentLink.index2()];
    }

    public MultiAlignment toMultiAlignmentObject() {
        MultiAlignment result = new MultiAlignment(this.sequences);
        for (Column c : this.linearizedColumns.keySet()) {
            for (GreedyDecoder.Edge e : c.spanningEdges()) {
                result.addAlign(e.lang1(), e.index1(), e.lang2(), e.index2());
            }
        }
        return result;
    }

    private Set<Taxon> languagesAtOtherEndOfArc(Column reference, Pair<Column, Column> arc) {
        if (arc.getFirst() == reference) {
            return arc.getSecond().points.keySet();
        }
        if (arc.getSecond() == reference) {
            return arc.getFirst().points.keySet();
        }
        throw new RuntimeException();
    }

    public Column column(Taxon lang, int index) {
        return this.columnMaps.get(lang)[index];
    }

    public Set<Map<Taxon, Integer>> points() {
        HashSet<Map<Taxon, Integer>> result = CollUtils.set();
        for (Column c : this.linearizedColumns.keySet()) {
            result.add(c.points);
        }
        return result;
    }

    public Set<Column> relevantColumns(Set<Taxon> langs) {
        HashSet<Column> result = CollUtils.set();
        for (Column c : this.linearizedColumns.keySet()) {
            if (!CollUtils.intersects(c.points.keySet(), langs)) continue;
            result.add(c);
        }
        return result;
    }

    public static List<GreedyDecoder.Edge> spanningEdges(Map<Taxon, Integer> points) {
        ArrayList<GreedyDecoder.Edge> result = CollUtils.list();
        if (points.size() < 2) {
            return result;
        }
        ArrayList<Taxon> langs = CollUtils.list(points.keySet());
        Taxon baseLang = (Taxon)langs.get(0);
        int basePos = points.get(baseLang);
        for (int i = 1; i < langs.size(); ++i) {
            Taxon otherLang = (Taxon)langs.get(i);
            result.add(new GreedyDecoder.Edge(basePos, points.get(otherLang), baseLang, otherLang));
        }
        return result;
    }

    public boolean isFull(Column c) {
        return c.points.keySet().equals(this.sequences.keySet());
    }

    public Collection<Column> columns() {
        return Collections.unmodifiableCollection(this.linearizedColumns.keySet());
    }

    public List<Column> linearizedColumns() {
        if (!this.linearizationEnabled) {
            throw new RuntimeException();
        }
        Column[] all = new Column[this.linearizedLocations.size()];
        HashMap conversion = CollUtils.map();
        int cur = 0;
        for (Double d : this.linearizedLocations) {
            conversion.put(d, cur++);
        }
        for (Column c : this.linearizedColumns.keySet()) {
            Double key = this.linearizedColumns.get(c);
            all[((Integer)conversion.get((Object)key)).intValue()] = c;
        }
        return Arrays.asList(all);
    }

    public double getIdentityStatistic() {
        return this.basicStat(true);
    }

    public double getAlignedStatistics() {
        return this.basicStat(false);
    }

    public double getMeanSequenceLength() {
        SummaryStatistics result = new SummaryStatistics();
        for (String seq : this.sequences().values()) {
            result.addValue((double)seq.length());
        }
        return result.getMean();
    }

    private double basicStat(boolean isIdentity) {
        SummaryStatistics stat = new SummaryStatistics();
        for (Taxon t1 : this.taxa()) {
            for (Taxon t2 : this.taxa()) {
                if (t1.equals(t2)) continue;
                stat.addValue(this.basicStat(t1, t2, isIdentity));
            }
        }
        return stat.getMean();
    }

    private double basicStat(Taxon t1, Taxon t2, boolean isIdentity) {
        SummaryStatistics stat = new SummaryStatistics();
        for (int i = 0; i < this.sequences().get(t1).length(); ++i) {
            Column c = this.column(t1, i);
            if (!c.points.containsKey(t2)) {
                stat.addValue(0.0);
                continue;
            }
            if (!isIdentity) {
                stat.addValue(1.0);
                continue;
            }
            stat.addValue(this.charAt(c, t1) == this.charAt(c, t2) ? 1.0 : 0.0);
        }
        return stat.getMean();
    }

    public String toString() {
        return this.toString(null);
    }

    public String toString(Set<Taxon> restriction) {
        int row;
        Pair<Map<Taxon, Integer>, StringBuilder[]> pair = this.createPaddedStrings(restriction);
        Map<Taxon, Integer> languagePrintOrder = pair.getFirst();
        StringBuilder[] builders = pair.getSecond();
        Table t = new Table();
        for (Taxon lang : languagePrintOrder.keySet()) {
            row = languagePrintOrder.get(lang);
            t.set(row, 0, lang.toString());
            t.set(row, 1, "|");
        }
        for (Taxon lang : languagePrintOrder.keySet()) {
            row = languagePrintOrder.get(lang);
            t.set(row, 2, builders[row].toString());
        }
        t.setBorder(false);
        return t.toString();
    }

    public Map<Taxon, String> gapPaddedSequences() {
        Pair<Map<Taxon, Integer>, StringBuilder[]> pair = this.createPaddedStrings(null);
        HashMap<Taxon, String> result = new HashMap<Taxon, String>();
        Map<Taxon, Integer> languagePrintOrder = pair.getFirst();
        StringBuilder[] builders = pair.getSecond();
        for (Taxon t : languagePrintOrder.keySet()) {
            int index = languagePrintOrder.get(t);
            result.put(t, builders[index].toString());
        }
        return result;
    }

    private Pair<Map<Taxon, Integer>, StringBuilder[]> createPaddedStrings(Set<Taxon> restriction) {
        ArrayList<Taxon> _printOrder = new ArrayList<Taxon>(restriction == null ? this.sequences.keySet() : CollUtils.inter(restriction, this.sequences.keySet()));
        Collections.sort(_printOrder);
        Map<Taxon, Integer> languagePrintOrder = CollUtils.invert(_printOrder);
        StringBuilder[] builders = new StringBuilder[_printOrder.size()];
        for (Taxon lang : languagePrintOrder.keySet()) {
            StringBuilder current;
            int row = languagePrintOrder.get(lang);
            builders[row] = current = new StringBuilder();
        }
        for (Column c : this.linearizedColumns()) {
            if (restriction != null && !CollUtils.intersects(restriction, c.points.keySet())) continue;
            for (Taxon lang : languagePrintOrder.keySet()) {
                String currentChar = c.points.keySet().contains(lang) ? "" + this.sequences.get(lang).charAt((Integer)c.points.get(lang)) : "-";
                builders[languagePrintOrder.get(lang)].append(currentChar);
            }
        }
        return Pair.makePair(languagePrintOrder, builders);
    }

    public static double columnRecall(MSAPoset gold, MSAPoset guess) {
        Set<Map<Taxon, Integer>> goldColumns = gold.points();
        Set<Map<Taxon, Integer>> guessColumns = guess.points();
        double num = 0.0;
        double denom = 0.0;
        for (Map<Taxon, Integer> goldColumn : goldColumns) {
            if (goldColumn.size() <= 1) continue;
            denom += 1.0;
            if (!guessColumns.contains(goldColumn)) continue;
            num += 1.0;
        }
        return num / denom;
    }

    public static double edgeRecall(MSAPoset gold, MSAPoset guess) {
        double num = 0.0;
        double denom = 0.0;
        for (GreedyDecoder.Edge e : gold.edges()) {
            denom += 1.0;
            if (!guess.containsEdge(e)) continue;
            num += 1.0;
        }
        return num / denom;
    }

    public static double edgePrecision(MSAPoset gold, MSAPoset guess) {
        return MSAPoset.edgeRecall(guess, gold);
    }

    public static double edgeF1(MSAPoset gold, MSAPoset guess) {
        return MathUtils.f1Score(MSAPoset.edgePrecision(gold, guess), MSAPoset.edgeRecall(gold, guess));
    }

    public static MSAPoset processBenchmarkReference(MSAPoset msa) {
        MSAPoset result = new MSAPoset(msa.sequences);
        result.disableLinearization();
        for (Column c : msa.linearizedColumns.keySet()) {
            HashMap<Taxon, Integer> processed = CollUtils.map();
            for (Taxon item : c.points.keySet()) {
                if (!Character.isUpperCase(msa.charAt(c, item))) continue;
                processed.put(item, (Integer)c.points.get(item));
            }
            result.tryAdding(processed);
        }
        for (Taxon lang : result.taxa()) {
            result.sequences.put(lang, result.sequences.get(lang).toUpperCase());
        }
        result.enableLinearization();
        return result;
    }

    public static MSAPoset _processBenchmarkReference(MSAPoset msa) {
        System.out.println("Warning! Using legacy implementation!");
        MSAPoset result = new MSAPoset(msa.sequences);
        for (GreedyDecoder.Edge e : msa.edges()) {
            if (!Character.isUpperCase(msa.charAt(e, true)) || !Character.isUpperCase(msa.charAt(e, false))) continue;
            result.tryAdding(e);
        }
        for (Taxon lang : result.taxa()) {
            result.sequences.put(lang, result.sequences.get(lang).toUpperCase());
        }
        return result;
    }

    public static void main(String[] args) {
        int maxL = 9;
        Random rand = new Random(1L);
        int nLangs = 10;
        HashMap<Taxon, String> seqs = CollUtils.map();
        for (int i = 0; i < nLangs; ++i) {
            Taxon l = new Taxon("l" + i);
            String word = "";
            for (int w = 0; w < maxL; ++w) {
                word = word + w;
            }
            seqs.put(l, word);
        }
        MSAPoset msa = new MSAPoset(seqs);
        System.out.println(msa);
        ArrayList list = new ArrayList(seqs.keySet());
        int id = 0;
        for (int i = 0; i < 1000000; ++i) {
            Taxon l1 = (Taxon)list.get(rand.nextInt(list.size()));
            Taxon l2 = (Taxon)list.get(rand.nextInt(list.size()));
            int i1 = rand.nextInt(((String)seqs.get(l1)).length());
            int i2 = rand.nextInt(((String)seqs.get(l2)).length());
            GreedyDecoder.Edge e = new GreedyDecoder.Edge(i1, i2, l1, l2);
            if (i % 10000 == 0) {
                System.out.println(i + " Trying to add edge: " + e);
            }
            MultiAlignment bu = msa.toMultiAlignmentObject();
            boolean test = msa.isValidAddition(e);
            if (!bu.equals(msa.toMultiAlignmentObject())) {
                throw new RuntimeException();
            }
            boolean success = msa.tryAdding(e);
            if (MSAPoset._testArcs(msa) > 0) {
                throw new RuntimeException("Sanity failed after insert. ID:" + id);
            }
            ++id;
            if (success != test) {
                throw new RuntimeException();
            }
            if (i % 10000 == 0) {
                System.out.println("Success: " + success);
                System.out.println("New align:\n" + msa);
                System.out.println();
            }
            if (i % 2 != 0) continue;
            ArrayList cols = CollUtils.list();
            for (Column c : msa.linearizedColumns.keySet()) {
                if (c.points.size() <= 1) continue;
                cols.add(c);
            }
            if (cols.size() <= 1) continue;
            Column c = (Column)cols.get(rand.nextInt(cols.size()));
            HashSet<Taxon> toKeep = CollUtils.set();
            LinkedList available = new LinkedList(c.points.keySet());
            toKeep.add((Taxon)available.poll());
            available.poll();
            for (Taxon lang : available) {
                if (!rand.nextBoolean()) continue;
                toKeep.add(lang);
            }
            if (i % 10000 == 0) {
                System.out.println("Splitting(" + c.points + "," + toKeep + ")");
            }
            msa.split(c, toKeep);
            if (MSAPoset._testArcs(msa) > 0) {
                throw new RuntimeException("Sanity failed after split");
            }
            if (i % 10000 != 0) continue;
            System.out.println("New align:\n" + msa);
            System.out.println();
        }
    }

    public void toFASTA(File f) {
        PrintWriter out = IOUtils.openOutHard(f);
        for (Taxon t : this.taxa) {
            String curSeq = this.sequences.get(t);
            out.append(">" + t + "\n");
            for (Column c : this.linearizedColumns()) {
                if (c.points.containsKey(t)) {
                    out.append("" + curSeq.charAt((Integer)c.points.get(t)));
                    continue;
                }
                out.append("-");
            }
            out.append("\n");
        }
        out.close();
    }

    public static MSAPoset parseFASTA(File f) {
        HashMap alignData = CollUtils.map();
        HashMap stringData = CollUtils.map();
        Counter<Taxon> highestIndex = new Counter<Taxon>();
        Taxon currentTaxon = null;
        for (String string : IO.i(f)) {
            if (string.matches("^\\s*$") || string.matches("^[;].*")) continue;
            if (string.matches("[>].*")) {
                currentTaxon = new Taxon(StringUtils.selectFirstRegex("[>](.*)", string));
                if (alignData.containsKey(currentTaxon)) {
                    throw new RuntimeException("Duplicated taxon name:" + currentTaxon);
                }
                alignData.put(currentTaxon, new ArrayList());
                stringData.put(currentTaxon, new StringBuilder());
                continue;
            }
            if (string.matches("[a-zA-Z.-]*")) {
                if (currentTaxon == null) {
                    throw new RuntimeException("Sequences should be preceded by the taxon name using a line of the form \">[name]\"");
                }
                for (char c : string.toCharArray()) {
                    if (c == '.' || c == '-' || c == '?') {
                        ((List)alignData.get(currentTaxon)).add(null);
                        continue;
                    }
                    List currentAlignData = (List)alignData.get(currentTaxon);
                    currentAlignData.add((int)highestIndex.getCount(currentTaxon));
                    highestIndex.incrementCount(currentTaxon, 1.0);
                    ((StringBuilder)stringData.get(currentTaxon)).append(c);
                }
                continue;
            }
            throw new RuntimeException("Invalid line:" + string);
        }
        HashMap<Taxon, String> strings = CollUtils.map();
        for (Taxon lang : stringData.keySet()) {
            strings.put(lang, ((StringBuilder)stringData.get(lang)).toString());
        }
        MSAPoset mSAPoset = new MSAPoset(strings);
        mSAPoset.disableLinearization();
        int len = -1;
        for (Taxon lang : alignData.keySet()) {
            if (len == -1) {
                len = ((List)alignData.get(lang)).size();
                continue;
            }
            if (len == ((List)alignData.get(lang)).size()) continue;
            throw new RuntimeException("Invalid alignment spec: all gap-padded seqns should have the len");
        }
        ArrayList langs = CollUtils.list(alignData.keySet());
        for (int p = 0; p < len; ++p) {
            Taxon l1;
            if (_useParseDebug) {
                IO.warnOnce("Warning: Using parse debug!");
                for (int l1i = 0; l1i < langs.size(); ++l1i) {
                    for (int l2i = l1i + 1; l2i < langs.size(); ++l2i) {
                        l1 = (Taxon)langs.get(l1i);
                        Taxon l2 = (Taxon)langs.get(l2i);
                        if (((List)alignData.get(l1)).get(p) == null || ((List)alignData.get(l2)).get(p) == null) continue;
                        mSAPoset.tryAdding(new GreedyDecoder.Edge((Integer)((List)alignData.get(l1)).get(p), (Integer)((List)alignData.get(l2)).get(p), l1, l2));
                    }
                }
                continue;
            }
            HashMap<Taxon, Integer> points = CollUtils.map();
            for (int l = 0; l < langs.size(); ++l) {
                l1 = (Taxon)langs.get(l);
                if (((List)alignData.get(l1)).get(p) == null) continue;
                points.put(l1, (Integer)((List)alignData.get(l1)).get(p));
            }
            mSAPoset.tryAdding(points);
        }
        mSAPoset.enableLinearization();
        return mSAPoset;
    }

    public TopoSort.PartialOrder<Column> getPoset() {
        return this.poset;
    }

    public static boolean deepEquals(MSAPoset msa1, MSAPoset msa2) {
        if (!msa1.sequences().equals(msa2.sequences())) {
            return false;
        }
        return CollUtils.set(msa1.edges()).equals(CollUtils.set(msa2.edges()));
    }

    public static class SaveMSAPoset
    implements Runnable {
        @Option
        public File path = null;

        public static void main(String[] args) {
            IO.runLight(args, new SaveMSAPoset());
        }

        @Override
        public void run() {
            int n = Integer.MAX_VALUE;
            MSAPoset msa = PreprocessGutellData.randomDataSet(new File("/Users/bouchard/Documents/data/gutell/16S.3.alnfasta"), 1, n, new Random(1L)).get(0);
            File outFile = new File(this.path.getAbsolutePath() + ".bin");
            System.gc();
            System.gc();
            System.gc();
            System.gc();
            System.gc();
            System.out.println("" + n + "\t" + (double)Runtime.getRuntime().totalMemory() / 1024.0 / 1024.0 + "");
            MSAPoset.save(msa, outFile);
            System.out.println("Done");
        }
    }

    public static class Column
    implements Serializable {
        private static final long serialVersionUID = 1L;
        private Map<Taxon, Integer> points = new HashMap<Taxon, Integer>(2, 0.75f);

        public Map<Taxon, Integer> getPoints() {
            return Collections.unmodifiableMap(this.points);
        }

        public Column() {
        }

        public Column(Taxon lang, int i) {
            this.points.put(lang, i);
        }

        public List<GreedyDecoder.Edge> spanningEdges() {
            return MSAPoset.spanningEdges(this.points);
        }

        private boolean disjoint(Column c2) {
            return Collections.disjoint(this.points.keySet(), c2.points.keySet());
        }

        public boolean equals(Object obj) {
            return super.equals(obj);
        }

        public int hashCode() {
            return super.hashCode();
        }

        public String toString() {
            return this.points.toString();
        }
    }

    public class ImplicitPoset
    implements TopoSort.PartialOrder<Column>,
    Serializable {
        private static final long serialVersionUID = 1L;

        @Override
        public Set<Column> next(Column n) {
            HashSet<Column> result = CollUtils.set();
            for (Taxon t : n.points.keySet()) {
                int curPos = (Integer)n.points.get(t);
                if (curPos + 1 >= MSAPoset.this.sequences().get(t).length()) continue;
                Column next = ((Column[])MSAPoset.this.columnMaps.get(t))[curPos + 1];
                result.add(next);
            }
            return result;
        }

        @Override
        public Set<Column> prev(Column n) {
            HashSet<Column> result = CollUtils.set();
            for (Taxon t : n.points.keySet()) {
                int curPos = (Integer)n.points.get(t);
                if (curPos <= 0) continue;
                Column prev = ((Column[])MSAPoset.this.columnMaps.get(t))[curPos - 1];
                result.add(prev);
            }
            return result;
        }

        @Override
        public Set<Column> nodes() {
            return MSAPoset.this.linearizedColumns.keySet();
        }
    }

    public static class ROCPoint {
        public final double precision;
        public final double recall;
        public final double posterior;

        public ROCPoint(double precision, double recall, double posterior) {
            this.precision = precision;
            this.recall = recall;
            this.posterior = posterior;
        }
    }
}

