/*
 * Decompiled with CFR 0.152.
 */
package goblin;

import fig.basic.UnorderedPair;
import goblin.DerivationTree;
import goblin.EditsTracker;
import goblin.ObservationsTracker;
import goblin.ParamsTracker;
import goblin.Taxon;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import ma.AffineGapAlignmentSampler;
import ma.MultiAlignment;
import nuts.io.IO;
import nuts.lispparser.LispParser;
import nuts.lispparser.ParseException;
import nuts.math.MeasureZeroException;
import nuts.util.All2OneMap;
import nuts.util.Arbre;
import nuts.util.CollUtils;
import nuts.util.Tree;
import pepper.Corpus;
import pepper.Edit;
import pepper.editmodel.EditParam;
import pepper.editmodel.ObservedWordSampler;
import pepper.editmodel.SinkWordSampler;

public class DataPrepUtils {
    public static int nMinEntriesForHeldout = 2;
    public static final int DEFAULT_MIN_TOP_LENGTH = 2;

    public static Arbre<Taxon> arbreDeriv2Lang(Arbre<DerivationTree.DerivationNode> a) {
        return a.postOrderMap(new Arbre.ArbreMap<DerivationTree.DerivationNode, Taxon>(){

            @Override
            public Taxon map(Arbre<DerivationTree.DerivationNode> currentDomainNode) {
                return currentDomainNode.getContents().getLanguage();
            }
        });
    }

    public static <S> String newick(Tree<S> topology) {
        return DataPrepUtils.newick(topology, null, false);
    }

    public static <S> String newick(Tree<S> topology, Map<Taxon, Double> lengths, boolean ignoreInternalNames) {
        String result = DataPrepUtils.newickRec(topology, lengths, true, ignoreInternalNames, null) + ";";
        return result;
    }

    public static <S> String newick2(Tree<S> topology, Map<UnorderedPair<Taxon, Taxon>, Double> lengths, boolean ignoreInternalNames) {
        String result = DataPrepUtils.newickRec(topology, lengths, true, ignoreInternalNames, null) + ";";
        return result;
    }

    private static <S> String newickRec(Tree<S> topology, Map _lengths, boolean isRoot, boolean ignoreInternalNames, Taxon parent) {
        Map mapOfLanguages = null;
        Map mapOfPairs = null;
        if (_lengths != null) {
            if (_lengths.keySet().iterator().next() instanceof Taxon) {
                mapOfLanguages = _lengths;
            } else if (_lengths.keySet().iterator().next() instanceof UnorderedPair) {
                mapOfPairs = _lengths;
            } else {
                throw new RuntimeException();
            }
        }
        String result = "";
        Taxon current = new Taxon(topology.getLabel() == null ? "" : topology.getLabel().toString());
        if (topology.getChildren().size() > 0) {
            result = result + "(";
            for (int i = 0; i < topology.getChildren().size(); ++i) {
                Tree<S> child = topology.getChildren().get(i);
                result = result + DataPrepUtils.newickRec(child, _lengths, false, ignoreInternalNames, current);
                if (i == topology.getChildren().size() - 1) continue;
                result = result + ",";
            }
            result = result + ")";
        }
        if (topology.getLabel() != null) {
            String label = topology.getLabel().toString();
            if (label.contains("(") || label.contains(")") || label.contains(",") || label.contains(":") || label.contains(";")) {
                throw new RuntimeException();
            }
            if (!ignoreInternalNames || topology.getChildren().size() == 0) {
                result = result + topology.getLabel();
            }
        }
        if (!isRoot && _lengths != null) {
            double bl = Double.NaN;
            bl = mapOfLanguages != null ? ((Double)mapOfLanguages.get(current)).doubleValue() : ((Double)mapOfPairs.get(new UnorderedPair<Taxon, Taxon>(current, parent))).doubleValue();
            result = result + ":" + bl;
        }
        return result;
    }

    public static Arbre<DerivationTree.DerivationNode> tree2arbre(Tree<String> langs, Map<String, String> words) {
        String currentLang = langs.getLabel();
        String currentWord = words.get(currentLang);
        DerivationTree.DerivationNode currentNode = new DerivationTree.DerivationNode(new Taxon(currentLang), currentWord);
        Arbre<DerivationTree.DerivationNode> result = Arbre.arbre(currentNode);
        for (Tree<String> child : langs.getChildren()) {
            result.addLeaves(DataPrepUtils.tree2arbre(child, words));
        }
        return result;
    }

    public static Arbre<DerivationTree.DerivationNode> tree2arbre2(Tree<String> langs, Map<Taxon, String> words) {
        HashMap<String, String> converted = new HashMap<String, String>();
        for (Taxon lang : words.keySet()) {
            converted.put(lang.toString(), words.get(lang));
        }
        return DataPrepUtils.tree2arbre(langs, converted);
    }

    public static Tree<String> lisp2tree(String string) {
        try {
            return new LispParser(string).parse();
        }
        catch (ParseException e) {
            throw new RuntimeException(e);
        }
    }

    public static void stem(Arbre<DerivationTree.DerivationNode> a, Taxon lang) {
        Arbre<DerivationTree.DerivationNode> toStem = DerivationTree.findNodeByLangName(a, lang);
        Arbre<DerivationTree.DerivationNode> parent = toStem.getParent();
        DerivationTree.DerivationNode toStemC = toStem.getContents();
        if (toStemC.getWord() != null | toStemC.getDerivation() != null) {
            throw new RuntimeException();
        }
        parent.setContents(new DerivationTree.DerivationNode(lang, null));
        Iterator<Arbre<DerivationTree.DerivationNode>> iter = parent.forceGetChildren().iterator();
        boolean changed = false;
        while (iter.hasNext()) {
            Arbre<DerivationTree.DerivationNode> current = iter.next();
            if (!current.getContents().getLanguage().equals(lang)) continue;
            if (changed) {
                throw new RuntimeException();
            }
            changed = true;
            iter.remove();
        }
        if (!changed) {
            throw new RuntimeException();
        }
    }

    public static Arbre<DerivationTree.DerivationNode> initDerivationUsingMultAlign(Arbre<DerivationTree.DerivationNode> tree, MultiAlignment ma) {
        Arbre<DerivationTree.DerivationNode> result = tree.preOrderMap(new InitLongGapMap(ma));
        return result;
    }

    public static Arbre<DerivationTree.DerivationNode> initDerivationUsingSampler(Arbre<DerivationTree.DerivationNode> tree, AffineGapAlignmentSampler.GapAlignmentParams params) {
        return tree.preOrderMap(new InitLongGapMap(new All2OneMap(params)));
    }

    public static Arbre<DerivationTree.DerivationNode> initDerivationUsingSampler(Arbre<DerivationTree.DerivationNode> tree, Map<Taxon, AffineGapAlignmentSampler.GapAlignmentParams> params) {
        return tree.preOrderMap(new InitLongGapMap(params));
    }

    public static DerivationTree.DerivationNode monotonicDerivation(DerivationTree.DerivationNode contents) {
        int[] ancestors = new int[contents.getWord().length()];
        for (int i = 0; i < ancestors.length; ++i) {
            ancestors[i] = i;
        }
        return new DerivationTree.DerivationNode(contents.getLanguage(), contents.getWord(), new DerivationTree.Derivation(ancestors, contents.getWord(), contents.getWord()));
    }

    public static Arbre<DerivationTree.DerivationNode> sampleDerivationsUsingObservedSampler(Arbre<DerivationTree.DerivationNode> tree, ParamsTracker tracker, Random rand) throws MeasureZeroException {
        return DataPrepUtils.sampleDerivationsUsingObservedSampler(tree, tracker, rand, new EditsTracker());
    }

    public static Arbre<DerivationTree.DerivationNode> sampleDerivationsUsingObservedSampler(Arbre<DerivationTree.DerivationNode> tree, ParamsTracker tracker, Random rand, EditsTracker editsTracker) throws MeasureZeroException {
        try {
            return tree.preOrderMap(new DerivationSamplerMap(rand, tracker, editsTracker));
        }
        catch (RuntimeException re) {
            if (re.getCause() instanceof MeasureZeroException) {
                throw (MeasureZeroException)re.getCause();
            }
            throw re;
        }
    }

    public static Set<Character> unknownCharacters(Corpus corpus, int row, Set<Character> chars) {
        HashSet<Character> unknown = CollUtils.set();
        for (int column = 0; column < corpus.getNLangs(); ++column) {
            if (!corpus.isKnown(row, column)) continue;
            unknown.addAll(DataPrepUtils.unknownCharacters(corpus.getWord(row, column), chars));
        }
        return unknown;
    }

    public static Set<Character> unknownCharacters(String word, Set<Character> knownChars) {
        HashSet<Character> unknown = CollUtils.set();
        for (char c : word.toCharArray()) {
            if (knownChars.contains(Character.valueOf(c))) continue;
            unknown.add(Character.valueOf(c));
        }
        return unknown;
    }

    public static int nValidRows(Corpus corpus, Set<Character> chars) {
        int result = 0;
        for (int row = 0; row < corpus.getNWords(); ++row) {
            if (DataPrepUtils.unknownCharacters(corpus, row, chars).size() != 0) continue;
            ++result;
        }
        return result;
    }

    public static Set<Character> unknownCharacters(Corpus corpus, Set<Character> chars) {
        HashSet<Character> result = CollUtils.set();
        for (int row = 0; row < corpus.getNWords(); ++row) {
            result.addAll(DataPrepUtils.unknownCharacters(corpus, row, chars));
        }
        return result;
    }

    public static int nValidHeldoutRows(Corpus corpus, String heldoutLang, Set<Character> chars) {
        int result = 0;
        for (int row = 0; row < corpus.getNWords(); ++row) {
            if (DataPrepUtils.unknownCharacters(corpus, row, chars).size() != 0 || !DataPrepUtils.isValidForHeldout(corpus, row, heldoutLang)) continue;
            ++result;
        }
        return result;
    }

    public static boolean isValidForHeldout(Corpus corpus, int row, String heldoutLang) {
        if (!corpus.isKnown(row, heldoutLang)) {
            return false;
        }
        int nKnown = 0;
        for (int column = 0; column < corpus.getNLangs(); ++column) {
            if (!corpus.isKnown(row, column)) continue;
            ++nKnown;
        }
        return nKnown >= nMinEntriesForHeldout;
    }

    public static void fillInWords(Arbre<DerivationTree.DerivationNode> root, Random rand) {
        DataPrepUtils.fillInWords(root, rand, false);
    }

    public static void fillInWords(Arbre<DerivationTree.DerivationNode> root, Random rand, boolean putLongerWords) {
        if (DataPrepUtils.observations(root).observedLanguages().size() == 0) {
            throw new RuntimeException("There should be observations");
        }
        List<Arbre<DerivationTree.DerivationNode>> nodes = root.nodes();
        for (int i = 0; i < nodes.size(); ++i) {
            Collections.shuffle(nodes, rand);
            for (Arbre<DerivationTree.DerivationNode> node : nodes) {
                if (node.getContents().getWord() != null) continue;
                DataPrepUtils.fillInWord(node, rand, putLongerWords);
            }
        }
    }

    private static boolean fillInWord(Arbre<DerivationTree.DerivationNode> node, Random rand, boolean putLongerWords) {
        Object rootNode;
        ArrayList<String> nbhrs = CollUtils.list();
        if (!node.isRoot() && ((DerivationTree.DerivationNode)(rootNode = node.getParent().getContents())).getWord() != null) {
            nbhrs.add(((DerivationTree.DerivationNode)rootNode).getWord());
        }
        for (Arbre arbre : node.getChildren()) {
            DerivationTree.DerivationNode childNode = (DerivationTree.DerivationNode)arbre.getContents();
            if (childNode.getWord() == null) continue;
            nbhrs.add(childNode.getWord());
        }
        if (nbhrs.size() == 0) {
            return false;
        }
        int index = putLongerWords ? DataPrepUtils.findLongest(nbhrs) : rand.nextInt(nbhrs.size());
        String string = (String)nbhrs.get(index);
        DerivationTree.DerivationNode original = node.getContents();
        node.setContents(new DerivationTree.DerivationNode(original.getLanguage(), string, original.getDerivation()));
        return true;
    }

    private static int findLongest(List<String> nbhrs) {
        int result = -1;
        int max = -1;
        for (int i = 0; i < nbhrs.size(); ++i) {
            if (nbhrs.get(i).length() <= max) continue;
            max = nbhrs.get(i).length();
            result = i;
        }
        return result;
    }

    public static List<String> generate(Random rand, Tree<String> topology, ParamsTracker params) {
        return DataPrepUtils.generate(rand, topology, params, null, 2);
    }

    private static List<String> generate(Random rand, Tree<String> topology, ParamsTracker params, String parentWord, int minTopLength) {
        String word = null;
        Taxon lang = new Taxon(topology.getLabel());
        if (parentWord == null) {
            while ((word = params.getRootPhonemeModel().generateFromBiGramPhonemeModel(rand)).length() < minTopLength) {
            }
        } else {
            word = SinkWordSampler.sample(rand, params.getEditParam(lang), parentWord);
        }
        ArrayList<String> result = new ArrayList<String>();
        result.add(word);
        for (Tree<String> child : topology.getChildren()) {
            result.addAll(DataPrepUtils.generate(rand, child, params, word, minTopLength));
        }
        return result;
    }

    public static ObservationsTracker observations(Arbre<DerivationTree.DerivationNode> root) {
        HashSet<Taxon> observed = CollUtils.set();
        for (Arbre<DerivationTree.DerivationNode> node : root.nodes()) {
            if (node.getContents().getWord() == null) continue;
            observed.add(node.getContents().getLanguage());
        }
        return new ObservationsTracker(observed);
    }

    public static Arbre<DerivationTree.DerivationNode> trim(Arbre<DerivationTree.DerivationNode> as) {
        as = Arbre.lowestCommonAncestor(as, DataPrepUtils.known(as));
        Set<DerivationTree.DerivationNode> known = DataPrepUtils.known(as);
        Map<Arbre<DerivationTree.DerivationNode>, Set<DerivationTree.DerivationNode>> descMap = Arbre.descMap(as);
        for (Arbre<DerivationTree.DerivationNode> node : as.nodes()) {
            if (CollUtils.intersects(known, descMap.get(node))) continue;
            Arbre.removeNode(node);
        }
        return as;
    }

    private static Set<DerivationTree.DerivationNode> known(Arbre<DerivationTree.DerivationNode> root) {
        HashSet<DerivationTree.DerivationNode> known = new HashSet<DerivationTree.DerivationNode>();
        for (Arbre<DerivationTree.DerivationNode> a : root.nodes()) {
            if (a.getContents().getWord() == null) continue;
            known.add(a.getContents());
        }
        return known;
    }

    public static String optionallyLoad(String str) {
        if (str.charAt(0) != '!') {
            if (str.contains(".topo")) {
                System.err.println("WARNING: FOR PARAM TOPO, DID YOU MEAN: !" + str);
            }
            return str;
        }
        return IO.f2s(str.substring(1, str.length()));
    }

    public static void forgetUnobserved(Arbre<DerivationTree.DerivationNode> tree, ObservationsTracker obs) {
        DataPrepUtils.forgetDerivation(tree);
        for (Arbre<DerivationTree.DerivationNode> cur : tree.root().nodes()) {
            if (obs.isObserved(cur.getContents().getLanguage())) continue;
            cur.setContents(DataPrepUtils.forgetWord(cur.getContents()));
        }
    }

    public static void forgetDerivation(Arbre<DerivationTree.DerivationNode> tree) {
        for (Arbre<DerivationTree.DerivationNode> cur : tree.root().nodes()) {
            cur.setContents(DataPrepUtils.forgetDerivation(cur.getContents()));
        }
    }

    private static DerivationTree.DerivationNode forgetDerivation(DerivationTree.DerivationNode contents) {
        return new DerivationTree.DerivationNode(contents.getLanguage(), contents.getWord());
    }

    public static DerivationTree.DerivationNode forgetWord(DerivationTree.DerivationNode contents) {
        return new DerivationTree.DerivationNode(contents.getLanguage(), null, null);
    }

    private static class DerivationSamplerMap
    extends Arbre.ArbreMap<DerivationTree.DerivationNode, DerivationTree.DerivationNode> {
        private final Random rand;
        private final ParamsTracker tracker;
        private final EditsTracker editsTracker;

        public DerivationSamplerMap(Random rand, ParamsTracker tracker, EditsTracker editsTracker) {
            this.editsTracker = editsTracker;
            this.rand = rand;
            this.tracker = tracker;
        }

        @Override
        public DerivationTree.DerivationNode map(Arbre<DerivationTree.DerivationNode> dom) {
            DerivationTree.Derivation d = null;
            Taxon currentLang = dom.getContents().getLanguage();
            String currentWord = dom.getContents().getWord();
            if (!dom.isRoot()) {
                String parentWord = dom.getParent().getContents().getWord();
                EditParam currentParam = this.tracker.getEditParam(currentLang);
                ObservedWordSampler sampler = new ObservedWordSampler(this.rand, currentParam, parentWord, currentWord);
                List<Edit> sample = sampler.sample(false);
                if (sampler.isFailure()) {
                    throw new RuntimeException(new MeasureZeroException("Impossible to samplea derivation from " + parentWord + " to " + currentWord));
                }
                this.editsTracker.update(dom.getContents().getLanguage(), sample);
                d = DerivationTree.Derivation.editList2Derivation(sample);
            }
            return new DerivationTree.DerivationNode(currentLang, currentWord, d);
        }
    }

    private static class InitLongGapMap
    extends Arbre.ArbreMap<DerivationTree.DerivationNode, DerivationTree.DerivationNode> {
        private final MultiAlignment ma;
        private final Map<Taxon, AffineGapAlignmentSampler.GapAlignmentParams> params;

        private InitLongGapMap(MultiAlignment ma) {
            this.ma = ma;
            this.params = null;
        }

        private InitLongGapMap(Map<Taxon, AffineGapAlignmentSampler.GapAlignmentParams> params) {
            this.params = params;
            this.ma = null;
        }

        private boolean useSampling() {
            return this.params != null;
        }

        @Override
        public DerivationTree.DerivationNode map(Arbre<DerivationTree.DerivationNode> currentDomainNode) {
            if (!currentDomainNode.isRoot()) {
                String currentWord;
                String topWord = currentDomainNode.getParent().getContents().getWord();
                if (topWord.equals(currentWord = currentDomainNode.getContents().getWord())) {
                    return DataPrepUtils.monotonicDerivation(currentDomainNode.getContents());
                }
                return this.derivationNode(topWord, currentDomainNode.getContents());
            }
            return new DerivationTree.DerivationNode(currentDomainNode.getContents().getLanguage(), currentDomainNode.getContents().getWord());
        }

        private DerivationTree.DerivationNode derivationNode(String topWord, DerivationTree.DerivationNode currentNode) {
            DerivationTree.Derivation d;
            String currentWord = currentNode.getWord();
            if (this.useSampling()) {
                AffineGapAlignmentSampler sampler = AffineGapAlignmentSampler.createAffineGapAlignmentSampler(topWord, currentWord, this.params.get(currentNode.getLanguage()));
                try {
                    d = sampler.mode();
                }
                catch (MeasureZeroException mze) {
                    throw new RuntimeException(mze);
                }
            } else {
                Taxon topLang = this.findLang(topWord);
                Taxon currentLang = this.findLang(currentWord);
                d = this.ma.getDerivation(topLang, currentLang);
            }
            return new DerivationTree.DerivationNode(currentNode.getLanguage(), currentNode.getWord(), d);
        }

        private Taxon findLang(String sequence) {
            for (Taxon lang : this.ma.getSequences().keySet()) {
                if (!this.ma.getSequences().get(lang).equals(sequence)) continue;
                return lang;
            }
            throw new RuntimeException("Lang not found for seqn:" + sequence);
        }
    }
}

