/*
 * Decompiled with CFR 0.152.
 */
package pepper;

import fig.basic.IOUtils;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import nuts.io.IO;
import nuts.lispparser.LispParser;
import nuts.lispparser.ParseException;
import nuts.util.Arbre;
import nuts.util.CollUtils;
import nuts.util.Counter;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import pepper.Corpus;
import pepper.editmodel.BayesRiskMinimizer;

public class CorpusExploration {
    public static void main(String[] args) throws IOException, ParseException {
        Corpus c = Corpus.parse("/Users/bouchard/w/evolvere/data/austro/20100807/processed/All.cog");
        System.out.println("Number of languages:" + c.getNLangs());
        LispParser p = new LispParser(IOUtils.openInHard("/Users/bouchard/w/evolvere/data/austro/20100807/processed/All.topo"));
        Arbre<String> a = Arbre.tree2Arbre(p.parse());
        Map<Arbre<String>, Set<String>> descMap = Arbre.descMap(a);
        List<String> langs = c.getFieldNames();
        Map<String, Integer> langIdx = CollUtils.invert(langs);
        for (String targetLang : c.getFieldNames()) {
            if (!targetLang.contains("Proto")) continue;
            Set<String> desct = descMap.get(Arbre.findFirstNodeWithContents(a, targetLang));
            if (desct == null) {
                desct = CollUtils.set();
                System.err.println("Proto not found in tree");
            }
            Counter<String> editD = new Counter<String>();
            Counter<String> count = new Counter<String>();
            for (String otherLang : c.getFieldNames()) {
                if (otherLang.equals(targetLang)) continue;
                SummaryStatistics stat = new SummaryStatistics();
                for (int row = 0; row < c.getNWords(); ++row) {
                    if (!c.isKnown(row, targetLang) || !c.isKnown(row, otherLang)) continue;
                    stat.addValue(BayesRiskMinimizer.computeDist(c.getWord(row, targetLang), c.getWord(row, otherLang)));
                }
                if (stat.getN() <= 20L) continue;
                editD.setCount(otherLang, -stat.getMean());
                count.setCount(otherLang, stat.getN());
            }
            System.out.println("\n\nCurrent proto-language: " + targetLang + "\n");
            int i = 0;
            HashSet closestLangs = CollUtils.set();
            System.out.println("\tClosest languages:");
            for (String otherLang : editD) {
                closestLangs.add(otherLang);
                System.out.println("\t\t" + otherLang + "\t" + -editD.getCount(otherLang) + "\t(" + count.getCount(otherLang) + ")\t" + (desct.contains(otherLang) ? "[desc]" : ""));
                if (i++ <= Integer.MAX_VALUE) continue;
                break;
            }
            System.out.println("\n\tMost overlapping languages:");
            for (String otherLang : count) {
                closestLangs.add(otherLang);
                System.out.println("\t\t" + otherLang + "\t" + -editD.getCount(otherLang) + "\t(" + count.getCount(otherLang) + ")\t" + (desct.contains(otherLang) ? "[desc]" : ""));
                if (i++ <= Integer.MAX_VALUE) continue;
                break;
            }
            Counter<Character> noreflexes = new Counter<Character>();
            double norm = 0.0;
            for (int row = 0; row < c.getNWords(); ++row) {
                if (!c.isKnown(row, targetLang)) continue;
                norm += 1.0;
                HashSet charsInProto = CollUtils.set();
                for (char aChar : c.getWord(row, targetLang).toCharArray()) {
                    charsInProto.add(Character.valueOf(aChar));
                }
                Object object = closestLangs.iterator();
                while (object.hasNext()) {
                    String closeLang = (String)object.next();
                    if (!c.isKnown(row, closeLang)) continue;
                    for (char aChar : c.getWord(row, closeLang).toCharArray()) {
                        charsInProto.remove(Character.valueOf(aChar));
                    }
                }
                object = charsInProto.iterator();
                while (object.hasNext()) {
                    char aChar = ((Character)object.next()).charValue();
                    noreflexes.incrementCount(Character.valueOf(aChar), 1.0);
                }
            }
            System.out.println("\n\tChars with no reflexes:");
            Iterator iterator = noreflexes.iterator();
            while (iterator.hasNext()) {
                char aChar = ((Character)iterator.next()).charValue();
                IO.so("\t\t" + aChar + "\t" + noreflexes.getCount(Character.valueOf(aChar)) / norm);
            }
        }
    }
}

