/*
 * Decompiled with CFR 0.152.
 */
package confusionmtx;

import ev.hmm.HetPairHMM;
import ev.par.ExponentialFamily;
import ev.par.FeatureExtractor;
import fig.basic.IOUtils;
import fig.basic.LogInfo;
import fig.basic.Option;
import fig.basic.Pair;
import fig.exec.Execution;
import goblin.DerivationTree;
import goblin.Taxon;
import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import ma.SequenceType;
import nuts.io.IO;
import nuts.lang.StringUtils;
import nuts.maxent.MaxentClassifier;
import nuts.util.CollUtils;
import nuts.util.Counter;
import pepper.Encodings;

public class CreateConfusionMtx
implements Runnable {
    public MaxentClassifier.MaxentOptions<Object> learningOptions = new MaxentClassifier.MaxentOptions();
    public ExponentialFamily.ExponentialFamilyOptions expFamOptions = new ExponentialFamily.ExponentialFamilyOptions();
    public FeatureExtractor.FeatureOptions featureOptions = new FeatureExtractor.FeatureOptions();
    private int nEMIters = 20;
    @Option(required=true)
    public String evalFile = "";

    public static void main(String[] args) {
        IO.run(args, new CreateConfusionMtx());
    }

    @Override
    public void run() {
        Encodings.ipaConsonantsFile = "/Users/bouchard/Documents/experiments/old-austro/encodings/converted-cons.ipa";
        Encodings.ipaVowelsFile = "/Users/bouchard/Documents/experiments/old-austro/encodings/converted-vow.ipa";
        this.expFamOptions.encodingType = SequenceType.PHONEMES;
        this.featureOptions.hydrophobicModeling = false;
        this.featureOptions.useLongGaps = true;
        Taxon top = new Taxon("top");
        Taxon bot = new Taxon("bot");
        Random rand = new Random(1L);
        File in = new File(this.evalFile);
        ArrayList<Pair<String, String>> truthGuessPairs = new ArrayList<Pair<String, String>>();
        String truth = null;
        for (String line : IO.i(in)) {
            if (line.matches("TRUTH\\s+.*")) {
                truth = StringUtils.selectFirstRegex("TRUTH\\s+(.*)", line);
                continue;
            }
            if (!line.matches("GOBLIN-\\S+\\s+.*")) continue;
            String guess = StringUtils.selectFirstRegex("GOBLIN-\\S+\\s+(.*)", line);
            truthGuessPairs.add(Pair.makePair(truth, guess));
        }
        ExponentialFamily expFam = ExponentialFamily.createExpfam(this.learningOptions, this.expFamOptions, this.featureOptions, null);
        for (int curEMIter = 0; curEMIter < this.nEMIters; ++curEMIter) {
            File alignFile = new File(Execution.getFile("alignments-iteration-" + curEMIter + ".txt"));
            PrintWriter alignOut = IOUtils.openOutHard(alignFile);
            Counter<Pair> counter = new Counter<Pair>();
            Counter<Character> insertCounter = new Counter<Character>();
            Counter<Character> deleteCounter = new Counter<Character>();
            for (Pair pair : truthGuessPairs) {
                HetPairHMM hmm = expFam.getHMM((String)pair.getFirst(), (String)pair.getSecond(), top, bot);
                if (Double.isNaN(hmm.logSumProduct()) || Double.isInfinite(hmm.logSumProduct())) {
                    throw new RuntimeException();
                }
                expFam.addSufficientStatistics(hmm, top, bot);
                alignOut.println(hmm.viterbi());
                for (int rep = 0; rep < 100; ++rep) {
                    DerivationTree.Derivation d = hmm.sample(rand);
                    String topWord = (String)pair.getFirst();
                    String botWord = (String)pair.getSecond();
                    HashSet<Integer> unaligned = CollUtils.set(CollUtils.ints(topWord.length()));
                    for (int i = 0; i < botWord.length(); ++i) {
                        char botChar = botWord.charAt(i);
                        if (d.hasAncestor(i)) {
                            int aIdx = d.ancestor(i);
                            unaligned.remove(aIdx);
                            char topChar = topWord.charAt(aIdx);
                            if (botChar == topChar) continue;
                            counter.incrementCount(Pair.makePair(Character.valueOf(topChar), Character.valueOf(botChar)), 1.0);
                            continue;
                        }
                        insertCounter.incrementCount(Character.valueOf(botChar), 1.0);
                    }
                    Iterator iterator = unaligned.iterator();
                    while (iterator.hasNext()) {
                        int i = (Integer)iterator.next();
                        deleteCounter.incrementCount(Character.valueOf(topWord.charAt(i)), 1.0);
                    }
                }
            }
            alignOut.close();
            expFam.updateParameters();
            counter.normalize();
            insertCounter.normalize();
            deleteCounter.normalize();
            File f = new File(Execution.getFile("iteration-" + curEMIter + ".txt"));
            PrintWriter printWriter = IOUtils.openOutHard(f);
            for (Pair key : counter) {
                printWriter.println("" + key + "\t" + counter.getCount(key));
            }
            printWriter.close();
            File delF = new File(Execution.getFile("del-iteration-" + curEMIter + ".txt"));
            PrintWriter delOut = IOUtils.openOutHard(delF);
            for (Character c : deleteCounter) {
                delOut.println("" + c + "\t" + deleteCounter.getCount(c));
            }
            delOut.close();
            File insF = new File(Execution.getFile("ins-iteration-" + curEMIter + ".txt"));
            PrintWriter insOut = IOUtils.openOutHard(insF);
            for (Character c : insertCounter) {
                insOut.println("" + c + "\t" + insertCounter.getCount(c));
            }
            insOut.close();
            LogInfo.logs("Iteration " + curEMIter + " completed");
        }
    }
}

