/*
 * Decompiled with CFR 0.152.
 */
package cognates;

import ev.hmm.HetPairHMM;
import ev.par.ExponentialFamily;
import ev.par.FeatureExtractor;
import fig.basic.IOUtils;
import fig.basic.Indexer;
import fig.basic.LogInfo;
import fig.basic.Option;
import fig.basic.Pair;
import goblin.Taxon;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import ma.MSAPoset;
import ma.SequenceType;
import nuts.io.IO;
import nuts.maxent.MaxentClassifier;
import nuts.util.CollUtils;
import pepper.Encodings;

public class LanguageDataHandler
implements Runnable {
    public static Encodings encoding = null;
    public static char NO_OBS_CHAR = (char)95;
    public static Indexer<Character> phonemes;
    @Option(required=false)
    public static int numLanguages;
    @Option(required=false)
    public static int numGloss;

    public static void alignByGloss(int numIter, String inputFile, String outputFile) throws IOException {
        ExponentialFamily.ExponentialFamilyOptions opt = new ExponentialFamily.ExponentialFamilyOptions();
        opt.encodingType = SequenceType.PHONEMES;
        FeatureExtractor.FeatureOptions fopt = new FeatureExtractor.FeatureOptions();
        fopt.hydrophobicModeling = false;
        ExponentialFamily fam = ExponentialFamily.createExpfam(new MaxentClassifier.MaxentOptions<Object>(), opt, fopt, null);
        BufferedReader reader = null;
        reader = IOUtils.openIn(inputFile);
        String[] languages = reader.readLine().trim().split("\\s+");
        int L = languages.length;
        String line = null;
        int numLines = 0;
        HashMap glossMap = CollUtils.map();
        int numSynonyms = 0;
        while ((line = reader.readLine()) != null) {
            LogInfo.logs("Processing line " + ++numLines);
            String[] w = line.split("\\s+");
            String gloss = w[0].split("\\(")[0].trim();
            String[] wordList = (String[])glossMap.get(gloss);
            if (wordList == null) {
                wordList = new String[L];
                glossMap.put(gloss, wordList);
            }
            for (int i = 1; i < w.length; ++i) {
                String word = w[i].trim();
                if (word.equals("?")) {
                    w[i] = null;
                    continue;
                }
                for (int s = 0; s < word.length(); ++s) {
                    if (encoding.char2PhoneId(word.charAt(s)) >= 0) continue;
                    w[i] = null;
                    break;
                }
                if (w[i] == null) continue;
                if (wordList[i] == null) {
                    wordList[i] = w[i].trim();
                    continue;
                }
                ++numSynonyms;
                LogInfo.logs("Synonym for '" + gloss + "' for language=" + languages[i]);
            }
        }
        reader.close();
        LogInfo.logs("num glosses=" + glossMap.size());
        LogInfo.logs("num synonyms=" + numSynonyms);
        if (numLanguages > 0) {
            LogInfo.logs("numLanguages=" + (numLanguages + 1));
            L = numLanguages + 1;
        }
        Indexer<Taxon> languageIndexer = new Indexer<Taxon>();
        for (int i = 1; i < L; ++i) {
            languageIndexer.add(new Taxon(languages[i]));
        }
        ArrayList<Pair> sequenceList = CollUtils.list();
        HashMap glossPairMap = CollUtils.map();
        for (String gloss : glossMap.keySet()) {
            HashMap sequences = CollUtils.map();
            ArrayList pairs = CollUtils.list();
            String[] words = (String[])glossMap.get(gloss);
            int numWords = 0;
            for (int i = 1; i < L; ++i) {
                String word1 = words[i];
                if (word1 == null) continue;
                Taxon taxon1 = new Taxon(languages[i]);
                TaxonWord tw1 = new TaxonWord(gloss, taxon1, word1);
                sequences.put(taxon1, word1);
                ++numWords;
                for (int j = i + 1; j < L; ++j) {
                    String word2 = words[j];
                    if (word2 == null) continue;
                    Taxon taxon2 = new Taxon(languages[j]);
                    TaxonWord tw2 = new TaxonWord(gloss, taxon2, word2);
                    Pair<TaxonWord, TaxonWord> pair = new Pair<TaxonWord, TaxonWord>(tw1, tw2);
                    pairs.add(pair);
                }
                glossPairMap.put(gloss, pairs);
            }
            sequenceList.add(new Pair(gloss, sequences));
            LogInfo.logs("'" + gloss + "' contains " + numWords + " observations");
        }
        for (int iter = 0; iter < numIter; ++iter) {
            LogInfo.logs("Training iteration " + iter);
            int numGloss = 0;
            for (String gloss : glossPairMap.keySet()) {
                List pairs = (List)glossPairMap.get(gloss);
                LogInfo.logs("Processing " + gloss + " with " + pairs.size() + " pairs. " + ++numGloss + "/" + glossMap.size());
                for (int i = 0; i < pairs.size(); ++i) {
                    Pair pair = (Pair)pairs.get(i);
                    String w1 = ((TaxonWord)pair.getFirst()).word;
                    Taxon t1 = ((TaxonWord)pair.getFirst()).taxon;
                    String w2 = ((TaxonWord)pair.getSecond()).word;
                    Taxon t2 = ((TaxonWord)pair.getSecond()).taxon;
                    HetPairHMM hh = fam.getHMM(w1, w2, t1, t2);
                    fam.addSufficientStatistics(hh, t1, t2);
                }
            }
            fam.updateParameters();
        }
        ArrayList data = CollUtils.list();
        for (Pair pair : sequenceList) {
            String gloss;
            gloss = (String)pair.getFirst();
            Map sequences = (Map)pair.getSecond();
            StringBuilder[] builders = new StringBuilder[L - 1];
            for (int l = 0; l < L - 1; ++l) {
                builders[l] = new StringBuilder();
            }
            MSAPoset poset = fam.maxRecallAlignFromAllPairs(sequences);
            LogInfo.logs(poset.toString());
            for (MSAPoset.Column column : poset.linearizedColumns()) {
                Map<Taxon, Integer> points = column.getPoints();
                for (int i = 0; i < L - 1; ++i) {
                    Taxon taxon = (Taxon)languageIndexer.getObject(i);
                    if (!points.containsKey(taxon)) {
                        builders[i].append('-');
                        continue;
                    }
                    char ch = poset.charAt(column, taxon);
                    builders[i].append(ch);
                }
            }
            StringBuilder lineBuilder = new StringBuilder();
            lineBuilder.append(gloss + " ");
            for (int i = 0; i < L - 1; ++i) {
                LogInfo.logs(((Taxon)languageIndexer.getObject(i)).toString() + ": " + builders[i].toString());
                lineBuilder.append(builders[i].toString() + " ");
            }
            data.add(lineBuilder.toString());
        }
        PrintWriter writer = new PrintWriter(new File(outputFile));
        writer.write("gloss ");
        for (int l = 0; l < L - 1; ++l) {
            writer.print(((Taxon)languageIndexer.getObject(l)).toString() + " ");
        }
        writer.print("\n");
        for (int i = 0; i < data.size(); ++i) {
            if (((String)data.get(i)).trim().equals("")) continue;
            writer.print((String)data.get(i) + "\n");
        }
        writer.close();
    }

    public static void writeMSA(int numIter, String filepath) throws IOException {
        ExponentialFamily.ExponentialFamilyOptions opt = new ExponentialFamily.ExponentialFamilyOptions();
        opt.encodingType = SequenceType.PHONEMES;
        FeatureExtractor.FeatureOptions fopt = new FeatureExtractor.FeatureOptions();
        fopt.hydrophobicModeling = false;
        ExponentialFamily fam = ExponentialFamily.createExpfam(new MaxentClassifier.MaxentOptions<Object>(), opt, fopt, null);
        BufferedReader reader = null;
        reader = IOUtils.openIn(filepath);
        String[] languages = reader.readLine().trim().split("\\s+");
        String line = null;
        int numLines = 0;
        ArrayList<Map> sequenceList = CollUtils.list();
        ArrayList<Map> wordList = CollUtils.list();
        while ((line = reader.readLine()) != null) {
            LogInfo.logs("Processing line " + ++numLines);
            String[] data = line.split("\\s+");
            HashMap<Taxon, String> sequences = new HashMap<Taxon, String>();
            HashMap words = new HashMap();
            for (int i = 1; i < data.length; ++i) {
                String word = data[i].trim();
                if (word.equals("?")) {
                    data[i] = null;
                    continue;
                }
                for (int s = 0; s < word.length(); ++s) {
                    if (encoding.char2PhoneId(word.charAt(s)) >= 0) continue;
                    data[i] = null;
                    break;
                }
                if (data[i] == null) continue;
                Taxon taxon = new Taxon(languages[i]);
                sequences.put(taxon, word);
                words.put(i, word);
            }
            sequenceList.add(sequences);
            wordList.add(words);
        }
        reader.close();
        for (int iter = 0; iter < numIter; ++iter) {
            LogInfo.logs("Training iteration " + iter);
            int numGroups = 0;
            for (Map words : wordList) {
                Object[] keySet = words.keySet().toArray();
                LogInfo.logs("Processing group " + ++numGroups + "/" + numLines + " with " + words.size() + " words");
                for (int i = 0; i < keySet.length; ++i) {
                    int l1 = (Integer)keySet[i];
                    Taxon t1 = new Taxon(languages[l1]);
                    String w1 = ((String)words.get(l1)).trim();
                    for (int j = i + 1; j < keySet.length; ++j) {
                        int l2 = (Integer)keySet[j];
                        Taxon t2 = new Taxon(languages[l2]);
                        String w2 = ((String)words.get(l2)).trim();
                        HetPairHMM hh = fam.getHMM(w1, w2, t1, t2);
                        fam.addSufficientStatistics(hh, t1, t2);
                    }
                }
            }
            fam.updateParameters();
        }
        for (Map sequences : sequenceList) {
            MSAPoset poset = fam.maxRecallAlignFromAllPairs(sequences);
            LogInfo.logs(poset.toString());
        }
    }

    public static Map<Taxon, char[]> readMSA(String filepath) throws IOException {
        HashMap<Taxon, char[]> map = CollUtils.map();
        BufferedReader reader = IOUtils.openIn(filepath);
        String line = null;
        int numLines = 0;
        while ((line = reader.readLine()) != null) {
            ++numLines;
            String[] data = line.split("\\s+");
            Taxon taxon = new Taxon(data[0].trim());
            map.put(taxon, data[1].toCharArray());
        }
        LogInfo.logs("numLines = " + numLines);
        return map;
    }

    public static double[][] convertPhonemeToLik(char[] data, Indexer<Character> phonemes) {
        int numPhonemes = phonemes.size();
        int S = data.length;
        int B = numPhonemes;
        double[][] lik = new double[S][B];
        double constantLik = 1.0 / (double)numPhonemes;
        for (int s = 0; s < S; ++s) {
            char phoneme = data[s];
            if (phoneme == NO_OBS_CHAR) {
                for (int b = 0; b < B; ++b) {
                    lik[s][b] = constantLik;
                }
                continue;
            }
            lik[s][LanguageDataHandler.encoding.char2PhoneId((char)phoneme)] = constantLik;
        }
        return lik;
    }

    @Override
    public void run() {
        try {
            LanguageDataHandler.alignByGloss(5, "data/processed/All.cog", "data/processed/final-data.txt");
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    public static void main(String[] args) {
        IO.run(args, new LanguageDataHandler());
    }

    static {
        numLanguages = 0;
        numGloss = 0;
        try {
            encoding = Encodings.realEncoding();
            phonemes = new Indexer();
        }
        catch (Exception exception) {
            // empty catch block
        }
    }

    public static class TaxonWord {
        public String gloss;
        public Taxon taxon;
        public String word;

        public TaxonWord(String gloss, Taxon taxon, String word) {
            this.gloss = gloss;
            this.taxon = taxon;
            this.word = word;
        }
    }
}

