/*
 * Decompiled with CFR 0.152.
 */
package pepper;

import fig.basic.Pair;
import fig.basic.StrUtils;
import goblin.CognateId;
import goblin.DataPrepUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import nuts.io.IO;
import nuts.util.CollUtils;
import nuts.util.Counter;
import nuts.util.CounterMap;

public class Corpus {
    public static final String unk = "?";
    public static final String emptyCode = "?emptystring";
    private final List<String> fieldNames;
    private final Map<String, Integer> indices;
    private final List<CognateId> rowIds;
    private final List<List<String>> words;
    private final int nLangs;
    private final int nWords;

    public Corpus(List<String> fieldNames, List<List<String>> words) {
        this(fieldNames, words, Corpus.fakeIds(words.size()));
    }

    private static List<CognateId> fakeIds(int size) {
        ArrayList<CognateId> result = new ArrayList<CognateId>();
        for (int i = 0; i < size; ++i) {
            result.add(new CognateId("row" + i));
        }
        return result;
    }

    public Corpus(List<String> fieldNames, List<List<String>> words, List<CognateId> rowIds) {
        if (words.size() != rowIds.size()) {
            throw new RuntimeException();
        }
        if (words.get(0).size() != fieldNames.size()) {
            throw new RuntimeException();
        }
        this.fieldNames = CollUtils.archive(fieldNames);
        HashMap<String, Integer> indices = new HashMap<String, Integer>();
        for (int i = 0; i < fieldNames.size(); ++i) {
            indices.put(fieldNames.get(i), i);
        }
        this.indices = Collections.unmodifiableMap(indices);
        this.nLangs = fieldNames.size();
        ArrayList<CognateId> _rowIds = new ArrayList<CognateId>();
        ArrayList<ArrayList<String>> _words = new ArrayList<ArrayList<String>>();
        for (int i = 0; i < words.size(); ++i) {
            List<String> row = words.get(i);
            if (Corpus.nUnk(row) == row.size()) continue;
            _words.add(new ArrayList<String>(row));
            _rowIds.add(rowIds.get(i));
        }
        this.rowIds = Collections.unmodifiableList(_rowIds);
        this.words = Collections.unmodifiableList(_words);
        this.nWords = this.words.size();
    }

    public Corpus(Corpus model) {
        this(model.fieldNames, model.words, model.rowIds);
    }

    public static int nUnk(List<String> row) {
        int result = 0;
        for (String word : row) {
            if (word != null) continue;
            ++result;
        }
        return result;
    }

    public static int nKnown(List<String> row) {
        return row.size() - Corpus.nUnk(row);
    }

    public static Corpus restrictToKnownCharacters(Corpus corpus, Set<Character> knownChars) {
        ArrayList<List<String>> newWords = new ArrayList<List<String>>();
        for (List<String> row : corpus.words) {
            newWords.add(new ArrayList<String>(row));
        }
        for (int row = 0; row < corpus.getNWords(); ++row) {
            for (int col = 0; col < corpus.getNLangs(); ++col) {
                if (!corpus.isKnown(row, col) || DataPrepUtils.unknownCharacters(corpus.getWord(row, col), knownChars).size() <= 0) continue;
                ((List)newWords.get(row)).set(col, null);
            }
        }
        return new Corpus(corpus.fieldNames, newWords, corpus.rowIds);
    }

    public static Corpus restrictToEntriesWithAtLeastNKnownEntries(Corpus corpus, int n) {
        ArrayList<List<String>> newWords = new ArrayList<List<String>>();
        ArrayList<CognateId> ids = new ArrayList<CognateId>();
        for (int i = 0; i < corpus.words.size(); ++i) {
            List<String> row = corpus.words.get(i);
            if (Corpus.nKnown(row) < n) continue;
            newWords.add(new ArrayList<String>(row));
            ids.add(corpus.rowIds.get(i));
        }
        return new Corpus(corpus.fieldNames, newWords, ids);
    }

    public static Corpus forget(Corpus initial, Set<String> langsToForget) {
        ArrayList<List<String>> newWords = new ArrayList<List<String>>();
        for (List<String> row : initial.words) {
            newWords.add(new ArrayList<String>(row));
        }
        for (int col = 0; col < initial.getNLangs(); ++col) {
            if (!langsToForget.contains(initial.fieldNames.get(col))) continue;
            for (int row = 0; row < initial.getNWords(); ++row) {
                ((List)newWords.get(row)).set(col, null);
            }
        }
        return new Corpus(initial.fieldNames, newWords, initial.rowIds);
    }

    public static Corpus restrict(Corpus initial, Set<String> langsToRestrictTo) {
        ArrayList<String> newFieldNames = new ArrayList<String>(initial.fieldNames);
        ArrayList<List<String>> newWords = new ArrayList<List<String>>();
        for (List<String> row : initial.words) {
            newWords.add(new ArrayList<String>(row));
        }
        for (int col = initial.fieldNames.size() - 1; col >= 0; --col) {
            if (langsToRestrictTo.contains(initial.fieldNames.get(col))) continue;
            newFieldNames.remove(col);
            for (List list : newWords) {
                list.remove(col);
            }
        }
        return new Corpus(newFieldNames, newWords, initial.rowIds);
    }

    public List<String> getFieldNames() {
        return Collections.unmodifiableList(this.fieldNames);
    }

    public boolean isFullEntry(int row, Set<String> langs) {
        for (int column = 0; column < this.nLangs; ++column) {
            if (!langs.contains(this.fieldNames.get(column)) || this.isKnown(row, column)) continue;
            return false;
        }
        return true;
    }

    public int getNLangs() {
        return this.nLangs;
    }

    public int getNWords() {
        return this.nWords;
    }

    public CognateId getCognateId(int row) {
        return this.rowIds.get(row);
    }

    public boolean isKnown(int row, int column) {
        return this.getWord(row, column) != null;
    }

    public boolean isKnown(int row, String langName) {
        return this.getWord(row, langName) != null;
    }

    public String getWord(int row, int column) {
        return this.words.get(row).get(column);
    }

    public String getWord(int row, String langName) {
        if (!this.indices.containsKey(langName)) {
            throw new RuntimeException("Lang. not available in the corpus: " + langName);
        }
        return this.getWord(row, this.indices.get(langName));
    }

    public String toString() {
        StringBuilder builder = new StringBuilder();
        builder.append("#id " + StrUtils.join(this.fieldNames, " ") + "\n");
        for (int row = 0; row < this.getNWords(); ++row) {
            builder.append(this.rowIds.get(row) + " ");
            for (int col = 0; col < this.getNLangs(); ++col) {
                String cWord = this.toString(row, col);
                builder.append(cWord);
                if (col == this.getNLangs() - 1) continue;
                builder.append(" ");
            }
            builder.append("\n");
        }
        return builder.toString();
    }

    private String toString(int row, int col) {
        String cWord = this.words.get(row).get(col);
        if (cWord == null) {
            cWord = unk;
        } else if (cWord.equals("")) {
            cWord = emptyCode;
        }
        return cWord;
    }

    public List<String> allWords() {
        ArrayList<String> result = new ArrayList<String>();
        for (int r = 0; r < this.getNWords(); ++r) {
            for (int c = 0; c < this.getNLangs(); ++c) {
                result.add(this.getWord(r, c));
            }
        }
        return result;
    }

    public List<String> wordsOfLanguage(int language) {
        ArrayList<String> result = new ArrayList<String>();
        for (int w = 0; w < this.getNWords(); ++w) {
            if (!this.isKnown(w, language)) continue;
            result.add(this.getWord(w, language));
        }
        return result;
    }

    public Map<String, String> getWords(int row) {
        HashMap<String, String> result = new HashMap<String, String>();
        for (String lang : this.getFieldNames()) {
            result.put(lang, this.getWord(row, lang));
        }
        return result;
    }

    public static void main(String[] args) throws IOException {
        String aLang = null;
        if (args.length == 2) {
            aLang = args[1];
        }
        Corpus c = Corpus.parse(args[0]);
        double num = 0.0;
        double denom = 0.0;
        double nInLang = 0.0;
        double nObserved = 0.0;
        for (int l = 0; l < c.nLangs; ++l) {
            for (int w = 0; w < c.nWords; ++w) {
                if (!c.isKnown(w, l)) continue;
                denom += 1.0;
                num += (double)c.getWord(w, l).length();
                if (c.fieldNames.get(l).equals(aLang)) {
                    nInLang += 1.0;
                    continue;
                }
                nObserved += 1.0;
            }
        }
        System.out.println("Mean word length: " + num / denom);
        if (aLang != null) {
            System.out.println("Number in " + aLang + ": " + nInLang);
        }
        System.out.println("N langs: " + c.nLangs);
        System.out.println("N cognate sets: " + c.nWords);
        System.out.println("N observed: " + nObserved);
    }

    private static String _print(Corpus c1, Corpus c2, boolean first) {
        StringBuilder result = new StringBuilder();
        for (int row = 0; row < c1.getNWords(); ++row) {
            int col;
            result.append(c1.rowIds.get(row) + " ");
            if (first) {
                for (col = 0; col < c1.getNLangs(); ++col) {
                    result.append(c1.toString(row, col) + " ");
                }
                for (col = 0; col < c2.getNLangs(); ++col) {
                    result.append("? ");
                }
            } else {
                for (col = 0; col < c2.getNLangs(); ++col) {
                    result.append("? ");
                }
                for (col = 0; col < c1.getNLangs(); ++col) {
                    result.append(c1.toString(row, col) + " ");
                }
            }
            result.append("\n");
        }
        return result.toString();
    }

    private static int rank(Counter<Integer> scores, int l1) {
        int i = 0;
        for (Integer key : scores) {
            if (key == l1) {
                return i;
            }
            ++i;
        }
        return -1;
    }

    private static Pair<List<String>, List<String>> split(List<String> words) {
        return Pair.makePair(words.subList(0, words.size() / 2), words.subList(words.size() / 2, words.size()));
    }

    private static CounterMap<String, Character> lm(List<String> words, int order) {
        CounterMap<String, Character> result = new CounterMap<String, Character>();
        for (String word : words) {
            int i;
            for (i = 0; i < order; ++i) {
                word = "#" + word;
            }
            for (i = order; i < word.length(); ++i) {
                result.incrementCount(order == 0 ? "" : word.substring(i - order, i), Character.valueOf(word.charAt(i)), 1.0);
            }
        }
        result.normalize();
        return result;
    }

    private static <S, T> double wm(CounterMap<S, T> cm1, CounterMap<S, T> cm2) {
        double result = 0.0;
        HashSet<S> interKey = new HashSet<S>();
        interKey.addAll(cm1.keySet());
        interKey.retainAll(cm2.keySet());
        for (Object key : interKey) {
            result += Corpus.l1(cm1.getCounter(key), cm2.getCounter(key));
        }
        return result / (double)interKey.size();
    }

    private static <T> double l1(Counter<T> c1, Counter<T> c2) {
        double result = 0.0;
        HashSet<T> allKeys = new HashSet<T>();
        allKeys.addAll(c1.keySet());
        allKeys.addAll(c2.keySet());
        for (Object key : allKeys) {
            result += Math.abs(c1.getCount(key) - c2.getCount(key));
        }
        return result;
    }

    public static Corpus parse(String path) throws IOException {
        ArrayList<String> fieldNames = new ArrayList<String>();
        ArrayList<List<String>> words = new ArrayList<List<String>>();
        ArrayList<CognateId> ids = new ArrayList<CognateId>();
        int nLangs = -1;
        int lineNumber = 0;
        boolean hasIds = false;
        for (String line : IO.i(path)) {
            if (line.equals("")) continue;
            String[] fields = line.split("(\\t|\\s)+");
            if (nLangs != -1 && nLangs + (hasIds ? 1 : 0) != fields.length) {
                throw new RuntimeException("Different # of fields for one of the lines, there should be " + nLangs + (hasIds ? 1 : 0) + ", found " + fields.length + "\nLine " + lineNumber + ":" + line);
            }
            if (nLangs == -1) {
                int i;
                if (line.charAt(0) != '#') {
                    throw new RuntimeException("First line should start with # and describe the languages\nRead: " + line);
                }
                fields = (line = line.replaceFirst("[#](\\s|\\t)*", "")).split("(\\t|\\s)+");
                hasIds = fields[0].equals("id");
                int n = i = hasIds ? 1 : 0;
                while (i < fields.length) {
                    if (!fields[i].equals("")) {
                        fieldNames.add(fields[i]);
                    }
                    ++i;
                }
                nLangs = fieldNames.size();
            } else {
                int i;
                ArrayList<String> currentList = new ArrayList<String>();
                CognateId id = new CognateId(hasIds ? fields[0] : path + ":" + lineNumber);
                ids.add(id);
                int n = i = hasIds ? 1 : 0;
                while (i < fields.length) {
                    if (fields[i].equals(unk)) {
                        currentList.add(null);
                    } else if (fields[i].equals(emptyCode)) {
                        currentList.add("");
                    } else {
                        currentList.add(fields[i]);
                    }
                    ++i;
                }
                words.add(currentList);
            }
            ++lineNumber;
        }
        return new Corpus(fieldNames, words, ids);
    }

    public Set<Character> allChars() {
        HashSet<Character> result = new HashSet<Character>();
        for (String word : this.allWords()) {
            if (word == null) continue;
            for (int i = 0; i < word.length(); ++i) {
                result.add(Character.valueOf(word.charAt(i)));
            }
        }
        return result;
    }
}

