/*
 * Decompiled with CFR 0.152.
 */
package ev.io;

import ev.io.PreprocessGutellData;
import goblin.Taxon;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import nuts.io.IO;
import nuts.lang.StringUtils;

public class SpeciesInfo {
    private static Map<Taxon, List<String>> _eukClass = null;
    public static Pattern classficationPattern = Pattern.compile(".*[:]([^:]*)[:]rem[:]\\s+(([^;]+;\\s*)+)");

    public static Map<Taxon, List<String>> getEukClassifications() {
        if (_eukClass == null) {
            _eukClass = SpeciesInfo.loadClassification(new File("data/gutell/raw/16S.E.ALL.aln.gz"), "Eukaryota");
        }
        return _eukClass;
    }

    public static Set<Taxon> speciesInClassif(String classif, Map<Taxon, List<String>> map) {
        HashSet<Taxon> result = new HashSet<Taxon>();
        for (Taxon lang : map.keySet()) {
            if (!map.get(lang).contains(classif)) continue;
            result.add(lang);
        }
        return result;
    }

    public static Map<Taxon, List<String>> loadClassification(File f, String topLevel) {
        HashMap<Taxon, List<String>> result = new HashMap<Taxon, List<String>>();
        for (String line : IO.i(f)) {
            if (line.length() <= 0 || line.charAt(0) != '#' || !classficationPattern.matcher(line).matches()) continue;
            List<String> matches = StringUtils.multiSelectFirstRegex(classficationPattern, line);
            Taxon lang = new Taxon(PreprocessGutellData.clean(matches.get(0)));
            List<String> classes = Arrays.asList(matches.get(1).split(";\\s+"));
            classes.set(classes.size() - 1, classes.get(classes.size() - 1).replaceAll(";", ""));
            if (!classes.get(0).contains(topLevel)) continue;
            result.put(lang, classes);
        }
        return result;
    }

    public static void main(String[] args) {
        Map<Taxon, List<String>> classes = SpeciesInfo.getEukClassifications();
        for (Taxon lang : classes.keySet()) {
            if (!classes.get(lang).contains("Vertebrata")) continue;
            System.out.println(lang + "\t" + classes.get(lang));
        }
        System.out.println(classes.size());
    }
}

