/*
 * Decompiled with CFR 0.152.
 */
package wikt;

import java.util.HashSet;
import java.util.regex.Pattern;
import nuts.io.IO;
import nuts.lang.StringUtils;
import nuts.util.Counter;

public class Scrape {
    public static final Pattern flexLang = Pattern.compile("^.*==(.*)==.*");
    public static final Pattern page = Pattern.compile(".*[<]title[>]([^<]*)[<][/]title[>].*");
    public static final Pattern ipa = Pattern.compile(".*[{][{]IPA[|]([^}]+)[}][}].*");

    public static void main(String[] args) {
        HashSet<String> langs = new HashSet<String>();
        for (String line : IO.i(args[0])) {
            if (line.charAt(0) == '#') continue;
            langs.add(StringUtils.selectFirstRegex("^.*\\t(.*)$", line));
        }
        Counter<String> langUsed = new Counter<String>();
        String currentTitle = null;
        String currentLanguage = null;
        for (String line : IO.i(args[1])) {
            if (page.matcher(line).matches()) {
                currentTitle = StringUtils.selectFirstRegex(page, line);
                currentLanguage = null;
                continue;
            }
            if (flexLang.matcher(line).matches()) {
                String candidate = StringUtils.selectFirstRegex(flexLang, line).replaceAll("[=]", "");
                if (!langs.contains(candidate)) continue;
                currentLanguage = candidate;
                continue;
            }
            if (!ipa.matcher(line).matches() || currentLanguage == null) continue;
            langUsed.incrementCount(currentLanguage, 1.0);
        }
        System.out.println("---");
        for (String key : langUsed) {
            IO.so(key + "\t" + langUsed.getCount(key));
        }
    }
}

