/*
 * Decompiled with CFR 0.152.
 */
package tools;

import java.util.List;
import nuts.io.CSV;
import nuts.io.IO;
import nuts.lang.StringUtils;
import nuts.util.CollUtils;

public class ScrapeMyPubs {
    private static List<String> entries = CollUtils.list();

    public static void main(String[] args) {
        IO.so(CSV.header("authors", "title", "booktitle", "year", "place", "paper-link", "slide-link", "supp-link", "code-link"));
        String current = "";
        for (String line : IO.i("/Users/bouchard/temp/pubs2")) {
            if (line.matches("^\\s*$")) {
                ScrapeMyPubs.process(current);
                current = "";
            }
            current = current + line + " ";
        }
        ScrapeMyPubs.process(current);
        for (int i = entries.size() - 1; i >= 0; --i) {
            IO.so(entries.get(i));
        }
    }

    private static String noNull(String str) {
        return str == null ? "" : str;
    }

    private static void process(String current) {
        if (current.matches("^\\s*$")) {
            return;
        }
        String[] fields = current.split("[(]20..[)]");
        String authors = fields[0].replace("<li>", "").replaceAll("\\s+", " ").replaceAll("^\\s+", "").replaceAll("[.]\\s*$", "");
        String date = StringUtils.selectFirstRegex("[(](20..)[)]", current);
        String[] sub = fields[1].split("[<][/]?em[>]");
        String title = sub[0].replaceAll("\\s+", " ").replaceAll("^\\s+", "").replaceAll("[.]\\s*$", "");
        String venue = sub[1].replaceAll("\\s+", " ").replaceAll("^\\s+", "").replaceAll("[.]\\s*$", "");
        String[] subsub = sub[2].split("[<][/]?br[/]?[>]");
        String location = subsub[0].replaceAll("\\s+", " ").replaceAll("^\\s+", "").replaceAll("[.]\\s*$", "");
        String paperLink = StringUtils.selectFirstRegex("href[=]\"([^\"]+)\"[>].paper", subsub[1]);
        String slideLink = StringUtils.selectFirstRegex("href[=]\"([^\"]+)\"[>].slide", subsub[1]);
        String posterLink = StringUtils.selectFirstRegex("href[=]\"([^\"]+)\"[>].poster", subsub[1]);
        String suppLink = StringUtils.selectFirstRegex("href[=]\"([^\"]+)\"[>].(supplementary material|appendix)", subsub[1]);
        entries.add(CSV.body(ScrapeMyPubs.noNull(authors), ScrapeMyPubs.noNull(title), ScrapeMyPubs.noNull(venue), ScrapeMyPubs.noNull(date), ScrapeMyPubs.noNull(location), ScrapeMyPubs.noNull(paperLink), ScrapeMyPubs.noNull(slideLink), ScrapeMyPubs.noNull(suppLink), ""));
    }
}

