/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki.disambiguation;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.util.Collections;
import it.uniroma1.lcl.jlt.util.Files;
import it.uniroma1.lcl.jlt.util.Pair;
import it.uniroma1.lcl.jlt.util.Sets;
import it.uniroma1.lcl.jlt.util.Strings;
import it.uniroma1.lcl.jlt.wiki.disambiguation.WikiDisambiguationItem;
import it.uniroma1.lcl.jlt.wiki.disambiguation.WikiDisambiguationPage;
import it.uniroma1.lcl.jlt.wiki.iterator.WikiDumpIterator;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class WikiDisambiguationIndexer {
    protected IndexWriter writer = null;
    private final String itemInit = "*";
    private final String catInit = "=";
    private final String subItemInit = "**";
    private long itemCounter = 0L;

    public WikiDisambiguationIndexer() {
        try {
            SimpleFSDirectory dir = new SimpleFSDirectory(new File(Configuration.getInstance().getWikipediaDisambiguationIndexDirectory()));
            this.writer = new IndexWriter((Directory)dir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            this.writer.setMaxMergeDocs(Configuration.getInstance().getMaxMergeDocs());
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    /*
     * Could not resolve type clashes
     * Unable to fully structure code
     */
    private WikiDisambiguationPage parseDisambiguationPage(String pageContent) {
        config = Configuration.getInstance();
        result = null;
        lines = pageContent.split("\n");
        metainfo = "";
        metainfoSB = new StringBuffer();
        labelStack /* !! */  = new ArrayList<Pair<String, Integer>>();
        title = "";
        parentRawLine = "";
        parentId = -1L;
        isMetaInfoPart = false;
        isComment = false;
        i = 0;
        while (i < lines.length) {
            block17: {
                block20: {
                    block21: {
                        block19: {
                            block18: {
                                trimLine = lines[i].trim();
                                if (trimLine.isEmpty()) break block17;
                                if (Strings.containsOneOf(trimLine, config.getShortDisambiguationPrefix())) break;
                                if (trimLine.startsWith("&lt;!--")) {
                                    isComment = true;
                                }
                                if (!isComment) break block18;
                                if (trimLine.contains("-->") || trimLine.contains("--&gt;")) {
                                    isComment = false;
                                }
                                break block17;
                            }
                            if (!trimLine.startsWith("<title>")) break block19;
                            title = trimLine.replace("<title>", "").replace("</title>", "").replace(" (" + Collections.getFirst(config.getDisambiguationPrefixes()) + ")", "").toLowerCase();
                            result = new WikiDisambiguationPage(title);
                            System.out.println("**** TITLE ****");
                            System.out.println(title);
                            break block17;
                        }
                        if (!trimLine.startsWith("<text xml:space=\"preserve\">") && !isMetaInfoPart) break block20;
                        headline = trimLine.replace("<text xml:space=\"preserve\">", "");
                        if (headline.isEmpty() || headline.startsWith("{{")) break block21;
                        metainfo = metainfoSB.toString();
                        System.out.println("**** METAINFO ****");
                        System.out.println(metainfo);
                        isMetaInfoPart = false;
                        break block20;
                    }
                    isMetaInfoPart = true;
                    metainfoSB.append(headline);
                    if (headline.isEmpty() || !headline.contains("{{") || headline.contains("}}")) break block17;
                    if (++i <= lines.length) ** GOTO lbl50
                    break;
lbl-1000:
                    // 1 sources

                    {
                        metainfoSB.append(lines[i]);
                        if (lines[i].contains("}}")) break block17;
                        ++i;
lbl50:
                        // 2 sources

                        ** while (i < lines.length)
                    }
lbl51:
                    // 1 sources

                    break block17;
                }
                if (!isMetaInfoPart) {
                    lineInit = String.valueOf(trimLine.charAt(0));
                    if (lineInit.equals("=")) {
                        categoryLineLevel = this.getNumberOf(trimLine, "=".charAt(0));
                        if (categoryLineLevel > this.getLastLevel(labelStack /* !! */ )) {
                            labelStack /* !! */ .add(new Pair<String, Integer>(trimLine.replaceAll("=", ""), categoryLineLevel));
                        } else if (categoryLineLevel == this.getLastLevel(labelStack /* !! */ )) {
                            labelStack /* !! */ .remove(labelStack /* !! */ .size() - 1);
                            labelStack /* !! */ .add(new Pair<String, Integer>(trimLine.replaceAll("=", ""), categoryLineLevel));
                        } else if (categoryLineLevel < this.getLastLevel(labelStack /* !! */ )) {
                            tempLabelStack = new ArrayList<Pair<String, Integer>>();
                            for (Pair cl : labelStack /* !! */ ) {
                                if ((Integer)cl.getSecond() >= categoryLineLevel) continue;
                                tempLabelStack.add(cl);
                            }
                            tempLabelStack.add(new Pair<String, Integer>(trimLine.replaceAll("=", ""), categoryLineLevel));
                            labelStack /* !! */  = tempLabelStack;
                        }
                        System.out.println("*** STACK ***");
                        System.out.println(labelStack /* !! */ );
                    } else if (!(lineInit.equals("<") || lineInit.equals("{") || labelStack /* !! */ .size() > 0 && !lineInit.equals("*"))) {
                        if (!trimLine.startsWith("**")) {
                            parentRawLine = trimLine;
                            parentId = this.itemCounter;
                        }
                        if ((wdi = this.createItem(title, this.stack2list(labelStack /* !! */ ), trimLine, parentRawLine, parentId)) != null) {
                            result.addItem(wdi);
                        }
                    }
                }
            }
            ++i;
        }
        result.parseMetaInfos(metainfoSB.toString());
        return result;
    }

    private List<String> stack2list(List<Pair<String, Integer>> labelStack) {
        ArrayList<String> result = new ArrayList<String>();
        for (Pair<String, Integer> stackItem : labelStack) {
            result.add(stackItem.getFirst());
        }
        return result;
    }

    private int getLastLevel(List<Pair<String, Integer>> labelStack) {
        if (labelStack.size() > 0) {
            return labelStack.get(labelStack.size() - 1).getSecond();
        }
        return 0;
    }

    private int getNumberOf(String test, char cue) {
        int result = 0;
        while (result < test.length() && test.charAt(result) == cue) {
            ++result;
        }
        return result;
    }

    private String getLinkFromOutOfCategoryContent(String rawItem) {
        String rawLink;
        String[] rawLinks;
        int linkStartIndex = rawItem.indexOf("'''[[");
        int linkEndIndex = rawItem.indexOf("]]'''");
        if (linkStartIndex >= 0 && linkEndIndex >= 0 && linkStartIndex < linkEndIndex && (rawLinks = (rawLink = rawItem.substring(linkStartIndex + 5, linkEndIndex)).split("\\|")) != null && rawLinks.length > 0) {
            return rawLinks[0];
        }
        return null;
    }

    private String getLink(String rawItem) {
        String rawLink;
        String[] rawLinks;
        int linkStartIndex = rawItem.indexOf("[[");
        int linkEndIndex = rawItem.indexOf("]]");
        if (linkStartIndex >= 0 && linkEndIndex >= 0 && linkStartIndex < linkEndIndex && (rawLinks = (rawLink = rawItem.substring(linkStartIndex + 2, linkEndIndex)).split("\\|")) != null && rawLinks.length > 0) {
            return rawLinks[0];
        }
        return null;
    }

    private String getTag(String title) {
        int tagStartIndex = title.indexOf("(");
        int tagEndIndex = title.indexOf(")");
        if (tagStartIndex >= 0 && tagEndIndex >= 0 && tagStartIndex < tagEndIndex) {
            return title.substring(tagStartIndex + 1, tagEndIndex).trim();
        }
        return null;
    }

    public static String leftClean(String text, Collection<String> garbage) {
        int c = 0;
        while (c < text.length() && garbage.contains(String.valueOf(text.charAt(c)))) {
            ++c;
        }
        if (c < text.length()) {
            return text.substring(c);
        }
        return null;
    }

    private DisambiguationGlossPattern isTermGloss(String trimLine, String lemma) {
        Set<String> garbage = Sets.varargsToHashSet("'", "\"", " ", "*");
        String testLine = WikiDisambiguationIndexer.leftClean(trimLine, garbage);
        if (testLine != null) {
            if (testLine.trim().startsWith("[[")) {
                return DisambiguationGlossPattern.TERM_GLOSS;
            }
            if (testLine.trim().startsWith(lemma)) {
                return DisambiguationGlossPattern.LEMMA_GLOSS;
            }
            if (trimLine.contains(",")) {
                String[] splittedLine = trimLine.trim().split(",");
                if (splittedLine[0].contains("''.''") || splittedLine[0].contains("\".\"")) {
                    return DisambiguationGlossPattern.FORMATTED_TERM_GLOSS;
                }
                return DisambiguationGlossPattern.ONLY_GLOSS;
            }
        }
        return DisambiguationGlossPattern.UNSUPPORTED_PATTERN;
    }

    private String getSynonym(String title, String lemma) {
        String tempTitle = title;
        int firstIndexOfBracket = title.indexOf(40);
        if (firstIndexOfBracket > 0) {
            tempTitle = tempTitle.substring(0, firstIndexOfBracket - 1).trim();
        }
        if (!tempTitle.equalsIgnoreCase(lemma)) {
            return tempTitle;
        }
        return null;
    }

    private Pair<String, String> getTermGloss(String trimLine, String lemma) {
        String title = null;
        String gloss = null;
        int linkEndIndex = 0;
        int glossStartIndex = 0;
        Set<String> garbage = Sets.varargsToHashSet("'", "\"", ",", " ");
        switch (this.isTermGloss(trimLine, lemma)) {
            case TERM_GLOSS: {
                title = this.getLink(trimLine);
                linkEndIndex = trimLine.indexOf("]]") + 2;
                gloss = trimLine.substring(linkEndIndex);
                gloss = WikiDisambiguationIndexer.leftClean(gloss, garbage);
                break;
            }
            case LEMMA_GLOSS: {
                title = lemma;
                gloss = this.deleteLemma(trimLine, lemma);
                break;
            }
            case FORMATTED_TERM_GLOSS: {
                title = this.getTitle(trimLine);
                glossStartIndex = trimLine.indexOf(",") + 1;
                gloss = trimLine.substring(glossStartIndex);
                gloss = WikiDisambiguationIndexer.leftClean(gloss, garbage);
                break;
            }
            case ONLY_GLOSS: {
                glossStartIndex = trimLine.indexOf(",") + 1;
                gloss = trimLine.substring(glossStartIndex);
                gloss = WikiDisambiguationIndexer.leftClean(gloss, garbage);
                break;
            }
        }
        return new Pair<String, Object>(title, gloss);
    }

    private String getTitle(String trimLine) {
        String[] splittedLine = trimLine.trim().split(",");
        int titleStartIndex = splittedLine[0].indexOf("''");
        if (trimLine.indexOf("\"") < titleStartIndex) {
            titleStartIndex = trimLine.indexOf("\"");
            int titleEndIndex = trimLine.substring(titleStartIndex).indexOf("\"") + titleStartIndex;
            return trimLine.substring(titleStartIndex + 1, titleEndIndex + 1);
        }
        int titleEndIndex = trimLine.substring(titleStartIndex).indexOf("''") + titleStartIndex;
        return trimLine.substring(titleStartIndex + 2, titleEndIndex + 1);
    }

    private String deleteLemma(String trimLine, String lemma) {
        Set<String> garbage = Sets.varargsToHashSet("'", "\"", "*", " ", "[");
        Set<String> sgarbage = Sets.varargsToHashSet("'", "\"", ",", " ", "]");
        String testLine = WikiDisambiguationIndexer.leftClean(trimLine, garbage);
        if (!testLine.startsWith(lemma)) {
            return trimLine;
        }
        if (testLine != null && !testLine.isEmpty()) {
            testLine = testLine.substring(lemma.length()).trim();
            testLine = WikiDisambiguationIndexer.leftClean(testLine, sgarbage);
        }
        return testLine;
    }

    public static Set<String> getContentBetween(String text, String cue, Set<String> result) {
        int firstCueIndex = text.indexOf(cue);
        if (firstCueIndex != -1) {
            String test = text.substring(firstCueIndex + cue.length());
            int secondCueIndex = test.indexOf(cue);
            if (secondCueIndex != -1) {
                result.add(test.substring(0, secondCueIndex));
            }
            return WikiDisambiguationIndexer.getContentBetween(test.substring(secondCueIndex + cue.length()), cue, result);
        }
        return result;
    }

    private Pair<Set<String>, String> splitSynonymsGloss(String gloss) {
        String[] glossSplit;
        List<String> garbage = Arrays.asList(",", " ");
        List<String> sgarbage = Arrays.asList("'");
        HashSet<String> resultingSynonyms = new HashSet<String>();
        String resultingGloss = gloss;
        int indexOfBracket = gloss.indexOf(40);
        if (indexOfBracket == 0 && (glossSplit = gloss.split("\\)")).length > 1) {
            String intraBrackets = glossSplit[0];
            resultingGloss = WikiDisambiguationIndexer.leftClean(glossSplit[1], garbage);
            Set<String> synonyms = WikiDisambiguationIndexer.getContentBetween(intraBrackets, "'", new HashSet<String>());
            for (String ss : synonyms) {
                String synonim = WikiDisambiguationIndexer.leftClean(ss, sgarbage);
                if (synonim == null) continue;
                resultingSynonyms.add(synonim);
            }
        }
        return new Pair<Set<String>, String>(resultingSynonyms, resultingGloss);
    }

    private WikiDisambiguationItem createItem(String lemma, List<String> labelStack, String trimLine, String parentRawItem, long parentId) {
        String tempTag;
        String tempSynonym;
        HashSet<String> synonyms = new HashSet<String>();
        String tag = "";
        String title = "";
        String fatherGloss = "";
        String gloss = "";
        String fatherId = "";
        String tempTitle = "";
        String tempGloss = "";
        String tempFatherGloss = "";
        if (!trimLine.startsWith("*") && !trimLine.startsWith("**") && !trimLine.startsWith("#") && trimLine.contains(".")) {
            String tt;
            int lastIndexOfDot = trimLine.lastIndexOf(46);
            tempGloss = trimLine;
            if (lastIndexOfDot > 0 && (tt = trimLine.substring(0, lastIndexOfDot + 1)).contains(lemma)) {
                tempGloss = this.deleteLemma(tt, lemma);
            }
            tempTitle = this.getLinkFromOutOfCategoryContent(trimLine);
        }
        if (trimLine.startsWith("**")) {
            tempFatherGloss = null;
            fatherId = String.valueOf(parentId);
            Pair<String, String> linkGloss = this.getTermGloss(trimLine, lemma);
            tempTitle = linkGloss.getFirst();
            tempGloss = linkGloss.getSecond();
            if (tempGloss == null) {
                tempFatherGloss = this.getTermGloss(parentRawItem, lemma).getSecond();
            }
        }
        if (tempFatherGloss != null) {
            fatherGloss = tempFatherGloss;
        }
        if (!trimLine.startsWith("**") && trimLine.startsWith("*") || trimLine.startsWith("#")) {
            Pair<String, String> linkGloss = this.getTermGloss(trimLine, lemma);
            tempTitle = linkGloss.getFirst();
            tempGloss = linkGloss.getSecond();
        }
        if (tempTitle != null && (tempSynonym = this.getSynonym(title = tempTitle, lemma)) != null && !tempSynonym.trim().isEmpty()) {
            synonyms.add(tempSynonym);
        }
        if (title.startsWith("List of") || title.startsWith("Index of")) {
            return null;
        }
        if (tempGloss != null) {
            gloss = tempGloss;
            Pair<Set<String>, String> synglos = this.splitSynonymsGloss(gloss);
            synonyms.addAll((Collection)synglos.getFirst());
            gloss = synglos.getSecond();
        }
        if ((tempTag = this.getTag(title)) != null && !(tempTag = tempTag.trim()).isEmpty()) {
            tag = tempTag;
        }
        try {
            if (synonyms.isEmpty() && title.trim().isEmpty() && gloss.trim().isEmpty() && labelStack.isEmpty() && trimLine.isEmpty()) {
                return null;
            }
        }
        catch (NullPointerException e) {
            System.out.println(tempGloss);
            System.out.println(title);
            e.printStackTrace();
            System.exit(0);
        }
        ++this.itemCounter;
        WikiDisambiguationItem wdi = new WikiDisambiguationItem(this.itemCounter, lemma, labelStack, synonyms, tag, title, gloss, trimLine, fatherId, fatherGloss);
        System.out.println(wdi);
        return wdi;
    }

    public void createIndex(String target, String source) throws IOException {
        BufferedWriter bw = Files.getBufferedWriter("/home/dimarco/wikidisambigpages.csv");
        System.out.println(source);
        WikiDumpIterator iterator = new WikiDumpIterator(source);
        iterator.open();
        while (iterator.hasNext()) {
            String currPage = iterator.nextDisambiguationPage();
            if (currPage == null) continue;
            WikiDisambiguationPage wdp = this.parseDisambiguationPage(currPage);
            bw.write(wdp.toCSV("\t"));
            bw.flush();
            Document currentDoc = new Document();
            currentDoc.add((Fieldable)new Field("lemma", wdp.getLemma(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            Map<String, String> metainfos = wdp.getMetaInfos();
            for (String metaKey : metainfos.keySet()) {
                currentDoc.add((Fieldable)new Field(metaKey, metainfos.get(metaKey), Field.Store.YES, Field.Index.NOT_ANALYZED));
            }
            currentDoc.add((Fieldable)new Field("meta", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            this.writer.addDocument(currentDoc);
            currentDoc = null;
            for (WikiDisambiguationItem wdi : wdp.getItems()) {
                String rawItemText;
                String fatherGloss;
                String fatherId;
                Document itemDoc = new Document();
                long id = wdi.getId();
                itemDoc.add((Fieldable)new Field("id", String.valueOf(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
                String gloss = wdi.getGloss();
                if (gloss != null && !gloss.trim().isEmpty()) {
                    itemDoc.add((Fieldable)new Field("gloss", gloss, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                if ((fatherId = wdi.getFathrerId()) != null && !fatherId.trim().isEmpty()) {
                    itemDoc.add((Fieldable)new Field("fatherId", fatherId, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                if ((fatherGloss = wdi.getFatherGloss()) != null && !fatherGloss.trim().isEmpty()) {
                    itemDoc.add((Fieldable)new Field("fatherGloss", fatherGloss, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                itemDoc.add((Fieldable)new Field("lemma", wdi.getLemma(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                for (String label : wdi.getLabels()) {
                    itemDoc.add((Fieldable)new Field("label", label, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                String tag = wdi.getTag();
                if (tag != null && !tag.trim().isEmpty()) {
                    itemDoc.add((Fieldable)new Field("tag", tag.trim(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                for (String synonym : wdi.getSynonyms()) {
                    itemDoc.add((Fieldable)new Field("synonym", synonym, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                String title = wdi.getTitle().trim();
                if (title != null && !title.trim().isEmpty()) {
                    itemDoc.add((Fieldable)new Field("title", title.trim(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                if ((rawItemText = wdi.getRawItemText()) != null && !rawItemText.trim().isEmpty()) {
                    itemDoc.add((Fieldable)new Field("rawItemText", rawItemText, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                this.writer.addDocument(itemDoc);
                itemDoc = null;
            }
        }
        this.writer.optimize();
        this.writer.close();
        iterator.close();
        bw.close();
    }

    public static void main(String[] args) {
        try {
            WikiDisambiguationIndexer indexer = new WikiDisambiguationIndexer();
            indexer.createIndex(Configuration.getInstance().getWikipediaDisambiguationIndexDirectory(), Configuration.getInstance().getWikipediaXMLDump());
            System.out.println("Finito");
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static enum DisambiguationGlossPattern {
        TERM_GLOSS,
        LEMMA_GLOSS,
        FORMATTED_TERM_GLOSS,
        ONLY_GLOSS,
        UNSUPPORTED_PATTERN;

    }
}

