/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki.data;

import it.uniroma1.lcl.jlt.util.Stopwords;
import it.uniroma1.lcl.jlt.wordnet.WordNet;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;

public class WikiDisambiguationPage {
    private final IndexSearcher is;
    private SimpleFSDirectory dir = new SimpleFSDirectory(new File(this.indexDir));
    private static WikiDisambiguationPage instance = null;
    private String indexDir = "./resources/jlt/wiki_sentences";

    private WikiDisambiguationPage() throws IOException {
        this.is = new IndexSearcher((Directory)this.dir, true);
    }

    public static WikiDisambiguationPage getInstance() {
        if (instance == null) {
            try {
                instance = new WikiDisambiguationPage();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        return instance;
    }

    public Set<String> getCloseWords(String lemma) {
        HashSet<String> closeWords = new HashSet<String>();
        String firstChar = lemma.substring(0, 1).toUpperCase();
        String wikiDisPage = String.valueOf(firstChar) + lemma.substring(1, lemma.length()) + " (disambiguation)";
        TermQuery tq = new TermQuery(new Term("wikipage", wikiDisPage));
        try {
            Hits hits = this.is.search((Query)tq);
            if (hits.length() == 0) {
                wikiDisPage = String.valueOf(firstChar) + lemma.substring(1, lemma.length());
                tq = new TermQuery(new Term("wikipage", wikiDisPage));
                hits = this.is.search((Query)tq);
            }
            int i = 0;
            while (i < hits.length()) {
                Document d = hits.doc(i);
                int disambig = Integer.valueOf(d.get("disambig"));
                if (disambig == 1) {
                    String[] lemmas = d.getValues("lemma");
                    String[] pos = d.getValues("pos");
                    int j = 0;
                    while (j < lemmas.length) {
                        if (pos[j].charAt(0) == 'N') {
                            String word = lemmas[j];
                            if ((word = word.replace("_(disambiguation)", "")).compareTo("disambiguation") != 0) {
                                closeWords.add(word);
                            }
                        }
                        ++j;
                    }
                }
                ++i;
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        Set<String> cleanCloseWords = this.removeStopWords(closeWords);
        return cleanCloseWords;
    }

    public Set<String> removeStopWords(Set<String> closeWords) {
        HashSet<String> cleanWords = new HashSet<String>();
        Stopwords sw = Stopwords.getInstance();
        for (String w : closeWords) {
            if (sw.isStopword(w)) continue;
            Set<String> stems = WordNet.getInstance().getWordNetStems(w);
            if (stems.isEmpty() || stems.iterator().next() == "") {
                if (w.compareTo("") == 0) continue;
                cleanWords.add(w);
                continue;
            }
            cleanWords.add(stems.iterator().next());
        }
        return cleanWords;
    }

    public void closeIndex() throws IOException {
        this.is.close();
        this.dir.close();
    }
}

