/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.gigaword.index;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.index.Record;
import it.uniroma1.lcl.jlt.util.Strings;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class GigawordSentenceExtractor {
    public List<Record> getSentencesContainingWord(String queryString, String tag, int HowMany) {
        ArrayList<Record> sentences = new ArrayList<Record>();
        try {
            FSDirectory index = FSDirectory.open((File)new File(Configuration.getInstance().getGigawordSentenceCompoundIndexDirectory()));
            WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
            Query q = new QueryParser(Version.LUCENE_CURRENT, "token", (Analyzer)analyzer).parse(queryString);
            IndexSearcher searcher = new IndexSearcher((Directory)index, true);
            TopScoreDocCollector collector = TopScoreDocCollector.create((int)HowMany, (boolean)true);
            searcher.search(q, (Collector)collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            int total = hits.length;
            int i = 0;
            while (i < total) {
                if (sentences.size() > HowMany) break;
                Record record = new Record();
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                String[] sID = d.getValues("sID");
                List<String> sentence = Arrays.asList(d.getValues("token"));
                List<String> tags = Arrays.asList(d.getValues("tag"));
                if (tags.get(sentence.indexOf(queryString)).startsWith(tag)) {
                    record.add("token", sentence.toArray(new String[0]));
                    record.add("tag", tags.toArray(new String[0]));
                    record.add("sID", sID);
                    sentences.add(record);
                }
                ++i;
            }
            searcher.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return sentences;
    }

    public static void main(String[] args) {
        GigawordSentenceExtractor gse = new GigawordSentenceExtractor();
        List<Record> sentences = gse.getSentencesContainingWord("computer", "NN", 10);
        for (Record sentence : sentences) {
            String sent = Strings.join(sentence.getAll("token"), " ");
            String tags = Strings.join(sentence.getAll("tag"), " ");
            int sID = Integer.parseInt(sentence.get("sID"));
            System.out.println(String.valueOf(sID) + ": " + sent + "\n" + tags + "\n\n");
        }
    }
}

