/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.wiki.WikiParsingUtility;
import it.uniroma1.lcl.jlt.wiki.WikiTokenizer;
import it.uniroma1.lcl.jlt.wiki.data.WikiPageInfo;
import it.uniroma1.lcl.jlt.wiki.iterator.WikiDumpIterator;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.annolab.tt4j.DefaultExecutableResolver;
import org.annolab.tt4j.DefaultModelResolver;
import org.annolab.tt4j.ExecutableResolver;
import org.annolab.tt4j.ModelResolver;
import org.annolab.tt4j.PlatformDetector;
import org.annolab.tt4j.TokenHandler;
import org.annolab.tt4j.TreeTaggerWrapper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class WikiSentenceIndexer {
    private static Configuration config = Configuration.getInstance();
    private WikiDumpIterator wikiIterator = null;
    private IndexWriter writer = null;
    private Document currentDoc = null;
    private List<Document> docs = null;
    private String currentWikiTitle = null;
    private boolean bDisambiguationPage = false;

    public WikiSentenceIndexer() {
        try {
            SimpleFSDirectory dir = new SimpleFSDirectory(new File(config.getWikipediaSentenceIndexDirectory()));
            this.writer = new IndexWriter((Directory)dir, (Analyzer)new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.UNLIMITED);
            this.writer.setMaxMergeDocs(config.getMaxMergeDocs());
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void index(String source) throws IOException {
        this.wikiIterator = new WikiDumpIterator(source);
        int totalPages = 0;
        int analyzedPages = 0;
        int LOG_NUM_PAGES = config.getLogNumPages();
        try {
            TreeTaggerWrapper tt = new TreeTaggerWrapper();
            tt.setExecutableProvider((ExecutableResolver)new DefaultExecutableResolver(){
                {
                    this._additionalPaths.add(Configuration.getInstance().getTreeTaggerDirectory());
                }
            });
            tt.setPlatformDetector(new PlatformDetector(){

                public String getExecutableSuffix() {
                    return "";
                }
            });
            tt.setModel(config.getTreeTaggerModel());
            tt.setModelProvider((ModelResolver)new DefaultModelResolver(){
                {
                    this._additionalPaths.add("");
                }
            });
            tt.setHandler((TokenHandler)new TokenHandler<String>(){

                public void token(String token, String pos, String lemma) {
                    if (WikiSentenceIndexer.this.currentDoc == null) {
                        WikiSentenceIndexer.this.currentDoc = new Document();
                        WikiSentenceIndexer.this.currentDoc.add((Fieldable)new Field("wikipage", WikiSentenceIndexer.this.currentWikiTitle, Field.Store.YES, Field.Index.NOT_ANALYZED));
                        WikiSentenceIndexer.this.currentDoc.add((Fieldable)new Field("disambig", WikiSentenceIndexer.this.bDisambiguationPage ? "1" : "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
                        WikiSentenceIndexer.this.docs.add(WikiSentenceIndexer.this.currentDoc);
                    }
                    if (token == null || pos == null || lemma == null) {
                        System.out.println(String.valueOf(WikiSentenceIndexer.this.currentWikiTitle) + ":" + token + "," + pos + "," + lemma);
                    }
                    WikiSentenceIndexer.this.currentDoc.add((Fieldable)new Field("token", token, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    WikiSentenceIndexer.this.currentDoc.add((Fieldable)new Field("pos", pos, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    WikiSentenceIndexer.this.currentDoc.add((Fieldable)new Field("lemma", lemma, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    if (pos.equals("SENT")) {
                        WikiSentenceIndexer.this.currentDoc = null;
                    }
                }
            });
            this.wikiIterator.open();
            while (this.wikiIterator.hasNext()) {
                WikiPageInfo w = this.wikiIterator.next();
                this.currentWikiTitle = w.getTitle();
                this.bDisambiguationPage = w.isDisambiguationPage();
                if (++totalPages % LOG_NUM_PAGES == 0) {
                    System.out.println("Document id: " + w.getId());
                }
                System.out.println(this.currentWikiTitle);
                if (w.getText() == null || w.isRedirectionPage()) continue;
                ++analyzedPages;
                WikiTokenizer wt = new WikiTokenizer(w.getText(), w.getId());
                String cleanTxt = wt.getText(false);
                this.currentDoc = null;
                this.docs = new ArrayList<Document>();
                try {
                    try {
                        tt.setModel(config.getTreeTaggerModel());
                        ArrayList<String> tokens = new ArrayList<String>();
                        String[] stringArray = cleanTxt.split("\\s+");
                        int n = stringArray.length;
                        int n2 = 0;
                        while (n2 < n) {
                            String token = stringArray[n2];
                            tokens.add(WikiParsingUtility.extractText(token));
                            ++n2;
                        }
                        tt.process(tokens);
                        for (Document doc : this.docs) {
                            try {
                                this.writer.addDocument(doc);
                            }
                            catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                    catch (Exception e) {
                        e.printStackTrace();
                        tt.destroy();
                        continue;
                    }
                }
                catch (Throwable throwable) {
                    tt.destroy();
                    throw throwable;
                }
                tt.destroy();
            }
        }
        finally {
            System.out.println("Total pages: " + totalPages + ". Analyzed: " + analyzedPages);
            this.wikiIterator.close();
            this.writer.optimize();
            this.writer.close();
        }
    }

    public static void main(String[] args) {
        System.out.println("::Part-of-speech tagging and indexing source sentences: " + config.getWikipediaXMLDump());
        long start = System.currentTimeMillis();
        WikiSentenceIndexer xtd = new WikiSentenceIndexer();
        try {
            xtd.index(config.getWikipediaXMLDump());
        }
        catch (IOException e) {
            e.printStackTrace();
            System.out.println("::Interrupted: see exception");
        }
        long end = System.currentTimeMillis();
        System.out.println("::Terminated in " + (double)(end - start) / 1000.0 + " sec");
    }
}

