/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.ukwac.index;

import it.uniroma1.lcl.jlt.Configuration;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class UKWacSentenceIndexer {
    protected IndexWriter writer = null;
    protected Document currentDoc = null;

    public UKWacSentenceIndexer() {
        try {
            SimpleFSDirectory dir = new SimpleFSDirectory(new File(Configuration.getInstance().getUKWacSentenceIndexDirectory()));
            this.writer = new IndexWriter((Directory)dir, (Analyzer)new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);
            this.writer.setMaxMergeDocs(Configuration.getInstance().getMaxMergeDocs());
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void index(String source) throws IOException {
        Integer SentenceID = 0;
        String[] files = new File(source).list();
        int totalDoc = 0;
        int LOG_NUM_PAGES = Configuration.getInstance().getLogNumPages();
        String[] stringArray = files;
        int n = files.length;
        int n2 = 0;
        while (n2 < n) {
            String file = stringArray[n2];
            if (file.toLowerCase().endsWith(".xml")) {
                System.out.println("Loading " + file.toString());
                BufferedReader br = new BufferedReader(new FileReader(String.valueOf(source) + "/" + file));
                ArrayList<String[]> tokens = null;
                while (br.ready()) {
                    String[] triple2;
                    String line = br.readLine();
                    if (line.startsWith("<s>")) {
                        this.currentDoc = new Document();
                        tokens = new ArrayList<String[]>();
                        SentenceID = SentenceID + 1;
                        continue;
                    }
                    if (line.startsWith("</s>")) {
                        try {
                            for (String[] triple2 : tokens) {
                                this.currentDoc.add((Fieldable)new Field("token", triple2[0], Field.Store.YES, Field.Index.NOT_ANALYZED));
                                this.currentDoc.add((Fieldable)new Field("pos", triple2[1], Field.Store.YES, Field.Index.NOT_ANALYZED));
                                this.currentDoc.add((Fieldable)new Field("lemma", triple2[2], Field.Store.YES, Field.Index.NOT_ANALYZED));
                            }
                            this.currentDoc.add((Fieldable)new Field("sID", Integer.toString(SentenceID), Field.Store.YES, Field.Index.NOT_ANALYZED));
                            this.writer.addDocument(this.currentDoc);
                        }
                        catch (IOException e) {
                            e.printStackTrace();
                        }
                        continue;
                    }
                    if (line.startsWith("<text")) {
                        if (++totalDoc % LOG_NUM_PAGES != 0) continue;
                        System.out.println("Document id: " + totalDoc);
                        continue;
                    }
                    if (line.startsWith("</text")) continue;
                    triple2 = line.split("\t");
                    tokens.add(triple2);
                }
                br.close();
            }
            ++n2;
        }
        this.writer.optimize();
        this.writer.close();
    }

    public static void main(String[] args) {
        System.out.println("::Indexing source sentences: " + Configuration.getInstance().getUKWacDirectory());
        long start = System.currentTimeMillis();
        UKWacSentenceIndexer xtd = new UKWacSentenceIndexer();
        try {
            xtd.index(Configuration.getInstance().getUKWacDirectory());
        }
        catch (IOException e) {
            e.printStackTrace();
            System.out.println("::Interrupted: see exception");
        }
        long end = System.currentTimeMillis();
        System.out.println("::Terminated in " + (double)(end - start) / 1000.0 + " sec");
    }
}

