/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval10;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsField;
import it.uniroma1.lcl.jlt.collocs.enumeration.LexiconField;
import it.uniroma1.lcl.jlt.semeval10.Semeval10;
import it.uniroma1.lcl.jlt.semeval10.data.SemevalInstance;
import it.uniroma1.lcl.jlt.util.IntegerCounter;
import it.uniroma1.lcl.jlt.util.Pair;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class SemEvalIndexer {
    private static final Log log = LogFactory.getLog(SemEvalIndexer.class);
    private static final String LEXICON_DIR = "/lexicon";
    private static final String COLLOCS_DIR = "/collocs";
    private final IntegerCounter<String> lexicon;
    private final IntegerCounter<Pair<String, String>> collocs;
    private final String indexDirectory;

    public SemEvalIndexer(CollocsDB collocsDB) {
        this.indexDirectory = Configuration.getInstance().getCollocsLocation(collocsDB);
        this.lexicon = new IntegerCounter("SemEval_counts");
        this.collocs = new IntegerCounter("SemEval_collocs");
    }

    public void loadCountInstructions() {
        for (String targetWord : Semeval10.getInstance().getTargetWords()) {
            int instanceCount = 0;
            for (SemevalInstance instance : Semeval10.getInstance().getInstances(targetWord, CollocsDB.SEMEVAL10_TRAIN)) {
                List<String> words = instance.getWords(Arrays.asList("n"), true, false, true);
                for (String string : words) {
                    this.lexicon.count(string);
                }
                for (Pair pair : instance.getPairsWords()) {
                    this.collocs.count(pair);
                }
                if (++instanceCount % 1000 != 0) continue;
                log.info((Object)("Analized " + instanceCount + " instances"));
            }
        }
    }

    public void saveCountInstructions() throws IOException {
        SimpleFSDirectory luceneDir = new SimpleFSDirectory(new File(String.valueOf(this.indexDirectory) + LEXICON_DIR));
        IndexWriter luceneWriter = new IndexWriter((Directory)luceneDir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
        luceneWriter.setMaxMergeDocs(Configuration.getInstance().getMaxMergeDocs());
        SimpleFSDirectory collocsDir = new SimpleFSDirectory(new File(String.valueOf(this.indexDirectory) + COLLOCS_DIR));
        IndexWriter collocsWriter = new IndexWriter((Directory)collocsDir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
        log.info((Object)("Saving SemEval Lexicon (" + this.lexicon.keySet().size() + " entry)"));
        for (String entry : this.lexicon.keySet()) {
            Document doc = new Document();
            doc.add((Fieldable)new Field(LexiconField.WORD.toString(), entry, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add((Fieldable)new Field(LexiconField.COUNT.toString(), Long.toString(((Integer)this.lexicon.get(entry)).intValue()), Field.Store.YES, Field.Index.NOT_ANALYZED));
            luceneWriter.addDocument(doc);
        }
        log.info((Object)("Saving SemEval Collocs (" + this.collocs.keySet().size() + " entry)"));
        int count = 0;
        for (Pair entry : this.collocs.keySet()) {
            if (++count % 500000 == 0) {
                log.info((Object)(String.valueOf(count) + " pairs saved in Collocs"));
            }
            Document doc = new Document();
            String word1 = (String)entry.getFirst();
            String word2 = (String)entry.getSecond();
            double dice = this.calculateDice(entry);
            doc.add((Fieldable)new Field(CollocsField.WORD1.toString(), word1, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add((Fieldable)new Field(CollocsField.WORD2.toString(), word2, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add((Fieldable)new Field(CollocsField.CO_OCCURR.toString(), Long.toString(((Integer)this.collocs.get(entry)).intValue()), Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add((Fieldable)new Field(CollocsField.DICE.toString(), Double.toString(dice), Field.Store.YES, Field.Index.NOT_ANALYZED));
            collocsWriter.addDocument(doc);
        }
        luceneWriter.optimize();
        luceneWriter.close();
        collocsWriter.optimize();
        collocsWriter.close();
    }

    private double calculateDice(Pair<String, String> entry) {
        double dice = 2.0 * (double)((Integer)this.collocs.get(entry)).intValue() / (double)((Integer)this.lexicon.get(entry.getFirst()) + (Integer)this.lexicon.get(entry.getSecond()));
        return dice;
    }

    public static void test(CollocsDB collocsDb) throws IOException {
        SimpleFSDirectory dir = new SimpleFSDirectory(new File(Configuration.getInstance().getCollocsLocation(collocsDb)));
        IndexWriter writer = new IndexWriter((Directory)dir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setMaxMergeDocs(Configuration.getInstance().getMaxMergeDocs());
        Document entry = new Document();
        entry.add((Fieldable)new Field("word", "ciccio", Field.Store.YES, Field.Index.NOT_ANALYZED));
        entry.add((Fieldable)new Field("count", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(entry);
        System.out.println("Adding ciccio");
        writer.commit();
        SimpleFSDirectory simpleDir = new SimpleFSDirectory(new File(Configuration.getInstance().getCollocsLocation(collocsDb)));
        IndexSearcher is = new IndexSearcher((Directory)simpleDir, true);
        Document entry2 = new Document();
        entry2.add((Fieldable)new Field("word", "franco", Field.Store.YES, Field.Index.NOT_ANALYZED));
        entry2.add((Fieldable)new Field("count", "3", Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(entry2);
        TermQuery tq = new TermQuery(new Term("word", "ciccio"));
        Hits hits = is.search((Query)tq);
        System.out.println("Found " + hits.length() + " documents");
        int prevValue = Integer.parseInt(hits.doc(0).get("count"));
        Document entry3 = new Document();
        entry3.add((Fieldable)new Field("word", "ciccio", Field.Store.YES, Field.Index.NOT_ANALYZED));
        entry3.add((Fieldable)new Field("count", Integer.toString(++prevValue), Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.updateDocument(new Term("word", "franco"), entry3);
        writer.optimize();
        writer.close();
    }

    public static void main(String[] args) throws IOException {
        try {
            SemEvalIndexer factory = new SemEvalIndexer(CollocsDB.SEMEVAL10_TRAIN);
            factory.loadCountInstructions();
            factory.saveCountInstructions();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

