/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval07;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsField;
import it.uniroma1.lcl.jlt.collocs.enumeration.LexiconField;
import it.uniroma1.lcl.jlt.semeval07.wsi.Semeval07WSI;
import it.uniroma1.lcl.jlt.semeval07.wsi.Semeval07WSIInstance;
import it.uniroma1.lcl.jlt.util.IntegerCounter;
import it.uniroma1.lcl.jlt.util.Pair;
import java.io.File;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class Semeval07WSIIndexer {
    private static final Log log = LogFactory.getLog(Semeval07WSIIndexer.class);
    private static final String indexDirectory = Configuration.getInstance().getCollocsLocation(CollocsDB.SEMEVAL07_TEST);
    private static final String LEXICON_DIR = "/lexicon";
    private static final String COLLOCS_DIR = "/collocs";
    private static IntegerCounter<Pair<String, String>> collocs = new IntegerCounter("SemEval07WSI_collocs");
    private static IntegerCounter<String> lexicon = new IntegerCounter("SemEval07WSI_lexicon");
    protected static final boolean removeStopwords = true;
    protected static boolean alreadyStored = false;
    private static Semeval07WSIIndexer instance;

    public static Semeval07WSIIndexer getInstance() {
        if (instance == null) {
            instance = new Semeval07WSIIndexer();
        }
        return instance;
    }

    private Semeval07WSIIndexer() {
    }

    public void loadCountInstructions(int k) {
        if (lexicon.size() != 0) {
            return;
        }
        int instanceCount = 0;
        for (Semeval07WSIInstance instance : Semeval07WSI.getInstance().getInstances()) {
            log.info((Object)("Target word: " + instance.getId()));
            for (Pair<String, String> pair : instance.getAnyPairsWords(k)) {
                if (pair.getFirst().length() <= 3 || pair.getSecond().length() <= 3) continue;
                log.debug((Object)("Pair: " + pair));
                collocs.count(pair);
            }
            if (++instanceCount % 1000 != 0) continue;
            log.info((Object)("Analized " + instanceCount + " instances"));
        }
        for (Pair pair : collocs.keySet()) {
            lexicon.count((String)pair.getFirst());
            lexicon.count((String)pair.getSecond());
        }
    }

    public void saveCountInstructions() {
        if (alreadyStored) {
            return;
        }
        alreadyStored = true;
        try {
            SimpleFSDirectory luceneDir = new SimpleFSDirectory(new File(String.valueOf(indexDirectory) + LEXICON_DIR));
            IndexWriter luceneWriter = new IndexWriter((Directory)luceneDir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            luceneWriter.setMaxMergeDocs(Configuration.getInstance().getMaxMergeDocs());
            SimpleFSDirectory collocsDir = new SimpleFSDirectory(new File(String.valueOf(indexDirectory) + COLLOCS_DIR));
            IndexWriter collocsWriter = new IndexWriter((Directory)collocsDir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            log.info((Object)("Saving SemEval07WSI Lexicon (" + lexicon.size() + " entry)"));
            for (String entry : lexicon.keySet()) {
                Document doc = new Document();
                doc.add((Fieldable)new Field(LexiconField.WORD.toString(), entry, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.add((Fieldable)new Field(LexiconField.COUNT.toString(), Long.toString(((Integer)lexicon.get(entry)).intValue()), Field.Store.YES, Field.Index.NOT_ANALYZED));
                luceneWriter.addDocument(doc);
            }
            log.info((Object)("Saving SemEval Collocs (" + collocs.size() + " entry)"));
            int count = 0;
            for (Pair entry : collocs.keySet()) {
                if (++count % 500000 == 0) {
                    log.info((Object)(String.valueOf(count) + " pairs saved in Collocs"));
                }
                Document doc = new Document();
                String word1 = (String)entry.getFirst();
                String word2 = (String)entry.getSecond();
                double dice = this.calculateDice(entry);
                doc.add((Fieldable)new Field(CollocsField.WORD1.toString(), word1, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.add((Fieldable)new Field(CollocsField.WORD2.toString(), word2, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.add((Fieldable)new Field(CollocsField.CO_OCCURR.toString(), Long.toString(((Integer)collocs.get(entry)).intValue()), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.add((Fieldable)new Field(CollocsField.DICE.toString(), Double.toString(dice), Field.Store.YES, Field.Index.NOT_ANALYZED));
                collocsWriter.addDocument(doc);
            }
            luceneWriter.optimize();
            luceneWriter.close();
            collocsWriter.optimize();
            collocsWriter.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void exportCountInstructions() {
        lexicon.saveToFile(String.valueOf(indexDirectory) + LEXICON_DIR + "/lexicon.txt");
        collocs.saveToFile(String.valueOf(indexDirectory) + COLLOCS_DIR + "/collocs.txt");
    }

    private double calculateDice(Pair<String, String> entry) {
        return 2.0 * (double)((Integer)collocs.get(entry)).intValue() / (double)((Integer)lexicon.get(entry.getFirst()) + (Integer)lexicon.get(entry.getSecond()));
    }

    public static void main(String[] args) {
        Semeval07WSIIndexer factory = Semeval07WSIIndexer.getInstance();
        factory.loadCountInstructions(8);
        factory.saveCountInstructions();
        factory.exportCountInstructions();
    }
}

