/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.ukwac.ukwacco;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.collocs.Lexicon;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsField;
import it.uniroma1.lcl.jlt.collocs.enumeration.LexiconField;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.ukwac.data.UKWacCompound;
import it.uniroma1.lcl.jlt.ukwac.data.UKWacText;
import it.uniroma1.lcl.jlt.ukwac.iterator.UKWacDumpIterator;
import it.uniroma1.lcl.jlt.util.IntegerCounter;
import it.uniroma1.lcl.jlt.util.Pair;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class UKWaccoLuceneFactory {
    private static final Log log = LogFactory.getLog(UKWaccoLuceneFactory.class);
    private static final Configuration config = Configuration.getInstance();
    private static final String LEXICON_DIR = "/lexicon";
    private static final String COLLOCS_DIR = "/collocs";
    private static IntegerCounter<String> lexicon = new IntegerCounter("Ukwac lexicon counts");
    private static IntegerCounter<Pair<String, String>> collocs = new IntegerCounter("Ukwac collocs counts");

    public static void loadCountInstructions() throws ParseException, IOException {
        Lexicon webcoLexicon = Lexicon.getInstance(CollocsDB.WEBCO);
        UKWacDumpIterator ukwacIterator = new UKWacDumpIterator();
        log.warn((Object)"COLLECTING COUNTS, please wait...");
        int textCounter = 0;
        ukwacIterator.open();
        while (ukwacIterator.hasNext()) {
            UKWacText text = ukwacIterator.next();
            for (StanfordSentence sentence : text) {
                List<String> validPOS = Arrays.asList("N", "J", "V");
                List<String> sentenceTerms = sentence.getTerms(validPOS, true, false, false, StanfordSentence.MultiwordBelongingTo.WORDNET);
                ArrayList<UKWacCompound> sentenceCompounds = new ArrayList<UKWacCompound>();
                int position = 0;
                for (String term : sentenceTerms) {
                    if (webcoLexicon.contains(term)) {
                        lexicon.count(term);
                        sentenceCompounds.add(new UKWacCompound(position, term));
                    }
                    ++position;
                }
                int i = 0;
                while (i < sentenceCompounds.size()) {
                    int j = i + 1;
                    while (j < sentenceCompounds.size()) {
                        UKWacCompound iCompound = (UKWacCompound)sentenceCompounds.get(i);
                        UKWacCompound jCompound = (UKWacCompound)sentenceCompounds.get(j);
                        int distance = jCompound.getOffset() - iCompound.getEndOffset();
                        if (distance <= config.getUKWacCorrelationWindow() && distance > 0) {
                            String word2;
                            Pair<String, String> keyPair = null;
                            String word1 = ((UKWacCompound)sentenceCompounds.get(i)).toString();
                            keyPair = word1.compareTo(word2 = ((UKWacCompound)sentenceCompounds.get(j)).toString()) < 0 ? new Pair<String, String>(word1, word2) : new Pair<String, String>(word2, word1);
                            collocs.count(keyPair);
                        }
                        ++j;
                    }
                    ++i;
                }
            }
            System.out.println("Analizzate " + ++textCounter + " pagine");
            if (textCounter % 500 == 0) {
                log.warn((Object)("Ora: " + new SimpleDateFormat("HH:mm:ss").format(Calendar.getInstance().getTime())));
                log.warn((Object)("Analizzate " + textCounter + " pagine."));
            }
            if (textCounter % 100000 != 0) continue;
            log.info((Object)"Cleaning the memory");
            UKWaccoLuceneFactory.saveCountInstructions();
            log.info((Object)"Memory cleaned!");
        }
    }

    public static void saveCountInstructions() throws IOException {
        Document doc;
        String indexDirectory = "/home/dimarco/resources/ukwac";
        SimpleFSDirectory luceneDir = new SimpleFSDirectory(new File(String.valueOf(indexDirectory) + LEXICON_DIR));
        IndexWriter luceneWriter = new IndexWriter((Directory)luceneDir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
        luceneWriter.setMaxMergeDocs(Configuration.getInstance().getMaxMergeDocs());
        IndexSearcher luceneSearcher = new IndexSearcher((Directory)luceneDir, true);
        SimpleFSDirectory collocsDir = new SimpleFSDirectory(new File(String.valueOf(indexDirectory) + COLLOCS_DIR));
        IndexWriter collocsWriter = new IndexWriter((Directory)collocsDir, (Analyzer)new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
        IndexSearcher collocsSearcher = new IndexSearcher((Directory)collocsDir, true);
        log.info((Object)("Saving Ukwacco Lexicon (" + lexicon.keySet().size() + " entry)"));
        for (Object entry : lexicon.keySet()) {
            doc = new Document();
            doc.add((Fieldable)new Field(LexiconField.WORD.toString(), (String)entry, Field.Store.YES, Field.Index.NOT_ANALYZED));
            TermQuery query = new TermQuery(new Term(LexiconField.WORD.toString(), (String)entry));
            Hits hits = luceneSearcher.search((Query)query);
            switch (hits.length()) {
                case 0: {
                    doc.add((Fieldable)new Field(LexiconField.COUNT.toString(), Long.toString(((Integer)lexicon.get((String)entry)).intValue()), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    break;
                }
                case 1: {
                    Document currentEntry = hits.doc(0);
                    long oldCount = Long.parseLong(currentEntry.get(LexiconField.COUNT.toString()));
                    long newCount = oldCount + (long)((Integer)lexicon.get((String)entry)).intValue();
                    doc.add((Fieldable)new Field(LexiconField.COUNT.toString(), Long.toString(newCount), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    luceneWriter.deleteDocuments((Query)query);
                    break;
                }
                default: {
                    throw new RuntimeException("Malformed Lucene Lexicon Index");
                }
            }
            luceneWriter.addDocument(doc);
        }
        log.info((Object)("Saving SemEval Collocs (" + collocs.keySet().size() + " entry)"));
        for (Object entry : collocs.keySet()) {
            doc = new Document();
            String word1 = (String)((Pair)entry).getFirst();
            String word2 = (String)((Pair)entry).getSecond();
            doc.add((Fieldable)new Field(CollocsField.WORD1.toString(), word1, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add((Fieldable)new Field(CollocsField.WORD2.toString(), word2, Field.Store.YES, Field.Index.NOT_ANALYZED));
            BooleanQuery query = new BooleanQuery();
            query.add(new BooleanClause((Query)new TermQuery(new Term(CollocsField.WORD1.toString(), (String)((Pair)entry).getFirst())), BooleanClause.Occur.MUST));
            query.add(new BooleanClause((Query)new TermQuery(new Term(CollocsField.WORD2.toString(), (String)((Pair)entry).getSecond())), BooleanClause.Occur.MUST));
            Hits hits = collocsSearcher.search((Query)query);
            switch (hits.length()) {
                case 0: {
                    double dice = UKWaccoLuceneFactory.calculateDice((Pair<String, String>)entry);
                    doc.add((Fieldable)new Field(CollocsField.CO_OCCURR.toString(), Long.toString(((Integer)collocs.get((Pair<String, String>)entry)).intValue()), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.add((Fieldable)new Field(CollocsField.DICE.toString(), Double.toString(dice), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    break;
                }
                case 1: {
                    Document currentEntry = hits.doc(0);
                    long oldCount = Long.parseLong(currentEntry.get(CollocsField.CO_OCCURR.toString()));
                    long newCount = oldCount + (long)((Integer)collocs.get((Pair<String, String>)entry)).intValue();
                    doc.add((Fieldable)new Field(CollocsField.CO_OCCURR.toString(), Long.toString(newCount), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    double dice = UKWaccoLuceneFactory.calculateDice((Pair<String, String>)entry);
                    doc.add((Fieldable)new Field(CollocsField.DICE.toString(), Double.toString(dice), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    collocsWriter.deleteDocuments((Query)query);
                    break;
                }
                default: {
                    throw new RuntimeException("Malformed Index");
                }
            }
            collocsWriter.addDocument(doc);
        }
        luceneWriter.optimize();
        luceneWriter.close();
        collocsWriter.optimize();
        collocsWriter.close();
        lexicon.clear();
        collocs.clear();
    }

    private static double calculateDice(Pair<String, String> entry) {
        double dice = 2.0 * (double)((Integer)collocs.get(entry)).intValue() / (double)((Integer)lexicon.get(entry.getFirst()) + (Integer)lexicon.get(entry.getSecond()));
        return dice;
    }

    public static void main(String[] args) throws ParseException, IOException {
        String startTime = new SimpleDateFormat("HH:mm").format(Calendar.getInstance().getTime());
        UKWaccoLuceneFactory.loadCountInstructions();
        String endTime = new SimpleDateFormat("HH:mm").format(Calendar.getInstance().getTime());
        log.warn((Object)("Simulation started at: " + startTime + " and completed at " + endTime));
    }
}

