/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.ukwac.ukwacco;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.collocs.Collocs;
import it.uniroma1.lcl.jlt.collocs.Lexicon;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.mysql.CooccurrenceDB;
import it.uniroma1.lcl.jlt.mysql.NumericTableCache;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.ukwac.data.UKWacCompound;
import it.uniroma1.lcl.jlt.ukwac.data.UKWacText;
import it.uniroma1.lcl.jlt.ukwac.iterator.UKWacDumpIterator;
import it.uniroma1.lcl.jlt.util.Pair;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.queryParser.ParseException;

public class UKWaccoFactory {
    private static final Log log = LogFactory.getLog(UKWaccoFactory.class);
    private static final Configuration config = Configuration.getInstance();

    public static void saveCountInstructions(boolean initializeLexicon) throws IOException, ParseException {
        Lexicon webcoLexicon = Lexicon.getInstance(CollocsDB.WEBCO);
        if (initializeLexicon) {
            UKWaccoFactory.initLexicon(webcoLexicon);
            log.info((Object)"Webco lexicon cloned!\n");
        }
        StanfordSentence.MultiwordBelongingTo lexiconSettings = StanfordSentence.MultiwordBelongingTo.valueOf(config.getUkwaccoDefaultLexicon());
        HashSet<String> customLexicon = new HashSet();
        if (lexiconSettings.isCustomLexicon()) {
            customLexicon = UKWaccoFactory.loadCustomLexicon(config.getUkwaccoCustomLexiconLocation());
        }
        NumericTableCache<Pair<Integer, String>, Integer> lexicon = new NumericTableCache<Pair<Integer, String>, Integer>(config.getUKWaccoDBName(), config.getCollocsUser(CollocsDB.UKWACCO), config.getCollocsPassword(CollocsDB.UKWACCO), "lexicon", "id,word", "count");
        NumericTableCache<Pair<Integer, Integer>, Integer> collocs = new NumericTableCache<Pair<Integer, Integer>, Integer>(config.getUKWaccoDBName(), config.getCollocsUser(CollocsDB.UKWACCO), config.getCollocsPassword(CollocsDB.UKWACCO), "collocs", "w1,w2", "count");
        UKWacDumpIterator ukwacIterator = new UKWacDumpIterator();
        ukwacIterator.open();
        List<String> validPOS = Arrays.asList("N");
        log.warn((Object)"COLLECTING COUNTS, please wait...");
        int textCounter = 0;
        while (ukwacIterator.hasNext()) {
            UKWacText text = ukwacIterator.next();
            for (StanfordSentence sentence : text) {
                ArrayList<String> sentenceTerms = new ArrayList<String>();
                if (lexiconSettings.isCustomLexicon()) {
                    sentenceTerms.addAll(sentence.getTerms(validPOS, true, false, false, StanfordSentence.MultiwordBelongingTo.CUSTOM_LEXICON, false, customLexicon));
                } else {
                    sentenceTerms.addAll(sentence.getTerms(validPOS, true, false, false, lexiconSettings));
                }
                ArrayList<UKWacCompound> sentenceCompounds = new ArrayList<UKWacCompound>();
                int position = 0;
                for (String term : sentenceTerms) {
                    if (!webcoLexicon.contains(term)) continue;
                    Pair<Integer, String> idWordPair = new Pair<Integer, String>(webcoLexicon.getInteger(term), term);
                    lexicon.add(idWordPair, 1);
                    lexicon.add(new Pair<Integer, String>(-1, "TOTAL_ID"), 1);
                    sentenceCompounds.add(new UKWacCompound(position, term));
                    ++position;
                }
                int i = 0;
                while (i < sentenceCompounds.size()) {
                    int j = i + 1;
                    while (j < sentenceCompounds.size()) {
                        UKWacCompound iCompound = (UKWacCompound)sentenceCompounds.get(i);
                        UKWacCompound jCompound = (UKWacCompound)sentenceCompounds.get(j);
                        int distance = jCompound.getOffset() - iCompound.getEndOffset();
                        if (distance <= config.getUKWacCorrelationWindow() && distance > 0 && iCompound.hasEmptyIntersection(jCompound)) {
                            int jIndex;
                            Pair<Integer, Integer> keyPair = null;
                            int iIndex = webcoLexicon.getInteger(((UKWacCompound)sentenceCompounds.get(i)).toString());
                            keyPair = iIndex <= (jIndex = webcoLexicon.getInteger(((UKWacCompound)sentenceCompounds.get(j)).toString()).intValue()) ? new Pair<Integer, Integer>(iIndex, jIndex) : new Pair<Integer, Integer>(jIndex, iIndex);
                            collocs.add(keyPair, 1);
                            collocs.add(new Pair<Integer, Integer>(-1, -1), 1);
                        }
                        ++j;
                    }
                    ++i;
                }
            }
            if (++textCounter % 1000 != 0) continue;
            log.warn((Object)("Ora: " + new SimpleDateFormat("HH:mm:ss").format(Calendar.getInstance().getTime())));
            log.warn((Object)("Analizzate " + textCounter + " pagine"));
        }
        lexicon.close();
        collocs.close();
    }

    private static Set<String> loadCustomLexicon(String fileName) throws IOException {
        HashSet<String> customLexicon = new HashSet<String>();
        BufferedReader reader = new BufferedReader(new FileReader(fileName));
        while (reader.ready()) {
            String line = reader.readLine();
            if (line.startsWith("#")) continue;
            customLexicon.add(line);
        }
        reader.close();
        return customLexicon;
    }

    private static void initLexicon(Lexicon referenceLexicon) {
        Set<String> lexiconWords = referenceLexicon.getLexiconWords();
        CooccurrenceDB ukwacCo = new CooccurrenceDB(config.getCollocsLocation(CollocsDB.UKWACCO), config.getCollocsUser(CollocsDB.UKWACCO), config.getCollocsPassword(CollocsDB.UKWACCO));
        for (String word : lexiconWords) {
            int id = referenceLexicon.getId(word);
            ukwacCo.updateLexicon(id, word, 0);
        }
    }

    public static void pseudowordCount() throws IOException {
        CooccurrenceDB ukwacCo = new CooccurrenceDB(config.getCollocsLocation(CollocsDB.UKWACCO), config.getCollocsUser(CollocsDB.UKWACCO), config.getCollocsPassword(CollocsDB.UKWACCO));
        Lexicon ukwaccoLexicon = Lexicon.getInstance(CollocsDB.UKWACCO);
        Collocs ukwaccoCollocs = Collocs.getInstance(CollocsDB.UKWACCO);
        Set<String> customLexicon = UKWaccoFactory.loadCustomLexicon(config.getUkwaccoCustomLexiconLocation());
        for (String query : customLexicon) {
            String[] words = query.split("\\*");
            long occurrences = 0L;
            String[] stringArray = words;
            int n = words.length;
            int n2 = 0;
            while (n2 < n) {
                String word = stringArray[n2];
                occurrences += ukwaccoLexicon.getCount(word);
                ++n2;
            }
            ukwacCo.updateLexicon(ukwaccoLexicon.getId(query), query, (int)occurrences);
        }
        Set<String> lexiconWords = ukwaccoLexicon.getLexiconWords();
        for (String lexiconWord : lexiconWords) {
            for (String pseudoword : customLexicon) {
                String[] words = pseudoword.split("\\*");
                long cooccurrences = 0L;
                String[] stringArray = words;
                int n = words.length;
                int n3 = 0;
                while (n3 < n) {
                    String word = stringArray[n3];
                    cooccurrences += ukwaccoCollocs.getCount(lexiconWord, word);
                    ++n3;
                }
                int pseudowordID = ukwaccoLexicon.getId(pseudoword);
                int lexiconWordID = ukwaccoLexicon.getId(lexiconWord);
                if (pseudoword == lexiconWord || cooccurrences == 0L) continue;
                if (pseudowordID < lexiconWordID) {
                    ukwacCo.updateCollocs(pseudowordID, lexiconWordID, (int)cooccurrences);
                    continue;
                }
                if (pseudowordID <= lexiconWordID) continue;
                ukwacCo.updateCollocs(lexiconWordID, pseudowordID, (int)cooccurrences);
            }
        }
    }

    public static void main(String[] args) throws IOException, SQLException, ParseException {
        String startTime = new SimpleDateFormat("HH:mm").format(Calendar.getInstance().getTime());
        log.info((Object)"Generating ukwacco sql instructions...");
        log.info((Object)"Done!");
        String endTime = new SimpleDateFormat("HH:mm").format(Calendar.getInstance().getTime());
        System.out.println("Started at: " + startTime + " ended at: " + endTime);
    }
}

