/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.ukwac;

import edu.mit.jwi.item.POS;
import edu.stanford.nlp.ling.WordLemmaTag;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.ukwac.data.UKWacText;
import it.uniroma1.lcl.jlt.ukwac.iterator.UKWacDumpIterator;
import it.uniroma1.lcl.jlt.util.IntegerCounter;
import it.uniroma1.lcl.jlt.util.Strings;
import it.uniroma1.lcl.jlt.util.Timer;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class UKWacSentenceExtractor {
    private static final Log log = LogFactory.getLog(UKWacSentenceExtractor.class);

    public List<Integer> getNumSentencesContainingWords(Collection<String> words) throws IOException {
        int i = 0;
        ArrayList<Integer> countWord = new ArrayList<Integer>();
        while (i < words.size()) {
            countWord.add(0);
            ++i;
        }
        UKWacDumpIterator ukWacIterator = new UKWacDumpIterator();
        int counter = 0;
        Timer timer = new Timer();
        while (ukWacIterator.hasNext()) {
            UKWacText text = ukWacIterator.next();
            log.debug((Object)("Extracting from: " + text.getId()));
            for (StanfordSentence sentence : text) {
                i = 0;
                for (String word : words) {
                    boolean bContains = sentence.contains(word);
                    if (bContains) {
                        countWord.set(i, countWord.get(i) + 1);
                    }
                    ++i;
                }
                if (++counter % 200000 != 0) continue;
                timer.tick("TO PROCESS " + counter + " DOCUMENTS");
                Iterator<String> Iw = words.iterator();
                int j = 0;
                while (j < countWord.size()) {
                    System.out.print(String.valueOf(Iw.next()) + ":" + countWord.get(j) + ", ");
                    ++j;
                }
                System.out.println();
            }
        }
        return countWord;
    }

    public static void printSentencesContainingSequences(Collection<List<String>> sequences, String outputFile) throws IOException {
        FileWriter write = null;
        if (outputFile != null) {
            write = new FileWriter(outputFile);
        }
        IntegerCounter<String> counts = new IntegerCounter<String>();
        UKWacDumpIterator ukWacIterator = new UKWacDumpIterator();
        int counter = 0;
        Timer timer = new Timer();
        while (ukWacIterator.hasNext()) {
            UKWacText text = ukWacIterator.next();
            log.debug((Object)("Extracting from: " + text.getId()));
            for (StanfordSentence sentence : text) {
                for (List<String> sequence : sequences) {
                    boolean bContains = sentence.containsSequence(sequence);
                    if (!bContains) continue;
                    if (write != null) {
                        write.write("\"" + Strings.join(sequence, " ") + "\"\t" + Strings.join(sentence, " ") + "\n");
                    } else {
                        System.out.println("\"" + Strings.join(sequence, " ") + "\"\t" + Strings.join(sentence, " "));
                    }
                    for (WordLemmaTag token : sentence) {
                        counts.count(token.lemma());
                    }
                }
            }
            if (++counter % 1000 != 0) continue;
            timer.tick("TO PROCESS " + counter + " DOCUMENTS");
        }
        if (write != null) {
            write.flush();
            write.close();
        }
    }

    public static void printSentencesContainingWords(Collection<String> words, String outputFile, int limit) throws IOException {
        FileWriter write = null;
        if (outputFile != null) {
            write = new FileWriter(outputFile);
        }
        IntegerCounter<String> counter = new IntegerCounter<String>();
        UKWacDumpIterator ukWacIterator = new UKWacDumpIterator();
        while (ukWacIterator.hasNext()) {
            UKWacText text = ukWacIterator.next();
            log.debug((Object)("Extracting from: " + text.getId()));
            for (StanfordSentence sentence : text) {
                Set<String> terms = sentence.containsWhichOf(words, POS.NOUN);
                if (terms.size() <= 0) continue;
                Set<String> bow = sentence.getBOW();
                for (String term : terms) {
                    if ((Integer)counter.get(term) == limit) continue;
                    Map<String, String> features = sentence.getLocalFeaturesAroundFirstOccurrence(term);
                    StringBuffer arffString = new StringBuffer();
                    for (String feature : features.keySet()) {
                        if (arffString.length() > 0) {
                            arffString.append(" ");
                        }
                        arffString.append(String.valueOf(feature) + ":" + features.get(feature));
                    }
                    counter.count(term);
                    if (write != null) {
                        write.write(String.valueOf(term) + "\t" + Strings.join(bow, " ") + "\t" + sentence.toString() + "\t" + arffString.toString() + "\n");
                        continue;
                    }
                    System.out.println(String.valueOf(term) + "\t" + Strings.join(bow, " ") + "\t" + sentence.toString() + "\t" + arffString.toString());
                }
            }
        }
        if (write != null) {
            write.flush();
            write.close();
        }
    }

    public static void main(String[] args) {
        try {
            List<String> sequence1 = Arrays.asList("surface", "area", "of");
            ArrayList<List<String>> sequences = new ArrayList<List<String>>();
            sequences.add(sequence1);
            List<String> words = Arrays.asList("computer", "medicine", "anthropology", "computers", "convex", "stochastic", "computer", "medicine", "anthropology", "computers", "convex", "stochastic", "computer", "medicine", "anthropology", "computers", "convex", "stochastic", "computer", "medicine", "anthropology", "computers", "convex", "stochastic", "computer", "medicine", "anthropology", "computers", "convex", "stochastic", "computer", "medicine", "anthropology", "computers", "convex", "stochastic");
            UKWacSentenceExtractor walk = new UKWacSentenceExtractor();
            List<Integer> counts = walk.getNumSentencesContainingWords(words);
            int i = 0;
            while (i < words.size()) {
                System.out.println(String.valueOf(words.get(i)) + " " + counts.get(i));
                ++i;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

