/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.pseudowords;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.pseudowords.PseudoWord;
import it.uniroma1.lcl.jlt.ukwac.index.UKWacSentenceIndexExtractor;
import it.uniroma1.lcl.jlt.util.Stopwords;
import it.uniroma1.lcl.jlt.wordnet.WordNet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class PseudoWordSentenceFactory {
    public static void main(String[] args) {
        UKWacSentenceIndexExtractor extractor = new UKWacSentenceIndexExtractor(Configuration.getInstance().getUKWacSentenceIndexDirectory());
        Stopwords sw = Stopwords.getInstance();
        Set<String> monoWords = WordNet.getInstance().getWordsWithPolysemy(1);
        ArrayList<String> monoWordsList = new ArrayList<String>();
        for (String monoWord : monoWords) {
            if (monoWord.contains("_") || monoWord.contains("-") || monoWord.contains("'")) continue;
            monoWordsList.add(monoWord);
        }
        Collections.shuffle(monoWordsList);
        HashMap<String, List<String>> word2sentences = new HashMap<String, List<String>>();
        System.out.println(String.valueOf(monoWordsList.size()) + " MONOSEMOUS WORDS FOUND");
        int count = 0;
        if (args.length > 0) {
            System.out.println("ADDING " + args.length + " MONOSEMOUS WORDS FROM INPUT");
            String[] stringArray = args;
            int n = args.length;
            int n2 = 0;
            while (n2 < n) {
                String arg = stringArray[n2];
                monoWordsList.remove(arg);
                monoWordsList.add(0, arg);
                ++n2;
            }
        }
        for (String monoWord : monoWordsList) {
            if (sw.isStopword(monoWord)) continue;
            try {
                List<String> sentences = extractor.extract(monoWord);
                System.out.println(String.valueOf(monoWord) + " HAS " + sentences.size() + " SENTENCES");
                if (sentences.size() < 1000) continue;
                System.out.println("SELECTED: " + monoWord);
                List<String> first5000 = sentences.subList(0, 1000);
                word2sentences.put(monoWord, first5000);
                if (++count != 100) continue;
                System.out.println("FOUND 100 MONOSEMOUS WORDS");
                break;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        ArrayList words = new ArrayList(word2sentences.keySet());
        System.out.println("WORDS SELECTED FOR DETERMINING PSEUDOWORDS: " + words);
        int k = 2;
        while (k <= 10) {
            HashSet<String> done = new HashSet<String>();
            int j = 0;
            while (j < 100) {
                PseudoWord pw = null;
                List<String> subList = null;
                do {
                    Collections.shuffle(words);
                } while (done.contains((pw = new PseudoWord(subList = words.subList(0, k))).getPseudoword()));
                done.add(pw.getPseudoword());
                for (String word : subList) {
                    for (String sentence : (List)word2sentences.get(word)) {
                        System.out.println(String.valueOf(pw.getPseudoword()) + "\t" + word + "\t" + sentence);
                    }
                }
                ++j;
            }
            ++k;
        }
    }
}

