/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval07.wsi;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.bnc.Bnc;
import it.uniroma1.lcl.jlt.matrix.JLTMatrix;
import it.uniroma1.lcl.jlt.matrix.SentenceToVector;
import it.uniroma1.lcl.jlt.matrix.VectorComposition;
import it.uniroma1.lcl.jlt.pipeline.stanford.DataProcessor;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.util.Measures;
import it.uniroma1.lcl.jlt.util.Pair;
import it.uniroma1.lcl.jlt.util.Strings;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.jblas.DoubleMatrix;
import weka.core.Stopwords;

public class Semeval07WSIInstance {
    protected static String UNWANTED_SYMBOLS = "\"|'|\\.|~|\\,|;|!|\\?|:|\\(|\\)|\\[|\\]";
    protected String targetWord;
    protected int number;
    protected String id;
    protected String text;
    protected String corpus;
    protected List<String> terms;
    protected List<Pair<String, String>> collocs;
    protected static boolean removeStopwords = true;
    protected DoubleMatrix vector;
    protected double norm;

    public Semeval07WSIInstance(String tw, int number, String id, String corpus, String text) {
        this.targetWord = tw;
        this.number = number;
        this.id = id;
        this.corpus = corpus;
        this.text = text;
    }

    public List<String> getWords() {
        return this.getWords(false);
    }

    public List<String> getWords(boolean removeTargetWord) {
        return this.getWords(Configuration.getInstance().getSemeval07WsiPoses(), removeTargetWord);
    }

    public List<String> getWords(List<String> poses, boolean removeTargetWord) {
        if (this.terms == null) {
            try {
                String cleanedText = this.text.replaceAll(UNWANTED_SYMBOLS, "");
                StanfordSentence stanford = new StanfordSentence(DataProcessor.getInstance().processSentence(cleanedText));
                this.terms = stanford.getTerms(poses, true, false, true, StanfordSentence.MultiwordBelongingTo.WORDNET);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (removeStopwords) {
            ArrayList<String> termsNoStopwords = new ArrayList<String>();
            for (String word : this.terms) {
                if (Stopwords.isStopword((String)Strings.getLemma(word))) continue;
                termsNoStopwords.add(word);
            }
            return termsNoStopwords;
        }
        if (removeTargetWord) {
            ArrayList<String> termsNoTw = new ArrayList<String>();
            for (String word : this.terms) {
                if (word.equalsIgnoreCase(this.targetWord)) continue;
                termsNoTw.add(word);
            }
            return termsNoTw;
        }
        return this.terms;
    }

    public int getNumber() {
        return this.number;
    }

    public String getTargetWord() {
        return this.targetWord.replaceAll("#", ".");
    }

    public String getId() {
        return this.id;
    }

    public String getText() {
        return this.text;
    }

    public String getCorpus() {
        return this.corpus;
    }

    public List<Pair<String, String>> getPairsWords() {
        if (this.collocs != null) {
            return this.collocs;
        }
        this.collocs = new ArrayList<Pair<String, String>>();
        List<String> words_list = this.getWords();
        Collections.sort(words_list);
        String[] words = words_list.toArray(new String[0]);
        int i = 0;
        while (i < words.length) {
            int j = i + 1;
            while (j < words.length) {
                if (!words[i].equalsIgnoreCase(words[j])) {
                    this.collocs.add(new Pair<String, String>(words[i], words[j]));
                }
                ++j;
            }
            ++i;
        }
        return this.collocs;
    }

    public List<Pair<String, String>> getPairsWords(int k) {
        if (this.collocs != null) {
            return this.collocs;
        }
        this.collocs = new ArrayList<Pair<String, String>>();
        String[] words = this.getWords().toArray(new String[0]);
        int i = 0;
        while (i < words.length) {
            if (words[i].equalsIgnoreCase(this.targetWord)) {
                int window_LEFT = Math.max(0, i - k);
                int window_RIGHT = Math.min(words.length - 1, i + k);
                int first = window_LEFT;
                while (first < window_RIGHT - 1) {
                    if (!words[first].equalsIgnoreCase(this.targetWord)) {
                        int second = first + 1;
                        while (second <= window_RIGHT) {
                            if (!words[second].equalsIgnoreCase(this.targetWord) && !words[first].equalsIgnoreCase(words[second])) {
                                String w1 = null;
                                String w2 = null;
                                if (words[first].compareTo(words[second]) < 0) {
                                    w1 = words[first];
                                    w2 = words[second];
                                } else {
                                    w2 = words[first];
                                    w1 = words[second];
                                }
                                this.collocs.add(new Pair<String, String>(w1, w2));
                            }
                            ++second;
                        }
                    }
                    ++first;
                }
            }
            ++i;
        }
        return this.collocs;
    }

    public List<Pair<String, String>> getAnyPairsWords(int k) {
        ArrayList<Pair<String, String>> collocs = new ArrayList<Pair<String, String>>();
        String[] words = this.getWords().toArray(new String[0]);
        int first = 0;
        while (first < words.length) {
            int window_RIGHT = Math.min(words.length - 1, first + k + 1);
            int second = first + 1;
            while (second <= window_RIGHT) {
                String w1 = null;
                String w2 = null;
                if (words[first].compareTo(words[second]) < 0) {
                    w1 = words[first];
                    w2 = words[second];
                } else {
                    w2 = words[first];
                    w1 = words[second];
                }
                collocs.add(new Pair<String, String>(w1, w2));
                ++second;
            }
            ++first;
        }
        return collocs;
    }

    public double bow_similarity(Semeval07WSIInstance c) {
        return Measures.Jaccard(this.getWords(), c.getWords());
    }

    private double wordnet_similarity(Semeval07WSIInstance c) {
        return 0.0;
    }

    public double collocs_similarity(Semeval07WSIInstance c) {
        return Measures.Jaccard(this.getPairsWords(), c.getPairsWords());
    }

    public double collocs_similarity(Semeval07WSIInstance c, int size) {
        return Measures.Jaccard(this.getPairsWords(size), c.getPairsWords(size));
    }

    public double bnc_similarity(Semeval07WSIInstance c) {
        DoubleMatrix o;
        DoubleMatrix v = this.getVector();
        double sim = JLTMatrix.cos_sim(v, this.norm, o = c.getVector(), c.norm);
        if (Double.isNaN(sim)) {
            return 0.0;
        }
        return sim;
    }

    public DoubleMatrix getVector() {
        if (this.vector != null) {
            return this.vector;
        }
        List<String> words = Bnc.getInstance().getLexiconFromFile(Configuration.getInstance().getBncLexiconFile());
        DoubleMatrix contextMatrix = Bnc.getInstance().getContextMatrixFromFile(Configuration.getInstance().getBncMatrixOfContextsPath());
        SentenceToVector converter = new SentenceToVector(contextMatrix, words);
        VectorComposition composition = VectorComposition.valueOf(Configuration.getInstance().getBncVectorMethod());
        this.vector = converter.convertSentence(this.getWords(false), composition);
        this.norm = this.vector.norm2();
        return this.vector;
    }

    public String toString() {
        return String.valueOf(this.id) + " (" + this.corpus + ") = " + this.text;
    }
}

