/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval10.data;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.bnc.Bnc;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.matrix.JLTMatrix;
import it.uniroma1.lcl.jlt.matrix.SentenceToVector;
import it.uniroma1.lcl.jlt.matrix.VectorComposition;
import it.uniroma1.lcl.jlt.pipeline.stanford.DataProcessor;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.util.IntegerCounter;
import it.uniroma1.lcl.jlt.util.Measures;
import it.uniroma1.lcl.jlt.util.Pair;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.BasicConfigurator;
import org.apache.lucene.queryParser.ParseException;
import org.jblas.DoubleMatrix;

public class SemevalInstance
implements Serializable,
Comparable<SemevalInstance> {
    private static final long serialVersionUID = -8847362834182256016L;
    protected CollocsDB type;
    protected String targetWord;
    protected int id;
    protected String content;
    protected List<String> contentWords_All;
    protected List<String> contentWords_noTW;
    protected List<Pair<String, String>> collocs;
    protected String node_string;
    protected String extended_string;
    protected static String UNWANTED_SYMBOLS = "\"|'|\\.|~|\\,|;|!|\\?|:|\\(|\\)|\\[|\\]";
    protected DoubleMatrix vector;
    protected double norm;

    public SemevalInstance(CollocsDB collocsDB, String target, int id, String content) {
        this.type = collocsDB;
        this.targetWord = target;
        this.id = id;
        this.content = content;
    }

    public int getId() {
        return this.id;
    }

    public String getTargetWord() {
        return this.targetWord;
    }

    public int getTargetWordOccurrences() {
        int occurrences = 0;
        int nextIndex = -1;
        while ((nextIndex = this.content.indexOf(this.getTargetWord(), nextIndex + 1)) != -1) {
            ++occurrences;
        }
        return occurrences;
    }

    public String getContent() {
        return this.content;
    }

    public List<String> getWords() {
        return this.getWords(true);
    }

    @Deprecated
    public List<String> getAllWords() {
        return this.getWords(false);
    }

    public List<String> getWords(boolean removeTargetWord) {
        if (this.contentWords_noTW != null) {
            return this.contentWords_noTW;
        }
        ArrayList<String> contentWords_noTW = new ArrayList<String>(this.getWords(Arrays.asList("n", "v", "r", "j"), true, true, true));
        while (contentWords_noTW.remove(this.targetWord)) {
        }
        return contentWords_noTW;
    }

    public List<String> getWords(List<String> validPOS, boolean allowCompounds, boolean includePOSInString, boolean removeTargetWord) {
        List<String> contentWords = new ArrayList<String>();
        if (removeTargetWord) {
            if (this.contentWords_All != null) {
                return this.contentWords_All;
            }
        } else if (this.contentWords_noTW != null) {
            return this.contentWords_noTW;
        }
        try {
            StanfordSentence taggedSentence = new StanfordSentence(DataProcessor.getInstance().processSentence(this.content.replaceAll(UNWANTED_SYMBOLS, "")));
            contentWords = taggedSentence.getTerms(validPOS, allowCompounds, false, includePOSInString, StanfordSentence.MultiwordBelongingTo.WORDNET);
            if (removeTargetWord) {
                while (contentWords.remove(this.targetWord)) {
                }
                this.contentWords_noTW = contentWords;
            } else {
                this.contentWords_All = contentWords;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return contentWords;
    }

    public List<Pair<String, String>> getPairsWords() {
        if (this.collocs != null) {
            return this.collocs;
        }
        this.collocs = new ArrayList<Pair<String, String>>();
        List<String> words_list = this.getWords(Arrays.asList("n"), true, false, true);
        Collections.sort(words_list);
        String[] words = words_list.toArray(new String[0]);
        int i = 0;
        while (i < words.length) {
            int j = i + 1;
            while (j < words.length) {
                if (!words[i].equalsIgnoreCase(words[j])) {
                    this.collocs.add(new Pair<String, String>(words[i], words[j]));
                }
                ++j;
            }
            ++i;
        }
        return this.collocs;
    }

    public List<Pair<String, String>> getPairsWords(int k) {
        if (this.collocs != null) {
            return this.collocs;
        }
        this.collocs = new ArrayList<Pair<String, String>>();
        String[] words = this.getAllWords().toArray(new String[0]);
        int i = 0;
        while (i < words.length) {
            if (words[i].equalsIgnoreCase(this.targetWord)) {
                int window_LEFT = Math.max(0, i - k);
                int window_RIGHT = Math.min(words.length - 1, i + k);
                int first = window_LEFT;
                while (first < window_RIGHT - 1) {
                    if (!words[first].equalsIgnoreCase(this.targetWord)) {
                        int second = first + 1;
                        while (second <= window_RIGHT) {
                            if (!words[second].equalsIgnoreCase(this.targetWord) && !words[first].equalsIgnoreCase(words[second])) {
                                String w1 = null;
                                String w2 = null;
                                if (words[first].compareTo(words[second]) < 0) {
                                    w1 = words[first];
                                    w2 = words[second];
                                } else {
                                    w2 = words[first];
                                    w1 = words[second];
                                }
                                this.collocs.add(new Pair<String, String>(w1, w2));
                            }
                            ++second;
                        }
                    }
                    ++first;
                }
            }
            ++i;
        }
        return this.collocs;
    }

    public void clearCollocations() {
        this.collocs.clear();
    }

    public String toString() {
        if (this.node_string == null) {
            this.node_string = StringUtils.join(this.getWords(), (String)" ").trim();
        }
        return this.node_string;
    }

    public String print() throws Exception, ParseException {
        if (this.extended_string == null) {
            this.extended_string = "\t" + this.getTargetWord() + " Id=" + this.getId() + "\n" + "\tContent := " + this.getContent() + "\n" + "\tContentWords := " + this.getWords() + "\n";
        }
        return this.extended_string;
    }

    public double bow_similarity(SemevalInstance c) {
        return Measures.Jaccard(this.getWords(), c.getWords());
    }

    public double collocs_similarity(SemevalInstance c) {
        return Measures.Jaccard(this.getPairsWords(), c.getPairsWords());
    }

    public double collocs_similarity(SemevalInstance c, int size) {
        return Measures.Jaccard(this.getPairsWords(size), c.getPairsWords(size));
    }

    public double bnc_similarity(SemevalInstance c) {
        DoubleMatrix o;
        DoubleMatrix v = this.getVector();
        double sim = JLTMatrix.cos_sim(v, this.norm, o = c.getVector(), c.norm);
        if (Double.isNaN(sim)) {
            return 0.0;
        }
        return sim;
    }

    public DoubleMatrix getVector() {
        if (this.vector != null) {
            return this.vector;
        }
        List<String> words = Bnc.getInstance().getLexiconFromFile(Configuration.getInstance().getBncLexiconFile());
        DoubleMatrix contextMatrix = Bnc.getInstance().getContextMatrixFromFile(Configuration.getInstance().getBncMatrixOfContextsPath());
        SentenceToVector converter = new SentenceToVector(contextMatrix, words);
        VectorComposition composition = VectorComposition.valueOf(Configuration.getInstance().getBncVectorMethod());
        this.vector = converter.convertSentence(this.getAllWords(), composition);
        this.norm = this.vector.norm2();
        return this.vector;
    }

    public boolean equals(Object o) {
        if (!(o instanceof SemevalInstance)) {
            return false;
        }
        SemevalInstance other = (SemevalInstance)o;
        if (other.type != this.type) {
            return false;
        }
        if (!other.targetWord.equals(this.targetWord)) {
            return false;
        }
        return other.getId() == this.getId();
    }

    public IntegerCounter<String> getInstanceWordCount() {
        IntegerCounter<String> instanceWordCount = new IntegerCounter<String>();
        for (String word : this.getWords(Arrays.asList("n"), true, false, true)) {
            instanceWordCount.count(word);
        }
        return instanceWordCount;
    }

    public static void main(String[] args) throws IOException, ParseException {
        BasicConfigurator.configure();
        String sentence = "Britain\u2019s top 50 companies are to be given unprecedented ~access to government ministers in an attempt to spark life into the economy. Bosses of companies, including BP and GlaxoSmithKline, will be able to telephone directly to the top of /. Whitehall ~~ departments in new individually tailored relationships with senior ministers who will act as their \"buddies\". The Government will announce soon the details of the new \"strategic relations\" initiative led by Lord Green of Hurstpierpoint, the Trade and Investment Minister, who as Stephen Green was chairman and chief executive of HSBC.";
        SemevalInstance example = new SemevalInstance(CollocsDB.SEMEVAL10_TEST, "economy", 0, sentence);
        System.out.println("getContent() := " + example.getContent());
        System.out.println("getWords() := " + example.getWords());
        System.out.println("getAllWords() := " + example.getAllWords());
        System.out.println("=====================================");
        SemevalInstance c1 = new SemevalInstance(CollocsDB.SEMEVAL10_TRAIN, "life", 1, "I love you so much, Mary! My dog is your dog and my life is your life. Here is my ring for you.");
        SemevalInstance c2 = new SemevalInstance(CollocsDB.SEMEVAL10_TRAIN, "life", 2, "Mary loves Paul very much! Her cat is his cat and her life is his life... Here is my ring for you. Her life was so sad before she met him!");
        System.out.println("getContent() := " + c1.getContent());
        System.out.println("getWords() := " + c1.getWords());
        System.out.println("getAllWords() := " + c1.getAllWords());
        System.out.println("getTargetWord() := " + c1.getTargetWord());
        System.out.println("getTargetWordOccurrences() := " + c1.getTargetWordOccurrences());
        System.out.println("-------------------------------------");
        System.out.println("getContent() := " + c2.getContent());
        System.out.println("getWords() := " + c2.getWords());
        System.out.println("getAllWords() := " + c2.getAllWords());
        System.out.println("getTargetWord() := " + c2.getTargetWord());
        System.out.println("getTargetWordOccurrences() := " + c2.getTargetWordOccurrences());
        System.out.println("=====================================");
        ArrayList<String> intersection = new ArrayList<String>();
        intersection.addAll(c1.getWords());
        intersection.retainAll(c2.getWords());
        System.out.println("Intersection(" + intersection.size() + ") := " + intersection);
        ArrayList<String> union = new ArrayList<String>();
        union.addAll(c1.getWords());
        union.addAll(c2.getWords());
        System.out.println("Union(" + union.size() + ") := " + union);
        System.out.println("Bow Similarity: " + c1.bow_similarity(c2));
        System.out.println("Coll Similarity: " + c1.collocs_similarity(c2));
        System.out.println("C1 equals C1? " + c1.equals(c1));
        System.out.println("C1 equals C2? " + c1.equals(c2));
    }

    @Override
    public int compareTo(SemevalInstance o) {
        return new Integer(this.getId()).compareTo(o.getId());
    }
}

