/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.aol.data;

import edu.stanford.nlp.ling.WordLemmaTag;
import it.uniroma1.lcl.jlt.pipeline.stanford.DataProcessor;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.util.Pair;
import it.uniroma1.lcl.jlt.util.Stopwords;
import it.uniroma1.lcl.jlt.wordnet.WordNet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.queryParser.ParseException;

public class AolQuery {
    private String sessionID;
    private String query;
    private String time;
    private String rank;
    private String url;
    private List<String> queryWords;
    protected List<Pair<String, String>> collocs;
    protected static String UNWANTED_SYMBOLS = "\"|'|~|\\,|;|!|\\?|:|\\(|\\)|\\[|\\]";

    public AolQuery(String sessionID, String query, String time, String rank, String url) throws IOException {
        this.sessionID = sessionID;
        this.query = query;
        this.time = time;
        this.rank = rank;
        this.url = url;
        List<String> validPOS = Arrays.asList("N");
        StanfordSentence taggedQuery = new StanfordSentence(DataProcessor.getInstance().processSentence(this.query.replaceAll(UNWANTED_SYMBOLS, "").trim()));
        try {
            this.queryWords = this.clean(taggedQuery.getTerms(validPOS, true, false, false, StanfordSentence.MultiwordBelongingTo.WORDNET_OR_WIKIPEDIA));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        Stopwords stopwords = Stopwords.getInstance();
        for (WordLemmaTag queryWord : taggedQuery.getWords()) {
            String wordLemma = WordNet.getInstance().getSingularOf(queryWord.lemma());
            if (this.queryWords.contains(wordLemma = AolQuery.clean(wordLemma)) || stopwords.isStopword(wordLemma) || wordLemma.isEmpty()) continue;
            this.queryWords.add(wordLemma);
        }
    }

    public String getSessionID() {
        return this.sessionID;
    }

    public String getQueryText() {
        return this.query;
    }

    public String getTime() {
        return this.time;
    }

    public String getRank() {
        return this.rank;
    }

    public String getUrl() {
        return this.url;
    }

    public List<String> getQueryWords() {
        return this.queryWords;
    }

    public List<Pair<String, String>> getWordPairs() {
        if (this.collocs == null) {
            this.collocs = new ArrayList<Pair<String, String>>();
            ArrayList<String> wordList = new ArrayList<String>(this.queryWords);
            Collections.sort(wordList);
            String[] words = wordList.toArray(new String[0]);
            int i = 0;
            while (i < words.length) {
                int j = i + 1;
                while (j < words.length) {
                    if (words[i].compareTo(words[j]) < 0) {
                        this.collocs.add(new Pair<String, String>(words[i], words[j]));
                    } else if (words[i].compareTo(words[j]) > 0) {
                        this.collocs.add(new Pair<String, String>(words[j], words[i]));
                    }
                    ++j;
                }
                ++i;
            }
        }
        return this.collocs;
    }

    private List<String> clean(List<String> strings) {
        ArrayList<String> resultList = new ArrayList<String>();
        for (String string : strings) {
            resultList.add(AolQuery.clean(string));
        }
        return resultList;
    }

    private static String clean(String stringa) {
        String result = stringa;
        String innerRegex = "[^a-z^A-Z^0-9^.^\\-^_^ ]";
        result = result.replaceAll(innerRegex, "");
        result = result.replaceAll(" ", "_");
        String outerRegex = "^[-\\.]*|[-\\.]*$";
        result = result.replaceAll(outerRegex, "").trim();
        return result;
    }

    public static void main(String[] args) throws IOException, ParseException {
        String test = "www.google.com.";
        String st = "blablabla$trut ittatta";
        System.out.println(AolQuery.clean(st));
        System.exit(0);
        AolQuery query = new AolQuery("1", test, "oraedata", "1", "www.goo.foo");
        System.out.println("Testo:\n" + query.getQueryText());
        System.out.println("Parole diverse:");
        for (String word : query.getQueryWords()) {
            System.out.println(word);
        }
    }
}

