/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.Constants;
import it.uniroma1.lcl.jlt.util.Language;
import it.uniroma1.lcl.jlt.wiki.SearchWiki;
import it.uniroma1.lcl.jlt.wiki.SearchWikiCentral;
import it.uniroma1.lcl.jlt.wiki.data.WikiPage;
import it.uniroma1.lcl.jlt.wiki.data.WikiText;
import it.uniroma1.lcl.jlt.wiki.data.WikiWord;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;

public class WikiIndexUtilities {
    private SearchWiki sw;
    private Configuration config = Configuration.getInstance();

    public WikiIndexUtilities() {
        this(Language.EN);
    }

    public WikiIndexUtilities(Language language) {
        this.sw = SearchWikiCentral.getInstance(language);
    }

    public void printWordSenseCount(String lemma) throws IOException {
        String[] v1 = this.sw.getValuesFromField(Constants.WikiIndexFields.WORD.toString(), lemma);
        String[] v2 = this.sw.getValuesFromField(Constants.WikiIndexFields.SENSE.toString(), lemma);
        System.out.println("LEMMA: " + lemma);
        if (v1 != null && v2 != null) {
            System.out.println("W: " + v1.length);
            System.out.println("S: " + v2.length);
        } else {
            System.out.println("No Hits!");
        }
    }

    public void printAllWordSenseCount() throws IOException {
        HashSet<Object> s = new HashSet();
        s = this.sw.getValuesFromField(Constants.WikiIndexFields.LEMMA.toString());
        for (String string : s) {
            System.out.println("LEMMA: " + string);
            String[] v1 = this.sw.getValuesFromField(Constants.WikiIndexFields.WORD.toString(), string);
            String[] v2 = this.sw.getValuesFromField(Constants.WikiIndexFields.SENSE.toString(), string);
            if (v1 != null && v2 != null) {
                System.out.println("WORD COUNT: " + v1.length);
                System.out.println("SENSE COUNT: " + v2.length);
                continue;
            }
            System.out.println("No Hits!");
        }
        this.sw.closeIndex();
    }

    public void printWordSenseDiff() throws IOException {
        HashSet<Object> s = new HashSet();
        s = this.sw.getValuesFromField(Constants.WikiIndexFields.LEMMA.toString());
        for (String string : s) {
            String[] v1 = this.sw.getValuesFromField(Constants.WikiIndexFields.WORD.toString(), string);
            String[] v2 = this.sw.getValuesFromField(Constants.WikiIndexFields.SENSE.toString(), string);
            if (v1 == null || v2 == null || v1.length == v2.length) continue;
            System.out.println("LEMMA: " + string);
            System.out.println("WORD SIZE: " + v1.length);
            System.out.println("SENSE SIZE: " + v2.length);
            int k = 0;
            while (k < Math.min(v1.length, v2.length)) {
                System.out.println(String.valueOf(k) + ":\t" + v1[k] + " -- " + v2[k]);
                ++k;
            }
        }
    }

    public void printWikiPageInfos(String lemma) throws IOException, ParseException {
        for (WikiPage wp : this.sw.getSenses(lemma)) {
            System.out.println("ID:" + wp.getId());
            System.out.println("TITOLO:" + wp.getTitle());
            System.out.println("LEMMA:" + wp.getLemma());
            System.out.println("LEMMI CATEGORIE:" + wp.getCategoryLemmas());
            System.out.println("CATEGORIE:" + wp.getCategories());
            System.out.println("TRANSLATIONS:" + wp.getTranslations());
            System.out.println("WORD-SENSE OF THE TEXT");
            WikiText wt = wp.getText();
            for (WikiWord ww : wt.getWikiWords()) {
                System.out.println("\t" + ww.getWikiText() + "/" + ww.getWikiSense());
            }
        }
    }

    public void printUniqueFieldItemsCount(String field) throws IOException {
        System.out.println("Number of unique items on the index for field " + field + ": " + this.sw.getValuesFromField(field).size());
    }

    public void printAvarageSensePerLemma() throws IOException, ParseException {
        int count = 0;
        int totCount = 0;
        for (String l : this.sw.getValuesFromField(Constants.WikiIndexFields.LEMMA.toString())) {
            ++count;
            totCount += this.sw.getSenseTitles(l).size();
        }
        float avg = (float)totCount / (float)count;
        System.out.println("Avarage sense count: " + avg + " (tot lemma: " + count + ")");
    }

    public void printTextUsingOffset(int c) throws IOException {
        String line;
        int sOffset = 0;
        int eOffset = 0;
        InputStreamReader isr = null;
        BufferedReader reader = null;
        String source = this.config.getWikipediaXMLDump();
        isr = new InputStreamReader((InputStream)new FileInputStream(source), "UTF-8");
        reader = new BufferedReader(isr);
        System.out.println("Reading xml: " + source);
        StringBuffer sb = new StringBuffer();
        while ((line = reader.readLine()) != null) {
            sb.append(line);
            sb.append(System.getProperty("line.separator"));
        }
        String doc = sb.toString();
        sb = null;
        System.out.println("Bytes read from the xml dump: " + doc.length() + "\n");
        for (Document d : this.sw.getDocuments(c)) {
            String lemma = d.get(Constants.WikiIndexFields.LEMMA.toString());
            String title = d.get(Constants.WikiIndexFields.TITLE.toString());
            sOffset = Integer.parseInt(d.get(Constants.WikiIndexFields.OFFSET_START.toString()));
            eOffset = Integer.parseInt(d.get(Constants.WikiIndexFields.OFFSET_END.toString()));
            String temp = doc.substring(sOffset, eOffset);
            System.out.println("-:::- LEMMA: " + lemma + " - TITLE: " + title + " - " + sOffset + "/" + eOffset + "\n" + temp + "\n");
        }
        reader.close();
        isr.close();
    }

    public void printOffsetAnomalies(int c) throws IOException {
        String line;
        int sOffset = 0;
        int eOffset = 0;
        InputStreamReader isr = null;
        BufferedReader reader = null;
        String source = this.config.getWikipediaXMLDump();
        isr = new InputStreamReader((InputStream)new FileInputStream(source), "UTF-8");
        reader = new BufferedReader(isr);
        System.out.println("Reading xml: " + source);
        StringBuffer sb = new StringBuffer();
        String initTextTag = "<text xml:space=\"preserve\">";
        String endTextTag = "</text>";
        while ((line = reader.readLine()) != null) {
            sb.append(line);
            sb.append(System.getProperty("line.separator"));
        }
        String doc = sb.toString();
        sb = null;
        System.out.println("Bytes read from the xml dump: " + doc.length() + "\n");
        for (Document d : this.sw.getDocuments(c)) {
            String lemma = d.get(Constants.WikiIndexFields.LEMMA.toString());
            String title = d.get(Constants.WikiIndexFields.TITLE.toString());
            sOffset = Integer.parseInt(d.get(Constants.WikiIndexFields.OFFSET_START.toString()));
            eOffset = Integer.parseInt(d.get(Constants.WikiIndexFields.OFFSET_END.toString()));
            String foundInitTextTag = doc.substring(sOffset - "<text xml:space=\"preserve\">".length(), sOffset);
            String foundEndTextTag = doc.substring(eOffset, eOffset + "</text>".length());
            if (foundInitTextTag.equals("<text xml:space=\"preserve\">") && foundEndTextTag.equals("</text>")) continue;
            System.out.println(String.valueOf(foundInitTextTag) + " -- " + foundEndTextTag);
            String temp = doc.substring(sOffset, eOffset);
            System.out.println("-:::- LEMMA: " + lemma + " - TITLE: " + title + " - " + sOffset + "/" + eOffset + "\n" + temp + "\n");
        }
        reader.close();
        isr.close();
    }

    public static void main(String[] args) throws IOException, ParseException {
        WikiIndexUtilities wiu = new WikiIndexUtilities();
        wiu.printWikiPageInfos("drink");
    }
}

