/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki;

import edu.mit.jwi.item.POS;
import edu.mit.jwi.morph.IStemmer;
import edu.mit.jwi.morph.WordnetStemmer;
import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.Constants;
import it.uniroma1.lcl.jlt.util.Language;
import it.uniroma1.lcl.jlt.util.Strings;
import it.uniroma1.lcl.jlt.wiki.IndexWiki;
import it.uniroma1.lcl.jlt.wiki.SearchWiki;
import it.uniroma1.lcl.jlt.wiki.SearchWikiCentral;
import it.uniroma1.lcl.jlt.wordnet.WordNet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;

public class IndexWikiThread
extends Thread {
    private static final Log log = LogFactory.getLog(IndexWikiThread.class);
    private static final String SEPARATOR = Configuration.getInstance().getPagesSeparator();
    private final IndexWiki iw;
    private final IndexWriter writer;
    private final int startDir;
    private final int endDir;
    private final IStemmer stemmer;
    private final Set<String> lexicon;
    private final String docDir;
    private final long tId;
    private CountDownLatch doneSignal;

    public IndexWikiThread(IndexWiki iw, IndexWriter writer, int startDir, int endDir, Set<String> lexicon, String docDir, CountDownLatch doneSignal) {
        this.iw = iw;
        this.writer = writer;
        this.startDir = startDir;
        this.endDir = endDir;
        this.lexicon = lexicon;
        this.docDir = docDir;
        this.doneSignal = doneSignal;
        this.tId = this.getId();
        this.stemmer = Configuration.getInstance().getIndexFactoryLanguage() == Language.EN ? new WordnetStemmer(WordNet.getInstance().getDictionary()) : null;
    }

    @Override
    public void run() {
        try {
            try {
                this.runThread();
            }
            catch (Exception e) {
                log.warn((Object)("EXCEPTION: " + e + " STACKTRACE FOLLOWS:"));
                e.printStackTrace();
                this.doneSignal.countDown();
            }
        }
        finally {
            this.doneSignal.countDown();
        }
    }

    public void runThread() {
        Configuration config = Configuration.getInstance();
        int nPagesPerFile = config.getMaxPagesPerFile();
        String lexiconFileName = config.getLexiconFileName();
        Language language = config.getIndexFactoryLanguage();
        try {
            File c;
            BufferedReader infobr = null;
            BufferedReader txtbr = null;
            SearchWiki search = null;
            if (config.getIndexResume()) {
                search = SearchWikiCentral.getInstance(language);
            }
            String id = null;
            String title = null;
            String lemma = null;
            String categoryLemmas = null;
            String categories = null;
            String translations = null;
            String offsets = null;
            int maxId = 0;
            int docNo = 0;
            if (search != null) {
                maxId = search.getMaxId();
            }
            while ((c = this.iw.getJob()) != null) {
                log.info((Object)("[THREAD " + this.tId + "] Next job: " + c.getName()));
                String nomeDir = c.getName();
                if (nomeDir.equals(lexiconFileName)) continue;
                int dirnum = Integer.parseInt(nomeDir);
                if (dirnum < this.startDir || dirnum > this.endDir) {
                    log.info((Object)("[THREAD " + this.tId + "] Skipping directory not in range: " + nomeDir));
                    continue;
                }
                log.info((Object)("[THREAD " + this.tId + "] Reading directory: " + nomeDir));
                File[] fileArray = new File(String.valueOf(this.docDir) + "/" + nomeDir).listFiles(new FilenameFilter(){

                    @Override
                    public boolean accept(File dir, String name) {
                        return name.endsWith(".info");
                    }
                });
                if (fileArray == null) continue;
                log.info((Object)("[THREAD " + this.tId + "] " + fileArray.length + " files in directory: " + nomeDir));
                int fileCount = 0;
                File[] fileArray2 = fileArray;
                int n = fileArray.length;
                int n2 = 0;
                while (n2 < n) {
                    File file = fileArray2[n2];
                    String nomeFile = file.getName();
                    String fullFileName = file.getAbsolutePath();
                    log.info((Object)("[THREAD " + this.tId + "] Reading file #" + ++fileCount + ": " + nomeFile));
                    infobr = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "UTF-8"));
                    txtbr = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(fullFileName.replaceAll(".info", ".txt")), "UTF-8"));
                    int curPage = 0;
                    while (curPage < nPagesPerFile) {
                        id = infobr.readLine();
                        if (id == null) {
                            log.info((Object)("NULL ID in file: " + nomeFile));
                            break;
                        }
                        if (search != null && Integer.parseInt(id) < maxId) {
                            log.info((Object)("[THREAD " + this.tId + "] Skipping file already on index: " + nomeFile));
                        } else {
                            String tmpLine;
                            title = infobr.readLine();
                            if (title == null) {
                                log.info((Object)("[THREAD " + this.tId + "] Misalignment in file " + nomeFile + ", id=" + id));
                            }
                            lemma = infobr.readLine();
                            categoryLemmas = infobr.readLine();
                            categories = infobr.readLine();
                            translations = infobr.readLine();
                            offsets = infobr.readLine();
                            boolean test = title != null;
                            switch (language) {
                                case AR: {
                                    break;
                                }
                                default: {
                                    boolean bl = test = test && !Strings.isUpperCase(title) || title.length() == 1;
                                }
                            }
                            if (test) {
                                int n3;
                                int n4;
                                String[] stringArray;
                                Document d = new Document();
                                d.add((Fieldable)new Field(Constants.WikiIndexFields.ID.toString(), id, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                d.add((Fieldable)new Field(Constants.WikiIndexFields.TITLE.toString(), title, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                d.add((Fieldable)new Field(Constants.WikiIndexFields.TITLE_TOLOWERCASE.toString(), title.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                                d.add((Fieldable)new Field(Constants.WikiIndexFields.LEMMA.toString(), lemma, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                if (!categoryLemmas.isEmpty()) {
                                    stringArray = categoryLemmas.split("\t");
                                    n4 = stringArray.length;
                                    n3 = 0;
                                    while (n3 < n4) {
                                        String categoryLemma = stringArray[n3];
                                        d.add((Fieldable)new Field(Constants.WikiIndexFields.CATEGORY_LEMMA.toString(), categoryLemma, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                        ++n3;
                                    }
                                }
                                if (!categories.isEmpty()) {
                                    stringArray = categories.split("\t");
                                    n4 = stringArray.length;
                                    n3 = 0;
                                    while (n3 < n4) {
                                        String category = stringArray[n3];
                                        d.add((Fieldable)new Field(Constants.WikiIndexFields.CATEGORY.toString(), category, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                        ++n3;
                                    }
                                }
                                if (!translations.isEmpty()) {
                                    stringArray = translations.split("\t");
                                    n4 = stringArray.length;
                                    n3 = 0;
                                    while (n3 < n4) {
                                        String translation = stringArray[n3];
                                        d.add((Fieldable)new Field(Constants.WikiIndexFields.TRANSLATION.toString(), translation, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                        ++n3;
                                    }
                                }
                                if (offsets != null && !offsets.isEmpty()) {
                                    String[] offsetArray = offsets.split("\t");
                                    d.add((Fieldable)new Field(Constants.WikiIndexFields.OFFSET_START.toString(), offsetArray[0], Field.Store.YES, Field.Index.NO));
                                    d.add((Fieldable)new Field(Constants.WikiIndexFields.OFFSET_END.toString(), offsetArray[1], Field.Store.YES, Field.Index.NO));
                                }
                                this.indexText(d, txtbr);
                                this.writer.addDocument(d);
                                if (docNo++ % 1000 == 0) {
                                    log.info((Object)("[THREAD " + this.tId + "] Document parsed: COUNT: " + (docNo - 1) + " -- TITLE: " + title + " -- ID: " + id));
                                }
                            } else {
                                String line;
                                log.info((Object)("[THREAD " + this.tId + "] Document has the title uppercase, discarding: " + ++docNo + " - " + title + " - " + id));
                                while ((line = txtbr.readLine()) != null && !line.equals(config.getPagesSeparator())) {
                                }
                            }
                            if ((tmpLine = infobr.readLine()) != null && !tmpLine.equals(config.getPagesSeparator())) {
                                log.info((Object)("[THREAD " + this.tId + "] Error on pages separator: File=" + nomeFile + " - DocNo=" + docNo + " - Title=" + title + " - Id=" + id));
                            }
                        }
                        ++curPage;
                    }
                    infobr.close();
                    txtbr.close();
                    ++n2;
                }
            }
            log.info((Object)("[QUIT THREAD " + this.tId + "] There are no more jobs to run....quitting."));
        }
        catch (Exception e) {
            e.printStackTrace();
            log.info((Object)("[ERROR THREAD " + this.tId + "] " + e.getMessage()));
        }
    }

    private void indexText(Document d, BufferedReader txtBr) throws IOException, InterruptedException {
        boolean nonVuoto = false;
        ArrayList<String> wordList = new ArrayList<String>();
        String redirection = null;
        try {
            String linea = txtBr.readLine();
            redirection = this.isRedirectionPage(linea);
            if (redirection != null) {
                d.add((Fieldable)new Field(Constants.WikiIndexFields.REDIRECTION.toString(), redirection, Field.Store.YES, Field.Index.NOT_ANALYZED));
                nonVuoto = true;
                txtBr.readLine();
            } else if (!Configuration.getInstance().indexOnlyWikiPageInformation()) {
                while (linea != null && !linea.equals(SEPARATOR)) {
                    if (!linea.isEmpty()) {
                        boolean bLink = linea.contains("IsLiNk");
                        if (bLink) {
                            String[] pair = (linea = IndexWikiThread.getLink(linea)).split("IsLiNk");
                            if (pair.length < 2) {
                                log.error((Object)("Errore indexText: link formattato male " + linea));
                                linea = txtBr.readLine();
                                continue;
                            }
                            String sense = pair[0];
                            String text = pair[1];
                            wordList.add("LiNk" + sense);
                            wordList.add("TeXt" + text);
                            nonVuoto = true;
                        } else {
                            wordList.add(linea);
                            nonVuoto = true;
                        }
                    }
                    linea = txtBr.readLine();
                }
            }
        }
        catch (FileNotFoundException fnf) {
            log.error((Object)("FileNotFoundException: " + fnf.getMessage()));
        }
        if (nonVuoto && redirection == null) {
            int k = 0;
            while (k < wordList.size()) {
                String parola = (String)wordList.get(k);
                String parteLink = null;
                String parteText = null;
                if (parola.startsWith("TeXt")) {
                    parteText = parola.substring(parola.indexOf("TeXt") + 4).replaceAll("IsSpAcE", " ");
                }
                if (parola.startsWith("LiNk")) {
                    parteLink = parola.substring(parola.indexOf("LiNk") + 4).replaceAll("IsSpAcE", " ");
                }
                if (parteText != null) {
                    String parola2;
                    int idx;
                    List<String> lemmas = this.stem(parteText = parteText.toLowerCase());
                    if (!lemmas.contains(parteText)) {
                        if (lemmas.isEmpty()) {
                            lemmas = new ArrayList<String>();
                        }
                        lemmas.add(parteText);
                    }
                    if ((idx = (parteLink = (parola2 = (String)wordList.get(k - 1)).substring(parola2.indexOf("LiNk") + 4).replaceAll("IsSpAcE", " ")).indexOf("(")) != -1) {
                        parteLink = parteLink.substring(0, idx).trim().toLowerCase();
                    }
                    if (lemmas.contains(parteLink)) {
                        d.add((Fieldable)new Field(Constants.WikiIndexFields.WORD.toString(), parteLink, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    } else {
                        d.add((Fieldable)new Field(Constants.WikiIndexFields.WORD.toString(), lemmas.get(0), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    }
                } else if (parteLink != null) {
                    d.add((Fieldable)new Field(Constants.WikiIndexFields.SENSE.toString(), parteLink, Field.Store.YES, Field.Index.NOT_ANALYZED));
                } else {
                    int maxListSize;
                    int j = maxListSize = Math.min(5, wordList.size() - k);
                    block4: while (j > 0) {
                        String compound = this.join(wordList.subList(k, k + maxListSize), j);
                        if (compound.indexOf(35) == -1) {
                            List<String> lemmatizedCompounds = this.stem(compound.toLowerCase());
                            if (!lemmatizedCompounds.contains(compound)) {
                                if (lemmatizedCompounds.isEmpty()) {
                                    lemmatizedCompounds = new ArrayList<String>();
                                }
                                lemmatizedCompounds.add(compound);
                            }
                            for (String lemmatizedCompound : lemmatizedCompounds) {
                                if (!this.lexicon.contains(lemmatizedCompound = lemmatizedCompound.toLowerCase())) continue;
                                d.add((Fieldable)new Field(Constants.WikiIndexFields.WORD.toString(), lemmatizedCompound, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                d.add((Fieldable)new Field(Constants.WikiIndexFields.SENSE.toString(), "", Field.Store.YES, Field.Index.NOT_ANALYZED));
                                if (j > 1) {
                                    String[] stringArray = lemmatizedCompound.split(" ");
                                    int n = stringArray.length;
                                    int n2 = 0;
                                    while (n2 < n) {
                                        String singleWordLemma = stringArray[n2];
                                        if (this.lexicon.contains(singleWordLemma)) {
                                            d.add((Fieldable)new Field(Constants.WikiIndexFields.WORD.toString(), singleWordLemma, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                            d.add((Fieldable)new Field(Constants.WikiIndexFields.SENSE.toString(), "", Field.Store.YES, Field.Index.NOT_ANALYZED));
                                        }
                                        ++n2;
                                    }
                                }
                                k += j - 1;
                                break block4;
                            }
                        }
                        --j;
                    }
                }
                ++k;
            }
        }
    }

    private String join(List<String> list, int b) {
        StringBuffer sb = new StringBuffer();
        int k = 0;
        while (k < b) {
            if (k == b - 1 && b > 1) {
                List<String> lemmas = this.stem(list.get(k));
                sb.append(lemmas.size() > 0 ? lemmas.get(0) : list.get(k));
            } else {
                sb.append(list.get(k));
                if (k < b - 1) {
                    sb.append(" ");
                }
            }
            ++k;
        }
        return sb.toString();
    }

    private List<String> stem(String token) {
        if (this.stemmer != null) {
            return this.stemmer.findStems(token, POS.NOUN);
        }
        return new ArrayList<String>();
    }

    private String isRedirectionPage(String testo) {
        if (testo == null || testo.equals("")) {
            return null;
        }
        if (!testo.startsWith("ReDiReCtIoNpAgE:")) {
            return null;
        }
        String redirection = null;
        int duePunti = testo.indexOf(":");
        if (duePunti > 0) {
            redirection = testo.substring(testo.indexOf(":") + 1);
        }
        return redirection;
    }

    public static String getLink(String s) {
        if (s == null) {
            return null;
        }
        if (s.equals("")) {
            return "";
        }
        String temp = new String(s);
        temp = s.replaceAll("IsOpEnRoUnDeD", "(");
        temp = temp.replaceAll("IsClOsEdRoUnDeD", ")");
        temp = temp.replaceAll("IsOpEnSqUaRe", "[");
        temp = temp.replaceAll("IsClOsEdSqUaRe", "]");
        temp = temp.replaceAll("IsOpEnCuRlY", "{");
        temp = temp.replaceAll("IsClOsEdCuRlY", "}");
        temp = temp.replaceAll("IsOpEnAnGlE", "<");
        temp = temp.replaceAll("IsClOsEdAnGlE", ">");
        return temp;
    }
}

