/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.Constants;
import it.uniroma1.lcl.jlt.util.Language;
import it.uniroma1.lcl.jlt.wiki.WikiTokenizer;
import it.uniroma1.lcl.jlt.wiki.data.WikiPageInfo;
import it.uniroma1.lcl.jlt.wiki.iterator.WikiDumpIterator;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Date;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class WikiDumpConverter {
    private static final Log log = LogFactory.getLog(WikiDumpConverter.class);
    private WikiDumpIterator wikiIterator;

    public void convert(String source) {
        this.wikiIterator = new WikiDumpIterator(source);
        int totalPages = 0;
        int analyzedPages = 0;
        int fileNumber = 0;
        int folderNumber = 0;
        boolean bCreateNewDir = false;
        boolean bCreateNewFile = false;
        boolean bSeparatePages = false;
        File info = null;
        File txt = null;
        File lexicon = null;
        PrintWriter infopw = null;
        PrintWriter txtpw = null;
        PrintWriter lexiconpw = null;
        int startFolder = Configuration.getInstance().getDumpStartDir();
        int endFolder = Configuration.getInstance().getDumpEndDir();
        Configuration config = Configuration.getInstance();
        int maxFiles = config.getMaxFilesPerDir();
        int maxPages = config.getMaxPagesPerFile();
        String pageSeparator = config.getPagesSeparator();
        int bufferSizeWriter = config.getBufferSizeWriter() * Constants.KBYTE;
        int logNumPages = config.getLogNumPages();
        Language factoryLanguage = config.getIndexFactoryLanguage();
        try {
            try {
                this.wikiIterator.open();
                File docsDir = new File(String.valueOf(config.getDocDir()) + File.separator + (Object)((Object)config.getIndexFactoryLanguage()));
                docsDir.mkdirs();
                lexicon = new File(docsDir, config.getLexiconFileName());
                log.info((Object)("CREATING LEXICON @ " + lexicon.getAbsolutePath()));
                lexicon.createNewFile();
                lexiconpw = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(lexicon), "UTF-8"), bufferSizeWriter));
                boolean bIndexFullPage = !Configuration.getInstance().indexOnlyWikiPageInformation();
                while (this.wikiIterator.hasNext()) {
                    String cleanTxt;
                    WikiPageInfo w = this.wikiIterator.next();
                    if (w.isDisambiguationPage() || w.isNamespace()) continue;
                    ++totalPages;
                    if (bSeparatePages) {
                        infopw.println("\n" + pageSeparator);
                        txtpw.println("\n" + pageSeparator);
                    } else {
                        bSeparatePages = true;
                    }
                    if (totalPages % maxPages == 1 || maxPages == 1) {
                        bCreateNewFile = true;
                        ++fileNumber;
                        if (infopw != null) {
                            infopw.flush();
                            infopw.close();
                        }
                        if (txtpw != null) {
                            txtpw.flush();
                            txtpw.close();
                        }
                    }
                    if (bCreateNewFile && (fileNumber % maxFiles == 1 || maxFiles == 1)) {
                        bCreateNewDir = true;
                        ++folderNumber;
                    }
                    if (folderNumber < startFolder || folderNumber > endFolder) {
                        bSeparatePages = false;
                        continue;
                    }
                    if (totalPages % logNumPages == 0) {
                        log.info((Object)("Folder #: " + folderNumber + ", Document id: " + w.getId()));
                    }
                    if (bCreateNewDir) {
                        new File(docsDir, new Integer(folderNumber).toString()).mkdir();
                        bCreateNewDir = false;
                    }
                    if (bCreateNewFile) {
                        info = new File(new File(docsDir, new Integer(folderNumber).toString()), String.valueOf(fileNumber) + ".info");
                        info.createNewFile();
                        infopw = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(info), "UTF-8"), bufferSizeWriter));
                        txt = new File(new File(docsDir, new Integer(folderNumber).toString()), String.valueOf(fileNumber) + ".txt");
                        txt.createNewFile();
                        txtpw = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(txt), "UTF-8"), bufferSizeWriter));
                        bCreateNewFile = false;
                    }
                    ++analyzedPages;
                    lexiconpw.println(w.getLemma());
                    infopw.println(w.getId());
                    infopw.println(w.getTitle());
                    infopw.println(w.getLemma());
                    WikiTokenizer wt = new WikiTokenizer(w.getText(), w.getId(), factoryLanguage);
                    Set<String> categoryLemmas = wt.getCategoryLemmas();
                    Set<String> categories = wt.getFullCategoriesName();
                    Set<String> translations = wt.getTranslationLinks();
                    StringBuffer sb = new StringBuffer();
                    for (String categoryLemma : categoryLemmas) {
                        if (sb.length() > 0) {
                            sb.append("\t");
                        }
                        sb.append(categoryLemma);
                    }
                    infopw.println(sb.toString().toLowerCase());
                    sb.setLength(0);
                    for (String category : categories) {
                        if (sb.length() > 0) {
                            sb.append("\t");
                        }
                        sb.append(category);
                    }
                    infopw.println(sb.toString());
                    sb.setLength(0);
                    for (String translation : translations) {
                        if (sb.length() > 0) {
                            sb.append("\t");
                        }
                        sb.append(translation);
                    }
                    infopw.println(sb.toString());
                    sb = null;
                    infopw.print(w.getOffset());
                    if (!bIndexFullPage || (cleanTxt = wt.getText()) == null) continue;
                    txtpw.append(cleanTxt);
                }
                this.wikiIterator.close();
            }
            catch (IOException ioe) {
                ioe.printStackTrace();
                log.info((Object)("Total pages: " + totalPages + ". Analyzed: " + analyzedPages));
                if (infopw != null) {
                    infopw.flush();
                    infopw.close();
                }
                if (txtpw != null) {
                    txtpw.flush();
                    txtpw.close();
                }
                lexiconpw.flush();
                lexiconpw.close();
            }
        }
        finally {
            log.info((Object)("Total pages: " + totalPages + ". Analyzed: " + analyzedPages));
            if (infopw != null) {
                infopw.flush();
                infopw.close();
            }
            if (txtpw != null) {
                txtpw.flush();
                txtpw.close();
            }
            lexiconpw.flush();
            lexiconpw.close();
        }
    }

    public static void main(String[] args) throws Exception {
        log.info((Object)("Conversion started at: " + new Date()));
        log.info((Object)("::Converting source: " + Configuration.getInstance().getWikipediaXMLDump()));
        log.info((Object)("MAX_FILES_PER_DIR: " + Configuration.getInstance().getMaxFilesPerDir()));
        log.info((Object)("MAX_PAGES_PER_FILE: " + Configuration.getInstance().getMaxPagesPerFile()));
        long start = System.currentTimeMillis();
        WikiDumpConverter xtd = new WikiDumpConverter();
        xtd.convert(Configuration.getInstance().getWikipediaXMLDump());
        long end = System.currentTimeMillis();
        log.info((Object)("Conversion ended at: " + new Date()));
        log.info((Object)("::Done converting source in " + (double)(end - start) / 60000.0 + " min"));
    }
}

