/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.wiki;

import it.uniroma1.lcl.jlt.util.Files;
import it.uniroma1.lcl.jlt.util.Timer;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.List;
import org.python.core.PyObject;
import org.python.core.PyString;
import org.python.core.PySystemState;
import org.python.core.PyUnicode;
import org.python.util.PythonInterpreter;

public class WikiTextExtractor {
    private final PythonInterpreter interpreter = WikiTextExtractor.initInterpreter();
    private static WikiTextExtractor instance;

    private WikiTextExtractor() {
        this.interpreter.exec("import WikiExtractor");
        this.interpreter.exec("file_encoding = \"utf-8\"");
    }

    public static synchronized WikiTextExtractor getInstance() {
        if (instance == null) {
            instance = new WikiTextExtractor();
        }
        return instance;
    }

    public String extractText(String raw) {
        this.interpreter.set("raw", (PyObject)new PyUnicode(raw));
        this.interpreter.exec("clean = WikiExtractor.clean_text(raw)");
        String moduleOutput = this.interpreter.get("clean").asString();
        moduleOutput = moduleOutput.replaceAll("\\<.*?>", "");
        return moduleOutput;
    }

    public String extractText(File input) throws IOException {
        StringBuffer text = new StringBuffer();
        BufferedReader read = new BufferedReader(new FileReader(input));
        while (read.ready()) {
            text.append(read.readLine()).append("\n");
        }
        return this.extractText(text.toString());
    }

    public static void test() throws IOException {
        Timer timer = new Timer();
        String output = WikiTextExtractor.getInstance().extractText(new File("lib/Lib/sample-wikipage.txt"));
        timer.tick("to clean the page");
        System.out.println("Output follows:\n---------------\n\n\n" + output);
    }

    private static PythonInterpreter initInterpreter() {
        PySystemState props = new PySystemState();
        PyString rootLibPath = new PyString("/usr/local/share/jython/Lib");
        PyString userLibPath = new PyString(String.valueOf(System.getProperty("user.dir")) + File.separator + "lib/Lib");
        props.path.append((PyObject)rootLibPath);
        props.path.append((PyObject)userLibPath);
        return new PythonInterpreter(null, props);
    }

    public static void main(String[] args) {
        try {
            String dir = "tmp/wikiText";
            if (args.length > 0) {
                dir = args[0];
            }
            WikiTextExtractor extractor = WikiTextExtractor.getInstance();
            List<File> files = Files.listFiles(new File(dir), new FilenameFilter(){

                @Override
                public boolean accept(File dir, String name) {
                    return name.endsWith(".wiki");
                }
            }, true);
            for (File inFile : files) {
                String baseInFile = Files.getFileNameWithoutExtension(inFile.getAbsolutePath());
                File outFile = new File(inFile.getParent(), String.valueOf(baseInFile) + ".txt");
                String clean = extractor.extractText(inFile);
                FileWriter writer = new FileWriter(outFile);
                writer.write(clean);
                writer.flush();
                writer.close();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

