/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.ukwac.iterator;

import edu.stanford.nlp.ling.WordLemmaTag;
import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.Constants;
import it.uniroma1.lcl.jlt.pipeline.stanford.StanfordSentence;
import it.uniroma1.lcl.jlt.ukwac.data.UKWacText;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class UKWacDumpIterator
implements Iterator<UKWacText> {
    private static final Log log = LogFactory.getLog(UKWacDumpIterator.class);
    private String dumpFileDir = Configuration.getInstance().getUKWacDirectory();
    private InputStreamReader isr;
    private BufferedReader brXMLDump;
    private List<String> dumpFiles = new ArrayList<String>();
    private Iterator<String> dumpFileIterator;
    private UKWacText nextText;

    public UKWacDumpIterator() {
        log.debug((Object)"Files to be parsed: ");
        String[] stringArray = new File(this.dumpFileDir).list();
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String fileName = stringArray[n2];
            if (fileName.endsWith(".xml")) {
                this.dumpFiles.add(fileName);
                log.debug((Object)(String.valueOf(fileName) + "\t"));
            }
            ++n2;
        }
        log.debug((Object)"\n");
        this.dumpFileIterator = this.dumpFiles.iterator();
        this.nextText = null;
    }

    public void open() throws IOException {
        this.openNext();
    }

    protected void openNext() throws IOException {
        if (this.brXMLDump != null) {
            this.close();
        }
        if (!this.dumpFileIterator.hasNext()) {
            return;
        }
        String nextDumpFile = String.valueOf(this.dumpFileDir) + "/" + this.dumpFileIterator.next();
        this.isr = new InputStreamReader((InputStream)new FileInputStream(nextDumpFile), "iso-8859-1");
        this.brXMLDump = new BufferedReader(this.isr, Configuration.getInstance().getBufferSizeReader() * Constants.KBYTE);
    }

    public void close() throws IOException {
        this.brXMLDump.close();
        this.isr.close();
    }

    @Override
    public boolean hasNext() {
        if (this.nextText == null) {
            this.nextText = this.next();
        }
        return this.nextText != null;
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public UKWacText next() {
        if (this.nextText != null) {
            UKWacText next = this.nextText;
            this.nextText = null;
            return next;
        }
        UKWacText text = null;
        try {
            String textLine = "";
            if (this.brXMLDump != null && this.brXMLDump.ready()) {
                textLine = this.brXMLDump.readLine();
            }
            if (textLine.equals("</corpus>")) {
                this.brXMLDump.readLine();
            }
            if (this.brXMLDump == null || !this.brXMLDump.ready()) {
                if (!this.dumpFileIterator.hasNext()) {
                    return null;
                }
                this.openNext();
            }
            if (textLine.contains("</corpus>") || textLine.equals("")) {
                textLine = this.brXMLDump.readLine();
            }
            if (textLine.contains("<corpus>")) {
                textLine = this.brXMLDump.readLine();
            }
            int startIdx = textLine.indexOf("\"") + 1;
            int endIdx = textLine.lastIndexOf("\"");
            String id = textLine.substring(startIdx, endIdx);
            text = new UKWacText(id);
            textLine = this.brXMLDump.readLine();
            block2: while (true) {
                if (textLine.equals("</text>")) {
                    return text;
                }
                if (!textLine.equals("<s>")) continue;
                StanfordSentence sentence = new StanfordSentence();
                textLine = this.brXMLDump.readLine();
                while (true) {
                    if (textLine.equals("</s>")) {
                        text.addSentence(sentence);
                        textLine = this.brXMLDump.readLine();
                        continue block2;
                    }
                    String[] triple = textLine.split("\t");
                    String word = triple[0];
                    String tag = triple[1];
                    String lemma = triple[2];
                    if (word.length() > 0 && Character.isLetter(word.charAt(0))) {
                        sentence.addWord(new WordLemmaTag(word, lemma, tag));
                    }
                    textLine = this.brXMLDump.readLine();
                }
                break;
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return text;
    }

    @Override
    public void remove() {
        throw new RuntimeException("Unsupported operation 'remove'");
    }

    public static void main(String[] args) throws IOException {
        UKWacDumpIterator ukwdi = new UKWacDumpIterator();
        ukwdi.open();
        while (ukwdi.hasNext()) {
            UKWacText ukwt = ukwdi.next();
            for (StanfordSentence ss : ukwt) {
                for (WordLemmaTag wlt : ss.getWords()) {
                    System.out.print(String.valueOf(wlt.word()) + " ");
                }
                System.out.println();
            }
        }
        ukwdi.close();
    }
}

