/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval10.util;

import it.uniroma1.lcl.jlt.semeval10.util.PageType;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

public class SemEvalPageParser {
    private List<String> instanceList = new ArrayList<String>();
    private String pageName;
    private PageType pageType;

    public SemEvalPageParser(String fileName) {
        this.pageType = PageType.TRAIN;
        this.pageName = fileName.substring(fileName.lastIndexOf("/") + 1, fileName.length() - 4);
        this.parsePage(fileName);
    }

    public SemEvalPageParser(String fileName, PageType pageType) {
        this.pageType = pageType;
        this.pageName = fileName.substring(fileName.lastIndexOf("/") + 1, fileName.length() - 4);
        this.parsePage(fileName);
    }

    private void parsePage(String fileName) {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
            String fileText = "";
            while (reader.ready()) {
                fileText = String.valueOf(fileText) + reader.readLine();
            }
            reader.close();
            boolean endOfFile = false;
            fileText = fileText.substring(fileText.indexOf(this.pageType.getFirstTagDelimiter()));
            while (!endOfFile) {
                String text = this.readNextText(fileText);
                if (text != null) {
                    this.instanceList.add(text);
                    fileText = fileText.substring(text.length());
                    continue;
                }
                endOfFile = true;
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public String readNextText(String string) {
        String nextText = null;
        String tagToSearch = this.pageType.tagToSearch(this.pageName);
        int nextTagIndex = string.indexOf(tagToSearch);
        if (nextTagIndex == -1) {
            return null;
        }
        if (this.pageType.equals((Object)PageType.TRAIN)) {
            int tagEndIndex = string.indexOf(">", nextTagIndex) + 1;
            string = string.substring(tagEndIndex);
        } else if (this.pageType.equals((Object)PageType.TEST)) {
            string = string.substring(nextTagIndex + tagToSearch.length());
        }
        nextText = string.substring(0, string.indexOf("</"));
        return nextText;
    }

    public List<String> getInstanceList() {
        return this.instanceList;
    }

    public static void main(String[] args) {
        String fileName = "/home/dimarco/resources/semeval/data/test_data/nouns/access.n.xml";
        String trainText = "<access.n.train><access.n.1>Train 1</access.n.1><access.n.222>Train 2</access.n.222><access.n.60>Train 3</access.n.6></access.n.train>";
        SemEvalPageParser parser = new SemEvalPageParser(fileName);
        trainText = trainText.substring(trainText.indexOf(">"));
        String a = parser.readNextText(trainText);
        trainText = trainText.substring(trainText.indexOf(a));
        String b = parser.readNextText(trainText);
        trainText = trainText.substring(trainText.indexOf(b));
        String c = parser.readNextText(trainText);
        trainText = trainText.substring(trainText.indexOf(c));
        String d = parser.readNextText(trainText);
        System.out.println(a);
        System.out.println(b);
        System.out.println(c);
        System.out.println(d);
    }
}

