/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval10;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.semeval10.data.SemevalDataset;
import it.uniroma1.lcl.jlt.semeval10.data.SemevalInstance;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class Semeval10 {
    protected static final Log log = LogFactory.getLog(Semeval10.class);
    private static final String semevalXmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
    private static Semeval10 instance = null;

    public static Semeval10 getInstance() {
        if (instance == null) {
            instance = new Semeval10();
        }
        return instance;
    }

    private Semeval10() {
    }

    public File[] list(CollocsDB collocsDB) {
        Object[] list = new File(Configuration.getInstance().getSemEvalDataDirectory(collocsDB)).listFiles();
        Arrays.sort(list);
        return list;
    }

    public List<String> getTargetWords() {
        File[] list = this.list(CollocsDB.SEMEVAL10_TRAIN);
        ArrayList<String> targetWords = new ArrayList<String>();
        File[] fileArray = list;
        int n = list.length;
        int n2 = 0;
        while (n2 < n) {
            File file = fileArray[n2];
            targetWords.add(file.getName().replaceAll(".n.xml", ""));
            ++n2;
        }
        return targetWords;
    }

    public SemevalDataset load(CollocsDB collocsDB) {
        log.info((Object)("Started loading SemEval 2010 " + (Object)((Object)collocsDB) + " dataset."));
        SemevalDataset dataset = new SemevalDataset();
        for (String targetWord : this.getTargetWords()) {
            dataset.addInstances(targetWord, this.getInstances(targetWord, collocsDB));
        }
        log.info((Object)("Finished loading SemEval 2010 " + (Object)((Object)collocsDB) + " dataset. [" + this.getTargetWords().size() + " files read]."));
        return dataset;
    }

    public List<SemevalInstance> getInstances(String targetWord, CollocsDB collocsDB) {
        File fileXML = new File(String.valueOf(Configuration.getInstance().getSemEvalDataDirectory(collocsDB)) + "/" + targetWord + ".n.xml");
        ArrayList<SemevalInstance> list = new ArrayList<SemevalInstance>();
        String CONTENT = null;
        try {
            BufferedReader br = new BufferedReader(new FileReader(fileXML));
            CONTENT = br.readLine();
            if (CONTENT.equals(semevalXmlHeader)) {
                CONTENT = br.readLine();
            }
            br.close();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        String REGEX = null;
        if (collocsDB == CollocsDB.SEMEVAL10_TRAIN) {
            REGEX = "<(" + targetWord + "\\.n\\.\\d+)>([^<]+)</\\1>";
        } else if (collocsDB == CollocsDB.SEMEVAL10_TEST) {
            REGEX = "<(TargetSentence)>([^<]+)</\\1>";
        }
        Pattern regex = Pattern.compile(REGEX);
        Matcher matcher = regex.matcher(CONTENT);
        int match_number = 0;
        while (matcher.find()) {
            list.add(new SemevalInstance(collocsDB, targetWord, ++match_number, matcher.group(2)));
        }
        return list;
    }

    public HashMap<Integer, SemevalInstance> getInstancesByIDs(String targetWord, CollocsDB collocsDB) {
        HashMap<Integer, SemevalInstance> hash = new HashMap<Integer, SemevalInstance>();
        List<SemevalInstance> list = this.getInstances(targetWord, collocsDB);
        for (SemevalInstance instance : list) {
            hash.put(instance.getId(), instance);
        }
        return hash;
    }

    public static void main(String[] args) throws IOException {
        Semeval10 semeval = Semeval10.getInstance();
        CollocsDB collocsDB = CollocsDB.SEMEVAL10_TEST;
        SemevalDataset dataset = semeval.load(collocsDB);
        log.info((Object)("Dataset targetWords (" + dataset.getTargetWords().size() + "): " + dataset.getTargetWords()));
        int instCounter = 0;
        for (String targetWord : dataset.getTargetWords()) {
            instCounter += dataset.getInstances(targetWord).size();
        }
        System.out.println("Istanze recuperate: " + instCounter);
    }
}

