/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.semeval;

import it.uniroma1.lcl.jlt.Configuration;
import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
import it.uniroma1.lcl.jlt.semeval.data.SemevalInstance;
import it.uniroma1.lcl.jlt.util.Maths;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class Semeval {
    protected static final Log log = LogFactory.getLog(Semeval.class);
    private static Semeval instance = null;

    public static Semeval getInstance() {
        if (instance == null) {
            instance = new Semeval();
        }
        return instance;
    }

    private Semeval() {
    }

    public File[] list(CollocsDB collocsDB) {
        Object[] list = new File(Configuration.getInstance().getSemEvalDataDirectory(collocsDB)).listFiles();
        Arrays.sort(list);
        return list;
    }

    public List<String> getTargetWords() {
        File[] list = this.list(CollocsDB.SEMEVAL10_TRAIN);
        ArrayList<String> targetWords = new ArrayList<String>();
        File[] fileArray = list;
        int n = list.length;
        int n2 = 0;
        while (n2 < n) {
            File file = fileArray[n2];
            targetWords.add(file.getName().replaceAll(".n.xml", ""));
            ++n2;
        }
        return targetWords;
    }

    public List<SemevalInstance> getInstances(String targetWord, CollocsDB collocsDB) {
        File fileXML = new File(String.valueOf(Configuration.getInstance().getSemEvalDataDirectory(collocsDB)) + "/" + targetWord + ".n.xml");
        ArrayList<SemevalInstance> list = new ArrayList<SemevalInstance>();
        String CONTENT = null;
        try {
            BufferedReader br = new BufferedReader(new FileReader(fileXML));
            br.readLine();
            CONTENT = br.readLine();
            br.close();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        String REGEX = null;
        if (collocsDB == CollocsDB.SEMEVAL10_TRAIN) {
            REGEX = "<(" + targetWord + "\\.n\\.\\d+)>([^<]+)</\\1>";
        } else if (collocsDB == CollocsDB.SEMEVAL10_TEST) {
            REGEX = "<(TargetSentence)>([^<]+)</\\1>";
        }
        Pattern regex = Pattern.compile(REGEX);
        Matcher matcher = regex.matcher(CONTENT);
        int match_number = 0;
        while (matcher.find()) {
            list.add(new SemevalInstance(collocsDB, targetWord, ++match_number, matcher.group(2)));
        }
        return list;
    }

    public HashMap<Integer, SemevalInstance> getInstancesByIDs(String targetWord, CollocsDB collocsDB) {
        HashMap<Integer, SemevalInstance> hash = new HashMap<Integer, SemevalInstance>();
        List<SemevalInstance> list = this.getInstances(targetWord, collocsDB);
        for (SemevalInstance instance : list) {
            hash.put(instance.getId(), instance);
        }
        return hash;
    }

    public static void main(String[] args) throws IOException {
        Semeval semeval = Semeval.getInstance();
        CollocsDB collocsDB = CollocsDB.SEMEVAL10_TRAIN;
        ArrayList<Integer> sizes = new ArrayList<Integer>();
        int instCounter = 0;
        for (String targetWord : semeval.getTargetWords()) {
            instCounter += semeval.getInstances(targetWord, collocsDB).size();
            sizes.add(semeval.getInstances(targetWord, collocsDB).size());
        }
        log.info((Object)("MIN: " + Maths.min(sizes)));
        log.info((Object)("MAX: " + Maths.max(sizes)));
        log.info((Object)("MEAN: " + Maths.mean(sizes)));
        log.info((Object)("TOT: " + Maths.sum(sizes)));
        System.out.println("Istanze recuperate: " + instCounter);
    }
}

