/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.pipeline.stanford;

import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordTokenFactory;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class StanfordTokenizer {
    private static final String[] OLDSTRINGS = new String[]{"\\(", "\\)", "\\[", "\\]", "\\{", "\\}", "\\*", "/", "_", "-"};
    private static final String[] NEWSTRING = new String[]{"-LRB-", "-RRB-", "-LCB-", "-RCB-", "-LCB-", "-RCB-", "\\*", "\\/", "-", " "};
    private static final char[] DEFAULTOLDCHARS = new char[]{'*', '/'};
    private final PennTokenizer pennTokenizer = new PennTokenizer();
    private PTBTokenizer<Word> tokenizer;
    private static StanfordTokenizer singleton;

    private StanfordTokenizer() {
    }

    public static synchronized StanfordTokenizer getInstance() {
        if (singleton == null) {
            singleton = new StanfordTokenizer();
        }
        return singleton;
    }

    public List<String> tokenizeString(String string) {
        ArrayList<String> tokens = new ArrayList<String>();
        for (Word w : this.tokenize(string)) {
            tokens.add(w.word());
        }
        return tokens;
    }

    /*
     * Unable to fully structure code
     */
    public List<Word> tokenize(String string) {
        this.tokenizer = new PTBTokenizer((Reader)new StringReader(string), (LexedTokenFactory)new WordTokenFactory(), "untokenizable=noneDelete,ptb3Escaping=true");
        try {
            return this.tokenizer.tokenize();
        }
        catch (Exception e) {
            System.err.println(e.getMessage());
            tokens = new ArrayList<Word>();
            var7_4 = this.pennTokenizer.tokenize(string).split("\\s+");
            var6_5 = var7_4.length;
            var5_6 = 0;
            ** while (var5_6 < var6_5)
        }
lbl-1000:
        // 1 sources

        {
            token = var7_4[var5_6];
            tokens.add(new Word(token));
            ++var5_6;
            continue;
        }
lbl16:
        // 1 sources

        return tokens;
    }

    public String unTokenize(String tokenized) {
        return PTBTokenizer.ptb2Text((String)tokenized);
    }

    public static void main(String[] args) {
        System.out.println(StanfordTokenizer.getInstance().tokenize("\"Weird Al\" F.C. Yankovic"));
    }

    private static class PennTokenizer {
        private Map<String, String> stringSubs;
        private char[] oldChars;

        private PennTokenizer() {
            this(PennTokenizer.makeStringMap(), DEFAULTOLDCHARS);
        }

        private PennTokenizer(Map<String, String> stringSubs, char[] oldChars) {
            this.stringSubs = stringSubs;
            this.oldChars = oldChars;
        }

        private static Map<String, String> makeStringMap() {
            HashMap<String, String> map = new HashMap<String, String>();
            int i = 0;
            while (i < OLDSTRINGS.length) {
                map.put(OLDSTRINGS[i], NEWSTRING[i]);
                ++i;
            }
            return map;
        }

        public String tokenize(String str) {
            str = str.replaceAll("``", "`` ");
            str = str.replaceAll("''", "  ''");
            str = str.replaceAll("([?!\".,;:@#$%&])", " $1 ");
            str = str.replaceAll("\\.\\.\\.", " ... ");
            str = str.replaceAll("\\s+", " ");
            str = str.replaceAll(",([^0-9])", " , $1");
            str = str.replaceAll("([^.])([.])([\\])}>\"']*)\\s*$", "$1 $2$3 ");
            str = str.replaceAll("([\\[\\](){}<>])", " $1 ");
            str = str.replaceAll("--", " -- ");
            str = str.replaceAll("$", " ");
            str = str.replaceAll("^", " ");
            str = str.replaceAll("([^'])' ", "$1 ' ");
            str = str.replaceAll("'([sSmMdD]) ", " '$1 ");
            str = str.replaceAll("'ll ", " 'll ");
            str = str.replaceAll("'re ", " 're ");
            str = str.replaceAll("'ve ", " 've ");
            str = str.replaceAll("n't ", " n't ");
            str = str.replaceAll("'LL ", " 'LL ");
            str = str.replaceAll("'RE ", " 'RE ");
            str = str.replaceAll("'VE ", " 'VE ");
            str = str.replaceAll("N'T ", " N'T ");
            str = str.replaceAll(" ([Cc])annot ", " $1an not ");
            str = str.replaceAll(" ([Dd])'ye ", " $1' ye ");
            str = str.replaceAll(" ([Gg])imme ", " $1im me ");
            str = str.replaceAll(" ([Gg])onna ", " $1on na ");
            str = str.replaceAll(" ([Gg])otta ", " $1ot ta ");
            str = str.replaceAll(" ([Ll])emme ", " $1em me ");
            str = str.replaceAll(" ([Mm])ore'n ", " $1ore 'n ");
            str = str.replaceAll(" '([Tt])is ", " $1 is ");
            str = str.replaceAll(" '([Tt])was ", " $1 was ");
            str = str.replaceAll(" ([Ww])anna ", " $1an na ");
            str = str.replaceAll(" ([A-Z])\\ +\\.", " $1. ");
            str = str.replaceAll("\\s+", " ");
            str = str.replaceAll("^\\s+", "");
            str = str.trim();
            return this.process(str);
        }

        private String process(String s) {
            for (String string : this.stringSubs.keySet()) {
                s = s.replaceAll(string, this.stringSubs.get(string));
            }
            return this.escapeString(s);
        }

        private String escapeString(String s) {
            StringBuffer buff = new StringBuffer();
            int i = 0;
            while (i < s.length()) {
                char curChar = s.charAt(i);
                if (curChar == '\\') {
                    buff.append(curChar);
                    if (++i < s.length()) {
                        curChar = s.charAt(i);
                        buff.append(curChar);
                    }
                } else {
                    int j = 0;
                    while (j < this.oldChars.length) {
                        if (curChar == this.oldChars[j]) {
                            buff.append('\\');
                            break;
                        }
                        ++j;
                    }
                    buff.append(curChar);
                }
                ++i;
            }
            return buff.toString();
        }
    }
}

