/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma1.lcl.jlt.ml.mallet;

import cc.mallet.pipe.CharSequence2TokenSequence;
import cc.mallet.pipe.FeatureSequence2FeatureVector;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Target2Label;
import cc.mallet.pipe.TokenSequence2FeatureSequence;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.LabelAlphabet;
import java.util.ArrayList;
import java.util.regex.Pattern;

public class MalletPipe {
    public static String UNKNOWN_LABEL = "XXXXX";

    private MalletPipe() {
    }

    public static Pipe getPipe() {
        ArrayList<CommentRemoverPipe> pipeList = new ArrayList<CommentRemoverPipe>();
        pipeList.add(new CommentRemoverPipe());
        Pattern tokenPattern = Pattern.compile("\\S+");
        pipeList.add((CommentRemoverPipe)new CharSequence2TokenSequence(tokenPattern));
        pipeList.add((CommentRemoverPipe)new TokenSequence2FeatureSequence());
        pipeList.add((CommentRemoverPipe)new Target2Label());
        pipeList.add((CommentRemoverPipe)new FeatureSequence2FeatureVector());
        return new SerialPipes(pipeList);
    }

    public static class CommentRemoverPipe
    extends Pipe {
        private static final long serialVersionUID = 4925040502618166253L;

        public CommentRemoverPipe() {
            super(new Alphabet(), (Alphabet)new LabelAlphabet());
        }

        public Instance pipe(Instance carrier) {
            String line = ((CharSequence)carrier.getData()).toString().trim();
            int idx = line.indexOf("#");
            if (idx > 0) {
                line = line.substring(0, idx).trim();
            }
            carrier.setData((Object)line);
            return carrier;
        }
    }

    public static class DataPipe
    extends Pipe {
        private static final long serialVersionUID = 4945043210665425367L;

        public DataPipe() {
            super(new Alphabet(), (Alphabet)new LabelAlphabet());
        }

        public Instance pipe(Instance carrier) {
            Alphabet dataAlphabet = this.getDataAlphabet();
            String line = ((CharSequence)carrier.getData()).toString().trim();
            int idx = line.indexOf("#");
            if (idx > 0) {
                line = line.substring(0, idx).trim();
            }
            String[] fields = line.split("\\s+");
            int numFields = fields.length;
            int[] featureNames = new int[numFields];
            double[] featureValues = new double[numFields];
            int i = 0;
            while (i < numFields) {
                String field = fields[i];
                featureNames[i] = dataAlphabet.lookupIndex((Object)field, true);
                featureValues[i] = 1.0;
                ++i;
            }
            FeatureVector fv = new FeatureVector(dataAlphabet, featureNames, featureValues);
            carrier.setData((Object)fv);
            return carrier;
        }
    }
}

