biz.k11i.xgboost.util.FVec Java Examples

The following examples show how to use biz.k11i.xgboost.util.FVec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Example.java    From xgboost-predictor-java with Apache License 2.0 6 votes vote down vote up
/**
 * Loads test data.
 *
 * @return test data
 */
static List<SimpleEntry<Integer, FVec>> loadData() throws IOException {
    List<SimpleEntry<Integer, FVec>> result = new ArrayList<>();

    for (String line : Files.readAllLines(new File(TestHelper.getResourcePath("data/agaricus.txt.0.test")).toPath(), StandardCharsets.UTF_8)) {
        String[] values = line.split(" ");

        Map<Integer, Float> map = new HashMap<>();

        for (int i = 1; i < values.length; i++) {
            String[] pair = values[i].split(":");
            map.put(Integer.parseInt(pair[0]), Float.parseFloat(pair[1]));
        }

        result.add(new SimpleEntry<>(Integer.parseInt(values[0]), FVec.Transformer.fromMap(map)));
    }

    return result;
}
 
Example #2
Source File: XGBoostOnlinePredictUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static FVec parseSparseFeatures(@Nonnull final List<?> featureList)
        throws UDFArgumentException {
    final Map<Integer, Double> map = new HashMap<>((int) (featureList.size() * 1.5));
    for (Object f : featureList) {
        if (f == null) {
            continue;
        }
        String str = f.toString();
        final int pos = str.indexOf(':');
        if (pos < 1) {
            throw new UDFArgumentException("Invalid feature format: " + str);
        }
        final int index;
        final double value;
        try {
            index = Integer.parseInt(str.substring(0, pos));
            value = Double.parseDouble(str.substring(pos + 1));
        } catch (NumberFormatException e) {
            throw new UDFArgumentException("Failed to parse a feature value: " + str);
        }
        map.put(index, value);
    }

    return FVec.Transformer.fromMap(map);
}
 
Example #3
Source File: XGBoostOnlinePredictUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private FVec parseDenseFeatures(@Nonnull Object argObj) throws UDFArgumentException {
    final int length = featureListOI.getListLength(argObj);
    final double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        final Object o = featureListOI.getListElement(argObj, i);
        final double v;
        if (o == null) {
            v = Double.NaN;
        } else {
            v = PrimitiveObjectInspectorUtils.getDouble(o, featureElemOI);
        }
        values[i] = v;

    }
    return FVec.Transformer.fromArray(values, false);
}
 
Example #4
Source File: TestBase.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public List<FVec> loadDatasetAsListOfFVec() throws Exception {
    final List<FVec> dataset = new ArrayList<>();

    RowProcessor proc = new RowProcessor() {
        @Override
        public void handleRow(String[] splitted) throws Exception {
            FVec fv = XGBoostUtils.parseRowAsFVec(splitted, 1, splitted.length);
            dataset.add(fv);
        }

    };
    parse(proc);

    return dataset;
}
 
Example #5
Source File: TestBase.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public List<FVec> loadDatasetAsListOfFVec() throws Exception {
    final List<FVec> dataset = new ArrayList<>();

    RowProcessor proc = new RowProcessor() {
        @Override
        public void handleRow(String[] splitted) throws Exception {
            final float[] features = new float[34];
            for (int i = 0; i <= 32; i++) {
                features[i] = Float.parseFloat(splitted[i]);
            }
            features[33] = splitted[33].equals("?") ? 0.f : Float.parseFloat(splitted[33]);

            FVec fv = FVec.Transformer.fromArray(features, false);
            dataset.add(fv);
        }

    };
    parse(proc);

    return slice(dataset, sliceIndex, FVec.class);
}
 
Example #6
Source File: XGBoostOnlinePredictUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public void process(Object[] args) throws HiveException {
    if (mapToModel == null) {
        this.mapToModel = new HashMap<String, Predictor>();
    }
    if (args[1] == null) {// features is null
        return;
    }

    String modelId =
            PrimitiveObjectInspectorUtils.getString(nonNullArgument(args, 2), modelIdOI);
    Predictor model = mapToModel.get(modelId);
    if (model == null) {
        Text arg3 = modelOI.getPrimitiveWritableObject(nonNullArgument(args, 3));
        model = XGBoostUtils.loadPredictor(arg3);
        mapToModel.put(modelId, model);
    }

    Writable rowId = HiveUtils.copyToWritable(nonNullArgument(args, 0), rowIdOI);
    FVec features = denseFeatures ? parseDenseFeatures(args[1])
            : parseSparseFeatures(featureListOI.getList(args[1]));

    predictAndForward(model, rowId, features);
}
 
Example #7
Source File: XGBoostUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
public static FVec parseRowAsFVec(@Nonnull final String[] row, final int start, final int end) {
    final Map<Integer, Float> map = new HashMap<>((int) (row.length * 1.5));
    for (int i = start; i < end; i++) {
        String f = row[i];
        if (f == null) {
            continue;
        }
        String str = f.toString();
        final int pos = str.indexOf(':');
        if (pos < 1) {
            throw new IllegalArgumentException("Invalid feature format: " + str);
        }
        final int index;
        final float value;
        try {
            index = Integer.parseInt(str.substring(0, pos));
            value = Float.parseFloat(str.substring(pos + 1));
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Failed to parse a feature value: " + str);
        }
        map.put(index, value);
    }

    return FVec.Transformer.fromMap(map);
}
 
Example #8
Source File: Example.java    From xgboost-predictor-java with Apache License 2.0 6 votes vote down vote up
/**
 * Predicts probability and calculate its logarithmic loss using {@link Predictor#predict(FVec)}.
 *
 * @param predictor Predictor
 * @param data      test data
 */
static void predictAndLogLoss(Predictor predictor, List<SimpleEntry<Integer, FVec>> data) {
    double sum = 0;

    for (SimpleEntry<Integer, FVec> pair : data) {

        double[] predicted = predictor.predict(pair.getValue());

        double predValue = Math.min(Math.max(predicted[0], 1e-15), 1 - 1e-15);
        int actual = pair.getKey();
        sum = actual * Math.log(predValue) + (1 - actual) * Math.log(1 - predValue);
    }

    double logLoss = -sum / data.size();

    System.out.println("Logloss: " + logLoss);
}
 
Example #9
Source File: XGBoostEvidenceFilterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
protected void testLocalXGBoostClassifierSpark() {
    final Predictor localPredictor = XGBoostEvidenceFilter.loadPredictor(localClassifierModelFile);
    // get spark ctx
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    // parallelize classifierAccuracyData to RDD
    JavaRDD<FVec> testFeaturesRdd = ctx.parallelize(Arrays.asList(classifierAccuracyData.features));
    // predict in parallel
    JavaDoubleRDD predictedProbabilityRdd
            = testFeaturesRdd.mapToDouble(f -> localPredictor.predictSingle(f, false, 0));
    // pull back to local array
    final double[] predictedProbabilitySpark = predictedProbabilityRdd.collect()
            .stream().mapToDouble(Double::doubleValue).toArray();
    // check probabilities from spark are identical to serial
    assertArrayEquals(predictedProbabilitySpark, predictedProbabilitySerial, 0.0, "Probabilities predicted in spark context differ from serial"
    );
}
 
Example #10
Source File: Example.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
    List<SimpleEntry<Integer, FVec>> data = loadData();
    Predictor predictor = new Predictor(TestHelper.getResourceAsStream("model/gbtree/v47/binary-logistic.model"));

    predictAndLogLoss(predictor, data);

    predictLeafIndex(predictor, data);
}
 
Example #11
Source File: Example.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
/**
 * Predicts leaf index of each tree.
 *
 * @param predictor Predictor
 * @param data test data
 */
static void predictLeafIndex(Predictor predictor, List<SimpleEntry<Integer, FVec>> data) {
    int count = 0;
    for (SimpleEntry<Integer, FVec> pair : data) {

        int[] leafIndexes = predictor.predictLeaf(pair.getValue());

        System.out.printf("leafIndexes[%d]: %s%s",
                count++,
                Arrays.toString(leafIndexes),
                System.lineSeparator());
    }
}
 
Example #12
Source File: GBTree.java    From Myna with Apache License 2.0 5 votes vote down vote up
double pred(FVec feat, int bst_group, int root_index, int ntree_limit) {
    RegTree[] trees = _groupTrees[bst_group];
    int treeleft = ntree_limit == 0 ? trees.length : ntree_limit;

    double psum = 0;
    for (int i = 0; i < treeleft; i++) {
        psum += trees[i].getLeafValue(feat, root_index);
    }

    return psum;
}
 
Example #13
Source File: GBTree.java    From Myna with Apache License 2.0 5 votes vote down vote up
@Override
public double[] predict(FVec feat, int ntree_limit) {
    double[] preds = new double[mparam.num_output_group];
    for (int gid = 0; gid < mparam.num_output_group; gid++) {
        preds[gid] = pred(feat, gid, 0, ntree_limit);
    }
    return preds;
}
 
Example #14
Source File: GBTree.java    From Myna with Apache License 2.0 5 votes vote down vote up
@Override
public double predictSingle(FVec feat, int ntree_limit) {
    if (mparam.num_output_group != 1) {
        throw new IllegalStateException(
                "Can't invoke predictSingle() because this model outputs multiple values: "
                + mparam.num_output_group);
    }
    return pred(feat, 0, 0, ntree_limit);
}
 
Example #15
Source File: PredictionTestBase.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
public static PredictionTask predictLeafWithNTree(final int ntree_limit) {
    return new PredictionTask("leaf_ntree") {
        @Override
        double[] predict(Predictor predictor, FVec feat) {
            return toDoubleArray(predictor.predictLeaf(feat, ntree_limit));
        }
    };
}
 
Example #16
Source File: PredictionTestBase.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
public static PredictionTask predictLeaf() {
    return new PredictionTask("leaf") {
        @Override
        double[] predict(Predictor predictor, FVec feat) {
            return toDoubleArray(predictor.predictLeaf(feat));
        }
    };
}
 
Example #17
Source File: GBTree.java    From Myna with Apache License 2.0 5 votes vote down vote up
int[] predPath(FVec feat, int root_index, int ntree_limit) {
    int treeleft = ntree_limit == 0 ? trees.length : ntree_limit;

    int[] leafIndex = new int[treeleft];
    for (int i = 0; i < treeleft; i++) {
        leafIndex[i] = trees[i].getLeafIndex(feat, root_index);
    }
    return leafIndex;
}
 
Example #18
Source File: XGBoostOnlinePredictUDTF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private void predictAndForward(@Nonnull final Predictor model, @Nonnull final Writable rowId,
        @Nonnull final FVec features) throws HiveException {
    double[] predicted = model.predict(features);
    // predicted[0] has
    //    - probability ("binary:logistic")
    //    - class label ("multi:softmax")
    forwardPredicted(rowId, predicted);
}
 
Example #19
Source File: GBTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
int[] predPath(FVec feat, int root_index, int ntree_limit) {
    int treeleft = ntree_limit == 0 ? trees.length : Math.min(ntree_limit, trees.length);

    int[] leafIndex = new int[treeleft];
    for (int i = 0; i < treeleft; i++) {
        leafIndex[i] = trees[i].getLeafIndex(feat, root_index);
    }
    return leafIndex;
}
 
Example #20
Source File: Predictor.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
double[] predictRaw(FVec feat, int ntree_limit) {
    double[] preds = gbm.predict(feat, ntree_limit);
    for (int i = 0; i < preds.length; i++) {
        preds[i] += mparam.base_score;
    }
    return preds;
}
 
Example #21
Source File: RegTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
int next(FVec feat) {
    double fvalue = feat.fvalue(_splitIndex);
    if (fvalue != fvalue) {  // is NaN?
        return _defaultNext;
    }
    return (fvalue < split_cond) ? cleft_ : cright_;
}
 
Example #22
Source File: GBLinear.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
@Override
public double[] predict(FVec feat, int ntree_limit) {
    double[] preds = new double[mparam.num_output_group];
    for (int gid = 0; gid < mparam.num_output_group; ++gid) {
        preds[gid] = pred(feat, gid);
    }
    return preds;
}
 
Example #23
Source File: GBLinear.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
@Override
public double predictSingle(FVec feat, int ntree_limit) {
    if (mparam.num_output_group != 1) {
        throw new IllegalStateException(
                "Can't invoke predictSingle() because this model outputs multiple values: "
                        + mparam.num_output_group);
    }
    return pred(feat, 0);
}
 
Example #24
Source File: GBLinear.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
double pred(FVec feat, int gid) {
    double psum = bias(gid);
    double featValue;
    for (int fid = 0; fid < mparam.num_feature; ++fid) {
        featValue = feat.fvalue(fid);
        if (!Double.isNaN(featValue)) {
            psum += featValue * weight(fid, gid);
        }
    }
    return psum;
}
 
Example #25
Source File: RegTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieves nodes from root to leaf and returns leaf value.
 *
 * @param feat    feature vector
 * @param root_id starting root index
 * @return leaf value
 */
public double getLeafValue(FVec feat, int root_id) {
    Node n = nodes[root_id];
    while (!n._isLeaf) {
        n = nodes[n.next(feat)];
    }

    return n.leaf_value;
}
 
Example #26
Source File: Dart.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
double pred(FVec feat, int bst_group, int root_index, int ntree_limit) {
    RegTree[] trees = _groupTrees[bst_group];
    int treeleft = ntree_limit == 0 ? trees.length : Math.min(ntree_limit, trees.length);

    double psum = 0;
    for (int i = 0; i < treeleft; i++) {
        psum += weightDrop[i] * trees[i].getLeafValue(feat, root_index);
    }

    return psum;
}
 
Example #27
Source File: GBTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
@Override
public double[] predict(FVec feat, int ntree_limit) {
    double[] preds = new double[mparam.num_output_group];
    for (int gid = 0; gid < mparam.num_output_group; gid++) {
        preds[gid] = pred(feat, gid, 0, ntree_limit);
    }
    return preds;
}
 
Example #28
Source File: GBTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
@Override
public double predictSingle(FVec feat, int ntree_limit) {
    if (mparam.num_output_group != 1) {
        throw new IllegalStateException(
                "Can't invoke predictSingle() because this model outputs multiple values: "
                + mparam.num_output_group);
    }
    return pred(feat, 0, 0, ntree_limit);
}
 
Example #29
Source File: GBTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
double pred(FVec feat, int bst_group, int root_index, int ntree_limit) {
    RegTree[] trees = _groupTrees[bst_group];
    int treeleft = ntree_limit == 0 ? trees.length : Math.min(ntree_limit, trees.length);

    double psum = 0;
    for (int i = 0; i < treeleft; i++) {
        psum += trees[i].getLeafValue(feat, root_index);
    }

    return psum;
}
 
Example #30
Source File: RegTree.java    From xgboost-predictor-java with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieves nodes from root to leaf and returns leaf index.
 *
 * @param feat    feature vector
 * @param root_id starting root index
 * @return leaf index
 */
public int getLeafIndex(FVec feat, int root_id) {
    int pid = root_id;

    Node n;
    while (!(n = nodes[pid])._isLeaf) {
        pid = n.next(feat);
    }

    return pid;
}