Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#getConstantObjectInspector()

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#getConstantObjectInspector() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: PassiveAggressiveUDTFTest.java From incubator-hivemall with Apache License 2.0

6 votes

@Test
public void testPA1Eta() throws UDFArgumentException {
    PassiveAggressiveUDTF udtf = new PassiveAggressiveUDTF.PA1();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ListObjectInspector intListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(intOI);
    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-c 3.0");

    /* do initialize() with aggressiveness parameter */
    udtf.initialize(new ObjectInspector[] {intListOI, intOI, param});
    float loss = 0.1f;

    PredictionResult margin1 = new PredictionResult(0.5f).squaredNorm(0.05f);
    float expectedLearningRate1 = 2.0f;
    assertEquals(expectedLearningRate1, udtf.eta(loss, margin1), 1e-5f);

    PredictionResult margin2 = new PredictionResult(0.5f).squaredNorm(0.01f);
    float expectedLearningRate2 = 3.0f;
    assertEquals(expectedLearningRate2, udtf.eta(loss, margin2), 1e-5f);
}

Example 2

Source File: LDAUDTFTest.java From incubator-hivemall with Apache License 2.0

6 votes

@Test
public void testSingleRow() throws HiveException {
    LDAUDTF udtf = new LDAUDTF();
    final int numTopics = 2;
    ObjectInspector[] argOIs = new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector),
            ObjectInspectorUtils.getConstantObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                "-topics " + numTopics)};
    udtf.initialize(argOIs);

    String[] doc1 = new String[] {"1", "2", "3"};
    udtf.process(new Object[] {Arrays.asList(doc1)});

    final MutableInt cnt = new MutableInt(0);
    udtf.setCollector(new Collector() {
        @Override
        public void collect(Object arg0) throws HiveException {
            cnt.addValue(1);
        }
    });
    udtf.close();

    Assert.assertEquals(doc1.length * numTopics, cnt.getValue());
}

Example 3

Source File: UDAFToOrderedListTest.java From incubator-hivemall with Apache License 2.0

6 votes

@Test
public void testReverseOrder() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-reverse_order")};

    final String[] values = new String[] {"banana", "apple", "candy"};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i]});
    }

    @SuppressWarnings("unchecked")
    List<Object> res = (List<Object>) evaluator.terminate(agg);

    Assert.assertEquals(3, res.size());
    Assert.assertEquals("candy", res.get(0));
    Assert.assertEquals("banana", res.get(1));
    Assert.assertEquals("apple", res.get(2));
}

Example 4

Source File: UDAFToOrderedListTest.java From incubator-hivemall with Apache License 2.0

5 votes

@Test(expected = UDFArgumentException.class)
public void testKVandVKFail() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k 2 -kv_map -vk_map")};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
}

Example 5

Source File: UDAFToOrderedListTest.java From incubator-hivemall with Apache License 2.0

5 votes

@Test
public void testVKMapOptionNaturalOrder() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-vk_map")};

    final String[] values = new String[] {"banana", "apple", "banana"};
    final double[] keys = new double[] {0.7, 0.6, 0.8};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i], keys[i]});
    }

    Object result = evaluator.terminate(agg);

    Assert.assertEquals(LinkedHashMap.class, result.getClass());
    Map<?, ?> map = (Map<?, ?>) result;
    Assert.assertEquals(2, map.size());

    Assert.assertEquals(0.6d, map.get("apple"));
    Assert.assertEquals(0.7d, map.get("banana"));
}

Example 6

Source File: UDAFToOrderedListTest.java From incubator-hivemall with Apache License 2.0

5 votes

@Test
public void testVKMapOptionBananaOverlap() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k 2 -vk_map")};

    final String[] values = new String[] {"banana", "banana", "candy"};
    final double[] keys = new double[] {0.7, 0.8, 0.81};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i], keys[i]});
    }

    Object result = evaluator.terminate(agg);

    Assert.assertEquals(LinkedHashMap.class, result.getClass());
    Map<?, ?> map = (Map<?, ?>) result;
    Assert.assertEquals(2, map.size());

    Assert.assertEquals(0.81d, map.get("candy"));
    Assert.assertEquals(0.8d, map.get("banana"));
}

Example 7

Source File: LDAPredictUDAFTest.java From incubator-hivemall with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Test
public void testTerminateWithSameTopicProbability() throws Exception {
    udaf = new LDAPredictUDAF();

    inputOIs = new ObjectInspector[] {
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.STRING),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.FLOAT),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.INT),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.FLOAT),
            ObjectInspectorUtils.getConstantObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-topics 2")};

    evaluator = udaf.getEvaluator(new SimpleGenericUDAFParameterInfo(inputOIs, false, false));

    agg = (LDAPredictUDAF.OnlineLDAPredictAggregationBuffer) evaluator.getNewAggregationBuffer();

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    // Assume that all words in a document are NOT in vocabulary that composes a LDA model.
    // Hence, the document should be assigned to topic #1 (#2) with probability 0.5 (0.5).
    for (int i = 0; i < 18; i++) {
        evaluator.iterate(agg, new Object[] {words[i], 0.f, labels[i], lambdas[i]});
    }

    // Probability for each of the two topics should be same.
    List<Object[]> result = (List<Object[]>) evaluator.terminate(agg);
    Assert.assertEquals(result.size(), 2);
    Assert.assertEquals(result.get(0)[1], result.get(1)[1]);
}

Example 8

Source File: UDAFToOrderedListTest.java From incubator-hivemall with Apache License 2.0

5 votes

@Test
public void testKVMapTop2() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaIntObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k 2 -kv_map")};

    final int[] keys = new int[] {5, 3, 4, 2, 3};
    final String[] values = new String[] {"apple", "banana", "candy", "donut", "egg"};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i], keys[i]});
    }

    Object result = evaluator.terminate(agg);

    Assert.assertEquals(LinkedHashMap.class, result.getClass());
    Map<?, ?> map = (Map<?, ?>) result;
    Assert.assertEquals(2, map.size());

    Assert.assertEquals("apple", map.get(5));
    Assert.assertEquals("candy", map.get(4));
}

Example 9

Source File: PLSAPredictUDAFTest.java From incubator-hivemall with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Test
public void testTerminateWithSameTopicProbability() throws Exception {
    udaf = new PLSAPredictUDAF();

    inputOIs = new ObjectInspector[] {
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.STRING),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.FLOAT),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.INT),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.FLOAT),
            ObjectInspectorUtils.getConstantObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-topics 2")};

    evaluator = udaf.getEvaluator(new SimpleGenericUDAFParameterInfo(inputOIs, false, false));
    agg = (PLSAPredictUDAF.PLSAPredictAggregationBuffer) evaluator.getNewAggregationBuffer();

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    // Assume that all words in a document are NOT in vocabulary that composes a LDA model.
    // Hence, the document should be assigned to topic #1 (#2) with probability 0.5 (0.5).
    for (int i = 0; i < words.length; i++) {
        String word = words[i];
        evaluator.iterate(agg, new Object[] {word, 0.f, labels[i], probs[i]});
    }

    // Probability for each of the two topics should be same.
    List<Object[]> result = (List<Object[]>) evaluator.terminate(agg);
    Assert.assertEquals(result.size(), 2);
    Assert.assertEquals(result.get(0)[1], result.get(1)[1]);
}

Example 10

Source File: MatrixFactorizationSGDUDTFTest.java From incubator-hivemall with Apache License 2.0

5 votes

@Test
public void testRandInit() throws HiveException {
    println("--------------------------\n testRandInit()");
    OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF();

    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        new String("-factor 3 -rankinit random"));
    ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param};
    mf.initialize(argOIs);
    Assert.assertTrue(mf.rankInit == RankInitScheme.random);

    float[][] rating = {{5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}};
    Object[] args = new Object[3];
    final int num_iters = 100;
    for (int iter = 0; iter < num_iters; iter++) {
        for (int row = 0; row < rating.length; row++) {
            for (int col = 0, size = rating[row].length; col < size; col++) {
                args[0] = row;
                args[1] = col;
                args[2] = (float) rating[row][col];
                mf.process(args);
            }
        }
    }
    for (int row = 0; row < rating.length; row++) {
        for (int col = 0, size = rating[row].length; col < size; col++) {
            double predicted = mf.predict(row, col);
            print(rating[row][col] + "[" + predicted + "]\t");
            Assert.assertEquals(rating[row][col], predicted, 0.2d);
        }
        println();
    }
}

Example 11

Source File: GeneralClassifierUDTFTest.java From incubator-hivemall with Apache License 2.0

5 votes

@Test
public void testNesterov() throws IOException, HiveException {
    String filePath = "adam_test_10000.tsv.gz";
    String options =
            "-loss logloss -opt nesterov -reg l1 -lambda 0.0001 -iter 10 -mini_batch 1 -cv_rate 0.00005";

    GeneralClassifierUDTF udtf = new GeneralClassifierUDTF();

    ListObjectInspector stringListOI = ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, options);

    udtf.initialize(new ObjectInspector[] {stringListOI,
            PrimitiveObjectInspectorFactory.javaIntObjectInspector, params});

    BufferedReader reader = readFile(filePath);
    for (String line = reader.readLine(); line != null; line = reader.readLine()) {
        StringTokenizer tokenizer = new StringTokenizer(line, " ");

        String featureLine = tokenizer.nextToken();
        List<String> X = Arrays.asList(featureLine.split(","));

        String labelLine = tokenizer.nextToken();
        Integer y = Integer.valueOf(labelLine);

        udtf.process(new Object[] {X, y});
    }

    udtf.finalizeTraining();

    Assert.assertTrue(
        "CumulativeLoss is expected to be less than 1100: " + udtf.getCumulativeLoss(),
        udtf.getCumulativeLoss() < 1100);
}

Example 12

Source File: MatrixFactorizationSGDUDTFTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testFileBackedIterationsCloseNoConverge() throws HiveException {
    println("--------------------------\n testFileBackedIterationsCloseNoConverge()");
    OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF();

    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    int iters = 5;
    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        new String("-disable_cv -factor 3 -iterations " + iters));
    ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param};
    MapredContext mrContext = MapredContextAccessor.create(true, null);
    mf.configure(mrContext);
    mf.initialize(argOIs);
    final MutableInt numCollected = new MutableInt(0);
    mf.setCollector(new Collector() {
        @Override
        public void collect(Object input) throws HiveException {
            numCollected.addValue(1);
        }
    });
    Assert.assertTrue(mf.rankInit == RankInitScheme.random);

    float[][] rating = {{5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}};
    Object[] args = new Object[3];

    final int num_iters = 500;
    int trainingExamples = 0;
    for (int iter = 0; iter < num_iters; iter++) {
        for (int row = 0; row < rating.length; row++) {
            for (int col = 0, size = rating[row].length; col < size; col++) {
                args[0] = row;
                args[1] = col;
                args[2] = (float) rating[row][col];
                mf.process(args);
                trainingExamples++;
            }
        }
    }

    File tmpFile = mf.fileIO.getFile();
    mf.close();
    Assert.assertEquals(trainingExamples * iters, mf.count);
    Assert.assertEquals(5, numCollected.intValue());
    Assert.assertFalse(tmpFile.exists());
}

Example 13

Source File: RandomForestClassifierUDTFTest.java From incubator-hivemall with Apache License 2.0

4 votes

private static DecisionTree.Node getDecisionTreeFromSparseInput(String urlString)
        throws IOException, ParseException, HiveException {
    URL url = new URL(urlString);
    InputStream is = new BufferedInputStream(url.openStream());

    ArffParser arffParser = new ArffParser();
    arffParser.setResponseIndex(4);

    AttributeDataset iris = arffParser.parse(is);
    int size = iris.size();
    double[][] x = iris.toArray(new double[size][]);
    int[] y = iris.toArray(new int[size]);

    RandomForestClassifierUDTF udtf = new RandomForestClassifierUDTF();
    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-trees 1 -seed 71");
    udtf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector),
            PrimitiveObjectInspectorFactory.javaIntObjectInspector, param});

    final List<String> xi = new ArrayList<String>(x[0].length);
    for (int i = 0; i < size; i++) {
        final double[] row = x[i];
        for (int j = 0; j < row.length; j++) {
            xi.add(j + ":" + row[j]);
        }
        udtf.process(new Object[] {xi, y[i]});
        xi.clear();
    }

    final Text[] placeholder = new Text[1];
    Collector collector = new Collector() {
        public void collect(Object input) throws HiveException {
            Object[] forward = (Object[]) input;
            placeholder[0] = (Text) forward[2];
        }
    };

    udtf.setCollector(collector);
    udtf.close();

    Text modelTxt = placeholder[0];
    Assert.assertNotNull(modelTxt);

    byte[] b = Base91.decode(modelTxt.getBytes(), 0, modelTxt.getLength());
    DecisionTree.Node node = DecisionTree.deserialize(b, b.length, true);
    return node;
}

Example 14

Source File: RandomForestClassifierUDTFTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testNews20BinarySparse() throws IOException, ParseException, HiveException {
    final int numTrees = 10;
    RandomForestClassifierUDTF udtf = new RandomForestClassifierUDTF();
    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        "-seed 71 -trees " + numTrees);
    udtf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector),
            PrimitiveObjectInspectorFactory.javaIntObjectInspector, param});

    BufferedReader news20 = readFile("news20-small.binary.gz");
    ArrayList<String> features = new ArrayList<String>();
    String line = news20.readLine();
    while (line != null) {
        StringTokenizer tokens = new StringTokenizer(line, " ");
        int label = Integer.parseInt(tokens.nextToken());
        if (label == -1) {
            label = 0;
        }
        while (tokens.hasMoreTokens()) {
            features.add(tokens.nextToken());
        }
        if (!features.isEmpty()) {
            udtf.process(new Object[] {features, label});
            features.clear();
        }
        line = news20.readLine();
    }
    news20.close();

    final MutableInt count = new MutableInt(0);
    final MutableInt oobErrors = new MutableInt(0);
    final MutableInt oobTests = new MutableInt(0);
    Collector collector = new Collector() {
        public synchronized void collect(Object input) throws HiveException {
            Object[] forward = (Object[]) input;
            oobErrors.addValue(((IntWritable) forward[4]).get());
            oobTests.addValue(((IntWritable) forward[5]).get());
            count.addValue(1);
        }
    };
    udtf.setCollector(collector);
    udtf.close();

    Assert.assertEquals(numTrees, count.getValue());
    float oobErrorRate = ((float) oobErrors.getValue()) / oobTests.getValue();
    Assert.assertTrue("oob error rate is too high: " + oobErrorRate, oobErrorRate < 0.3);
}

Example 15

Source File: GradientTreeBoostingClassifierUDTFTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testIrisDense() throws IOException, ParseException, HiveException {
    URL url = new URL(
        "https://gist.githubusercontent.com/myui/143fa9d05bd6e7db0114/raw/500f178316b802f1cade6e3bf8dc814a96e84b1e/iris.arff");
    InputStream is = new BufferedInputStream(url.openStream());

    ArffParser arffParser = new ArffParser();
    arffParser.setResponseIndex(4);

    AttributeDataset iris = arffParser.parse(is);
    int size = iris.size();
    double[][] x = iris.toArray(new double[size][]);
    int[] y = iris.toArray(new int[size]);

    GradientTreeBoostingClassifierUDTF udtf = new GradientTreeBoostingClassifierUDTF();
    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-trees 490");
    udtf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaDoubleObjectInspector),
            PrimitiveObjectInspectorFactory.javaIntObjectInspector, param});

    final List<Double> xi = new ArrayList<Double>(x[0].length);
    for (int i = 0; i < size; i++) {
        for (int j = 0; j < x[i].length; j++) {
            xi.add(j, x[i][j]);
        }
        udtf.process(new Object[] {xi, y[i]});
        xi.clear();
    }

    final MutableInt count = new MutableInt(0);
    Collector collector = new Collector() {
        public void collect(Object input) throws HiveException {
            count.addValue(1);
        }
    };

    udtf.setCollector(collector);
    udtf.close();

    Assert.assertEquals(490, count.getValue());
}

Example 16

Source File: SlimUDTFTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testAllSamples() throws HiveException {
    SlimUDTF slim = new SlimUDTF();
    ObjectInspector itemIOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ObjectInspector itemJOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;

    ObjectInspector itemIRatesOI = ObjectInspectorFactory.getStandardMapObjectInspector(
        PrimitiveObjectInspectorFactory.javaIntObjectInspector,
        PrimitiveObjectInspectorFactory.javaFloatObjectInspector);
    ObjectInspector itemJRatesOI = ObjectInspectorFactory.getStandardMapObjectInspector(
        PrimitiveObjectInspectorFactory.javaIntObjectInspector,
        PrimitiveObjectInspectorFactory.javaFloatObjectInspector);
    ObjectInspector topKRatesOfIOI = ObjectInspectorFactory.getStandardMapObjectInspector(
        PrimitiveObjectInspectorFactory.javaIntObjectInspector,
        ObjectInspectorFactory.getStandardMapObjectInspector(
            PrimitiveObjectInspectorFactory.javaIntObjectInspector,
            PrimitiveObjectInspectorFactory.javaFloatObjectInspector));
    ObjectInspector optionArgumentOI = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-l2 0.01 -l1 0.01");

    ObjectInspector[] argOIs =
            {itemIOI, itemIRatesOI, topKRatesOfIOI, itemJOI, itemJRatesOI, optionArgumentOI};

    slim.initialize(argOIs);
    int numUser = 4;
    int numItem = 5;

    float[][] data = {{1.f, 4.f, 0.f, 0.f, 0.f}, {0.f, 3.f, 0.f, 1.f, 2.f},
            {2.f, 2.f, 0.f, 0.f, 3.f}, {0.f, 1.f, 1.f, 0.f, 0.f}};

    for (int i = 0; i < numItem; i++) {
        Map<Integer, Float> Ri = new HashMap<>();
        for (int u = 0; u < numUser; u++) {
            if (data[u][i] != 0.) {
                Ri.put(u, data[u][i]);
            }
        }

        // most similar data
        Map<Integer, Map<Integer, Float>> knnRatesOfI = new HashMap<>();
        for (int u = 0; u < numUser; u++) {
            Map<Integer, Float> Ru = new HashMap<>();
            for (int k = 0; k < numItem; k++) {
                if (k == i)
                    continue;
                Ru.put(k, data[u][k]);
            }
            knnRatesOfI.put(u, Ru);
        }

        for (int j = 0; j < numItem; j++) {
            if (i == j)
                continue;
            Map<Integer, Float> Rj = new HashMap<>();
            for (int u = 0; u < numUser; u++) {
                if (data[u][j] != 0.) {
                    Rj.put(u, data[u][j]);
                }
            }

            Object[] args = {i, Ri, knnRatesOfI, j, Rj};
            slim.process(args);
        }
    }
    slim.finalizeTraining();
}

Example 17

Source File: FactorizationMachineUDTFTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testAdaptiveRegularization() throws HiveException, IOException {
    println("Adaptive regularization test");

    final String options = "-factors 5 -min 1 -max 5 -init_v gaussian -eta0 0.01 -seed 31 ";

    FactorizationMachineUDTF udtf = new FactorizationMachineUDTF();
    ObjectInspector[] argOIs = new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector),
            PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
            ObjectInspectorUtils.getConstantObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector, options)};

    udtf.initialize(argOIs);

    BufferedReader data = readFile("5107786.txt.gz");
    List<List<String>> featureVectors = new ArrayList<>();
    List<Double> ys = new ArrayList<>();
    String line = data.readLine();
    while (line != null) {
        StringTokenizer tokenizer = new StringTokenizer(line, " ");
        double y = Double.parseDouble(tokenizer.nextToken());
        List<String> features = new ArrayList<String>();
        while (tokenizer.hasMoreTokens()) {
            String f = tokenizer.nextToken();
            features.add(f);
        }
        udtf.process(new Object[] {features, y});
        featureVectors.add(features);
        ys.add(y);
        line = data.readLine();
    }
    udtf.finalizeTraining();
    data.close();

    double loss = udtf._cvState.getAverageLoss(featureVectors.size());
    println("Average loss without adaptive regularization: " + loss);

    // train with adaptive regularization
    udtf = new FactorizationMachineUDTF();
    argOIs[2] = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        options + "-adaptive_regularization -validation_threshold 1");
    udtf.initialize(argOIs);
    udtf.initModel(udtf._params);
    for (int i = 0, n = featureVectors.size(); i < n; i++) {
        udtf.process(new Object[] {featureVectors.get(i), ys.get(i)});
    }
    udtf.finalizeTraining();

    double loss_adareg = udtf._cvState.getAverageLoss(featureVectors.size());
    println("Average loss with adaptive regularization: " + loss_adareg);
    Assert.assertTrue("Adaptive regularization should achieve lower loss", loss > loss_adareg);
}

Example 18

Source File: PLSAPredictUDAFTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testMerge() throws Exception {
    udaf = new PLSAPredictUDAF();

    inputOIs = new ObjectInspector[] {
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.STRING),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.FLOAT),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.INT),
            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveObjectInspector.PrimitiveCategory.FLOAT),
            ObjectInspectorUtils.getConstantObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-topics 2")};

    evaluator = udaf.getEvaluator(new SimpleGenericUDAFParameterInfo(inputOIs, false, false));

    agg = (PLSAPredictUDAF.PLSAPredictAggregationBuffer) evaluator.getNewAggregationBuffer();

    final Map<String, Float> doc = new HashMap<String, Float>();
    doc.put("apples", 1.f);
    doc.put("avocados", 1.f);
    doc.put("colds", 1.f);
    doc.put("flu", 1.f);
    doc.put("like", 2.f);
    doc.put("oranges", 1.f);

    Object[] partials = new Object[3];

    // bin #1
    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);
    for (int i = 0; i < 6; i++) {
        evaluator.iterate(agg, new Object[] {words[i], doc.get(words[i]), labels[i], probs[i]});
    }
    partials[0] = evaluator.terminatePartial(agg);

    // bin #2
    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);
    for (int i = 6; i < 12; i++) {
        evaluator.iterate(agg, new Object[] {words[i], doc.get(words[i]), labels[i], probs[i]});
    }
    partials[1] = evaluator.terminatePartial(agg);

    // bin #3
    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);
    for (int i = 12; i < 18; i++) {
        evaluator.iterate(agg, new Object[] {words[i], doc.get(words[i]), labels[i], probs[i]});
    }

    partials[2] = evaluator.terminatePartial(agg);

    // merge in a different order
    final int[][] orders = new int[][] {{0, 1, 2}, {1, 0, 2}, {1, 2, 0}, {2, 1, 0}};
    for (int i = 0; i < orders.length; i++) {
        evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL2, partialOI);
        evaluator.reset(agg);

        evaluator.merge(agg, partials[orders[i][0]]);
        evaluator.merge(agg, partials[orders[i][1]]);
        evaluator.merge(agg, partials[orders[i][2]]);

        float[] distr = agg.get();
        Assert.assertTrue(distr[0] < distr[1]);
    }
}

Example 19

Source File: IntFeatureMapModelTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testClassification() throws HiveException {
    final int ROW = 10, COL = 40;

    FactorizationMachineUDTF udtf = new FactorizationMachineUDTF();
    ListObjectInspector xOI = ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    DoubleObjectInspector yOI = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    ObjectInspector paramOI = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        "-adareg -int_feature -factors 20 -classification -seed 31 -iters 10");
    udtf.initialize(new ObjectInspector[] {xOI, yOI, paramOI});
    FactorizationMachineModel model = udtf.initModel(udtf._params);
    Assert.assertTrue("Actual class: " + model.getClass().getName(),
        model instanceof FMIntFeatureMapModel);

    float accuracy = 0.f;
    final Random rnd = new Random(201L);
    for (int numberOfIteration = 0; numberOfIteration < 10000; numberOfIteration++) {
        ArrayList<IntFeature[]> fArrayList = new ArrayList<IntFeature[]>();
        ArrayList<Double> ans = new ArrayList<Double>();
        for (int i = 0; i < ROW; i++) {
            ArrayList<IntFeature> feature = new ArrayList<IntFeature>();
            for (int j = 1; j <= COL; j++) {
                if (i < (0.5f * ROW)) {
                    if (j == 1) {
                        feature.add(new IntFeature(j, 1.d));
                    } else if (j < 0.5 * COL) {
                        if (rnd.nextFloat() < 0.2f) {
                            feature.add(new IntFeature(j, rnd.nextDouble()));
                        }
                    }
                } else {
                    if (j > 0.5f * COL) {
                        if (rnd.nextFloat() < 0.2f) {
                            feature.add(new IntFeature(j, rnd.nextDouble()));
                        }
                    }
                }
            }
            IntFeature[] x = new IntFeature[feature.size()];
            feature.toArray(x);
            fArrayList.add(x);

            final double y;
            if (i < ROW * 0.5f) {
                y = -1.0d;
            } else {
                y = 1.0d;
            }
            ans.add(y);

            udtf.process(new Object[] {toStringArray(x), y});
        }
        int bingo = 0;
        int total = fArrayList.size();
        for (int i = 0; i < total; i++) {
            double tmpAns = ans.get(i);
            if (tmpAns < 0) {
                tmpAns = 0;
            } else {
                tmpAns = 1;
            }
            double p = model.predict(fArrayList.get(i));
            int predicted = p > 0.5 ? 1 : 0;
            if (predicted == tmpAns) {
                bingo++;
            }
        }
        accuracy = bingo / (float) total;
        println("Accuracy = " + accuracy);
    }
    udtf.runTrainingIteration(10);
    Assert.assertTrue(accuracy > 0.95f);
}

Example 20

Source File: StringFeatureMapModelTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testRegression() throws HiveException {
    final int ROW = 1000, COL = 80;

    FactorizationMachineUDTF udtf = new FactorizationMachineUDTF();
    ListObjectInspector xOI = ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    DoubleObjectInspector yOI = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    ObjectInspector paramOI = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        "-factors 20 -seed 31 -eta 0.001 -lambda0 0.1 -sigma 0.1");
    udtf.initialize(new ObjectInspector[] {xOI, yOI, paramOI});
    FactorizationMachineModel model = udtf.initModel(udtf._params);
    Assert.assertTrue("Actual class: " + model.getClass().getName(),
        model instanceof FMStringFeatureMapModel);

    double diff = 0.d;
    final Random rnd = new Random(201L);
    for (int numberOfIteration = 0; numberOfIteration < 100; numberOfIteration++) {
        ArrayList<StringFeature[]> fArrayList = new ArrayList<StringFeature[]>();
        ArrayList<Double> ans = new ArrayList<Double>();
        for (int i = 0; i < ROW; i++) {
            ArrayList<StringFeature> feature = new ArrayList<StringFeature>();
            for (int j = 1; j <= COL; j++) {
                if (i < (0.5f * ROW)) {
                    if (j == 1) {
                        feature.add(new StringFeature(j, 1.d));
                    } else if (j < 0.5 * COL) {
                        if (rnd.nextFloat() < 0.2f) {
                            feature.add(new StringFeature(j, rnd.nextDouble()));
                        }
                    }
                } else {
                    if (j > (0.5f * COL)) {
                        if (rnd.nextFloat() < 0.2f) {
                            feature.add(new StringFeature(j, rnd.nextDouble()));
                        }
                    }
                }
            }
            StringFeature[] x = new StringFeature[feature.size()];
            feature.toArray(x);
            fArrayList.add(x);

            final double y;
            if (i < ROW * 0.5f) {
                y = 0.1d;
            } else {
                y = 0.4d;
            }
            ans.add(y);

            udtf.process(new Object[] {toStringArray(x), y});
        }

        diff = 0.d;
        for (int i = 0; i < fArrayList.size(); i++) {
            double predicted = model.predict(fArrayList.get(i));
            double actual = ans.get(i);
            double tmpDiff = predicted - actual;
            diff += tmpDiff * tmpDiff;
        }
        println("diff = " + diff);
    }
    Assert.assertTrue("diff = " + diff, diff < 5.d);
}