Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory#javaStringObjectInspector()

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory#javaStringObjectInspector() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KuromojiUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiveArgumentArray() throws UDFArgumentException, IOException {
    GenericUDF udf = new KuromojiUDF();
    ObjectInspector[] argOIs = new ObjectInspector[5];
    // line
    argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    // mode
    PrimitiveTypeInfo stringType = new PrimitiveTypeInfo();
    stringType.setTypeName("string");
    argOIs[1] = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
        stringType, null);
    // stopWords
    argOIs[2] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, null);
    // stopTags
    argOIs[3] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, null);
    // userDictUrl
    argOIs[4] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, null);
    udf.initialize(argOIs);
    udf.close();
}
 
Example 2
Source File: UDFStringSplitToMultimap.java    From hive-third-functions with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Check if two arguments were passed
    if (arguments.length != ARG_COUNT) {
        throw new UDFArgumentLengthException(
                "The function split_to_multimap(string, string, string) takes exactly " + ARG_COUNT + " arguments.");
    }

    // Check if two argument is of string
    for (int i = 0; i < 3; i++) {
        if (!ObjectInspectorUtils.compareTypes(PrimitiveObjectInspectorFactory.javaStringObjectInspector, arguments[i])) {
            throw new UDFArgumentTypeException(i,
                    "\"" + PrimitiveObjectInspectorFactory.javaStringObjectInspector.getTypeName() + "\" "
                            + "expected at function split_to_multimap, but "
                            + "\"" + arguments[i].getTypeName() + "\" "
                            + "is found");
        }
    }

    ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector mapValueOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

    return ObjectInspectorFactory.getStandardMapObjectInspector(mapKeyOI, mapValueOI);
}
 
Example 3
Source File: GeneralRegressorUDTFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoOptions() throws Exception {
    List<String> x = Arrays.asList("1:-2", "2:-1");
    float y = 0.f;

    GeneralRegressorUDTF udtf = new GeneralRegressorUDTF();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);

    udtf.initialize(new ObjectInspector[] {stringListOI, intOI});

    udtf.process(new Object[] {x, y});

    udtf.finalizeTraining();

    float predicted = udtf.predict(udtf.parseFeatures(x));
    Assert.assertEquals(y, predicted, 1E-5);
}
 
Example 4
Source File: TestColumnStatistics.java    From hive-dwrf with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringStatisticsMerge() throws Exception {
  ObjectInspector inspector =
      PrimitiveObjectInspectorFactory.javaStringObjectInspector;

  ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
  ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
  stats1.updateString("bob");
  stats1.updateString("david");
  stats1.updateString("charles");
  stats2.updateString("anne");
  stats2.updateString("erin");
  stats1.merge(stats2);
  StringColumnStatistics strStats = (StringColumnStatistics) stats1;
  assertEquals("anne", strStats.getMinimum());
  assertEquals("erin", strStats.getMaximum());
}
 
Example 5
Source File: UDAFToOrderedListTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testReverseTailK() throws Exception {
    // = top-k
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k -2 -reverse")};

    final String[] values = new String[] {"banana", "apple", "candy"};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i]});
    }

    @SuppressWarnings("unchecked")
    List<Object> res = (List<Object>) evaluator.terminate(agg);

    Assert.assertEquals(2, res.size());
    Assert.assertEquals("candy", res.get(0));
    Assert.assertEquals("banana", res.get(1));
}
 
Example 6
Source File: UDFRe2JRegexpExtractAllTest.java    From hive-third-functions with Apache License 2.0 6 votes vote down vote up
@Test
public void testUDFRe2JRegexpExtractAll() throws HiveException {
    UDFRe2JRegexpExtractAll udf = new UDFRe2JRegexpExtractAll();

    ObjectInspector source = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector pattern = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector[] arguments = {source, pattern};

    udf.initialize(arguments);

    GenericUDF.DeferredObject sourceObj = new GenericUDF.DeferredJavaObject("1a 2b 3c 6f");
    GenericUDF.DeferredObject patternObj = new GenericUDF.DeferredJavaObject("\\d+");
    GenericUDF.DeferredObject[] args = {sourceObj, patternObj};

    ArrayList<Object> output = (ArrayList<Object>) udf.evaluate(args);
    assertTrue(Iterables.elementsEqual(ImmutableList.of("1", "2", "3", "6"), output));
}
 
Example 7
Source File: KuromojiUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testThreeArgument() throws UDFArgumentException, IOException {
    GenericUDF udf = new KuromojiUDF();
    ObjectInspector[] argOIs = new ObjectInspector[3];
    // line
    argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    // mode
    PrimitiveTypeInfo stringType = new PrimitiveTypeInfo();
    stringType.setTypeName("string");
    argOIs[1] = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
        stringType, null);
    // stopWords
    argOIs[2] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, null);
    udf.initialize(argOIs);
    udf.close();
}
 
Example 8
Source File: UDAFToOrderedListTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testTopKWithKey() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-k 2")};

    final String[] values = new String[] {"banana", "apple", "candy"};
    final double[] keys = new double[] {0.7, 0.5, 0.8};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i], keys[i]});
    }

    @SuppressWarnings("unchecked")
    List<Object> res = (List<Object>) evaluator.terminate(agg);

    Assert.assertEquals(2, res.size());
    Assert.assertEquals("candy", res.get(0));
    Assert.assertEquals("banana", res.get(1));
}
 
Example 9
Source File: UDAFToOrderedListTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testReverseTopK() throws Exception {
    // = tail-k
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k 2 -reverse")};

    final String[] values = new String[] {"banana", "apple", "candy"};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i]});
    }

    @SuppressWarnings("unchecked")
    List<Object> res = (List<Object>) evaluator.terminate(agg);

    Assert.assertEquals(2, res.size());
    Assert.assertEquals("apple", res.get(0));
    Assert.assertEquals("banana", res.get(1));
}
 
Example 10
Source File: KuromojiUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testNormalModeWithOption()
        throws IOException, HiveException, IllegalAccessException, NoSuchFieldException {
    GenericUDF udf = new KuromojiUDF();
    ObjectInspector[] argOIs = new ObjectInspector[2];

    argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector; // line
    argOIs[1] = HiveUtils.getConstStringObjectInspector("-mode normal"); // mode
    udf.initialize(argOIs);

    Object mode = PrivilegedAccessor.getValue(udf, "_mode");
    Assert.assertEquals(Mode.NORMAL, mode);

    DeferredObject[] args = new DeferredObject[1];
    args[0] = new DeferredObject() {
        public Text get() throws HiveException {
            return new Text("クロモジのJapaneseAnalyzerを使ってみる。テスト。");
        }

        @Override
        public void prepare(int arg) throws HiveException {}
    };
    Object result = udf.evaluate(args);
    Assert.assertThat(Arrays.asList(new Text("クロモジ"), new Text("japaneseanalyzer"),
        new Text("使う"), new Text("みる"), new Text("テスト")), CoreMatchers.is(result));

    udf.close();
}
 
Example 11
Source File: GeneralRegressorUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentException.class)
public void testUnsupportedOptimizer() throws Exception {
    GeneralRegressorUDTF udtf = new GeneralRegressorUDTF();
    ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-opt UnsupportedOpt");

    udtf.initialize(new ObjectInspector[] {stringListOI, floatOI, params});
}
 
Example 12
Source File: UDAFToOrderedListTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testNaturalOrderWithKey() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector};

    final String[] values = new String[] {"banana", "apple", "candy"};
    final double[] keys = new double[] {0.7, 0.5, 0.7};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i], keys[i]});
    }

    @SuppressWarnings("unchecked")
    List<Object> res = (List<Object>) evaluator.terminate(agg);

    Assert.assertEquals(3, res.size());
    Assert.assertEquals("apple", res.get(0));
    if (res.get(1) == "banana") { // duplicated key (0.7)
        Assert.assertEquals("candy", res.get(2));
    } else {
        Assert.assertEquals("banana", res.get(2));
    }
}
 
Example 13
Source File: KuromojiUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testOneArgument() throws UDFArgumentException, IOException {
    GenericUDF udf = new KuromojiUDF();
    ObjectInspector[] argOIs = new ObjectInspector[1];
    // line
    argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    udf.initialize(argOIs);
    udf.close();
}
 
Example 14
Source File: FeatureUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testStringFloat() throws Exception {
    ObjectInspector featureOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector weightOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    udf.initialize(new ObjectInspector[] {featureOI, weightOI});

    Text ret = udf.evaluate(new GenericUDF.DeferredObject[] {new DeferredJavaObject("f1"),
            new DeferredJavaObject(2.5f)});

    Assert.assertEquals("f1:2.5", ret.toString());
}
 
Example 15
Source File: FeatureUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testStringInt() throws Exception {
    ObjectInspector featureOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector weightOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    udf.initialize(new ObjectInspector[] {featureOI, weightOI});

    Text ret = udf.evaluate(new GenericUDF.DeferredObject[] {new DeferredJavaObject("f1"),
            new DeferredJavaObject(2)});

    Assert.assertEquals("f1:2", ret.toString());
}
 
Example 16
Source File: PerceptronUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpdate() throws UDFArgumentException {
    PerceptronUDTF udtf = new PerceptronUDTF();
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);
    udtf.initialize(new ObjectInspector[] {stringListOI,
            PrimitiveObjectInspectorFactory.javaIntObjectInspector});

    /* update weights by List<Object> */
    FeatureValue word1 = FeatureValue.parse("good");
    FeatureValue word2 = FeatureValue.parse("opinion");
    FeatureValue[] features1 = new FeatureValue[] {word1, word2};
    udtf.update(features1, 1, 0.f);

    /* check weights */
    assertEquals(1.f, udtf.model.get(word1.getFeature()).get(), 1e-5f);
    assertEquals(1.f, udtf.model.get(word2.getFeature()).get(), 1e-5f);

    /* update weights by List<Object> */
    FeatureValue word3 = FeatureValue.parse("bad");
    FeatureValue word4 = FeatureValue.parse("opinion");
    FeatureValue[] features2 = new FeatureValue[] {word3, word4};
    udtf.update(features2, -1, 0.f);

    /* check weights */
    assertEquals(1.f, udtf.model.get(word1.getFeature()).get(), 1e-5f);
    assertEquals(-1.f, udtf.model.get(word3.getFeature()).get(), 1e-5f);
    assertEquals(0.f, udtf.model.get(word4.getFeature()).get(), 1e-5f);
}
 
Example 17
Source File: MergeTest.java    From hive-funnel-udf with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentTypeException.class)
public void testPrimitiveParam() throws HiveException {
    Merge udaf = new Merge();
    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
        PrimitiveObjectInspectorFactory.javaStringObjectInspector
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
}
 
Example 18
Source File: FeatureUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testStringLong() throws Exception {
    ObjectInspector featureOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector weightOI = PrimitiveObjectInspectorFactory.javaLongObjectInspector;
    udf.initialize(new ObjectInspector[] {featureOI, weightOI});

    Text ret = udf.evaluate(new GenericUDF.DeferredObject[] {new DeferredJavaObject("f1"),
            new DeferredJavaObject(2L)});

    Assert.assertEquals("f1:2", ret.toString());
}
 
Example 19
Source File: UDAFToOrderedListTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testTop4NoDedup() throws Exception {
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    PrimitiveObjectInspectorFactory.javaIntObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k 4 -kv_map")};

    final int[] keys = new int[] {5, 3, 4, 1, 2, 4};
    final String[] values = new String[] {"apple", "banana", "candy", "donut", "egg", "candy"}; // 4:candy is duplicating

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i], keys[i]});
    }

    Object result = evaluator.terminate(agg);

    Assert.assertEquals(LinkedHashMap.class, result.getClass());
    Map<?, ?> map = (Map<?, ?>) result;
    Assert.assertEquals(3, map.size());

    Assert.assertEquals("apple", map.get(5));
    Assert.assertEquals("candy", map.get(4));
    Assert.assertEquals("banana", map.get(3));
    Assert.assertNull(map.get(2));
    Assert.assertNull(map.get(1));
}
 
Example 20
Source File: FunnelTest.java    From hive-funnel-udf with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartial1() throws HiveException {
    Funnel udaf = new Funnel();

    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, // action_column
        PrimitiveObjectInspectorFactory.javaLongObjectInspector,   // timestamp_column
        ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), // funnel_step_1
        ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector) // funnel_step_1
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);

    ObjectInspector outputObjectInspector = udafEvaluator.init(Mode.PARTIAL1, inputObjectInspectorList);

    // Order will be "alpha, beta, gamma, delta" when ordered on timestamp_column
    // Funnel is "beta" -> "gamma" -> "epsilon"
    // Should return [1, 1, 0] as we don't have an epsilon
    Object[] parameters1 = new Object[]{ "beta", 200L, Arrays.asList("beta"), "gamma", Arrays.asList("epsilon")};
    Object[] parameters2 = new Object[]{"alpha", 100L, Arrays.asList("beta"), "gamma", Arrays.asList("epsilon")};
    Object[] parameters3 = new Object[]{"delta", 400L, Arrays.asList("beta"), "gamma", Arrays.asList("epsilon")};
    Object[] parameters4 = new Object[]{"gamma", 300L, Arrays.asList("beta"), "gamma", Arrays.asList("epsilon")};

    // Process the data
    AggregationBuffer agg = udafEvaluator.getNewAggregationBuffer();
    udafEvaluator.reset(agg);
    udafEvaluator.iterate(agg, parameters1);
    udafEvaluator.iterate(agg, parameters2);
    udafEvaluator.iterate(agg, parameters3);
    udafEvaluator.iterate(agg, parameters4);
    Object result = udafEvaluator.terminatePartial(agg);

    // Expected partial output
    List<Object> expected = new ArrayList<>();
    expected.add(Arrays.asList("beta", "gamma"));
    expected.add(Arrays.asList(200L, 300L));
    expected.add(Arrays.asList("beta", null, "gamma", null, "epsilon", null));

    Assert.assertEquals(expected, result);
}