org.apache.hadoop.hive.serde2.lazy.LazyString Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.lazy.LazyString. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveDynamoDBTypeTest.java    From emr-dynamodb-connector with Apache License 2.0 6 votes vote down vote up
@Test
public void testString() {
  String val = STRING_LIST.get(0);
  HiveDynamoDBType ddType = HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(STRING_OBJECT_INSPECTOR);
  AttributeValue expectedAV = new AttributeValue().withS(val);
  LazyString ls = new LazyString(LazyPrimitiveObjectInspectorFactory
      .getLazyStringObjectInspector(false, (byte) 0));
  initLazyObject(ls, val.getBytes(), 0, val.length());

  for (Object o : new Object[]{val, new Text(val), ls}) {
    AttributeValue actualAV = ddType.getDynamoDBData(o, STRING_OBJECT_INSPECTOR, false);
    assertEquals(expectedAV, actualAV);
    Object actualStr = ddType.getHiveData(actualAV, STRING_OBJECT_INSPECTOR);
    assertEquals(val, actualStr);
  }
}
 
Example #2
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static LazyString lazyString(@Nonnull final String str,
        @Nonnull final LazyStringObjectInspector oi) {
    LazyString lazy = new LazyString(oi);
    ByteArrayRef ref = new ByteArrayRef();
    byte[] data = str.getBytes(StandardCharsets.UTF_8);
    ref.setData(data);
    lazy.init(ref, 0, data.length);
    return lazy;
}
 
Example #3
Source File: GeneralClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testLazyStringFeature() throws Exception {
    LazyStringObjectInspector oi =
            LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, (byte) 0);
    List<LazyString> x = Arrays.asList(lazyString("テスト:-2", oi), lazyString("漢字:-333.0", oi),
        lazyString("test:-1"));
    testFeature(x, oi, LazyString.class, String.class);
}
 
Example #4
Source File: GeneralRegressorUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testLazyStringFeature() throws Exception {
    LazyStringObjectInspector oi =
            LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, (byte) 0);
    List<LazyString> x = Arrays.asList(lazyString("テスト:-2", oi), lazyString("漢字:-333.0", oi),
        lazyString("test:-1"));
    testFeature(x, oi, LazyString.class, String.class);
}
 
Example #5
Source File: HiveRCSchemaUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
    * Converts from a hive type to a pig type
    * 
    * @param value
    *            Object hive type
    * @return Object pig type
    */
   public static Object extractPigTypeFromHiveType(Object value) {

if (value instanceof org.apache.hadoop.hive.serde2.lazy.LazyArray) {
    value = parseLazyArrayToPigArray((org.apache.hadoop.hive.serde2.lazy.LazyArray) value);
} else if (value instanceof org.apache.hadoop.hive.serde2.lazy.LazyMap) {
    value = parseLazyMapToPigMap((org.apache.hadoop.hive.serde2.lazy.LazyMap) value);
} else {

    if (value instanceof LazyString) {
	value = ((LazyString) value).getWritableObject().toString();
    } else if (value instanceof LazyInteger) {
	value = ((LazyInteger) value).getWritableObject().get();
    } else if (value instanceof LazyLong) {
	value = ((LazyLong) value).getWritableObject().get();
    } else if (value instanceof LazyFloat) {
	value = ((LazyFloat) value).getWritableObject().get();
    } else if (value instanceof LazyDouble) {
	value = ((LazyDouble) value).getWritableObject().get();
    } else if (value instanceof LazyBoolean) {
	boolean boolvalue = ((LazyBoolean) value).getWritableObject()
		.get();
	value = (boolvalue) ? 1 : 0;
    } else if (value instanceof LazyByte) {
	value = (int) ((LazyByte) value).getWritableObject().get();
    } else if (value instanceof LazyShort) {
	value = ((LazyShort) value).getWritableObject().get();
    }

}

return value;
   }
 
Example #6
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShouldStoreRowInHiveFormat() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
            + ";");

    //when
    server.store("a", outputFile.getAbsolutePath(), storeString);

    //then
    Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");

    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 string,f3 string");

    assertEquals(3, struct.getFieldsAsList().size());
    Object o =  struct.getField(0);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(1);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(2);
    assertEquals(LazyString.class, o.getClass());

}
 
Example #7
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShouldStoreTupleAsHiveArray() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
            + ";");
    server.registerQuery("b = FOREACH a GENERATE f1, TOTUPLE(f2,f3);");

    //when
    server.store("b", outputFile.getAbsolutePath(), storeString);

    //then
    Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");

    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");

    assertEquals(2, struct.getFieldsAsList().size());
    Object o =  struct.getField(0);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(1);
    assertEquals(LazyArray.class, o.getClass());

    LazyArray arr = (LazyArray)o;
    List<Object> values = arr.getList();
    for(Object value : values) {
        assertEquals(LazyString.class, value.getClass());
        String valueStr =((LazyString) value).getWritableObject().toString();
        assertEquals("Sample value", valueStr);
    }

}
 
Example #8
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShouldStoreBagAsHiveArray() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
            + ";");
    server.registerQuery("b = FOREACH a GENERATE f1, TOBAG(f2,f3);");

    //when
    server.store("b", outputFile.getAbsolutePath(), storeString);

    //then
    Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");

    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");

    assertEquals(2, struct.getFieldsAsList().size());
    Object o =  struct.getField(0);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(1);
    assertEquals(LazyArray.class, o.getClass());

    LazyArray arr = (LazyArray)o;
    List<Object> values = arr.getList();
    for(Object value : values) {
        assertEquals(LazyString.class, value.getClass());
        String valueStr =((LazyString) value).getWritableObject().toString();
        assertEquals("Sample value", valueStr);
    }

}
 
Example #9
Source File: CassandraLazyFactory.java    From Hive-Cassandra with Apache License 2.0 5 votes vote down vote up
/**
 * Create a lazy primitive class given the type name. For Long and INT we use CassandraLazyLong and CassandraLazyInt
 * instead of the LazyObject from Hive.
 */
public static LazyObject createLazyPrimitiveClass(
    PrimitiveObjectInspector oi) {
  PrimitiveCategory p = oi.getPrimitiveCategory();

  switch (p) {
    case BOOLEAN:
      return new CassandraLazyBoolean((LazyBooleanObjectInspector) oi);
    case BYTE:
      return new LazyByte((LazyByteObjectInspector) oi);
    case SHORT:
      return new LazyShort((LazyShortObjectInspector) oi);
    case INT:
      return new CassandraLazyInteger((LazyIntObjectInspector) oi);
    case LONG:
      return new CassandraLazyLong((LazyLongObjectInspector) oi);
    case FLOAT:
      return new CassandraLazyFloat((LazyFloatObjectInspector) oi);
    case DOUBLE:
      return new CassandraLazyDouble((LazyDoubleObjectInspector) oi);
    case STRING:
      return new LazyString((LazyStringObjectInspector) oi);
    case BINARY:
      return new CassandraLazyBinary((LazyBinaryObjectInspector) oi);
    case TIMESTAMP:
      return new CassandraLazyTimestamp((LazyTimestampObjectInspector) oi);
    default:
      throw new RuntimeException("Internal error: no LazyObject for " + p);
  }
}
 
Example #10
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Nonnull
public static LazyString lazyString(@Nonnull final String str) {
    return lazyString(str, (byte) '\\');
}
 
Example #11
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Nonnull
public static LazyString lazyString(@Nonnull final String str, final byte escapeChar) {
    LazyStringObjectInspector oi =
            LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, escapeChar);
    return lazyString(str, oi);
}
 
Example #12
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testShouldStoreMapAsHiveMap() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
            + ";");
    server.registerQuery("b = FOREACH a GENERATE f1, TOMAP(f2,f3);");

    //when
    server.store("b", outputFile.getAbsolutePath(), storeString);

    //then
    Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");

    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 map<string,string>");

    assertEquals(2, struct.getFieldsAsList().size());
    Object o =  struct.getField(0);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(1);
    assertEquals(LazyMap.class, o.getClass());

    LazyMap arr = (LazyMap)o;
    Map<Object,Object> values = arr.getMap();
    for(Entry<Object,Object> entry : values.entrySet()) {
        assertEquals(LazyString.class, entry.getKey().getClass());
        assertEquals(LazyString.class, entry.getValue().getClass());

        String keyStr =((LazyString) entry.getKey()).getWritableObject().toString();
        assertEquals("Sample value", keyStr);
        String valueStr =((LazyString) entry.getValue()).getWritableObject().toString();
        assertEquals("Sample value", valueStr);
    }

}
 
Example #13
Source File: TestEsriJsonSerDe.java    From spatial-framework-for-hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void TestColumnTypes() throws Exception {
       ArrayList<Object> stuff = new ArrayList<Object>();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "flag,num1,num2,text");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "boolean,tinyint,smallint,string");
	AbstractSerDe jserde = mkSerDe(proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

	// {"attributes":{"flag":false,"num":"5","text":"Point(15.0 5.0)"}}
       addWritable(stuff, false);
       addWritable(stuff, (byte)2);
       addWritable(stuff, (short)5);
       addWritable(stuff, "Point(15.0 5.0)");
	Object row = runSerDe(stuff, jserde, rowOI);
	Object fieldData = getField("flag", row, rowOI);
	Assert.assertEquals(false, ((BooleanWritable)fieldData).get());
	fieldData = getField("num1", row, rowOI);
	Assert.assertEquals((byte)2, ((ByteWritable)fieldData).get());
	fieldData = getField("num2", row, rowOI);
	Assert.assertEquals((short)5, ((ShortWritable)fieldData).get());
	fieldData = getField("text", row, rowOI);
	Assert.assertEquals("Point(15.0 5.0)", ((Text)fieldData).toString());

	stuff.set(0, new BooleanWritable(true));
	stuff.set(1, new ByteWritable((byte)4));
	stuff.set(2, new ShortWritable((short)4));
	//stuff.set(3, new Text("other"));
	LazyStringObjectInspector loi = LazyPrimitiveObjectInspectorFactory.
		getLazyStringObjectInspector(false, (byte)'\0');
	LazyString lstr = new LazyString(loi);
	ByteArrayRef bar = new ByteArrayRef();
	bar.setData("other".getBytes());
	lstr.init(bar, 0, 5);
	stuff.set(3, lstr);
	row = runSerDe(stuff, jserde, rowOI);
	fieldData = getField("flag", row, rowOI);
	Assert.assertEquals(true, ((BooleanWritable)fieldData).get());
	fieldData = getField("num1", row, rowOI);
	Assert.assertEquals((byte)4, ((ByteWritable)fieldData).get());
	fieldData = getField("num2", row, rowOI);
	Assert.assertEquals((short)4, ((ShortWritable)fieldData).get());
	fieldData = getField("text", row, rowOI);
	Assert.assertEquals("other", ((Text)fieldData).toString());
}