Java Code Examples for org.apache.hadoop.hive.serde2.SerDeException

The following examples show how to use org.apache.hadoop.hive.serde2.SerDeException. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: presto   Author: prestosql   File: TestOrcReaderPositions.java    License: Apache License 2.0 6 votes vote down vote up
private static void createMultiStripeFile(File file)
        throws IOException, ReflectiveOperationException, SerDeException
{
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT);

    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);

    for (int i = 0; i < 300; i += 3) {
        if ((i > 0) && (i % 60 == 0)) {
            flushWriter(writer);
        }

        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }

    writer.close(false);
}
 
Example #2
Source Project: incubator-hivemall   Author: apache   File: JsonSerdeUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Nonnull
private static List<Object> parseArray(@Nonnull final JsonParser p,
        @CheckForNull final List<TypeInfo> columnTypes)
        throws HCatException, IOException, SerDeException {
    Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null",
        SerDeException.class);
    if (columnTypes.size() != 1) {
        throw new IOException("Expected a single array but go " + columnTypes);
    }

    TypeInfo elemType = columnTypes.get(0);
    HCatSchema schema = HCatSchemaUtils.getHCatSchema(elemType);

    HCatFieldSchema listSchema = schema.get(0);
    HCatFieldSchema elemSchema = listSchema.getArrayElementSchema().get(0);

    final List<Object> arr = new ArrayList<Object>();
    while (p.nextToken() != JsonToken.END_ARRAY) {
        arr.add(extractCurrentField(p, elemSchema, true));
    }
    return arr;
}
 
Example #3
Source Project: presto   Author: prestosql   File: GenericHiveRecordCursor.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean advanceNextPosition()
{
    try {
        if (closed || !recordReader.next(key, value)) {
            close();
            return false;
        }

        // reset loaded flags
        Arrays.fill(loaded, false);

        // decode value
        rowData = deserializer.deserialize(value);

        return true;
    }
    catch (IOException | SerDeException | RuntimeException e) {
        closeWithSuppression(this, e);
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }
        throw new PrestoException(HIVE_CURSOR_ERROR, e);
    }
}
 
Example #4
Source Project: incubator-hivemall   Author: apache   File: JsonSerdeUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Nonnull
private static void serializeList(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nullable final ListObjectInspector loi) throws SerDeException {
    ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
    List<?> olist = loi.getList(obj);

    if (olist == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACKET);
        for (int i = 0; i < olist.size(); i++) {
            if (i > 0) {
                sb.append(SerDeUtils.COMMA);
            }
            buildJSONString(sb, olist.get(i), listElementObjectInspector);
        }
        sb.append(SerDeUtils.RBRACKET);
    }
}
 
Example #5
Source Project: incubator-hivemall   Author: apache   File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static LazySimpleSerDe getLineSerde(@Nonnull final PrimitiveObjectInspector... OIs)
        throws SerDeException {
    if (OIs.length == 0) {
        throw new IllegalArgumentException("OIs must be specified");
    }
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();

    StringBuilder columnNames = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    for (int i = 0; i < OIs.length; i++) {
        columnNames.append('c').append(i + 1).append(',');
        columnTypes.append(OIs[i].getTypeName()).append(',');
    }
    columnNames.deleteCharAt(columnNames.length() - 1);
    columnTypes.deleteCharAt(columnTypes.length() - 1);

    tbl.setProperty("columns", columnNames.toString());
    tbl.setProperty("columns.types", columnTypes.toString());
    serde.initialize(conf, tbl);
    return serde;
}
 
Example #6
Source Project: incubator-iotdb   Author: apache   File: TsFileSerDeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() {
  tsFileSerDer = new TsFileSerDe();
  columnNames = Arrays.asList("time_stamp", "sensor_1");
  columnTypes = new ArrayList<>();
  PrimitiveTypeInfo typeInfo1 = new PrimitiveTypeInfo();
  typeInfo1.setTypeName("bigint");
  columnTypes.add(typeInfo1);
  PrimitiveTypeInfo typeInfo2 = new PrimitiveTypeInfo();
  typeInfo2.setTypeName("bigint");
  columnTypes.add(typeInfo2);
  tbl = new Properties();
  String delimiter = ",";
  tbl.setProperty(serdeConstants.COLUMN_NAME_DELIMITER, delimiter);
  tbl.setProperty(serdeConstants.LIST_COLUMNS, String.join(delimiter, columnNames));
  tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "bigint,bigint");
  tbl.setProperty(TsFileSerDe.DEVICE_ID, "device_1");
  job = new JobConf();
  try {
    tsFileSerDer.initialize(job, tbl);
  } catch (SerDeException e) {
    e.printStackTrace();

  }
}
 
Example #7
Source Project: multiple-dimension-spread   Author: yahoojapan   File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void initialize( final Configuration conf, final Properties table , final Properties part ) throws SerDeException{
  LOG.info( table.toString() );
  if( part != null ){
    LOG.info( part.toString() );
  }
  String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
  String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);

  String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );

  StructTypeInfo rootType;
  if( projectionColumnNames.isEmpty() ){
    rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
  }
  else{
    rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames );
  }

  inspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}
 
Example #8
Source Project: indexr   Author: shunfei   File: IndexRSerde.java    License: Apache License 2.0 6 votes vote down vote up
private static Writable createPrimitive(Object obj, PrimitiveObjectInspector inspector)
        throws SerDeException {
    if (obj == null) {
        return null;
    }
    switch (inspector.getPrimitiveCategory()) {
        case DOUBLE:
            return new DoubleWritable(((DoubleObjectInspector) inspector).get(obj));
        case FLOAT:
            return new FloatWritable(((FloatObjectInspector) inspector).get(obj));
        case INT:
            return new IntWritable(((IntObjectInspector) inspector).get(obj));
        case LONG:
            return new LongWritable(((LongObjectInspector) inspector).get(obj));
        case STRING:
            return new Text(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj));
        case DATE:
            return ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj);
        case TIMESTAMP:
            return ((TimestampObjectInspector) inspector).getPrimitiveWritableObject(obj);
        default:
            throw new SerDeException("Can't serialize primitive : " + inspector.getPrimitiveCategory());
    }
}
 
Example #9
Source Project: incubator-hivemall   Author: apache   File: HiveJsonStructReader.java    License: Apache License 2.0 6 votes vote down vote up
private Object parsePrimitive(JsonParser parser, PrimitiveObjectInspector oi)
        throws SerDeException, IOException {
    JsonToken currentToken = parser.getCurrentToken();
    if (currentToken == null) {
        return null;
    }
    try {
        switch (parser.getCurrentToken()) {
            case VALUE_FALSE:
            case VALUE_TRUE:
            case VALUE_NUMBER_INT:
            case VALUE_NUMBER_FLOAT:
            case VALUE_STRING:
                return getObjectOfCorrespondingPrimitiveType(parser.getText(), oi);
            case VALUE_NULL:
                return null;
            default:
                throw new SerDeException("unexpected token type: " + currentToken);
        }
    } finally {
        parser.nextToken();
    }
}
 
Example #10
Source Project: hadoopoffice   Author: ZuInnoTe   File: ExcelSpreadSheetCellDAOSerde.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns one cell in an Excel as a row in Hive containing 5 elements:  "formattedValue","comment","formula","address","sheetName"
 */

@Override
public Object deserialize(Writable arg0) throws SerDeException {
	// check for null
	if ((arg0 == null) || (arg0 instanceof NullWritable)) {
		return null;
	}
	if (!(arg0 instanceof SpreadSheetCellDAO)) {
		throw new SerDeException("Table does not contain objects of type SpreadSheetCellDAO. Did you use the ExcelCellInputFormat of HadoopOffice?");
	}
	String[] spreadSheetCellRow = new String[5];
	SpreadSheetCellDAO obj = (SpreadSheetCellDAO)arg0;
	spreadSheetCellRow[0] = obj.getFormattedValue();
	spreadSheetCellRow[1] = obj.getComment();
	spreadSheetCellRow[2] = obj.getFormula();
	spreadSheetCellRow[3] = obj.getAddress();
	spreadSheetCellRow[4] = obj.getSheetName();
	return spreadSheetCellRow;
}
 
Example #11
Source Project: incubator-hivemall   Author: apache   File: HiveJsonStructReader.java    License: Apache License 2.0 6 votes vote down vote up
private Object parseMapKey(JsonParser parser, PrimitiveObjectInspector oi)
        throws SerDeException, IOException {
    JsonToken currentToken = parser.getCurrentToken();
    if (currentToken == null) {
        return null;
    }
    try {
        switch (parser.getCurrentToken()) {
            case FIELD_NAME:
                return getObjectOfCorrespondingPrimitiveType(parser.getText(), oi);
            case VALUE_NULL:
                return null;
            default:
                throw new SerDeException("unexpected token type: " + currentToken);
        }
    } finally {
        parser.nextToken();
    }
}
 
Example #12
Source Project: hadoopoffice   Author: ZuInnoTe   File: ExcelSpreadSheetCellDAOSerdeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void serialize() throws SerDeException {
	// initialize Serde
	ExcelSpreadSheetCellDAOSerde testSerde = new ExcelSpreadSheetCellDAOSerde();
	Configuration hadoopConf = new Configuration();
	Properties tblProperties = new Properties();
	tblProperties.setProperty("hadoopoffice.write.locale.bcp47", "de");
	testSerde.initialize(hadoopConf, tblProperties);
	// get object inspector
	ObjectInspector oi = testSerde.getObjectInspector();
	String[] testHiveStructure = new String[5];
	testHiveStructure[0] = "test1";
	testHiveStructure[1] = "no comment";
	testHiveStructure[2] = "A1*A2";
	testHiveStructure[3] = "A3";
	testHiveStructure[4] = "Sheet1";
	SpreadSheetCellDAO resultDAO = (SpreadSheetCellDAO) testSerde.serialize(testHiveStructure, oi);
	assertEquals(testHiveStructure[0],resultDAO.getFormattedValue(),"formatted value correct");
	assertEquals(testHiveStructure[1],resultDAO.getComment(),"comment correct");
	assertEquals(testHiveStructure[2],resultDAO.getFormula(), "formula correct");
	assertEquals(testHiveStructure[3],resultDAO.getAddress(), "address correct");
	assertEquals(testHiveStructure[4],resultDAO.getSheetName(),"sheetname correct");
}
 
Example #13
Source Project: hadoopoffice   Author: ZuInnoTe   File: ExcelTextSerdeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void initializePositive() throws SerDeException {
	ExcelSerde testSerde = new ExcelSerde();
	Configuration conf = new Configuration();
	Properties tblProperties = new Properties();
	tblProperties.setProperty(ExcelSerde.CONF_DEFAULTSHEETNAME, "Sheet2");
	tblProperties.setProperty("hadoopoffice.write.header.write", "true");
	tblProperties.setProperty("hadoopoffice.read.locale.bcp47", "de");
	tblProperties.setProperty("hadoopoffice.read.linkedworkbooks", "true");
	tblProperties.setProperty(serdeConstants.LIST_COLUMNS, "column1,column2");
	tblProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string");
	testSerde.initialize(conf, tblProperties);
	assertEquals("de", conf.get("hadoopoffice.read.locale.bcp47", "us"),
			"HadoopOffice Hadoop configuration option set");
	assertTrue(conf.getBoolean("hadoopoffice.read.linkedworkbooks", false),
			"HaodoopOffice Hadoop configuration option set boolean");
}
 
Example #14
Source Project: hadoopcryptoledger   Author: ZuInnoTe   File: EthereumHiveSerdeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
 public void deserialize() throws IOException, EthereumBlockReadException, SerDeException{
 EthereumBlockSerde testSerde = new EthereumBlockSerde();
// create a BitcoinBlock based on the genesis block test data
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin";
String fullFileNameString=classLoader.getResource("testdata/"+fileName).getFile();	
File file = new File(fullFileNameString);
FileInputStream fin = new FileInputStream(file);
boolean direct=false;
EthereumBlockReader ebr = new EthereumBlockReader(fin,EthereumHiveSerdeTest.DEFAULT_MAXSIZE_ETHEREUMBLOCK,EthereumHiveSerdeTest.DEFAULT_BUFFERSIZE,direct );
EthereumBlock block = ebr.readBlock();
Object deserializedObject = testSerde.deserialize(block);
assertTrue( deserializedObject instanceof HiveEthereumBlock,"Deserialized Object is of type HiveEthereumBlock");
HiveEthereumBlock deserializedBitcoinBlockStruct = (HiveEthereumBlock)deserializedObject;

assertEquals( 6, deserializedBitcoinBlockStruct.getEthereumTransactions().size(),"Block contains 6 transactions");
assertEquals(0, deserializedBitcoinBlockStruct.getUncleHeaders().size(),"Block contains 0 uncleHeaders");
byte[] expectedParentHash = new byte[] {(byte)0xBA,(byte)0x6D,(byte)0xD2,(byte)0x60,(byte)0x12,(byte)0xB3,(byte)0x71,(byte)0x90,(byte)0x48,(byte)0xF3,(byte)0x16,(byte)0xC6,(byte)0xED,(byte)0xB3,(byte)0x34,(byte)0x9B,(byte)0xDF,(byte)0xBD,(byte)0x61,(byte)0x31,(byte)0x9F,(byte)0xA9,(byte)0x7C,(byte)0x61,(byte)0x6A,(byte)0x61,(byte)0x31,(byte)0x18,(byte)0xA1,(byte)0xAF,(byte)0x30,(byte)0x67};

assertArrayEquals( expectedParentHash, deserializedBitcoinBlockStruct.getEthereumBlockHeader().getParentHash(),"Block contains a correct 32 byte parent hash");
}
 
Example #15
Source Project: emodb   Author: bazaarvoice   File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Object deserialize(Writable writable)
        throws SerDeException {
    Row row = (Row) writable;

    // Since this implementation uses a StructObjectInspector return a list of deserialized values in the same
    // order as the original properties.

    int i = 0;
    for (Map.Entry<String, TypeInfo> column : _columns) {
        String columnName = column.getKey();
        TypeInfo type = column.getValue();

        // Get the raw value from traversing the JSON map
        Object rawValue = getRawValue(columnName, row);
        // Deserialize the value to the expected type
        Object value = deserialize(type, rawValue);

        _values.set(i++, value);
    }

    return _values;
}
 
Example #16
Source Project: emodb   Author: bazaarvoice   File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Deserializes a primitive to its corresponding Java type, doing a best-effort conversion when necessary.
 */
private Object deserializePrimitive(PrimitiveTypeInfo type, Object value)
        throws SerDeException {
    switch (type.getPrimitiveCategory()) {
        case VOID:
            return null;
        case STRING:
            return deserializeString(value);
        case BOOLEAN:
            return deserializeBoolean(value);
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
            return deserializeNumber(value, type);
        case DATE:
        case TIMESTAMP:
            return deserializeDate(value, type);
        default:
            throw new SerDeException("Unsupported type: " + type.getPrimitiveCategory());
    }
}
 
Example #17
Source Project: emodb   Author: bazaarvoice   File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeNumber(Object value, PrimitiveTypeInfo type)
        throws SerDeException {
    // Note that only numbers and booleans are supported.  All other types cannot be deserialized.  In particular
    // String representations of numbers are not parsed.
    Number number;
    if (value instanceof Number) {
        number = (Number) value;
    } else if (value instanceof Boolean) {
        number = ((Boolean) value) ? (byte) 1 : 0;
    } else {
        throw new SerDeException("Value is not a " + type + ": " + value);
    }

    switch (type.getPrimitiveCategory()) {
        case BYTE:   return number.byteValue();
        case SHORT:  return number.shortValue();
        case INT:    return number.intValue();
        case LONG:   return number.longValue();
        case FLOAT:  return number.floatValue();
        case DOUBLE: return number.doubleValue();
    }

    throw new SerDeException("Primitive number did not match any expected categories"); // Unreachable
}
 
Example #18
Source Project: emodb   Author: bazaarvoice   File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeStruct(StructTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;

    List<String> fieldNames = type.getAllStructFieldNames();
    List<TypeInfo> fieldTypes = type.getAllStructFieldTypeInfos();

    // When deserializing a struct the returned value is a list of values in the same order as the field names.

    List<Object> values = Lists.newArrayListWithCapacity(fieldNames.size());
    for (int i=0; i < fieldNames.size(); i++) {
        Object rawValue = getRawValueOrNullIfAbsent(fieldNames.get(i), map);
        Object value = deserialize(fieldTypes.get(i), rawValue);
        values.add(value);
    }

    return values;
}
 
Example #19
Source Project: emodb   Author: bazaarvoice   File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeList(ListTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof List)) {
        throw new SerDeException("Value not of type list");
    }
    //noinspection unchecked
    List<Object> list = (List<Object>) data;

    List<Object> values = Lists.newArrayListWithCapacity(list.size());
    for (Object entry : list) {
        Object value = deserialize(type.getListElementTypeInfo(), entry);
        values.add(value);
    }

    return values;
}
 
Example #20
Source Project: emr-dynamodb-connector   Author: awslabs   File: DynamoDBSerDeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitives() throws SerDeException {
  List<String> attributeNames = PRIMITIVE_FIELDS;
  List<ObjectInspector> colOIs = PRIMITIVE_OIS;

  List<String> data = PRIMITIVE_STRING_DATA;

  Map<String, AttributeValue> expectedItemMap = Maps.newHashMap();
  expectedItemMap.put(attributeNames.get(0), new AttributeValue(data.get(0)));
  expectedItemMap.put(attributeNames.get(1), new AttributeValue().withN(data.get(1)));
  expectedItemMap.put(attributeNames.get(2), new AttributeValue().withN(data.get(2)));
  expectedItemMap.put(attributeNames.get(3), new AttributeValue().withBOOL(Boolean.valueOf(data.get(3))));

  List<Object> rowData = Lists.newArrayList();
  rowData.add(data.get(0));
  rowData.add(Double.parseDouble(data.get(1)));
  rowData.add(Long.parseLong(data.get(2)));
  rowData.add(Boolean.valueOf(data.get(3)));
  Map<String, AttributeValue> actualItemMap = getSerializedItem(attributeNames, colOIs, rowData);

  assertEquals(expectedItemMap, actualItemMap);
}
 
Example #21
Source Project: emr-dynamodb-connector   Author: awslabs   File: DynamoDBSerDeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNull() throws SerDeException {
  List<String> attributeNames = PRIMITIVE_FIELDS.subList(0, 2);
  List<ObjectInspector> colOIs = PRIMITIVE_OIS.subList(0, 2);

  List<String> data = Lists.newArrayList(PRIMITIVE_STRING_DATA.subList(0, 2));
  data.set(1, null);

  Map<String, AttributeValue> expectedItemMap = Maps.newHashMap();
  expectedItemMap.put(attributeNames.get(0), new AttributeValue(data.get(0)));

  List<Object> rowData = Lists.newArrayList();
  rowData.addAll(data);

  // no null serialization
  Map<String, AttributeValue> actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, false);
  assertEquals(expectedItemMap, actualItemMap);

  // with null serialization
  expectedItemMap.put(attributeNames.get(1), new AttributeValue().withNULL(true));
  actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, true);
  assertEquals(expectedItemMap, actualItemMap);
}
 
Example #22
Source Project: HiveKudu-Handler   Author: BimalTandel   File: HiveKuduBridgeUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static ObjectInspector getObjectInspector(Type kuduType,
                                                 String hiveType) throws SerDeException {
    switch (kuduType) {
        case STRING:
            return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        case FLOAT:
            return PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
        case DOUBLE:
            return PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
        case BOOL:
            return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
        case INT8:
            return PrimitiveObjectInspectorFactory.javaByteObjectInspector;
        case INT16:
            return PrimitiveObjectInspectorFactory.javaShortObjectInspector;
        case INT32:
            return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
        case INT64:
            return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
        case TIMESTAMP:
            return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
        case BINARY:
            return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
        default:
            throw new SerDeException("Cannot find getObjectInspector for: "
                    + hiveType);
    }
}
 
Example #23
Source Project: presto   Author: prestosql   File: TestOrcReaderPositions.java    License: Apache License 2.0 5 votes vote down vote up
private static void createGrowingSequentialFile(File file, int count, int step, int initialLength)
        throws IOException, SerDeException
{
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, VARCHAR);

    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < initialLength; i++) {
        builder.append("0");
    }
    String seedString = builder.toString();

    // gradually grow the length of a cell
    int previousLength = initialLength;
    for (int i = 0; i < count; i++) {
        if ((i / step + 1) * initialLength > previousLength) {
            previousLength = (i / step + 1) * initialLength;
            builder.append(seedString);
        }
        objectInspector.setStructFieldData(row, field, builder.toString());
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }

    writer.close(false);
}
 
Example #24
Source Project: incubator-hivemall   Author: apache   File: JsonSerdeUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Deserialize Json array or Json primitives.
 */
@SuppressWarnings("unchecked")
@Nonnull
public static <T> T deserialize(@Nonnull final Text t, @Nonnull TypeInfo columnType)
        throws SerDeException {
    final HiveJsonStructReader reader = new HiveJsonStructReader(columnType);
    reader.setIgnoreUnknownFields(true);
    final Object result;
    try {
        result = reader.parseStruct(new FastByteArrayInputStream(t.getBytes(), t.getLength()));
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    return (T) result;
}
 
Example #25
Source Project: incubator-hivemall   Author: apache   File: JsonSerdeUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void serializeMap(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nonnull final MapObjectInspector moi) throws SerDeException {
    ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
    ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
    Map<?, ?> omap = moi.getMap(obj);
    if (omap == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACE);
        boolean first = true;
        for (Object entry : omap.entrySet()) {
            if (first) {
                first = false;
            } else {
                sb.append(SerDeUtils.COMMA);
            }
            Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
            StringBuilder keyBuilder = new StringBuilder();
            buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
            String keyString = keyBuilder.toString().trim();
            if ((!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE)) {
                appendWithQuotes(sb, keyString);
            } else {
                sb.append(keyString);
            }
            sb.append(SerDeUtils.COLON);
            buildJSONString(sb, e.getValue(), mapValueObjectInspector);
        }
        sb.append(SerDeUtils.RBRACE);
    }
}
 
Example #26
Source Project: indexr   Author: shunfei   File: IndexRSerde.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Object deserialize(Writable writable) throws SerDeException {

    // Different segments could contain different schemas.
    // Especially the column orders could be different.
    // Here we re-map the column names to the real column ids.

    SchemaWritable reader = (SchemaWritable) writable;
    if (this.projectCols != reader.columns) {
        // Don't have to do it every time, only when schema is changed.
        mapColIndex(reader.columns);
        projectCols = reader.columns;
    }

    if (!isMapNeeded) {
        serdeSize = columnNames.size();
        return reader;
    } else {
        Writable[] projectWritables = reader.get();
        Writable[] writables = new Writable[columnNames.size()];
        for (int i = 0; i < validColIndexes.length; i++) {
            int colIndex = validColIndexes[i];
            int mapColId = validColMapIds[i];
            writables[colIndex] = projectWritables[mapColId];
        }

        serdeSize = validColIndexes.length;
        return new ArrayWritable(Writable.class, writables);
    }
}
 
Example #27
Source Project: multiple-dimension-spread   Author: yahoojapan   File: MDSSerde.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Writable serialize( final Object obj, final ObjectInspector objInspector ) throws SerDeException{
  ParserWritable parserWritable = new ParserWritable();
  try{
    if( parser == null ){
      parser = (HiveStructParser)( messageReader.create( objInspector ) );
      parser.setFieldIndexMap( filedIndexMap );
    }
    parser.setObject( obj );
    parserWritable.set( parser );
  }catch( IOException e ){
    throw new SerDeException( e );
  }
  return parserWritable;
}
 
Example #28
Source Project: circus-train   Author: HotelsDotCom   File: CircusTrainParquetSchemaEvolutionIntegrationTest.java    License: Apache License 2.0 5 votes vote down vote up
private void assertColumnSchema(Schema schema, List<FieldSchema> cols) throws SerDeException {
  AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema);
  for (int i = 0; i < cols.size(); i++) {
    assertThat(cols.get(i).getType(), is(schemaInspector.getColumnTypes().get(i).toString()));
    assertThat(cols.get(i).getName(), is(schemaInspector.getColumnNames().get(i)));
  }
}
 
Example #29
Source Project: incubator-hivemall   Author: apache   File: HiveJsonStructReader.java    License: Apache License 2.0 5 votes vote down vote up
private Object parseList(JsonParser parser, ListObjectInspector oi)
        throws JsonParseException, IOException, SerDeException {
    List<Object> ret = new ArrayList<>();

    if (parser.getCurrentToken() == JsonToken.VALUE_NULL) {
        parser.nextToken();
        return null;
    }

    if (parser.getCurrentToken() != JsonToken.START_ARRAY) {
        throw new SerDeException("array expected");
    }
    ObjectInspector eOI = oi.getListElementObjectInspector();
    JsonToken currentToken = parser.nextToken();
    try {
        while (currentToken != null && currentToken != JsonToken.END_ARRAY) {
            ret.add(parseDispatcher(parser, eOI));
            currentToken = parser.getCurrentToken();
        }
    } catch (Exception e) {
        throw new SerDeException("array: " + e.getMessage(), e);
    }

    currentToken = parser.nextToken();

    return ret;
}
 
Example #30
Source Project: dremio-oss   Author: dremio   File: HiveTextReader.java    License: Apache License 2.0 5 votes vote down vote up
protected Object bufferAdd(Object value) throws SerDeException {
  footerBuffer.add(value);
  if (footerBuffer.size() <= footerCount) {
    return null;
  }
  return footerBuffer.poll();
}