Java Code Examples for org.apache.hadoop.hive.serde2.SerDeException

The following examples show how to use org.apache.hadoop.hive.serde2.SerDeException. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: TestOrcReaderPositions.java    License: Apache License 2.0 6 votes vote down vote up
private static void createMultiStripeFile(File file)
        throws IOException, ReflectiveOperationException, SerDeException
{
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT);

    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);

    for (int i = 0; i < 300; i += 3) {
        if ((i > 0) && (i % 60 == 0)) {
            flushWriter(writer);
        }

        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }

    writer.close(false);
}
 
Example 2
Source Project: incubator-hivemall   Source File: JsonSerdeUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Nonnull
private static List<Object> parseArray(@Nonnull final JsonParser p,
        @CheckForNull final List<TypeInfo> columnTypes)
        throws HCatException, IOException, SerDeException {
    Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null",
        SerDeException.class);
    if (columnTypes.size() != 1) {
        throw new IOException("Expected a single array but go " + columnTypes);
    }

    TypeInfo elemType = columnTypes.get(0);
    HCatSchema schema = HCatSchemaUtils.getHCatSchema(elemType);

    HCatFieldSchema listSchema = schema.get(0);
    HCatFieldSchema elemSchema = listSchema.getArrayElementSchema().get(0);

    final List<Object> arr = new ArrayList<Object>();
    while (p.nextToken() != JsonToken.END_ARRAY) {
        arr.add(extractCurrentField(p, elemSchema, true));
    }
    return arr;
}
 
Example 3
Source Project: presto   Source File: GenericHiveRecordCursor.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean advanceNextPosition()
{
    try {
        if (closed || !recordReader.next(key, value)) {
            close();
            return false;
        }

        // reset loaded flags
        Arrays.fill(loaded, false);

        // decode value
        rowData = deserializer.deserialize(value);

        return true;
    }
    catch (IOException | SerDeException | RuntimeException e) {
        closeWithSuppression(this, e);
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }
        throw new PrestoException(HIVE_CURSOR_ERROR, e);
    }
}
 
Example 4
Source Project: incubator-hivemall   Source File: JsonSerdeUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Nonnull
private static void serializeList(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nullable final ListObjectInspector loi) throws SerDeException {
    ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
    List<?> olist = loi.getList(obj);

    if (olist == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACKET);
        for (int i = 0; i < olist.size(); i++) {
            if (i > 0) {
                sb.append(SerDeUtils.COMMA);
            }
            buildJSONString(sb, olist.get(i), listElementObjectInspector);
        }
        sb.append(SerDeUtils.RBRACKET);
    }
}
 
Example 5
Source Project: incubator-hivemall   Source File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static LazySimpleSerDe getLineSerde(@Nonnull final PrimitiveObjectInspector... OIs)
        throws SerDeException {
    if (OIs.length == 0) {
        throw new IllegalArgumentException("OIs must be specified");
    }
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();

    StringBuilder columnNames = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    for (int i = 0; i < OIs.length; i++) {
        columnNames.append('c').append(i + 1).append(',');
        columnTypes.append(OIs[i].getTypeName()).append(',');
    }
    columnNames.deleteCharAt(columnNames.length() - 1);
    columnTypes.deleteCharAt(columnTypes.length() - 1);

    tbl.setProperty("columns", columnNames.toString());
    tbl.setProperty("columns.types", columnTypes.toString());
    serde.initialize(conf, tbl);
    return serde;
}
 
Example 6
Source Project: incubator-iotdb   Source File: TsFileSerDeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() {
  tsFileSerDer = new TsFileSerDe();
  columnNames = Arrays.asList("time_stamp", "sensor_1");
  columnTypes = new ArrayList<>();
  PrimitiveTypeInfo typeInfo1 = new PrimitiveTypeInfo();
  typeInfo1.setTypeName("bigint");
  columnTypes.add(typeInfo1);
  PrimitiveTypeInfo typeInfo2 = new PrimitiveTypeInfo();
  typeInfo2.setTypeName("bigint");
  columnTypes.add(typeInfo2);
  tbl = new Properties();
  String delimiter = ",";
  tbl.setProperty(serdeConstants.COLUMN_NAME_DELIMITER, delimiter);
  tbl.setProperty(serdeConstants.LIST_COLUMNS, String.join(delimiter, columnNames));
  tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "bigint,bigint");
  tbl.setProperty(TsFileSerDe.DEVICE_ID, "device_1");
  job = new JobConf();
  try {
    tsFileSerDer.initialize(job, tbl);
  } catch (SerDeException e) {
    e.printStackTrace();

  }
}
 
Example 7
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void initialize( final Configuration conf, final Properties table , final Properties part ) throws SerDeException{
  LOG.info( table.toString() );
  if( part != null ){
    LOG.info( part.toString() );
  }
  String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
  String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);

  String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );

  StructTypeInfo rootType;
  if( projectionColumnNames.isEmpty() ){
    rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
  }
  else{
    rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames );
  }

  inspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}
 
Example 8
Source Project: indexr   Source File: IndexRSerde.java    License: Apache License 2.0 6 votes vote down vote up
private static Writable createPrimitive(Object obj, PrimitiveObjectInspector inspector)
        throws SerDeException {
    if (obj == null) {
        return null;
    }
    switch (inspector.getPrimitiveCategory()) {
        case DOUBLE:
            return new DoubleWritable(((DoubleObjectInspector) inspector).get(obj));
        case FLOAT:
            return new FloatWritable(((FloatObjectInspector) inspector).get(obj));
        case INT:
            return new IntWritable(((IntObjectInspector) inspector).get(obj));
        case LONG:
            return new LongWritable(((LongObjectInspector) inspector).get(obj));
        case STRING:
            return new Text(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj));
        case DATE:
            return ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj);
        case TIMESTAMP:
            return ((TimestampObjectInspector) inspector).getPrimitiveWritableObject(obj);
        default:
            throw new SerDeException("Can't serialize primitive : " + inspector.getPrimitiveCategory());
    }
}
 
Example 9
Source Project: incubator-hivemall   Source File: HiveJsonStructReader.java    License: Apache License 2.0 6 votes vote down vote up
private Object parsePrimitive(JsonParser parser, PrimitiveObjectInspector oi)
        throws SerDeException, IOException {
    JsonToken currentToken = parser.getCurrentToken();
    if (currentToken == null) {
        return null;
    }
    try {
        switch (parser.getCurrentToken()) {
            case VALUE_FALSE:
            case VALUE_TRUE:
            case VALUE_NUMBER_INT:
            case VALUE_NUMBER_FLOAT:
            case VALUE_STRING:
                return getObjectOfCorrespondingPrimitiveType(parser.getText(), oi);
            case VALUE_NULL:
                return null;
            default:
                throw new SerDeException("unexpected token type: " + currentToken);
        }
    } finally {
        parser.nextToken();
    }
}
 
Example 10
Source Project: hadoopoffice   Source File: ExcelSpreadSheetCellDAOSerde.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns one cell in an Excel as a row in Hive containing 5 elements:  "formattedValue","comment","formula","address","sheetName"
 */

@Override
public Object deserialize(Writable arg0) throws SerDeException {
	// check for null
	if ((arg0 == null) || (arg0 instanceof NullWritable)) {
		return null;
	}
	if (!(arg0 instanceof SpreadSheetCellDAO)) {
		throw new SerDeException("Table does not contain objects of type SpreadSheetCellDAO. Did you use the ExcelCellInputFormat of HadoopOffice?");
	}
	String[] spreadSheetCellRow = new String[5];
	SpreadSheetCellDAO obj = (SpreadSheetCellDAO)arg0;
	spreadSheetCellRow[0] = obj.getFormattedValue();
	spreadSheetCellRow[1] = obj.getComment();
	spreadSheetCellRow[2] = obj.getFormula();
	spreadSheetCellRow[3] = obj.getAddress();
	spreadSheetCellRow[4] = obj.getSheetName();
	return spreadSheetCellRow;
}
 
Example 11
Source Project: incubator-hivemall   Source File: HiveJsonStructReader.java    License: Apache License 2.0 6 votes vote down vote up
private Object parseMapKey(JsonParser parser, PrimitiveObjectInspector oi)
        throws SerDeException, IOException {
    JsonToken currentToken = parser.getCurrentToken();
    if (currentToken == null) {
        return null;
    }
    try {
        switch (parser.getCurrentToken()) {
            case FIELD_NAME:
                return getObjectOfCorrespondingPrimitiveType(parser.getText(), oi);
            case VALUE_NULL:
                return null;
            default:
                throw new SerDeException("unexpected token type: " + currentToken);
        }
    } finally {
        parser.nextToken();
    }
}
 
Example 12
@Test
public void serialize() throws SerDeException {
	// initialize Serde
	ExcelSpreadSheetCellDAOSerde testSerde = new ExcelSpreadSheetCellDAOSerde();
	Configuration hadoopConf = new Configuration();
	Properties tblProperties = new Properties();
	tblProperties.setProperty("hadoopoffice.write.locale.bcp47", "de");
	testSerde.initialize(hadoopConf, tblProperties);
	// get object inspector
	ObjectInspector oi = testSerde.getObjectInspector();
	String[] testHiveStructure = new String[5];
	testHiveStructure[0] = "test1";
	testHiveStructure[1] = "no comment";
	testHiveStructure[2] = "A1*A2";
	testHiveStructure[3] = "A3";
	testHiveStructure[4] = "Sheet1";
	SpreadSheetCellDAO resultDAO = (SpreadSheetCellDAO) testSerde.serialize(testHiveStructure, oi);
	assertEquals(testHiveStructure[0],resultDAO.getFormattedValue(),"formatted value correct");
	assertEquals(testHiveStructure[1],resultDAO.getComment(),"comment correct");
	assertEquals(testHiveStructure[2],resultDAO.getFormula(), "formula correct");
	assertEquals(testHiveStructure[3],resultDAO.getAddress(), "address correct");
	assertEquals(testHiveStructure[4],resultDAO.getSheetName(),"sheetname correct");
}
 
Example 13
Source Project: hadoopoffice   Source File: ExcelTextSerdeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void initializePositive() throws SerDeException {
	ExcelSerde testSerde = new ExcelSerde();
	Configuration conf = new Configuration();
	Properties tblProperties = new Properties();
	tblProperties.setProperty(ExcelSerde.CONF_DEFAULTSHEETNAME, "Sheet2");
	tblProperties.setProperty("hadoopoffice.write.header.write", "true");
	tblProperties.setProperty("hadoopoffice.read.locale.bcp47", "de");
	tblProperties.setProperty("hadoopoffice.read.linkedworkbooks", "true");
	tblProperties.setProperty(serdeConstants.LIST_COLUMNS, "column1,column2");
	tblProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string");
	testSerde.initialize(conf, tblProperties);
	assertEquals("de", conf.get("hadoopoffice.read.locale.bcp47", "us"),
			"HadoopOffice Hadoop configuration option set");
	assertTrue(conf.getBoolean("hadoopoffice.read.linkedworkbooks", false),
			"HaodoopOffice Hadoop configuration option set boolean");
}
 
Example 14
Source Project: hadoopcryptoledger   Source File: EthereumHiveSerdeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
 public void deserialize() throws IOException, EthereumBlockReadException, SerDeException{
 EthereumBlockSerde testSerde = new EthereumBlockSerde();
// create a BitcoinBlock based on the genesis block test data
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin";
String fullFileNameString=classLoader.getResource("testdata/"+fileName).getFile();	
File file = new File(fullFileNameString);
FileInputStream fin = new FileInputStream(file);
boolean direct=false;
EthereumBlockReader ebr = new EthereumBlockReader(fin,EthereumHiveSerdeTest.DEFAULT_MAXSIZE_ETHEREUMBLOCK,EthereumHiveSerdeTest.DEFAULT_BUFFERSIZE,direct );
EthereumBlock block = ebr.readBlock();
Object deserializedObject = testSerde.deserialize(block);
assertTrue( deserializedObject instanceof HiveEthereumBlock,"Deserialized Object is of type HiveEthereumBlock");
HiveEthereumBlock deserializedBitcoinBlockStruct = (HiveEthereumBlock)deserializedObject;

assertEquals( 6, deserializedBitcoinBlockStruct.getEthereumTransactions().size(),"Block contains 6 transactions");
assertEquals(0, deserializedBitcoinBlockStruct.getUncleHeaders().size(),"Block contains 0 uncleHeaders");
byte[] expectedParentHash = new byte[] {(byte)0xBA,(byte)0x6D,(byte)0xD2,(byte)0x60,(byte)0x12,(byte)0xB3,(byte)0x71,(byte)0x90,(byte)0x48,(byte)0xF3,(byte)0x16,(byte)0xC6,(byte)0xED,(byte)0xB3,(byte)0x34,(byte)0x9B,(byte)0xDF,(byte)0xBD,(byte)0x61,(byte)0x31,(byte)0x9F,(byte)0xA9,(byte)0x7C,(byte)0x61,(byte)0x6A,(byte)0x61,(byte)0x31,(byte)0x18,(byte)0xA1,(byte)0xAF,(byte)0x30,(byte)0x67};

assertArrayEquals( expectedParentHash, deserializedBitcoinBlockStruct.getEthereumBlockHeader().getParentHash(),"Block contains a correct 32 byte parent hash");
}
 
Example 15
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Object deserialize(Writable writable)
        throws SerDeException {
    Row row = (Row) writable;

    // Since this implementation uses a StructObjectInspector return a list of deserialized values in the same
    // order as the original properties.

    int i = 0;
    for (Map.Entry<String, TypeInfo> column : _columns) {
        String columnName = column.getKey();
        TypeInfo type = column.getValue();

        // Get the raw value from traversing the JSON map
        Object rawValue = getRawValue(columnName, row);
        // Deserialize the value to the expected type
        Object value = deserialize(type, rawValue);

        _values.set(i++, value);
    }

    return _values;
}
 
Example 16
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Deserializes a primitive to its corresponding Java type, doing a best-effort conversion when necessary.
 */
private Object deserializePrimitive(PrimitiveTypeInfo type, Object value)
        throws SerDeException {
    switch (type.getPrimitiveCategory()) {
        case VOID:
            return null;
        case STRING:
            return deserializeString(value);
        case BOOLEAN:
            return deserializeBoolean(value);
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
            return deserializeNumber(value, type);
        case DATE:
        case TIMESTAMP:
            return deserializeDate(value, type);
        default:
            throw new SerDeException("Unsupported type: " + type.getPrimitiveCategory());
    }
}
 
Example 17
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeNumber(Object value, PrimitiveTypeInfo type)
        throws SerDeException {
    // Note that only numbers and booleans are supported.  All other types cannot be deserialized.  In particular
    // String representations of numbers are not parsed.
    Number number;
    if (value instanceof Number) {
        number = (Number) value;
    } else if (value instanceof Boolean) {
        number = ((Boolean) value) ? (byte) 1 : 0;
    } else {
        throw new SerDeException("Value is not a " + type + ": " + value);
    }

    switch (type.getPrimitiveCategory()) {
        case BYTE:   return number.byteValue();
        case SHORT:  return number.shortValue();
        case INT:    return number.intValue();
        case LONG:   return number.longValue();
        case FLOAT:  return number.floatValue();
        case DOUBLE: return number.doubleValue();
    }

    throw new SerDeException("Primitive number did not match any expected categories"); // Unreachable
}
 
Example 18
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeStruct(StructTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;

    List<String> fieldNames = type.getAllStructFieldNames();
    List<TypeInfo> fieldTypes = type.getAllStructFieldTypeInfos();

    // When deserializing a struct the returned value is a list of values in the same order as the field names.

    List<Object> values = Lists.newArrayListWithCapacity(fieldNames.size());
    for (int i=0; i < fieldNames.size(); i++) {
        Object rawValue = getRawValueOrNullIfAbsent(fieldNames.get(i), map);
        Object value = deserialize(fieldTypes.get(i), rawValue);
        values.add(value);
    }

    return values;
}
 
Example 19
Source Project: emodb   Source File: EmoSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeList(ListTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof List)) {
        throw new SerDeException("Value not of type list");
    }
    //noinspection unchecked
    List<Object> list = (List<Object>) data;

    List<Object> values = Lists.newArrayListWithCapacity(list.size());
    for (Object entry : list) {
        Object value = deserialize(type.getListElementTypeInfo(), entry);
        values.add(value);
    }

    return values;
}
 
Example 20
Source Project: emr-dynamodb-connector   Source File: DynamoDBSerDeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitives() throws SerDeException {
  List<String> attributeNames = PRIMITIVE_FIELDS;
  List<ObjectInspector> colOIs = PRIMITIVE_OIS;

  List<String> data = PRIMITIVE_STRING_DATA;

  Map<String, AttributeValue> expectedItemMap = Maps.newHashMap();
  expectedItemMap.put(attributeNames.get(0), new AttributeValue(data.get(0)));
  expectedItemMap.put(attributeNames.get(1), new AttributeValue().withN(data.get(1)));
  expectedItemMap.put(attributeNames.get(2), new AttributeValue().withN(data.get(2)));
  expectedItemMap.put(attributeNames.get(3), new AttributeValue().withBOOL(Boolean.valueOf(data.get(3))));

  List<Object> rowData = Lists.newArrayList();
  rowData.add(data.get(0));
  rowData.add(Double.parseDouble(data.get(1)));
  rowData.add(Long.parseLong(data.get(2)));
  rowData.add(Boolean.valueOf(data.get(3)));
  Map<String, AttributeValue> actualItemMap = getSerializedItem(attributeNames, colOIs, rowData);

  assertEquals(expectedItemMap, actualItemMap);
}
 
Example 21
Source Project: emr-dynamodb-connector   Source File: DynamoDBSerDeTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNull() throws SerDeException {
  List<String> attributeNames = PRIMITIVE_FIELDS.subList(0, 2);
  List<ObjectInspector> colOIs = PRIMITIVE_OIS.subList(0, 2);

  List<String> data = Lists.newArrayList(PRIMITIVE_STRING_DATA.subList(0, 2));
  data.set(1, null);

  Map<String, AttributeValue> expectedItemMap = Maps.newHashMap();
  expectedItemMap.put(attributeNames.get(0), new AttributeValue(data.get(0)));

  List<Object> rowData = Lists.newArrayList();
  rowData.addAll(data);

  // no null serialization
  Map<String, AttributeValue> actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, false);
  assertEquals(expectedItemMap, actualItemMap);

  // with null serialization
  expectedItemMap.put(attributeNames.get(1), new AttributeValue().withNULL(true));
  actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, true);
  assertEquals(expectedItemMap, actualItemMap);
}
 
Example 22
Source Project: HiveKudu-Handler   Source File: HiveKuduBridgeUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static ObjectInspector getObjectInspector(Type kuduType,
                                                 String hiveType) throws SerDeException {
    switch (kuduType) {
        case STRING:
            return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        case FLOAT:
            return PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
        case DOUBLE:
            return PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
        case BOOL:
            return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
        case INT8:
            return PrimitiveObjectInspectorFactory.javaByteObjectInspector;
        case INT16:
            return PrimitiveObjectInspectorFactory.javaShortObjectInspector;
        case INT32:
            return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
        case INT64:
            return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
        case TIMESTAMP:
            return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
        case BINARY:
            return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
        default:
            throw new SerDeException("Cannot find getObjectInspector for: "
                    + hiveType);
    }
}
 
Example 23
Source Project: presto   Source File: TestOrcReaderPositions.java    License: Apache License 2.0 5 votes vote down vote up
private static void createGrowingSequentialFile(File file, int count, int step, int initialLength)
        throws IOException, SerDeException
{
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, VARCHAR);

    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < initialLength; i++) {
        builder.append("0");
    }
    String seedString = builder.toString();

    // gradually grow the length of a cell
    int previousLength = initialLength;
    for (int i = 0; i < count; i++) {
        if ((i / step + 1) * initialLength > previousLength) {
            previousLength = (i / step + 1) * initialLength;
            builder.append(seedString);
        }
        objectInspector.setStructFieldData(row, field, builder.toString());
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }

    writer.close(false);
}
 
Example 24
Source Project: incubator-hivemall   Source File: JsonSerdeUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Deserialize Json array or Json primitives.
 */
@SuppressWarnings("unchecked")
@Nonnull
public static <T> T deserialize(@Nonnull final Text t, @Nonnull TypeInfo columnType)
        throws SerDeException {
    final HiveJsonStructReader reader = new HiveJsonStructReader(columnType);
    reader.setIgnoreUnknownFields(true);
    final Object result;
    try {
        result = reader.parseStruct(new FastByteArrayInputStream(t.getBytes(), t.getLength()));
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    return (T) result;
}
 
Example 25
Source Project: incubator-hivemall   Source File: JsonSerdeUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void serializeMap(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nonnull final MapObjectInspector moi) throws SerDeException {
    ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
    ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
    Map<?, ?> omap = moi.getMap(obj);
    if (omap == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACE);
        boolean first = true;
        for (Object entry : omap.entrySet()) {
            if (first) {
                first = false;
            } else {
                sb.append(SerDeUtils.COMMA);
            }
            Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
            StringBuilder keyBuilder = new StringBuilder();
            buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
            String keyString = keyBuilder.toString().trim();
            if ((!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE)) {
                appendWithQuotes(sb, keyString);
            } else {
                sb.append(keyString);
            }
            sb.append(SerDeUtils.COLON);
            buildJSONString(sb, e.getValue(), mapValueObjectInspector);
        }
        sb.append(SerDeUtils.RBRACE);
    }
}
 
Example 26
Source Project: indexr   Source File: IndexRSerde.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Object deserialize(Writable writable) throws SerDeException {

    // Different segments could contain different schemas.
    // Especially the column orders could be different.
    // Here we re-map the column names to the real column ids.

    SchemaWritable reader = (SchemaWritable) writable;
    if (this.projectCols != reader.columns) {
        // Don't have to do it every time, only when schema is changed.
        mapColIndex(reader.columns);
        projectCols = reader.columns;
    }

    if (!isMapNeeded) {
        serdeSize = columnNames.size();
        return reader;
    } else {
        Writable[] projectWritables = reader.get();
        Writable[] writables = new Writable[columnNames.size()];
        for (int i = 0; i < validColIndexes.length; i++) {
            int colIndex = validColIndexes[i];
            int mapColId = validColMapIds[i];
            writables[colIndex] = projectWritables[mapColId];
        }

        serdeSize = validColIndexes.length;
        return new ArrayWritable(Writable.class, writables);
    }
}
 
Example 27
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Writable serialize( final Object obj, final ObjectInspector objInspector ) throws SerDeException{
  ParserWritable parserWritable = new ParserWritable();
  try{
    if( parser == null ){
      parser = (HiveStructParser)( messageReader.create( objInspector ) );
      parser.setFieldIndexMap( filedIndexMap );
    }
    parser.setObject( obj );
    parserWritable.set( parser );
  }catch( IOException e ){
    throw new SerDeException( e );
  }
  return parserWritable;
}
 
Example 28
private void assertColumnSchema(Schema schema, List<FieldSchema> cols) throws SerDeException {
  AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema);
  for (int i = 0; i < cols.size(); i++) {
    assertThat(cols.get(i).getType(), is(schemaInspector.getColumnTypes().get(i).toString()));
    assertThat(cols.get(i).getName(), is(schemaInspector.getColumnNames().get(i)));
  }
}
 
Example 29
Source Project: incubator-hivemall   Source File: HiveJsonStructReader.java    License: Apache License 2.0 5 votes vote down vote up
private Object parseList(JsonParser parser, ListObjectInspector oi)
        throws JsonParseException, IOException, SerDeException {
    List<Object> ret = new ArrayList<>();

    if (parser.getCurrentToken() == JsonToken.VALUE_NULL) {
        parser.nextToken();
        return null;
    }

    if (parser.getCurrentToken() != JsonToken.START_ARRAY) {
        throw new SerDeException("array expected");
    }
    ObjectInspector eOI = oi.getListElementObjectInspector();
    JsonToken currentToken = parser.nextToken();
    try {
        while (currentToken != null && currentToken != JsonToken.END_ARRAY) {
            ret.add(parseDispatcher(parser, eOI));
            currentToken = parser.getCurrentToken();
        }
    } catch (Exception e) {
        throw new SerDeException("array: " + e.getMessage(), e);
    }

    currentToken = parser.nextToken();

    return ret;
}
 
Example 30
Source Project: dremio-oss   Source File: HiveTextReader.java    License: Apache License 2.0 5 votes vote down vote up
protected Object bufferAdd(Object value) throws SerDeException {
  footerBuffer.add(value);
  if (footerBuffer.size() <= footerCount) {
    return null;
  }
  return footerBuffer.poll();
}