org.apache.hadoop.hive.serde2.avro.AvroSerDe Java Examples
The following examples show how to use
org.apache.hadoop.hive.serde2.avro.AvroSerDe.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveUtil.java From presto with Apache License 2.0 | 6 votes |
public static InputFormat<?, ?> getInputFormat(Configuration configuration, Properties schema, boolean symlinkTarget) { String inputFormatName = getInputFormatName(schema); try { JobConf jobConf = toJobConf(configuration); configureCompressionCodecs(jobConf); Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName); if (symlinkTarget && inputFormatClass == SymlinkTextInputFormat.class) { // Symlink targets are assumed to be TEXTFILE unless serde indicates otherwise. inputFormatClass = TextInputFormat.class; if (isDeserializerClass(schema, AvroSerDe.class)) { inputFormatClass = AvroContainerInputFormat.class; } } return ReflectionUtils.newInstance(inputFormatClass, jobConf); } catch (ClassNotFoundException | RuntimeException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Unable to create input format " + inputFormatName, e); } }
Example #2
Source File: HiveAvroORCQueryGenerator.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Referencing org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo#generateTypeInfo(org.apache.avro.Schema) on * how to deal with logical types that supported by Hive but not by Avro(e.g. VARCHAR). * * If unsupported logical types found, return empty string as a result. * @param schema Avro schema * @return * @throws AvroSerdeException */ public static String generateHiveSpecificLogicalType(Schema schema) throws AvroSerdeException { // For bytes type, it can be mapped to decimal. Schema.Type type = schema.getType(); if (type == Schema.Type.STRING && AvroSerDe.VARCHAR_TYPE_NAME .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) { int maxLength = 0; try { maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); } return String.format("varchar(%s)", maxLength); } else { return StringUtils.EMPTY; } }
Example #3
Source File: LocalHiveMetastoreTestUtils.java From incubator-gobblin with Apache License 2.0 | 6 votes |
public Table createTestAvroTable(String dbName, String tableName, String tableSdLoc, Optional<String> partitionFieldName, boolean ignoreDbCreation) throws Exception { if (!ignoreDbCreation) { createTestDb(dbName); } Table tbl = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(dbName, tableName); tbl.getSd().setLocation(tableSdLoc); tbl.getSd().getSerdeInfo().setSerializationLib(AvroSerDe.class.getName()); tbl.getSd().getSerdeInfo().setParameters(ImmutableMap.of(HiveAvroSerDeManager.SCHEMA_URL, "/tmp/dummy")); if (partitionFieldName.isPresent()) { tbl.addToPartitionKeys(new FieldSchema(partitionFieldName.get(), "string", "some comment")); } this.localMetastoreClient.createTable(tbl); return tbl; }
Example #4
Source File: OrcTestTools.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * From each record, transformed to {@link AvroRow} object for writing. * One can also choose to use OrcSerDe to obtain ORC-associated writable object. * * Using return object of this method would enable a self-maintained ORC writer(not from OrcOutputFormat) * to write object. */ private Writable getAvroWritable(GenericRecord record, Schema avroSchema) { try { // Construct AvroSerDe with proper schema and deserialize into Hive object. AvroSerDe serDe = new AvroSerDe(); Properties propertiesWithSchema = new Properties(); propertiesWithSchema.setProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), avroSchema.toString()); serDe.initialize(null, propertiesWithSchema); AvroGenericRecordWritable avroGenericRecordWritable = new AvroGenericRecordWritable(record); avroGenericRecordWritable.setFileSchema(avroSchema); Object avroDeserialized = serDe.deserialize(avroGenericRecordWritable); ObjectInspector avroOI = new AvroObjectInspectorGenerator(avroSchema).getObjectInspector(); return new AvroRow(avroDeserialized, avroOI); } catch (SerDeException se) { throw new RuntimeException("Failed in SerDe exception:", se); } }
Example #5
Source File: HiveCatalogUtil.java From tajo with Apache License 2.0 | 5 votes |
public static String getDataFormat(StorageDescriptor descriptor) { Preconditions.checkNotNull(descriptor); String serde = descriptor.getSerdeInfo().getSerializationLib(); String inputFormat = descriptor.getInputFormat(); if (LazySimpleSerDe.class.getName().equals(serde)) { if (TextInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.TEXT; } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.SEQUENCE_FILE; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } } else if (LazyBinarySerDe.class.getName().equals(serde)) { if (SequenceFileInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.SEQUENCE_FILE; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) { if (RCFileInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.RCFILE; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } } else if (ParquetHiveSerDe.class.getName().equals(serde)) { return BuiltinStorages.PARQUET; } else if (AvroSerDe.class.getName().equals(serde)) { return BuiltinStorages.AVRO; } else if (OrcSerde.class.getName().equals(serde)) { return BuiltinStorages.ORC; } else if (RegexSerDe.class.getName().equals(serde)) { return BuiltinStorages.REGEX; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } }
Example #6
Source File: HiveAvroCopyEntityHelper.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Tell whether a hive table is actually an Avro table * @param table a hive {@link Table} * @return true if it is a hive table */ public static boolean isHiveTableAvroType(Table table) { String serializationLib = table.getTTable().getSd().getSerdeInfo().getSerializationLib(); String inputFormat = table.getTTable().getSd().getInputFormat(); String outputFormat = table.getTTable().getSd().getOutputFormat(); return inputFormat.endsWith(AvroContainerInputFormat.class.getSimpleName()) || outputFormat.endsWith(AvroContainerOutputFormat.class.getSimpleName()) || serializationLib.endsWith(AvroSerDe.class.getSimpleName()); }
Example #7
Source File: AvroSchemaManagerTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
private Partition getTestPartition(Table table) throws HiveException { Partition partition = new Partition(table, ImmutableMap.of("partition_key", "1"), null); StorageDescriptor sd = new StorageDescriptor(); sd.setSerdeInfo(new SerDeInfo("avro", AvroSerDe.class.getName(), null)); sd.setCols(Lists.newArrayList(new FieldSchema("foo", "int", null))); partition.getTPartition().setSd(sd); return partition; }
Example #8
Source File: HiveMetaStoreUtilsTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testGetTableAvro() { final String databaseName = "testdb"; final String tableName = "testtable"; HiveTable.Builder builder = new HiveTable.Builder(); builder.withDbName(databaseName).withTableName(tableName); State serdeProps = new State(); serdeProps.setProp("avro.schema.literal", "{\"type\": \"record\", \"name\": \"TestEvent\"," + " \"namespace\": \"test.namespace\", \"fields\": [{\"name\":\"a\"," + " \"type\": \"int\"}]}"); builder.withSerdeProps(serdeProps); HiveTable hiveTable = builder.build(); hiveTable.setInputFormat(AvroContainerInputFormat.class.getName()); hiveTable.setOutputFormat(AvroContainerOutputFormat.class.getName()); hiveTable.setSerDeType(AvroSerDe.class.getName()); Table table = HiveMetaStoreUtils.getTable(hiveTable); Assert.assertEquals(table.getDbName(), databaseName); Assert.assertEquals(table.getTableName(), tableName); StorageDescriptor sd = table.getSd(); Assert.assertEquals(sd.getInputFormat(), AvroContainerInputFormat.class.getName()); Assert.assertEquals(sd.getOutputFormat(), AvroContainerOutputFormat.class.getName()); Assert.assertNotNull(sd.getSerdeInfo()); Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), AvroSerDe.class.getName()); List<FieldSchema> fields = sd.getCols(); Assert.assertTrue(fields != null && fields.size() == 1); FieldSchema fieldA = fields.get(0); Assert.assertEquals(fieldA.getName(), "a"); Assert.assertEquals(fieldA.getType(), "int"); }
Example #9
Source File: HiveMetaStoreUtilsTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testGetTableAvroInvalidSchema() { final String databaseName = "testdb"; final String tableName = "testtable"; HiveTable.Builder builder = new HiveTable.Builder(); builder.withDbName(databaseName).withTableName(tableName); State serdeProps = new State(); serdeProps.setProp("avro.schema.literal", "invalid schema"); builder.withSerdeProps(serdeProps); HiveTable hiveTable = builder.build(); hiveTable.setInputFormat(AvroContainerInputFormat.class.getName()); hiveTable.setOutputFormat(AvroContainerOutputFormat.class.getName()); hiveTable.setSerDeType(AvroSerDe.class.getName()); Table table = HiveMetaStoreUtils.getTable(hiveTable); Assert.assertEquals(table.getDbName(), databaseName); Assert.assertEquals(table.getTableName(), tableName); StorageDescriptor sd = table.getSd(); Assert.assertEquals(sd.getInputFormat(), AvroContainerInputFormat.class.getName()); Assert.assertEquals(sd.getOutputFormat(), AvroContainerOutputFormat.class.getName()); Assert.assertNotNull(sd.getSerdeInfo()); Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), AvroSerDe.class.getName()); List<FieldSchema> fields = sd.getCols(); Assert.assertTrue(fields != null && fields.size() == 0); }
Example #10
Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return AvroSerDe.class; }
Example #11
Source File: HiveSource.java From incubator-gobblin with Apache License 2.0 | 4 votes |
private boolean isAvro(Table table) { return AvroSerDe.class.getName().equals(table.getSd().getSerdeInfo().getSerializationLib()); }