org.apache.hadoop.hive.serde2.avro.AvroSerDe Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.avro.AvroSerDe. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
public static InputFormat<?, ?> getInputFormat(Configuration configuration, Properties schema, boolean symlinkTarget)
{
    String inputFormatName = getInputFormatName(schema);
    try {
        JobConf jobConf = toJobConf(configuration);
        configureCompressionCodecs(jobConf);

        Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName);
        if (symlinkTarget && inputFormatClass == SymlinkTextInputFormat.class) {
            // Symlink targets are assumed to be TEXTFILE unless serde indicates otherwise.
            inputFormatClass = TextInputFormat.class;
            if (isDeserializerClass(schema, AvroSerDe.class)) {
                inputFormatClass = AvroContainerInputFormat.class;
            }
        }

        return ReflectionUtils.newInstance(inputFormatClass, jobConf);
    }
    catch (ClassNotFoundException | RuntimeException e) {
        throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Unable to create input format " + inputFormatName, e);
    }
}
 
Example #2
Source File: HiveAvroORCQueryGenerator.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Referencing org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo#generateTypeInfo(org.apache.avro.Schema) on
 * how to deal with logical types that supported by Hive but not by Avro(e.g. VARCHAR).
 *
 * If unsupported logical types found, return empty string as a result.
 * @param schema Avro schema
 * @return
 * @throws AvroSerdeException
 */
public static String generateHiveSpecificLogicalType(Schema schema) throws AvroSerdeException {
  // For bytes type, it can be mapped to decimal.
  Schema.Type type = schema.getType();

  if (type == Schema.Type.STRING && AvroSerDe.VARCHAR_TYPE_NAME
      .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) {
    int maxLength = 0;
    try {
      maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
    } catch (Exception ex) {
      throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex);
    }
    return String.format("varchar(%s)", maxLength);
  } else {
    return StringUtils.EMPTY;
  }
}
 
Example #3
Source File: LocalHiveMetastoreTestUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public Table createTestAvroTable(String dbName, String tableName, String tableSdLoc,
    Optional<String> partitionFieldName, boolean ignoreDbCreation) throws Exception {
  if (!ignoreDbCreation) {
    createTestDb(dbName);
  }

  Table tbl = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(dbName, tableName);
  tbl.getSd().setLocation(tableSdLoc);
  tbl.getSd().getSerdeInfo().setSerializationLib(AvroSerDe.class.getName());
  tbl.getSd().getSerdeInfo().setParameters(ImmutableMap.of(HiveAvroSerDeManager.SCHEMA_URL, "/tmp/dummy"));

  if (partitionFieldName.isPresent()) {
    tbl.addToPartitionKeys(new FieldSchema(partitionFieldName.get(), "string", "some comment"));
  }

  this.localMetastoreClient.createTable(tbl);

  return tbl;
}
 
Example #4
Source File: OrcTestTools.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * From each record, transformed to {@link AvroRow} object for writing.
 * One can also choose to use OrcSerDe to obtain ORC-associated writable object.
 *
 * Using return object of this method would enable a self-maintained ORC writer(not from OrcOutputFormat)
 * to write object.
 */
private Writable getAvroWritable(GenericRecord record, Schema avroSchema) {
  try {
    // Construct AvroSerDe with proper schema and deserialize into Hive object.
    AvroSerDe serDe = new AvroSerDe();
    Properties propertiesWithSchema = new Properties();
    propertiesWithSchema.setProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(),
        avroSchema.toString());
    serDe.initialize(null, propertiesWithSchema);
    AvroGenericRecordWritable avroGenericRecordWritable = new AvroGenericRecordWritable(record);
    avroGenericRecordWritable.setFileSchema(avroSchema);
    Object avroDeserialized = serDe.deserialize(avroGenericRecordWritable);
    ObjectInspector avroOI = new AvroObjectInspectorGenerator(avroSchema).getObjectInspector();

    return new AvroRow(avroDeserialized, avroOI);
  } catch (SerDeException se) {
    throw new RuntimeException("Failed in SerDe exception:", se);
  }
}
 
Example #5
Source File: HiveCatalogUtil.java    From tajo with Apache License 2.0 5 votes vote down vote up
public static String getDataFormat(StorageDescriptor descriptor) {
  Preconditions.checkNotNull(descriptor);

  String serde = descriptor.getSerdeInfo().getSerializationLib();
  String inputFormat = descriptor.getInputFormat();

  if (LazySimpleSerDe.class.getName().equals(serde)) {
    if (TextInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.TEXT;
    } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinarySerDe.class.getName().equals(serde)) {
    if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
    if (RCFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.RCFILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (ParquetHiveSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.PARQUET;
  } else if (AvroSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.AVRO;
  } else if (OrcSerde.class.getName().equals(serde)) {
    return BuiltinStorages.ORC;
  } else if (RegexSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.REGEX;
  } else {
    throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
  }
}
 
Example #6
Source File: HiveAvroCopyEntityHelper.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Tell whether a hive table is actually an Avro table
 * @param table a hive {@link Table}
 * @return true if it is a hive table
 */
public static boolean isHiveTableAvroType(Table table) {
  String serializationLib = table.getTTable().getSd().getSerdeInfo().getSerializationLib();
  String inputFormat = table.getTTable().getSd().getInputFormat();
  String outputFormat = table.getTTable().getSd().getOutputFormat();

  return inputFormat.endsWith(AvroContainerInputFormat.class.getSimpleName())
      || outputFormat.endsWith(AvroContainerOutputFormat.class.getSimpleName())
      || serializationLib.endsWith(AvroSerDe.class.getSimpleName());
}
 
Example #7
Source File: AvroSchemaManagerTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private Partition getTestPartition(Table table) throws HiveException {
  Partition partition = new Partition(table, ImmutableMap.of("partition_key", "1"), null);
  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo("avro", AvroSerDe.class.getName(), null));
  sd.setCols(Lists.newArrayList(new FieldSchema("foo", "int", null)));
  partition.getTPartition().setSd(sd);
  return partition;
}
 
Example #8
Source File: HiveMetaStoreUtilsTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetTableAvro() {
  final String databaseName = "testdb";
  final String tableName = "testtable";

  HiveTable.Builder builder = new HiveTable.Builder();

  builder.withDbName(databaseName).withTableName(tableName);

  State serdeProps = new State();
  serdeProps.setProp("avro.schema.literal", "{\"type\": \"record\", \"name\": \"TestEvent\","
      + " \"namespace\": \"test.namespace\", \"fields\": [{\"name\":\"a\"," + " \"type\": \"int\"}]}");
  builder.withSerdeProps(serdeProps);

  HiveTable hiveTable = builder.build();
  hiveTable.setInputFormat(AvroContainerInputFormat.class.getName());
  hiveTable.setOutputFormat(AvroContainerOutputFormat.class.getName());
  hiveTable.setSerDeType(AvroSerDe.class.getName());

  Table table = HiveMetaStoreUtils.getTable(hiveTable);
  Assert.assertEquals(table.getDbName(), databaseName);
  Assert.assertEquals(table.getTableName(), tableName);

  StorageDescriptor sd = table.getSd();
  Assert.assertEquals(sd.getInputFormat(), AvroContainerInputFormat.class.getName());
  Assert.assertEquals(sd.getOutputFormat(), AvroContainerOutputFormat.class.getName());
  Assert.assertNotNull(sd.getSerdeInfo());
  Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), AvroSerDe.class.getName());

  List<FieldSchema> fields = sd.getCols();
  Assert.assertTrue(fields != null && fields.size() == 1);
  FieldSchema fieldA = fields.get(0);
  Assert.assertEquals(fieldA.getName(), "a");
  Assert.assertEquals(fieldA.getType(), "int");
}
 
Example #9
Source File: HiveMetaStoreUtilsTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetTableAvroInvalidSchema() {
  final String databaseName = "testdb";
  final String tableName = "testtable";

  HiveTable.Builder builder = new HiveTable.Builder();

  builder.withDbName(databaseName).withTableName(tableName);

  State serdeProps = new State();
  serdeProps.setProp("avro.schema.literal", "invalid schema");
  builder.withSerdeProps(serdeProps);

  HiveTable hiveTable = builder.build();
  hiveTable.setInputFormat(AvroContainerInputFormat.class.getName());
  hiveTable.setOutputFormat(AvroContainerOutputFormat.class.getName());
  hiveTable.setSerDeType(AvroSerDe.class.getName());

  Table table = HiveMetaStoreUtils.getTable(hiveTable);
  Assert.assertEquals(table.getDbName(), databaseName);
  Assert.assertEquals(table.getTableName(), tableName);

  StorageDescriptor sd = table.getSd();
  Assert.assertEquals(sd.getInputFormat(), AvroContainerInputFormat.class.getName());
  Assert.assertEquals(sd.getOutputFormat(), AvroContainerOutputFormat.class.getName());
  Assert.assertNotNull(sd.getSerdeInfo());
  Assert.assertEquals(sd.getSerdeInfo().getSerializationLib(), AvroSerDe.class.getName());

  List<FieldSchema> fields = sd.getCols();
  Assert.assertTrue(fields != null && fields.size() == 0);
}
 
Example #10
Source File: KafkaStorageHandler.java    From HiveKa with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
  return AvroSerDe.class;
}
 
Example #11
Source File: HiveSource.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
private boolean isAvro(Table table) {
  return AvroSerDe.class.getName().equals(table.getSd().getSerdeInfo().getSerializationLib());
}