org.apache.hadoop.hive.metastore.api.SerDeInfo Java Examples

The following examples show how to use org.apache.hadoop.hive.metastore.api.SerDeInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: presto   Author: prestosql   File: ThriftMetastoreUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static void fromMetastoreApiStorageDescriptor(
        Map<String, String> tableParameters,
        StorageDescriptor storageDescriptor,
        Storage.Builder builder,
        String tablePartitionName)
{
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
            .setLocation(nullToEmpty(storageDescriptor.getLocation()))
            .setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
            .setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
            .setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}
 
Example #2
Source Project: flink   Author: flink-tpc-ds   File: HiveTableUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}
 
Example #3
Source Project: beeju   Author: HotelsDotCom   File: HiveServer2CoreTest.java    License: Apache License 2.0 6 votes vote down vote up
private Table createUnpartitionedTable(String databaseName, String tableName, HiveServer2Core server)
    throws Exception {
  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
Example #4
Source Project: beeju   Author: HotelsDotCom   File: HiveServer2CoreTest.java    License: Apache License 2.0 6 votes vote down vote up
private Table createPartitionedTable(String databaseName, String tableName, HiveServer2Core server) throws Exception {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(tableName);
  table.setPartitionKeys(Arrays.asList(new FieldSchema("partcol", "int", null)));
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
Example #5
Source Project: circus-train   Author: HotelsDotCom   File: TestUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static Table createView(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String view,
    String table,
    List<FieldSchema> partitionCols)
  throws TException {
  Table hiveView = new Table();
  hiveView.setDbName(database);
  hiveView.setTableName(view);
  hiveView.setTableType(TableType.VIRTUAL_VIEW.name());
  hiveView.setViewOriginalText(hql(database, table));
  hiveView.setViewExpandedText(expandHql(database, table, DATA_COLUMNS, partitionCols));
  hiveView.setPartitionKeys(partitionCols);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  hiveView.setSd(sd);

  metaStoreClient.createTable(hiveView);

  return hiveView;
}
 
Example #6
Source Project: waggle-dance   Author: HotelsDotCom   File: TestUtils.java    License: Apache License 2.0 6 votes vote down vote up
static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    File location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example #7
Source Project: waggle-dance   Author: HotelsDotCom   File: TestUtils.java    License: Apache License 2.0 6 votes vote down vote up
static Table createPartitionedTable(HiveMetaStoreClient metaStoreClient, String database, String table, File location)
  throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(PARTITION_COLUMNS);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example #8
Source Project: metacat   Author: Netflix   File: HiveConnectorTableService.java    License: Apache License 2.0 6 votes vote down vote up
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
 
Example #9
Source Project: metacat   Author: Netflix   File: HiveConvertersImpl.java    License: Apache License 2.0 6 votes vote down vote up
private StorageDto toStorageDto(@Nullable final StorageDescriptor sd, final String owner) {
    final StorageDto result = new StorageDto();
    if (sd != null) {
        result.setOwner(owner);
        result.setUri(sd.getLocation());
        result.setInputFormat(sd.getInputFormat());
        result.setOutputFormat(sd.getOutputFormat());
        result.setParameters(sd.getParameters());
        final SerDeInfo serde = sd.getSerdeInfo();
        if (serde != null) {
            result.setSerializationLib(serde.getSerializationLib());
            result.setSerdeInfoParameters(serde.getParameters());
        }
    }
    return result;
}
 
Example #10
Source Project: flink   Author: apache   File: HiveWriterFactory.java    License: Apache License 2.0 6 votes vote down vote up
public HiveWriterFactory(
		JobConf jobConf,
		Class hiveOutputFormatClz,
		SerDeInfo serDeInfo,
		TableSchema schema,
		String[] partitionColumns,
		Properties tableProperties,
		HiveShim hiveShim,
		boolean isCompressed) {
	Preconditions.checkArgument(HiveOutputFormat.class.isAssignableFrom(hiveOutputFormatClz),
			"The output format should be an instance of HiveOutputFormat");
	this.confWrapper = new JobConfWrapper(jobConf);
	this.hiveOutputFormatClz = hiveOutputFormatClz;
	this.serDeInfo = serDeInfo;
	this.allColumns = schema.getFieldNames();
	this.allTypes = schema.getFieldDataTypes();
	this.partitionColumns = partitionColumns;
	this.tableProperties = tableProperties;
	this.hiveShim = hiveShim;
	this.isCompressed = isCompressed;
}
 
Example #11
Source Project: flink   Author: apache   File: HiveOutputFormatFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateOutputFormat() {
	TableSchema schema = TableSchema.builder().field("x", DataTypes.INT()).build();
	SerDeInfo serDeInfo = new SerDeInfo("name", LazySimpleSerDe.class.getName(), Collections.emptyMap());
	HiveWriterFactory writerFactory = new HiveWriterFactory(
			new JobConf(),
			VerifyURIOutputFormat.class,
			serDeInfo, schema,
			new String[0],
			new Properties(),
			HiveShimLoader.loadHiveShim(HiveShimLoader.getHiveVersion()),
			false);
	HiveOutputFormatFactory factory = new HiveOutputFormatFactory(writerFactory);
	org.apache.flink.core.fs.Path path = new org.apache.flink.core.fs.Path(TEST_URI_SCHEME, TEST_URI_AUTHORITY, "/foo/path");
	factory.createOutputFormat(path);
}
 
Example #12
Source Project: incubator-gobblin   Author: apache   File: LocalHiveMetastoreTestUtils.java    License: Apache License 2.0 6 votes vote down vote up
public Partition addTestPartition(Table tbl, List<String> values, int createTime) throws Exception {
  StorageDescriptor partitionSd = new StorageDescriptor();
  if (StringUtils.isNotBlank(tbl.getSd().getLocation())) {
    partitionSd.setLocation(tbl.getSd().getLocation() + values);
  } else {
    partitionSd.setLocation("/tmp/" + tbl.getTableName() + "/part1");
  }

  partitionSd.setSerdeInfo(
      new SerDeInfo("name", "serializationLib", ImmutableMap.of(HiveAvroSerDeManager.SCHEMA_URL, "/tmp/dummy")));
  partitionSd.setCols(tbl.getPartitionKeys());
  Partition partition =
      new Partition(values, tbl.getDbName(), tbl.getTableName(), 1, 1, partitionSd, new HashMap<String, String>());
  partition.setCreateTime(createTime);
  return this.getLocalMetastoreClient().add_partition(partition);

}
 
Example #13
public Table makeMetastoreTableObject(HiveMetaStoreClient client,
    String dbName, String tabName, List<FieldSchema> cols) throws Exception {
  Table tbl = new Table();
  tbl.setDbName(dbName);
  tbl.setTableName(tabName);
  StorageDescriptor sd = new StorageDescriptor();
  tbl.setSd(sd);
  tbl.setParameters(new HashMap<String, String>());
  sd.setCols(cols);
  sd.setCompressed(false);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setName(tbl.getTableName());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  sd.getSerdeInfo().getParameters()
      .put(serdeConstants.SERIALIZATION_FORMAT, "1");
  sd.setSortCols(new ArrayList<Order>());
  return tbl;
}
 
Example #14
Source Project: kite   Author: kite-sdk   File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
static Table createEmptyTable(String namespace, String name) {
  Table table = new Table();
  table.setDbName(namespace);
  table.setTableName(name);
  table.setPartitionKeys(new ArrayList<FieldSchema>());
  table.setParameters(new HashMap<String, String>());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo());
  sd.setNumBuckets(-1);
  sd.setBucketCols(new ArrayList<String>());
  sd.setCols(new ArrayList<FieldSchema>());
  sd.setParameters(new HashMap<String, String>());
  sd.setSortCols(new ArrayList<Order>());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  SkewedInfo skewInfo = new SkewedInfo();
  skewInfo.setSkewedColNames(new ArrayList<String>());
  skewInfo.setSkewedColValues(new ArrayList<List<String>>());
  skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
  sd.setSkewedInfo(skewInfo);
  table.setSd(sd);

  return table;
}
 
Example #15
Source Project: data-highway   Author: HotelsDotCom   File: AvroStorageDescriptorFactory.java    License: Apache License 2.0 5 votes vote down vote up
public static StorageDescriptor create(String location) {
  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setInputFormat(AVRO_INPUT_FORMAT);
  storageDescriptor.setOutputFormat(AVRO_OUTPUT_FORMAT);
  storageDescriptor.setLocation(location);
  storageDescriptor.setCols(emptyList());

  SerDeInfo serdeInfo = new SerDeInfo();
  serdeInfo.setSerializationLib(AVRO_SERDE);
  storageDescriptor.setSerdeInfo(serdeInfo);

  return storageDescriptor;
}
 
Example #16
Source Project: data-highway   Author: HotelsDotCom   File: AvroHiveTableStrategyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example #17
Source Project: data-highway   Author: HotelsDotCom   File: AvroHiveTableStrategyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void alterHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  when(uriResolver.resolve(schema2, TABLE, 2))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table table = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  Table result = underTest.alterHiveTable(table, schema2, 2);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/2/table_v2.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("2"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example #18
public static SerDeInfo convertSerDeInfo(com.amazonaws.services.glue.model.SerDeInfo catalogSerDeInfo){
  SerDeInfo hiveSerDeInfo = new SerDeInfo();
  hiveSerDeInfo.setName(catalogSerDeInfo.getName());
  hiveSerDeInfo.setParameters(firstNonNull(catalogSerDeInfo.getParameters(), Maps.<String, String>newHashMap()));
  hiveSerDeInfo.setSerializationLib(catalogSerDeInfo.getSerializationLibrary());

  return hiveSerDeInfo;
}
 
Example #19
public static com.amazonaws.services.glue.model.SerDeInfo convertSerDeInfo(
        SerDeInfo hiveSerDeInfo) {
  com.amazonaws.services.glue.model.SerDeInfo catalogSerDeInfo = new com.amazonaws.services.glue.model.SerDeInfo();
  catalogSerDeInfo.setName(hiveSerDeInfo.getName());
  catalogSerDeInfo.setParameters(hiveSerDeInfo.getParameters());
  catalogSerDeInfo.setSerializationLibrary(hiveSerDeInfo.getSerializationLib());

  return catalogSerDeInfo;
}
 
Example #20
Source Project: presto   Author: prestosql   File: ThriftMetastoreUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static boolean isAvroTableWithSchemaSet(org.apache.hadoop.hive.metastore.api.Table table)
{
    if (table.getParameters() == null) {
        return false;
    }
    SerDeInfo serdeInfo = getSerdeInfo(table);

    return serdeInfo.getSerializationLib() != null &&
            (table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null ||
                    (serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_URL_KEY) != null)) &&
            serdeInfo.getSerializationLib().equals(AVRO.getSerDe());
}
 
Example #21
Source Project: presto   Author: prestosql   File: ThriftMetastoreUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static SerDeInfo getSerdeInfo(org.apache.hadoop.hive.metastore.api.Table table)
{
    StorageDescriptor storageDescriptor = table.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table);
    }
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    return serdeInfo;
}
 
Example #22
Source Project: presto   Author: prestosql   File: ThriftMetastoreUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage)
{
    SerDeInfo serdeInfo = new SerDeInfo();
    serdeInfo.setName(tableName);
    serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable());
    serdeInfo.setParameters(storage.getSerdeParameters());

    StorageDescriptor sd = new StorageDescriptor();
    sd.setLocation(emptyToNull(storage.getLocation()));
    sd.setCols(columns.stream()
            .map(ThriftMetastoreUtil::toMetastoreApiFieldSchema)
            .collect(toImmutableList()));
    sd.setSerdeInfo(serdeInfo);
    sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable());
    sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable());
    sd.setSkewedInfoIsSet(storage.isSkewed());
    sd.setParameters(ImmutableMap.of());

    Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty();
    if (bucketProperty.isPresent()) {
        sd.setNumBuckets(bucketProperty.get().getBucketCount());
        sd.setBucketCols(bucketProperty.get().getBucketedBy());
        if (!bucketProperty.get().getSortedBy().isEmpty()) {
            sd.setSortCols(bucketProperty.get().getSortedBy().stream()
                    .map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder()))
                    .collect(toImmutableList()));
        }
    }

    return sd;
}
 
Example #23
Source Project: circus-train   Author: HotelsDotCom   File: FilterToolIntegrationTest.java    License: Apache License 2.0 5 votes vote down vote up
private void createTable(File sourceTableUri) throws Exception {
  File partitionEurope = new File(sourceTableUri, "local_date=2000-01-01");
  File partitionUk = new File(partitionEurope, "local_hour=0");
  File dataFileUk = new File(partitionUk, PART_00000);
  FileUtils.writeStringToFile(dataFileUk, "1\tadam\tlondon\n2\tsusan\tglasgow\n");

  File partitionAsia = new File(sourceTableUri, "local_date=2000-01-02");
  File partitionChina = new File(partitionAsia, "local_hour=0");
  File dataFileChina = new File(partitionChina, PART_00000);
  String data = "1\tchun\tbeijing\n2\tshanghai\tmilan\n";
  FileUtils.writeStringToFile(dataFileChina, data);

  HiveMetaStoreClient sourceClient = sourceCatalog.client();

  Table source = new Table();
  source.setDbName(DATABASE);
  source.setTableName(TABLE);
  source.setTableType(TableType.EXTERNAL_TABLE.name());
  source.setParameters(new HashMap<String, String>());

  List<FieldSchema> partitionColumns = Arrays.asList(new FieldSchema("local_date", "string", ""),
      new FieldSchema("local_hour", "string", ""));
  source.setPartitionKeys(partitionColumns);

  List<FieldSchema> dataColumns = Arrays.asList(new FieldSchema("id", "bigint", ""),
      new FieldSchema("name", "string", ""), new FieldSchema("city", "tinyint", ""));

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(dataColumns);
  sd.setLocation(sourceTableUri.toURI().toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());

  source.setSd(sd);

  sourceClient.createTable(source);
  LOG.info(">>>> Partitions added: {}",
      +sourceClient.add_partitions(Arrays.asList(newPartition(sd, Arrays.asList("2000-01-01", "0"), partitionUk),
          newPartition(sd, Arrays.asList("2000-01-02", "0"), partitionChina))));
}
 
Example #24
Source Project: circus-train   Author: HotelsDotCom   File: CircusTrainTest.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws TException, IOException {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName("source_" + TABLE);
  table.setTableType(TableType.EXTERNAL_TABLE.name());
  table.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(Arrays.asList(new FieldSchema("col1", "string", null)));
  sd.setSerdeInfo(new SerDeInfo());
  table.setSd(sd);

  hive.client().createTable(table);
}
 
Example #25
Source Project: circus-train   Author: HotelsDotCom   File: HiveEntityFactory.java    License: Apache License 2.0 5 votes vote down vote up
public static StorageDescriptor newStorageDescriptor(File location, String... columns) {
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = new ArrayList<>(columns.length);
  for (String name : columns) {
    cols.add(newFieldSchema(name));
  }
  sd.setCols(cols);
  sd.setSerdeInfo(new SerDeInfo());
  sd.setLocation(location.toURI().toString());
  return sd;
}
 
Example #26
Source Project: circus-train   Author: HotelsDotCom   File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example #27
Source Project: circus-train   Author: HotelsDotCom   File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static Table newTable(String database, String tableName) {
  Table table = new Table();
  table.setDbName(database);
  table.setTableName(tableName);
  table.setTableType(TABLE_TYPE);
  table.setOwner(OWNER);
  table.setCreateTime(CREATE_TIME);
  table.setRetention(RETENTION);

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  table.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(COLS);
  storageDescriptor.setInputFormat(INPUT_FORMAT);
  storageDescriptor.setOutputFormat(OUTPUT_FORMAT);
  storageDescriptor.setSerdeInfo(new SerDeInfo(SERDE_INFO_NAME, SERIALIZATION_LIB, new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation(DATABASE + "/" + tableName + "/");
  table.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  table.setParameters(parameters);

  return table;
}
 
Example #28
Source Project: circus-train   Author: HotelsDotCom   File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static Partition newPartition(String database, String tableName, String partitionValue) {
  Partition partition = new Partition();
  partition.setDbName(database);
  partition.setTableName(tableName);
  partition.setCreateTime(CREATE_TIME);
  partition.setValues(ImmutableList.of(partitionValue));

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  partition.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(COLS);
  storageDescriptor.setInputFormat(INPUT_FORMAT);
  storageDescriptor.setOutputFormat(OUTPUT_FORMAT);
  storageDescriptor.setSerdeInfo(new SerDeInfo(SERDE_INFO_NAME, SERIALIZATION_LIB, new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation(DATABASE + "/" + tableName + "/" + partitionValue + "/");
  partition.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  partition.setParameters(parameters);

  return partition;
}
 
Example #29
Source Project: circus-train   Author: HotelsDotCom   File: TableTransformationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  table = new Table();
  table.setDbName("database");
  table.setTableName("table");
  table.setTableType("type");

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  table.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(Arrays.asList(new FieldSchema("a", "int", null)));
  storageDescriptor.setInputFormat("input_format");
  storageDescriptor.setOutputFormat("output_format");
  storageDescriptor.setSerdeInfo(new SerDeInfo("serde", "lib", new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation("database/table/");
  table.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  table.setParameters(parameters);
}
 
Example #30
Source Project: circus-train   Author: HotelsDotCom   File: PartitionTransformationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  partition = new Partition();
  partition.setDbName("database");
  partition.setTableName("table");
  partition.setValues(ImmutableList.of("part"));

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  partition.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(Arrays.asList(new FieldSchema("a", "int", null)));
  storageDescriptor.setInputFormat("input_format");
  storageDescriptor.setOutputFormat("output_format");
  storageDescriptor.setSerdeInfo(new SerDeInfo("serde", "lib", new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation("database/table/part/");
  partition.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  partition.setParameters(parameters);
}