org.apache.hadoop.hive.metastore.api.SerDeInfo Java Examples

The following examples show how to use org.apache.hadoop.hive.metastore.api.SerDeInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveUtils.java    From kite with Apache License 2.0 6 votes vote down vote up
static Table createEmptyTable(String namespace, String name) {
  Table table = new Table();
  table.setDbName(namespace);
  table.setTableName(name);
  table.setPartitionKeys(new ArrayList<FieldSchema>());
  table.setParameters(new HashMap<String, String>());

  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo());
  sd.setNumBuckets(-1);
  sd.setBucketCols(new ArrayList<String>());
  sd.setCols(new ArrayList<FieldSchema>());
  sd.setParameters(new HashMap<String, String>());
  sd.setSortCols(new ArrayList<Order>());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  SkewedInfo skewInfo = new SkewedInfo();
  skewInfo.setSkewedColNames(new ArrayList<String>());
  skewInfo.setSkewedColValues(new ArrayList<List<String>>());
  skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
  sd.setSkewedInfo(skewInfo);
  table.setSd(sd);

  return table;
}
 
Example #2
Source File: TestUtils.java    From circus-train with Apache License 2.0 6 votes vote down vote up
private static Table createView(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String view,
    String table,
    List<FieldSchema> partitionCols)
  throws TException {
  Table hiveView = new Table();
  hiveView.setDbName(database);
  hiveView.setTableName(view);
  hiveView.setTableType(TableType.VIRTUAL_VIEW.name());
  hiveView.setViewOriginalText(hql(database, table));
  hiveView.setViewExpandedText(expandHql(database, table, DATA_COLUMNS, partitionCols));
  hiveView.setPartitionKeys(partitionCols);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  hiveView.setSd(sd);

  metaStoreClient.createTable(hiveView);

  return hiveView;
}
 
Example #3
Source File: TestUtils.java    From waggle-dance with Apache License 2.0 6 votes vote down vote up
static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    File location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example #4
Source File: TestUtils.java    From waggle-dance with Apache License 2.0 6 votes vote down vote up
static Table createPartitionedTable(HiveMetaStoreClient metaStoreClient, String database, String table, File location)
  throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(PARTITION_COLUMNS);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toURI().toString());
  sd.setParameters(new HashMap<>());
  sd.setSerdeInfo(new SerDeInfo());

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  return hiveTable;
}
 
Example #5
Source File: HiveConnectorTableService.java    From metacat with Apache License 2.0 6 votes vote down vote up
private HiveStorageFormat extractHiveStorageFormat(final Table table) throws MetaException {
    final StorageDescriptor descriptor = table.getSd();
    if (descriptor == null) {
        throw new MetaException("Table is missing storage descriptor");
    }
    final SerDeInfo serdeInfo = descriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new MetaException(
            "Table storage descriptor is missing SerDe info");
    }
    final String outputFormat = descriptor.getOutputFormat();
    final String serializationLib = serdeInfo.getSerializationLib();

    for (HiveStorageFormat format : HiveStorageFormat.values()) {
        if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serializationLib)) {
            return format;
        }
    }
    throw new MetaException(
        String.format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib));
}
 
Example #6
Source File: HiveServer2CoreTest.java    From beeju with Apache License 2.0 6 votes vote down vote up
private Table createPartitionedTable(String databaseName, String tableName, HiveServer2Core server) throws Exception {
  Table table = new Table();
  table.setDbName(DATABASE);
  table.setTableName(tableName);
  table.setPartitionKeys(Arrays.asList(new FieldSchema("partcol", "int", null)));
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
Example #7
Source File: HiveServer2CoreTest.java    From beeju with Apache License 2.0 6 votes vote down vote up
private Table createUnpartitionedTable(String databaseName, String tableName, HiveServer2Core server)
    throws Exception {
  Table table = new Table();
  table.setDbName(databaseName);
  table.setTableName(tableName);
  table.setSd(new StorageDescriptor());
  table.getSd().setCols(Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)));
  table.getSd().setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
  table.getSd().setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
  table.getSd().setSerdeInfo(new SerDeInfo());
  table.getSd().getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe");
  HiveMetaStoreClient client = server.getCore().newClient();
  client.createTable(table);
  client.close();
  return table;
}
 
Example #8
Source File: HiveConvertersImpl.java    From metacat with Apache License 2.0 6 votes vote down vote up
private StorageDto toStorageDto(@Nullable final StorageDescriptor sd, final String owner) {
    final StorageDto result = new StorageDto();
    if (sd != null) {
        result.setOwner(owner);
        result.setUri(sd.getLocation());
        result.setInputFormat(sd.getInputFormat());
        result.setOutputFormat(sd.getOutputFormat());
        result.setParameters(sd.getParameters());
        final SerDeInfo serde = sd.getSerdeInfo();
        if (serde != null) {
            result.setSerializationLib(serde.getSerializationLib());
            result.setSerdeInfoParameters(serde.getParameters());
        }
    }
    return result;
}
 
Example #9
Source File: HiveTableUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}
 
Example #10
Source File: HiveWriterFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
public HiveWriterFactory(
		JobConf jobConf,
		Class hiveOutputFormatClz,
		SerDeInfo serDeInfo,
		TableSchema schema,
		String[] partitionColumns,
		Properties tableProperties,
		HiveShim hiveShim,
		boolean isCompressed) {
	Preconditions.checkArgument(HiveOutputFormat.class.isAssignableFrom(hiveOutputFormatClz),
			"The output format should be an instance of HiveOutputFormat");
	this.confWrapper = new JobConfWrapper(jobConf);
	this.hiveOutputFormatClz = hiveOutputFormatClz;
	this.serDeInfo = serDeInfo;
	this.allColumns = schema.getFieldNames();
	this.allTypes = schema.getFieldDataTypes();
	this.partitionColumns = partitionColumns;
	this.tableProperties = tableProperties;
	this.hiveShim = hiveShim;
	this.isCompressed = isCompressed;
}
 
Example #11
Source File: ThriftMetastoreUtil.java    From presto with Apache License 2.0 6 votes vote down vote up
private static void fromMetastoreApiStorageDescriptor(
        Map<String, String> tableParameters,
        StorageDescriptor storageDescriptor,
        Storage.Builder builder,
        String tablePartitionName)
{
    SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
    if (serdeInfo == null) {
        throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info");
    }

    builder.setStorageFormat(StorageFormat.createNullable(serdeInfo.getSerializationLib(), storageDescriptor.getInputFormat(), storageDescriptor.getOutputFormat()))
            .setLocation(nullToEmpty(storageDescriptor.getLocation()))
            .setBucketProperty(HiveBucketProperty.fromStorageDescriptor(tableParameters, storageDescriptor, tablePartitionName))
            .setSkewed(storageDescriptor.isSetSkewedInfo() && storageDescriptor.getSkewedInfo().isSetSkewedColNames() && !storageDescriptor.getSkewedInfo().getSkewedColNames().isEmpty())
            .setSerdeParameters(serdeInfo.getParameters() == null ? ImmutableMap.of() : serdeInfo.getParameters());
}
 
Example #12
Source File: HiveOutputFormatFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateOutputFormat() {
	TableSchema schema = TableSchema.builder().field("x", DataTypes.INT()).build();
	SerDeInfo serDeInfo = new SerDeInfo("name", LazySimpleSerDe.class.getName(), Collections.emptyMap());
	HiveWriterFactory writerFactory = new HiveWriterFactory(
			new JobConf(),
			VerifyURIOutputFormat.class,
			serDeInfo, schema,
			new String[0],
			new Properties(),
			HiveShimLoader.loadHiveShim(HiveShimLoader.getHiveVersion()),
			false);
	HiveOutputFormatFactory factory = new HiveOutputFormatFactory(writerFactory);
	org.apache.flink.core.fs.Path path = new org.apache.flink.core.fs.Path(TEST_URI_SCHEME, TEST_URI_AUTHORITY, "/foo/path");
	factory.createOutputFormat(path);
}
 
Example #13
Source File: LocalHiveMetastoreTestUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public Partition addTestPartition(Table tbl, List<String> values, int createTime) throws Exception {
  StorageDescriptor partitionSd = new StorageDescriptor();
  if (StringUtils.isNotBlank(tbl.getSd().getLocation())) {
    partitionSd.setLocation(tbl.getSd().getLocation() + values);
  } else {
    partitionSd.setLocation("/tmp/" + tbl.getTableName() + "/part1");
  }

  partitionSd.setSerdeInfo(
      new SerDeInfo("name", "serializationLib", ImmutableMap.of(HiveAvroSerDeManager.SCHEMA_URL, "/tmp/dummy")));
  partitionSd.setCols(tbl.getPartitionKeys());
  Partition partition =
      new Partition(values, tbl.getDbName(), tbl.getTableName(), 1, 1, partitionSd, new HashMap<String, String>());
  partition.setCreateTime(createTime);
  return this.getLocalMetastoreClient().add_partition(partition);

}
 
Example #14
Source File: AbstractMetastoreTestWithStaticConfiguration.java    From incubator-sentry with Apache License 2.0 6 votes vote down vote up
public Table makeMetastoreTableObject(HiveMetaStoreClient client,
    String dbName, String tabName, List<FieldSchema> cols) throws Exception {
  Table tbl = new Table();
  tbl.setDbName(dbName);
  tbl.setTableName(tabName);
  StorageDescriptor sd = new StorageDescriptor();
  tbl.setSd(sd);
  tbl.setParameters(new HashMap<String, String>());
  sd.setCols(cols);
  sd.setCompressed(false);
  sd.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setName(tbl.getTableName());
  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  sd.getSerdeInfo().getParameters()
      .put(serdeConstants.SERIALIZATION_FORMAT, "1");
  sd.setSortCols(new ArrayList<Order>());
  return tbl;
}
 
Example #15
Source File: HiveSchemaEvolutionTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private Optional<Table> createEvolvedDestinationTable(String tableName, String dbName, String location,
    boolean withComment) {
  List<FieldSchema> cols = new ArrayList<>();
  // Existing columns that match avroToOrcSchemaEvolutionTest/source_schema_evolution_enabled.ddl
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldString", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldString" : ""));
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldRecord__superNestedFieldInt", "int",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldRecord.superNestedFieldInt" : ""));
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldString", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldString" : ""));
  // The following column is skipped (simulating un-evolved schema):
  // Column name   : parentFieldRecord__nestedFieldInt
  // Column type   : int
  // Column comment: from flatten_source parentFieldRecord.nestedFieldInt
  cols.add(new FieldSchema("parentFieldInt", "int",
      withComment ? "from flatten_source parentFieldInt" : ""));
  // Extra schema
  cols.add(new FieldSchema("parentFieldRecord__nestedFieldString2", "string",
      withComment ? "from flatten_source parentFieldRecord.nestedFieldString2" : ""));

  String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
  String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
  StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, outputFormat, false, 0,
      new SerDeInfo(), null, Lists.<Order>newArrayList(), null);
  Table table = new Table(tableName, dbName, "ketl_dev", 0, 0, 0, storageDescriptor,
      Lists.<FieldSchema>newArrayList(), Maps.<String,String>newHashMap(), "", "", "");

  return Optional.of(table);
}
 
Example #16
Source File: AvroSchemaManagerTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private Partition getTestPartition(Table table) throws HiveException {
  Partition partition = new Partition(table, ImmutableMap.of("partition_key", "1"), null);
  StorageDescriptor sd = new StorageDescriptor();
  sd.setSerdeInfo(new SerDeInfo("avro", AvroSerDe.class.getName(), null));
  sd.setCols(Lists.newArrayList(new FieldSchema("foo", "int", null)));
  partition.getTPartition().setSd(sd);
  return partition;
}
 
Example #17
Source File: HiveConnectorPartitionService.java    From metacat with Apache License 2.0 5 votes vote down vote up
private void copyTableSdToPartitionSd(final List<Partition> hivePartitions, final Table table) {
    //
    // Update the partition info based on that of the table.
    //
    for (Partition partition : hivePartitions) {
        final StorageDescriptor sd = partition.getSd();
        final StorageDescriptor tableSdCopy = table.getSd().deepCopy();
        if (tableSdCopy.getSerdeInfo() == null) {
            final SerDeInfo serDeInfo = new SerDeInfo(null, null, new HashMap<>());
            tableSdCopy.setSerdeInfo(serDeInfo);
        }

        tableSdCopy.setLocation(sd.getLocation());
        if (!Strings.isNullOrEmpty(sd.getInputFormat())) {
            tableSdCopy.setInputFormat(sd.getInputFormat());
        }
        if (!Strings.isNullOrEmpty(sd.getOutputFormat())) {
            tableSdCopy.setOutputFormat(sd.getOutputFormat());
        }
        if (sd.getParameters() != null && !sd.getParameters().isEmpty()) {
            tableSdCopy.setParameters(sd.getParameters());
        }
        if (sd.getSerdeInfo() != null) {
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getName())) {
                tableSdCopy.getSerdeInfo().setName(sd.getSerdeInfo().getName());
            }
            if (!Strings.isNullOrEmpty(sd.getSerdeInfo().getSerializationLib())) {
                tableSdCopy.getSerdeInfo().setSerializationLib(sd.getSerdeInfo().getSerializationLib());
            }
            if (sd.getSerdeInfo().getParameters() != null && !sd.getSerdeInfo().getParameters().isEmpty()) {
                tableSdCopy.getSerdeInfo().setParameters(sd.getSerdeInfo().getParameters());
            }
        }
        partition.setSd(tableSdCopy);
    }
}
 
Example #18
Source File: HiveConnectorInfoConverter.java    From metacat with Apache License 2.0 5 votes vote down vote up
private StorageDescriptor fromStorageInfo(final StorageInfo storageInfo, final List<FieldSchema> cols) {
    if (storageInfo == null) {
        return new StorageDescriptor(
            Collections.emptyList(),
            "",
            null,
            null,
            false,
            0,
            new SerDeInfo("", null, new HashMap<>()),
            Collections.emptyList(),
            Collections.emptyList(),
            new HashMap<>());
    }
    // Set all required fields to a non-null value
    final String inputFormat = storageInfo.getInputFormat();
    final String location = notNull(storageInfo.getUri()) ? storageInfo.getUri() : "";
    final String outputFormat = storageInfo.getOutputFormat();
    final Map<String, String> sdParams = notNull(storageInfo.getParameters())
        ? storageInfo.getParameters() : new HashMap<>();
    final Map<String, String> serdeParams = notNull(storageInfo.getSerdeInfoParameters())
        ? storageInfo.getSerdeInfoParameters() : new HashMap<>();
    final String serializationLib = storageInfo.getSerializationLib();
    return new StorageDescriptor(
        cols,
        location,
        inputFormat,
        outputFormat,
        false,
        0,
        new SerDeInfo("", serializationLib, serdeParams),
        Collections.emptyList(),
        Collections.emptyList(),
        sdParams);
}
 
Example #19
Source File: HiveConnectorFastPartitionService.java    From metacat with Apache License 2.0 5 votes vote down vote up
private void copyTableSdToPartitionInfoSd(final PartitionInfo partitionInfo, final Table table) {
    StorageInfo sd = partitionInfo.getSerde();
    //
    // Partitions can be provided in the request without the storage information.
    //
    if (sd == null) {
        sd = new StorageInfo();
        partitionInfo.setSerde(sd);
    }
    final StorageDescriptor tableSd = table.getSd();

    if (StringUtils.isBlank(sd.getInputFormat())) {
        sd.setInputFormat(tableSd.getInputFormat());
    }
    if (StringUtils.isBlank(sd.getOutputFormat())) {
        sd.setOutputFormat(tableSd.getOutputFormat());
    }
    if (sd.getParameters() == null || sd.getParameters().isEmpty()) {
        sd.setParameters(tableSd.getParameters());
    }
    final SerDeInfo tableSerde = tableSd.getSerdeInfo();
    if (tableSerde != null) {
        if (StringUtils.isBlank(sd.getSerializationLib())) {
            sd.setSerializationLib(tableSerde.getSerializationLib());
        }
        if (sd.getSerdeInfoParameters() == null || sd.getSerdeInfoParameters().isEmpty()) {
            sd.setSerdeInfoParameters(tableSerde.getParameters());
        }
    }
}
 
Example #20
Source File: HiveMetaStoreUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public static SerDeInfo getSerDeInfo(HiveRegistrationUnit unit) {
  State props = unit.getSerDeProps();
  SerDeInfo si = new SerDeInfo();
  si.setParameters(getParameters(props));
  si.setName(unit.getTableName());
  if (unit.getSerDeType().isPresent()) {
    si.setSerializationLib(unit.getSerDeType().get());
  }
  return si;
}
 
Example #21
Source File: HiveMetaStoreUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private static State getSerDeProps(SerDeInfo si) {
  State serDeProps = new State();
  for (Map.Entry<String, String> entry : si.getParameters().entrySet()) {
    serDeProps.setProp(entry.getKey(), entry.getValue());
  }
  if (si.isSetSerializationLib()) {
    serDeProps.setProp(HiveConstants.SERDE_TYPE, si.getSerializationLib());
  }
  return serDeProps;
}
 
Example #22
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Partition newPartition() {
  Partition partition = new Partition();
  StorageDescriptor sd = new StorageDescriptor();
  SerDeInfo info = new SerDeInfo();
  info.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(info);
  partition.setSd(sd);
  partition.setParameters(new HashMap<String, String>());
  return partition;
}
 
Example #23
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table newTable(String database, String tableName) {
  Table table = new Table();
  table.setDbName(database);
  table.setTableName(tableName);
  table.setTableType(TABLE_TYPE);
  table.setOwner(OWNER);
  table.setCreateTime(CREATE_TIME);
  table.setRetention(RETENTION);

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  table.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(COLS);
  storageDescriptor.setInputFormat(INPUT_FORMAT);
  storageDescriptor.setOutputFormat(OUTPUT_FORMAT);
  storageDescriptor.setSerdeInfo(new SerDeInfo(SERDE_INFO_NAME, SERIALIZATION_LIB, new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation(DATABASE + "/" + tableName + "/");
  table.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  table.setParameters(parameters);

  return table;
}
 
Example #24
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table newTable() {
  Table table = new Table();
  StorageDescriptor sd = new StorageDescriptor();
  SerDeInfo info = new SerDeInfo();
  info.setParameters(new HashMap<String, String>());
  sd.setSerdeInfo(info);
  table.setSd(sd);
  table.setParameters(new HashMap<String, String>());
  return table;
}
 
Example #25
Source File: PartitionTransformationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  partition = new Partition();
  partition.setDbName("database");
  partition.setTableName("table");
  partition.setValues(ImmutableList.of("part"));

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  partition.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(Arrays.asList(new FieldSchema("a", "int", null)));
  storageDescriptor.setInputFormat("input_format");
  storageDescriptor.setOutputFormat("output_format");
  storageDescriptor.setSerdeInfo(new SerDeInfo("serde", "lib", new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation("database/table/part/");
  partition.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  partition.setParameters(parameters);
}
 
Example #26
Source File: TableTransformationTest.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Before
public void init() {
  table = new Table();
  table.setDbName("database");
  table.setTableName("table");
  table.setTableType("type");

  Map<String, List<PrivilegeGrantInfo>> userPrivileges = new HashMap<>();
  userPrivileges.put("read", ImmutableList.of(new PrivilegeGrantInfo()));
  PrincipalPrivilegeSet privileges = new PrincipalPrivilegeSet();
  privileges.setUserPrivileges(userPrivileges);
  table.setPrivileges(privileges);

  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setCols(Arrays.asList(new FieldSchema("a", "int", null)));
  storageDescriptor.setInputFormat("input_format");
  storageDescriptor.setOutputFormat("output_format");
  storageDescriptor.setSerdeInfo(new SerDeInfo("serde", "lib", new HashMap<String, String>()));
  storageDescriptor.setSkewedInfo(new SkewedInfo());
  storageDescriptor.setParameters(new HashMap<String, String>());
  storageDescriptor.setLocation("database/table/");
  table.setSd(storageDescriptor);

  Map<String, String> parameters = new HashMap<>();
  parameters.put("com.company.parameter", "abc");
  table.setParameters(parameters);
}
 
Example #27
Source File: AvroStorageDescriptorFactory.java    From data-highway with Apache License 2.0 5 votes vote down vote up
public static StorageDescriptor create(String location) {
  StorageDescriptor storageDescriptor = new StorageDescriptor();
  storageDescriptor.setInputFormat(AVRO_INPUT_FORMAT);
  storageDescriptor.setOutputFormat(AVRO_OUTPUT_FORMAT);
  storageDescriptor.setLocation(location);
  storageDescriptor.setCols(emptyList());

  SerDeInfo serdeInfo = new SerDeInfo();
  serdeInfo.setSerializationLib(AVRO_SERDE);
  storageDescriptor.setSerdeInfo(serdeInfo);

  return storageDescriptor;
}
 
Example #28
Source File: AvroHiveTableStrategyTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Test
public void newHiveTable() throws URISyntaxException {
  when(uriResolver.resolve(schema1, TABLE, 1))
      .thenReturn(new URI("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  doReturn(Instant.ofEpochSecond(1526462225L)).when(clock).instant();

  Table result = underTest.newHiveTable(DATABASE, TABLE, PARTITION_COLUMN, LOCATION, schema1, 1);

  assertThat(result.getDbName(), is(DATABASE));
  assertThat(result.getTableName(), is(TABLE));
  assertThat(result.getTableType(), is(TableType.EXTERNAL_TABLE.toString()));
  Map<String, String> parameters = result.getParameters();
  assertThat(parameters.get("EXTERNAL"), is("TRUE"));
  assertThat(parameters.get("data-highway.version"), is(DataHighwayVersion.VERSION));
  assertThat(parameters.get("data-highway.last-revision"), is("2018-05-16T09:17:05Z"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_URL),
      is("https://s3.amazonaws.com/road-schema-bucket/roads/table/schemas/1/table_v1.avsc"));
  assertThat(parameters.get(AvroHiveTableStrategy.AVRO_SCHEMA_VERSION), is("1"));
  List<FieldSchema> partitionKeys = result.getPartitionKeys();
  assertThat(partitionKeys.size(), is(1));
  assertThat(partitionKeys.get(0), is(new FieldSchema(PARTITION_COLUMN, "string", null)));
  StorageDescriptor storageDescriptor = result.getSd();
  assertThat(storageDescriptor.getInputFormat(), is(AvroStorageDescriptorFactory.AVRO_INPUT_FORMAT));
  assertThat(storageDescriptor.getOutputFormat(), is(AvroStorageDescriptorFactory.AVRO_OUTPUT_FORMAT));
  assertThat(storageDescriptor.getLocation(), is(LOCATION));
  assertThat(storageDescriptor.getCols().size(), is(0));
  SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
  assertThat(serdeInfo.getSerializationLib(), is(AvroStorageDescriptorFactory.AVRO_SERDE));
}
 
Example #29
Source File: TestUtils.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
 
Example #30
Source File: HiveEntityFactory.java    From circus-train with Apache License 2.0 5 votes vote down vote up
public static StorageDescriptor newStorageDescriptor(File location, String... columns) {
  StorageDescriptor sd = new StorageDescriptor();
  List<FieldSchema> cols = new ArrayList<>(columns.length);
  for (String name : columns) {
    cols.add(newFieldSchema(name));
  }
  sd.setCols(cols);
  sd.setSerdeInfo(new SerDeInfo());
  sd.setLocation(location.toURI().toString());
  return sd;
}