Java Code Examples for org.apache.hadoop.hive.ql.metadata.Partition

The following examples show how to use org.apache.hadoop.hive.ql.metadata.Partition. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
private void setVersions(final String name, final State state)
    throws IOException {
  try {
    UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
    loginUser.doAs(new PrivilegedExceptionAction<Void>() {
      @Override
      public Void run()
          throws IOException {
        synchronized (lock) {
          List<Partition> partitions = null;
          for (String tableName : ComplianceRetentionJob.tableNamesList) {
            for (String pattern : patterns) {
              if (tableName.contains(pattern)) {
                partitions = getPartitions(tableName);
                addPartitionsToVersions(versions, name, partitions);
              }
            }
          }
        }
        return null;
      }
    });
  } catch (InterruptedException | IOException e) {
    throw new IOException(e);
  }
}
 
Example 2
private static List<Partition> getPartitions(String completeTableName) {
  List<String> tableList = At_SPLITTER.splitToList(completeTableName);
  if (tableList.size() != 2) {
    log.warn("Invalid table name " + completeTableName);
    return Collections.EMPTY_LIST;
  }
  try (AutoReturnableObject<IMetaStoreClient> client = ComplianceRetentionJob.pool.getClient()) {
    Table table = client.get().getTable(tableList.get(0), tableList.get(1));
    HiveDataset dataset = new HiveDataset(FileSystem.newInstance(new Configuration()), ComplianceRetentionJob.pool,
        new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties());
    return dataset.getPartitionsFromDataset();
  } catch (IOException | TException e) {
    log.warn("Unable to get Partitions for table " + completeTableName + " " + e.getMessage());
  }
  return Collections.EMPTY_LIST;
}
 
Example 3
Source Project: incubator-gobblin   Source File: HivePartitionFinder.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Will find all datasets according to whitelist, except the backup, trash and staging tables.
 */
@Override
public List<HivePartitionDataset> findDatasets()
    throws IOException {
  List<HivePartitionDataset> list = new ArrayList<>();
  for (HiveDataset hiveDataset : this.hiveDatasets) {
    for (Partition partition : hiveDataset.getPartitionsFromDataset()) {
      list.add(new HivePartitionDataset(partition));
    }
  }
  String selectionPolicyString = this.state.getProp(ComplianceConfigurationKeys.DATASET_SELECTION_POLICY_CLASS,
      ComplianceConfigurationKeys.DEFAULT_DATASET_SELECTION_POLICY_CLASS);
  Policy<HivePartitionDataset> selectionPolicy =
      GobblinConstructorUtils.invokeConstructor(Policy.class, selectionPolicyString);
  return selectionPolicy.selectedList(list);
}
 
Example 4
/**
 * Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition}. The hive table is expected
 * to be date partitioned by {@link #partitionKeyName}. The partition value format must be {@link #pattern}
 *
 * @throws IllegalArgumentException when {@link #partitionKeyName} is not found in the <code></code>
 * @throws IllegalArgumentException when a value can not be found for {@link #partitionKeyName} in the <code>partition</code>
 * @throws IllegalArgumentException if the partition value can not be parsed with {@link #pattern}
 * {@inheritDoc}
 */
@Override
protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) {

  int index = Iterables.indexOf(partition.getTable().getPartitionKeys(), this.partitionKeyNamePredicate);

  if (index == -1) {
    throw new IllegalArgumentException(String
        .format("Failed to find partition key %s in the table %s", this.partitionKeyName,
            partition.getTable().getCompleteName()));
  }

  if (index >= partition.getValues().size()) {
    throw new IllegalArgumentException(String
        .format("Failed to find partition value for key %s in the partition %s", this.partitionKeyName,
            partition.getName()));
  }
  return new TimestampedHiveDatasetVersion(
      this.formatter.parseDateTime(partition.getValues().get(index).trim().substring(0, this.pattern.length())),
      partition);
}
 
Example 5
/**
 * Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition} based on the Modified time of underlying
 * hdfs data location
 * @throws IllegalArgumentException when argument is null
 * @throws IllegalArgumentException when data location of partition is null
 * @throws IllegalArgumentException when data location of partition doesn't exist
 * {@inheritDoc}
 */
@Override
protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) {
  try {
    Preconditions.checkArgument(partition != null, "Argument to method ");

    Path dataLocation = partition.getDataLocation();
    Preconditions
        .checkArgument(dataLocation != null, "Data location is null for partition " + partition.getCompleteName());
    boolean exists = this.fs.exists(dataLocation);
    Preconditions.checkArgument(exists, "Data location doesn't exist for partition " + partition.getCompleteName());

    long modificationTS = this.fs.getFileStatus(dataLocation).getModificationTime();
    return new TimestampedHiveDatasetVersion(new DateTime(modificationTS), partition);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 6
@Before
public void setup() throws Exception {
  conf = new HiveConf();
  baseDir = Files.createTempDir();
  baseDir.setWritable(true, false);
  conf.setVar(HiveConf.ConfVars.SCRATCHDIR, baseDir.getAbsolutePath());
  SessionState.start(conf);
  conf.setVar(ConfVars.HIVE_AUTHORIZATION_TASK_FACTORY,
      SentryHiveAuthorizationTaskFactoryImpl.class.getName());

  db = Mockito.mock(Hive.class);
  table = new Table(DB, TABLE);
  partition = new Partition(table);
  context = new Context(conf);
  parseDriver = new ParseDriver();
  analyzer = new DDLSemanticAnalyzer(conf, db);
  SessionState.start(conf);
  Mockito.when(db.getTable(TABLE, false)).thenReturn(table);
  Mockito.when(db.getPartition(table, new HashMap<String, String>(), false))
  .thenReturn(partition);

  HadoopDefaultAuthenticator auth = new HadoopDefaultAuthenticator();
  auth.setConf(conf);
  currentUser = auth.getUserName();

}
 
Example 7
/**
 * Parse the {@link #REPLACED_PARTITIONS_HIVE_METASTORE_KEY} from partition parameters to returns DDLs for all the partitions to be
 * dropped.
 *
 * @return A {@link List} of partitions to be dropped. Each element of the list is a {@link Map} which maps a partition's
 * key and value.
 *
 */
public static List<Map<String, String>> getDropPartitionsDDLInfo(Partition hivePartition) {
  List<Map<String, String>> replacedPartitionsDDLInfo = Lists.newArrayList();
  List<FieldSchema> partitionKeys = hivePartition.getTable().getPartitionKeys();

  if (StringUtils.isNotBlank(hivePartition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) {

    // Partitions are separated by "|"
    for (String partitionsInfoString : Splitter.on("|").omitEmptyStrings().split(hivePartition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) {

      // Values for a partition are separated by ","
      List<String> partitionValues = Splitter.on(",").omitEmptyStrings().trimResults().splitToList(partitionsInfoString);

      // Do not drop the partition being processed. Sometimes a partition may have replaced another partition of the same values.
      if (!partitionValues.equals(hivePartition.getValues())) {
        ImmutableMap.Builder<String, String> partitionDDLInfoMap = ImmutableMap.builder();
        for (int i = 0; i < partitionKeys.size(); i++) {
          partitionDDLInfoMap.put(partitionKeys.get(i).getName(), partitionValues.get(i));
        }
        replacedPartitionsDDLInfo.add(partitionDDLInfoMap.build());
      }
    }
  }
  return replacedPartitionsDDLInfo;
}
 
Example 8
@Test
public void testDroppedPartitions() throws Exception {
  WorkUnitState previousWus = new WorkUnitState();
  previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "[email protected]_dataset_urn");
  previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
  previousWus
      .setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l)));

  SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
  PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);

  Table table = mockTable("test_dataset_urn");
  Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", "")));

  Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));

  // partition 2015 replaces 2015-01 and 2015-02
  Mockito.when(partition2015.getParameters()).thenReturn(
      ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02"));
  watermarker.onPartitionProcessBegin(partition2015, 0l, 0l);

  Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("[email protected]_dataset_urn"), ImmutableMap.of("2015", 0l));
}
 
Example 9
@Test
public void testGetPreviousHighWatermarkForPartition() throws Exception {
  WorkUnitState previousWus = new WorkUnitState();
  previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "[email protected]_dataset_urn");
  previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
  previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));

  SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
  PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);

  Table table = mockTable("test_dataset_urn");
  Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
  Partition partition2016 = mockPartition(table, ImmutableList.of("2016"));

  Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l));
  Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l));
}
 
Example 10
Source Project: incubator-gobblin   Source File: HiveSource.java    License: Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
public static long getCreateTime(Partition partition) {
  // If create time is set, use it.
  // .. this is always set if HiveJDBC or Hive mestastore is used to create partition.
  // .. it might not be set (ie. equals 0) if Thrift API call is used to create partition.
  if (partition.getTPartition().getCreateTime() > 0) {
    return TimeUnit.MILLISECONDS.convert(partition.getTPartition().getCreateTime(), TimeUnit.SECONDS);
  }
  // Try to use distcp-ng registration generation time if it is available
  else if (partition.getTPartition().isSetParameters()
      && partition.getTPartition().getParameters().containsKey(DISTCP_REGISTRATION_GENERATION_TIME_KEY)) {
    log.debug("Did not find createTime in Hive partition, used distcp registration generation time.");
    return Long.parseLong(partition.getTPartition().getParameters().get(DISTCP_REGISTRATION_GENERATION_TIME_KEY));
  } else {
    log.warn(String.format("Could not find create time for partition %s. Will return createTime as 0",
        partition.getCompleteName()));
    return 0;
  }
}
 
Example 11
public HivePartitionFileSet createPartitionCopy(Path location, long registrationGenerationTime,
    boolean targetPartitionExists) {
  HivePartitionFileSet partitionCopy = Mockito.mock(HivePartitionFileSet.class);

  Partition partition = Mockito.mock(Partition.class);
  Mockito.doReturn(location).when(partition).getDataLocation();
  Mockito.doReturn(partition).when(partitionCopy).getPartition();

  if (targetPartitionExists) {

    Partition targetPartition = Mockito.mock(Partition.class);

    Map<String, String> parameters = Maps.newHashMap();
    parameters.put(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS,
        Long.toString(registrationGenerationTime));
    Mockito.doReturn(parameters).when(targetPartition).getParameters();

    Mockito.doReturn(Optional.of(targetPartition)).when(partitionCopy).getExistingTargetPartition();
  } else {
    Mockito.doReturn(Optional.absent()).when(partitionCopy).getExistingTargetPartition();
  }

  return partitionCopy;
}
 
Example 12
/**
 * @param entity, name of the entity to be changed, e.g. hive table or partition
 * @param sd, StorageDescriptor of the entity
 */
public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) {
  String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
  if (oldAvroSchemaURL != null) {

    Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
    URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();

    if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
        || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
        && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {

      String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
          Optional.<Partition>absent(), true).toString();

      sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
      log.info(String.format("For entity %s, change %s from %s to %s", entity,
          HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
    }
  }
}
 
Example 13
@Test
public void testUserDefinedDatePattern() throws Exception {
  String tableName = "VfTb2";
  Config conf =
      ConfigFactory.parseMap(ImmutableMap.<String, String> of(DatePartitionHiveVersionFinder.PARTITION_KEY_NAME_KEY, "field1",
          DatePartitionHiveVersionFinder.PARTITION_VALUE_DATE_TIME_PATTERN_KEY, "yyyy/MM/dd/HH"));

  DatePartitionHiveVersionFinder versionFinder = new DatePartitionHiveVersionFinder(this.fs, conf);

  Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, ImmutableList.of("field1"));
  org.apache.hadoop.hive.metastore.api.Partition tp =
      this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("2016/01/01/20"), (int) System.currentTimeMillis());
  Partition partition = new Partition(new org.apache.hadoop.hive.ql.metadata.Table(tbl), tp);
  Assert.assertEquals(URLDecoder.decode(partition.getName(), "UTF-8"), "field1=2016/01/01/20");
  TimestampedHiveDatasetVersion dv = versionFinder.getDatasetVersion(partition);
  Assert.assertEquals(dv.getDateTime(), formatter.parseDateTime("2016/01/01/20"));
}
 
Example 14
Source Project: atlas   Source File: HiveMetaStoreBridgeTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testImportWhenPartitionKeysAreNull() throws Exception {
    setupDB(hiveClient, TEST_DB_NAME);
    List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
    Table hiveTable = hiveTables.get(0);

    returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE);


    when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
            Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                    HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME))))
    .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
                    getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));

    String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, hiveTable);

    when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(),
            Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                    processQualifiedName)))
    .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
                    getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));

    when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
    .thenReturn(createTableReference());

    Partition partition = mock(Partition.class);
    when(partition.getTable()).thenReturn(hiveTable);
    List partitionValues = Arrays.asList(new String[]{});
    when(partition.getValues()).thenReturn(partitionValues);

    when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[]{partition}));

    HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2);
    try {
        bridge.importHiveMetadata(null, null, true);
    } catch (Exception e) {
        Assert.fail("Partition with null key caused import to fail with exception ", e);
    }
}
 
Example 15
Source Project: incubator-atlas   Source File: HiveMetaStoreBridgeTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testImportWhenPartitionKeysAreNull() throws Exception {
    setupDB(hiveClient, TEST_DB_NAME);
    List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
    Table hiveTable = hiveTables.get(0);

    returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);

    when(atlasClient.getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
        HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, TEST_DB_NAME, TEST_TABLE_NAME))).thenReturn(
        getEntityReference(HiveDataTypes.HIVE_TABLE.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
    String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, hiveTable);
    when(atlasClient.getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
            processQualifiedName)).thenReturn(getEntityReference(HiveDataTypes.HIVE_PROCESS.getName(), "82e06b34-9151-4023-aa9d-b82103a50e77"));
    when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());

    Partition partition = mock(Partition.class);
    when(partition.getTable()).thenReturn(hiveTable);
    List partitionValues = Arrays.asList(new String[]{});
    when(partition.getValues()).thenReturn(partitionValues);

    when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[]{partition}));

    HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
    try {
        bridge.importHiveMetadata(true);
    } catch (Exception e) {
        Assert.fail("Partition with null key caused import to fail with exception ", e);
    }
}
 
Example 16
private void addPartitionsToVersions(List<HivePartitionVersion> versions, String name,
    List<Partition> partitions)
    throws IOException {
  for (Partition partition : partitions) {
    if (partition.getName().equalsIgnoreCase(name)) {
      versions.add(new HivePartitionRetentionVersion(partition));
    }
  }
}
 
Example 17
Source Project: incubator-gobblin   Source File: ComplianceRetentionJob.java    License: Apache License 2.0 5 votes vote down vote up
public void initDatasetFinder(Properties properties)
    throws IOException {
  Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS),
      "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
  String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
  this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties));

  Iterator<HiveDataset> datasetsIterator =
      new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator();

  while (datasetsIterator.hasNext()) {
    // Drop partitions from empty tables if property is set, otherwise skip the table
    HiveDataset hiveDataset = datasetsIterator.next();
    List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset();
    String completeTableName = hiveDataset.getTable().getCompleteName();
    if (!partitionsFromDataset.isEmpty()) {
      this.tableNamesList.add(completeTableName);
      continue;
    }
    if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES,
        ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) {
      continue;
    }
    if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName
        .contains(ComplianceConfigurationKeys.BACKUP) || completeTableName
        .contains(ComplianceConfigurationKeys.STAGING)) {
      this.tablesToDrop.add(hiveDataset);
    }
  }
}
 
Example 18
/**
 * Create {@link HiveDatasetVersion}s for all {@link Partition}s of a {@link HiveDataset}.
 * Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
 * <p>
 * Note: If an exception occurs while processing a partition, that partition will be ignored in the returned collection
 * </p>
 *
 * @throws IllegalArgumentException if <code>dataset</code> is not a {@link HiveDataset}. Or if {@link HiveDataset#getTable()}
 * is not partitioned.
 */
@Override
public Collection<HiveDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
  if (!(dataset instanceof HiveDataset)) {
    throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with HiveDataset");
  }
  final HiveDataset hiveDataset = (HiveDataset) dataset;

  if (!hiveDataset.getTable().isPartitioned()) {
    throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables");
  }

  try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {

    List<Partition> partitions = HiveUtils.getPartitions(client.get(), hiveDataset.getTable(), Optional.<String> absent());
    return Lists.newArrayList(Iterables.filter(Iterables.transform(partitions, new Function<Partition, HiveDatasetVersion>() {

      @Override
      public HiveDatasetVersion apply(Partition partition) {
        try {
          return getDatasetVersion(partition);
        } catch (Throwable e) {
          log.warn(String.format("Failed to get DatasetVersion %s. Skipping.", partition.getCompleteName()), e);
          return null;
        }
      }
    }), Predicates.notNull()));
  }
}
 
Example 19
@Test
public void testRelocateFilesPartitioned() {
  Properties properties = new Properties();
  properties.setProperty(HiveTargetPathHelper.RELOCATE_DATA_FILES_KEY, Boolean.toString(true));
  properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_ROOT, "/target");

  HiveTargetPathHelper helper = createTestTargetPathHelper(properties);

  Path source = new Path(TABLE_ROOT, "partition/file1");

  Partition partition = Mockito.mock(Partition.class);
  Mockito.when(partition.getValues()).thenReturn(Lists.newArrayList("part", "123"));

  Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.of(partition), true), new Path("/target/tableName/part/123/file1"));
}
 
Example 20
/**
 * Adds an expected high watermark for this {@link Partition}. Also removes any watermarks for partitions being replaced.
 * Replace partitions are read using partition parameter {@link AbstractAvroToOrcConverter#REPLACED_PARTITIONS_HIVE_METASTORE_KEY}.
 * Uses the <code>partitionUpdateTime</code> as the high watermark for this <code>partition</code>
 *
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#onPartitionProcessBegin(org.apache.hadoop.hive.ql.metadata.Partition, long, long)
 */
@Override
public void onPartitionProcessBegin(Partition partition, long partitionProcessTime, long partitionUpdateTime) {

  Preconditions.checkNotNull(partition);
  Preconditions.checkNotNull(partition.getTable());

  if (!this.expectedHighWatermarks.hasPartitionWatermarks(tableKey(partition.getTable()))) {
    throw new IllegalStateException(String.format(
        "onPartitionProcessBegin called before onTableProcessBegin for table: %s, partitions: %s",
        tableKey(partition.getTable()), partitionKey(partition)));
  }

  // Remove dropped partitions
  Collection<String> droppedPartitions =
      Collections2.transform(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(partition),
          new Function<Map<String, String>, String>() {
            @Override
            public String apply(Map<String, String> input) {
              return PARTITION_VALUES_JOINER.join(input.values());
            }
          });

  this.expectedHighWatermarks.removePartitionWatermarks(tableKey(partition.getTable()), droppedPartitions);
  this.expectedHighWatermarks.addPartitionWatermark(tableKey(partition.getTable()), partitionKey(partition),
      partitionUpdateTime);
}
 
Example 21
/**
 * Return the previous high watermark if found in previous state. Else returns 0
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getPreviousHighWatermark(org.apache.hadoop.hive.ql.metadata.Partition)
 */
@Override
public LongWatermark getPreviousHighWatermark(Partition partition) {
  if (this.previousWatermarks.hasPartitionWatermarks(tableKey(partition.getTable()))) {

    // If partition has a watermark return.
    if (this.previousWatermarks.get(tableKey(partition.getTable())).containsKey(partitionKey(partition))) {
      return new LongWatermark(this.previousWatermarks.getPartitionWatermark(tableKey(partition.getTable()),
          partitionKey(partition)));
    }
  }
  return new LongWatermark(0);

}
 
Example 22
Source Project: incubator-gobblin   Source File: BackfillHiveSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean shouldCreateWorkunit(Partition sourcePartition, LongWatermark lowWatermark) {
  // If a whitelist is provided only create workunits for those partitions
  if (!this.partitionsWhitelist.isEmpty()) {
    return this.partitionsWhitelist.contains(sourcePartition.getCompleteName());
  }
  // If no whitelist is set, all partitions of a dataset are backfilled
  return true;
}
 
Example 23
Source Project: incubator-gobblin   Source File: BackfillHiveSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean isOlderThanLookback(Partition partition) {
  // If partition whitelist is provided, ignore lookback
  if (!this.partitionsWhitelist.isEmpty()) {
    return false;
  } else {
    return super.isOlderThanLookback(partition);
  }
}
 
Example 24
Source Project: incubator-gobblin   Source File: HiveSource.java    License: Apache License 2.0 5 votes vote down vote up
protected HiveWorkUnit workUnitForPartition(HiveDataset hiveDataset, Partition partition, boolean disableAvroCheck) throws IOException {
  HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(hiveDataset, partition);
  if (disableAvroCheck || isAvro(hiveDataset.getTable())) {
    hiveWorkUnit.setTableSchemaUrl(this.avroSchemaManager.getSchemaUrl(hiveDataset.getTable()));
    hiveWorkUnit.setPartitionSchemaUrl(this.avroSchemaManager.getSchemaUrl(partition));
  }
  return hiveWorkUnit;
}
 
Example 25
Source Project: incubator-gobblin   Source File: HdfsBasedUpdateProvider.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get the update time of a {@link Partition}
 *
 * @return the update time if available, 0 otherwise
 *
 * {@inheritDoc}
 * @see HiveUnitUpdateProvider#getUpdateTime(org.apache.hadoop.hive.ql.metadata.Partition)
 */
@Override
public long getUpdateTime(Partition partition) throws UpdateNotFoundException {

  try {
    return getUpdateTime(partition.getDataLocation());
  } catch (IOException e) {
    throw new UpdateNotFoundException(String.format("Failed to get update time for %s", partition.getCompleteName()),
        e);
  }
}
 
Example 26
Source Project: incubator-gobblin   Source File: EventWorkunitUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Set SLA event metadata in the workunit. The publisher will use this metadta to publish sla events
 */
public static void setPartitionSlaEventMetadata(WorkUnit state, Table table, Partition partition, long updateTime,
    long lowWatermark, long beginGetWorkunitsTime) {
  state.setProp(SlaEventKeys.DATASET_URN_KEY, state.getProp(ConfigurationKeys.DATASET_URN_KEY));
  state.setProp(SlaEventKeys.PARTITION_KEY, partition.getName());
  state.setProp(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, String.valueOf(updateTime));

  // Time when the workunit was created
  state.setProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY, System.currentTimeMillis());
  state.setProp(EventConstants.WORK_UNIT_CREATE_TIME, state.getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY));
  state.setProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY, lowWatermark);
  state.setProp(EventConstants.BEGIN_GET_WORKUNITS_TIME, beginGetWorkunitsTime);

  state.setProp(EventConstants.SOURCE_DATA_LOCATION, partition.getDataLocation());
}
 
Example 27
@Test
public void testReplicatePaths() {
  Properties properties = new Properties();

  HiveTargetPathHelper helper = createTestTargetPathHelper(properties);

  Path source = new Path(TABLE_ROOT, "partition/file1");
  Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.<Partition>absent(), true), new Path(TABLE_ROOT, "partition/file1"));
}
 
Example 28
public static Partition createMockPartitionWithLocation(String location) {
  Partition mockPartition = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
  org.apache.hadoop.hive.metastore.api.Partition mockTPartition =
      Mockito.mock(org.apache.hadoop.hive.metastore.api.Partition.class, Mockito.RETURNS_SMART_NULLS);
  StorageDescriptor mockSd = Mockito.mock(StorageDescriptor.class, Mockito.RETURNS_SMART_NULLS);
  Mockito.when(mockSd.getLocation()).thenReturn(location);
  Mockito.when(mockTPartition.getSd()).thenReturn(mockSd);
  Mockito.when(mockPartition.getTPartition()).thenReturn(mockTPartition);
  return mockPartition;
}
 
Example 29
Source Project: incubator-gobblin   Source File: HiveUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param client an {@link IMetaStoreClient} for the correct metastore.
 * @param table the {@link Table} for which we should get partitions.
 * @param filter an optional filter for partitions as would be used in Hive. Can only filter on String columns.
 *               (e.g. "part = \"part1\"" or "date > \"2015\"".
 * @return a map of values to {@link Partition} for input {@link Table} filtered and non-nullified.
 */
public static Map<List<String>, Partition> getPartitionsMap(IMetaStoreClient client, Table table,
    Optional<String> filter, Optional<? extends HivePartitionExtendedFilter> hivePartitionExtendedFilterOptional) throws IOException {
  return Maps.uniqueIndex(getPartitions(client, table, filter, hivePartitionExtendedFilterOptional), new Function<Partition, List<String>>() {
    @Override
    public List<String> apply(@Nullable Partition partition) {
      if (partition == null) {
        return null;
      }
      return partition.getValues();
    }
  });
}
 
Example 30
@Test
public void testReplacePrefix() {
  Properties properties = new Properties();
  properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, "/table");
  properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_PREFIX_REPLACEMENT, "/replaced");

  HiveTargetPathHelper helper = createTestTargetPathHelper(properties);

  Path source = new Path(TABLE_ROOT, "partition/file1");
  Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.<Partition>absent(), true), new Path("/replaced/path/partition/file1"));
}