org.apache.hadoop.hive.ql.metadata.Partition Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.metadata.Partition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RegistrationTimeSkipPredicateTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public HivePartitionFileSet createPartitionCopy(Path location, long registrationGenerationTime,
    boolean targetPartitionExists) {
  HivePartitionFileSet partitionCopy = Mockito.mock(HivePartitionFileSet.class);

  Partition partition = Mockito.mock(Partition.class);
  Mockito.doReturn(location).when(partition).getDataLocation();
  Mockito.doReturn(partition).when(partitionCopy).getPartition();

  if (targetPartitionExists) {

    Partition targetPartition = Mockito.mock(Partition.class);

    Map<String, String> parameters = Maps.newHashMap();
    parameters.put(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS,
        Long.toString(registrationGenerationTime));
    Mockito.doReturn(parameters).when(targetPartition).getParameters();

    Mockito.doReturn(Optional.of(targetPartition)).when(partitionCopy).getExistingTargetPartition();
  } else {
    Mockito.doReturn(Optional.absent()).when(partitionCopy).getExistingTargetPartition();
  }

  return partitionCopy;
}
 
Example #2
Source File: HiveSource.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
public static long getCreateTime(Partition partition) {
  // If create time is set, use it.
  // .. this is always set if HiveJDBC or Hive mestastore is used to create partition.
  // .. it might not be set (ie. equals 0) if Thrift API call is used to create partition.
  if (partition.getTPartition().getCreateTime() > 0) {
    return TimeUnit.MILLISECONDS.convert(partition.getTPartition().getCreateTime(), TimeUnit.SECONDS);
  }
  // Try to use distcp-ng registration generation time if it is available
  else if (partition.getTPartition().isSetParameters()
      && partition.getTPartition().getParameters().containsKey(DISTCP_REGISTRATION_GENERATION_TIME_KEY)) {
    log.debug("Did not find createTime in Hive partition, used distcp registration generation time.");
    return Long.parseLong(partition.getTPartition().getParameters().get(DISTCP_REGISTRATION_GENERATION_TIME_KEY));
  } else {
    log.warn(String.format("Could not find create time for partition %s. Will return createTime as 0",
        partition.getCompleteName()));
    return 0;
  }
}
 
Example #3
Source File: DatePartitionedHiveVersionFinderTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test
public void testUserDefinedDatePattern() throws Exception {
  String tableName = "VfTb2";
  Config conf =
      ConfigFactory.parseMap(ImmutableMap.<String, String> of(DatePartitionHiveVersionFinder.PARTITION_KEY_NAME_KEY, "field1",
          DatePartitionHiveVersionFinder.PARTITION_VALUE_DATE_TIME_PATTERN_KEY, "yyyy/MM/dd/HH"));

  DatePartitionHiveVersionFinder versionFinder = new DatePartitionHiveVersionFinder(this.fs, conf);

  Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, ImmutableList.of("field1"));
  org.apache.hadoop.hive.metastore.api.Partition tp =
      this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("2016/01/01/20"), (int) System.currentTimeMillis());
  Partition partition = new Partition(new org.apache.hadoop.hive.ql.metadata.Table(tbl), tp);
  Assert.assertEquals(URLDecoder.decode(partition.getName(), "UTF-8"), "field1=2016/01/01/20");
  TimestampedHiveDatasetVersion dv = versionFinder.getDatasetVersion(partition);
  Assert.assertEquals(dv.getDateTime(), formatter.parseDateTime("2016/01/01/20"));
}
 
Example #4
Source File: DatePartitionHiveVersionFinder.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition}. The hive table is expected
 * to be date partitioned by {@link #partitionKeyName}. The partition value format must be {@link #pattern}
 *
 * @throws IllegalArgumentException when {@link #partitionKeyName} is not found in the <code></code>
 * @throws IllegalArgumentException when a value can not be found for {@link #partitionKeyName} in the <code>partition</code>
 * @throws IllegalArgumentException if the partition value can not be parsed with {@link #pattern}
 * {@inheritDoc}
 */
@Override
protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) {

  int index = Iterables.indexOf(partition.getTable().getPartitionKeys(), this.partitionKeyNamePredicate);

  if (index == -1) {
    throw new IllegalArgumentException(String
        .format("Failed to find partition key %s in the table %s", this.partitionKeyName,
            partition.getTable().getCompleteName()));
  }

  if (index >= partition.getValues().size()) {
    throw new IllegalArgumentException(String
        .format("Failed to find partition value for key %s in the partition %s", this.partitionKeyName,
            partition.getName()));
  }
  return new TimestampedHiveDatasetVersion(
      this.formatter.parseDateTime(partition.getValues().get(index).trim().substring(0, this.pattern.length())),
      partition);
}
 
Example #5
Source File: HivePartitionVersionFinder.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private void setVersions(final String name, final State state)
    throws IOException {
  try {
    UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
    loginUser.doAs(new PrivilegedExceptionAction<Void>() {
      @Override
      public Void run()
          throws IOException {
        synchronized (lock) {
          List<Partition> partitions = null;
          for (String tableName : ComplianceRetentionJob.tableNamesList) {
            for (String pattern : patterns) {
              if (tableName.contains(pattern)) {
                partitions = getPartitions(tableName);
                addPartitionsToVersions(versions, name, partitions);
              }
            }
          }
        }
        return null;
      }
    });
  } catch (InterruptedException | IOException e) {
    throw new IOException(e);
  }
}
 
Example #6
Source File: HivePartitionVersionFinder.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private static List<Partition> getPartitions(String completeTableName) {
  List<String> tableList = At_SPLITTER.splitToList(completeTableName);
  if (tableList.size() != 2) {
    log.warn("Invalid table name " + completeTableName);
    return Collections.EMPTY_LIST;
  }
  try (AutoReturnableObject<IMetaStoreClient> client = ComplianceRetentionJob.pool.getClient()) {
    Table table = client.get().getTable(tableList.get(0), tableList.get(1));
    HiveDataset dataset = new HiveDataset(FileSystem.newInstance(new Configuration()), ComplianceRetentionJob.pool,
        new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties());
    return dataset.getPartitionsFromDataset();
  } catch (IOException | TException e) {
    log.warn("Unable to get Partitions for table " + completeTableName + " " + e.getMessage());
  }
  return Collections.EMPTY_LIST;
}
 
Example #7
Source File: HdfsModifiedTimeHiveVersionFinder.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition} based on the Modified time of underlying
 * hdfs data location
 * @throws IllegalArgumentException when argument is null
 * @throws IllegalArgumentException when data location of partition is null
 * @throws IllegalArgumentException when data location of partition doesn't exist
 * {@inheritDoc}
 */
@Override
protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) {
  try {
    Preconditions.checkArgument(partition != null, "Argument to method ");

    Path dataLocation = partition.getDataLocation();
    Preconditions
        .checkArgument(dataLocation != null, "Data location is null for partition " + partition.getCompleteName());
    boolean exists = this.fs.exists(dataLocation);
    Preconditions.checkArgument(exists, "Data location doesn't exist for partition " + partition.getCompleteName());

    long modificationTS = this.fs.getFileStatus(dataLocation).getModificationTime();
    return new TimestampedHiveDatasetVersion(new DateTime(modificationTS), partition);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #8
Source File: HiveAvroCopyEntityHelper.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * @param entity, name of the entity to be changed, e.g. hive table or partition
 * @param sd, StorageDescriptor of the entity
 */
public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) {
  String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
  if (oldAvroSchemaURL != null) {

    Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
    URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();

    if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
        || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
        && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {

      String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
          Optional.<Partition>absent(), true).toString();

      sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
      log.info(String.format("For entity %s, change %s from %s to %s", entity,
          HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
    }
  }
}
 
Example #9
Source File: PartitionLevelWatermarkerTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test
public void testDroppedPartitions() throws Exception {
  WorkUnitState previousWus = new WorkUnitState();
  previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
  previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
  previousWus
      .setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l)));

  SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
  PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);

  Table table = mockTable("test_dataset_urn");
  Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", "")));

  Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));

  // partition 2015 replaces 2015-01 and 2015-02
  Mockito.when(partition2015.getParameters()).thenReturn(
      ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02"));
  watermarker.onPartitionProcessBegin(partition2015, 0l, 0l);

  Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l));
}
 
Example #10
Source File: AbstractAvroToOrcConverter.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Parse the {@link #REPLACED_PARTITIONS_HIVE_METASTORE_KEY} from partition parameters to returns DDLs for all the partitions to be
 * dropped.
 *
 * @return A {@link List} of partitions to be dropped. Each element of the list is a {@link Map} which maps a partition's
 * key and value.
 *
 */
public static List<Map<String, String>> getDropPartitionsDDLInfo(Partition hivePartition) {
  List<Map<String, String>> replacedPartitionsDDLInfo = Lists.newArrayList();
  List<FieldSchema> partitionKeys = hivePartition.getTable().getPartitionKeys();

  if (StringUtils.isNotBlank(hivePartition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) {

    // Partitions are separated by "|"
    for (String partitionsInfoString : Splitter.on("|").omitEmptyStrings().split(hivePartition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) {

      // Values for a partition are separated by ","
      List<String> partitionValues = Splitter.on(",").omitEmptyStrings().trimResults().splitToList(partitionsInfoString);

      // Do not drop the partition being processed. Sometimes a partition may have replaced another partition of the same values.
      if (!partitionValues.equals(hivePartition.getValues())) {
        ImmutableMap.Builder<String, String> partitionDDLInfoMap = ImmutableMap.builder();
        for (int i = 0; i < partitionKeys.size(); i++) {
          partitionDDLInfoMap.put(partitionKeys.get(i).getName(), partitionValues.get(i));
        }
        replacedPartitionsDDLInfo.add(partitionDDLInfoMap.build());
      }
    }
  }
  return replacedPartitionsDDLInfo;
}
 
Example #11
Source File: TestSentryHiveAuthorizationTaskFactory.java    From incubator-sentry with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {
  conf = new HiveConf();
  baseDir = Files.createTempDir();
  baseDir.setWritable(true, false);
  conf.setVar(HiveConf.ConfVars.SCRATCHDIR, baseDir.getAbsolutePath());
  SessionState.start(conf);
  conf.setVar(ConfVars.HIVE_AUTHORIZATION_TASK_FACTORY,
      SentryHiveAuthorizationTaskFactoryImpl.class.getName());

  db = Mockito.mock(Hive.class);
  table = new Table(DB, TABLE);
  partition = new Partition(table);
  context = new Context(conf);
  parseDriver = new ParseDriver();
  analyzer = new DDLSemanticAnalyzer(conf, db);
  SessionState.start(conf);
  Mockito.when(db.getTable(TABLE, false)).thenReturn(table);
  Mockito.when(db.getPartition(table, new HashMap<String, String>(), false))
  .thenReturn(partition);

  HadoopDefaultAuthenticator auth = new HadoopDefaultAuthenticator();
  auth.setConf(conf);
  currentUser = auth.getUserName();

}
 
Example #12
Source File: HivePartitionFinder.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Will find all datasets according to whitelist, except the backup, trash and staging tables.
 */
@Override
public List<HivePartitionDataset> findDatasets()
    throws IOException {
  List<HivePartitionDataset> list = new ArrayList<>();
  for (HiveDataset hiveDataset : this.hiveDatasets) {
    for (Partition partition : hiveDataset.getPartitionsFromDataset()) {
      list.add(new HivePartitionDataset(partition));
    }
  }
  String selectionPolicyString = this.state.getProp(ComplianceConfigurationKeys.DATASET_SELECTION_POLICY_CLASS,
      ComplianceConfigurationKeys.DEFAULT_DATASET_SELECTION_POLICY_CLASS);
  Policy<HivePartitionDataset> selectionPolicy =
      GobblinConstructorUtils.invokeConstructor(Policy.class, selectionPolicyString);
  return selectionPolicy.selectedList(list);
}
 
Example #13
Source File: PartitionLevelWatermarkerTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetPreviousHighWatermarkForPartition() throws Exception {
  WorkUnitState previousWus = new WorkUnitState();
  previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
  previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
  previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));

  SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
  PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);

  Table table = mockTable("test_dataset_urn");
  Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
  Partition partition2016 = mockPartition(table, ImmutableList.of("2016"));

  Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l));
  Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l));
}
 
Example #14
Source File: HiveDataset.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * This method returns a sorted list of partitions.
 */
public List<Partition> getPartitionsFromDataset() throws IOException{
  try (AutoReturnableObject<IMetaStoreClient> client = getClientPool().getClient()) {
    List<Partition> partitions =
        HiveUtils.getPartitions(client.get(), getTable(), Optional.<String>absent());
    return sortPartitions(partitions);
  }
}
 
Example #15
Source File: HivePartitionVersionFinder.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
private void addPartitionsToVersions(List<HivePartitionVersion> versions, String name,
    List<Partition> partitions)
    throws IOException {
  for (Partition partition : partitions) {
    if (partition.getName().equalsIgnoreCase(name)) {
      versions.add(new HivePartitionRetentionVersion(partition));
    }
  }
}
 
Example #16
Source File: HiveAvroCopyEntityHelper.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Currently updated the {@link #HIVE_TABLE_AVRO_SCHEMA_URL} location for new hive partitions
 * @param targetTable, new Table to be registered in hive
 * @param sourcePartitions, source partitions
 * @throws IOException
 */
public static void updatePartitionAttributesIfAvro(Table targetTable, Map<List<String>, Partition> sourcePartitions, HiveCopyEntityHelper hiveHelper) throws IOException {
  if (isHiveTableAvroType(targetTable)) {
    for (Map.Entry<List<String>, Partition> partition : sourcePartitions.entrySet()) {
      updateAvroSchemaURL(partition.getValue().getCompleteName(), partition.getValue().getTPartition().getSd(), hiveHelper);
    }
  }
}
 
Example #17
Source File: HiveTargetPathHelperTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testRelocateFilesPartitioned() {
  Properties properties = new Properties();
  properties.setProperty(HiveTargetPathHelper.RELOCATE_DATA_FILES_KEY, Boolean.toString(true));
  properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_ROOT, "/target");

  HiveTargetPathHelper helper = createTestTargetPathHelper(properties);

  Path source = new Path(TABLE_ROOT, "partition/file1");

  Partition partition = Mockito.mock(Partition.class);
  Mockito.when(partition.getValues()).thenReturn(Lists.newArrayList("part", "123"));

  Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.of(partition), true), new Path("/target/tableName/part/123/file1"));
}
 
Example #18
Source File: ComplianceRetentionJob.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public void initDatasetFinder(Properties properties)
    throws IOException {
  Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS),
      "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
  String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
  this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties));

  Iterator<HiveDataset> datasetsIterator =
      new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator();

  while (datasetsIterator.hasNext()) {
    // Drop partitions from empty tables if property is set, otherwise skip the table
    HiveDataset hiveDataset = datasetsIterator.next();
    List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset();
    String completeTableName = hiveDataset.getTable().getCompleteName();
    if (!partitionsFromDataset.isEmpty()) {
      this.tableNamesList.add(completeTableName);
      continue;
    }
    if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES,
        ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) {
      continue;
    }
    if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName
        .contains(ComplianceConfigurationKeys.BACKUP) || completeTableName
        .contains(ComplianceConfigurationKeys.STAGING)) {
      this.tablesToDrop.add(hiveDataset);
    }
  }
}
 
Example #19
Source File: HiveMetaStoreBridgeTest.java    From atlas with Apache License 2.0 5 votes vote down vote up
@Test
public void testImportWhenPartitionKeysAreNull() throws Exception {
    setupDB(hiveClient, TEST_DB_NAME);
    List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
    Table hiveTable = hiveTables.get(0);

    returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE);


    when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
            Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                    HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME))))
    .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
                    getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));

    String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, hiveTable);

    when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(),
            Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                    processQualifiedName)))
    .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo(
                    getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77")));

    when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77"))
    .thenReturn(createTableReference());

    Partition partition = mock(Partition.class);
    when(partition.getTable()).thenReturn(hiveTable);
    List partitionValues = Arrays.asList(new String[]{});
    when(partition.getValues()).thenReturn(partitionValues);

    when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[]{partition}));

    HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2);
    try {
        bridge.importHiveMetadata(null, null, true);
    } catch (Exception e) {
        Assert.fail("Partition with null key caused import to fail with exception ", e);
    }
}
 
Example #20
Source File: PartitionLevelWatermarker.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Adds an expected high watermark for this {@link Partition}. Also removes any watermarks for partitions being replaced.
 * Replace partitions are read using partition parameter {@link AbstractAvroToOrcConverter#REPLACED_PARTITIONS_HIVE_METASTORE_KEY}.
 * Uses the <code>partitionUpdateTime</code> as the high watermark for this <code>partition</code>
 *
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#onPartitionProcessBegin(org.apache.hadoop.hive.ql.metadata.Partition, long, long)
 */
@Override
public void onPartitionProcessBegin(Partition partition, long partitionProcessTime, long partitionUpdateTime) {

  Preconditions.checkNotNull(partition);
  Preconditions.checkNotNull(partition.getTable());

  if (!this.expectedHighWatermarks.hasPartitionWatermarks(tableKey(partition.getTable()))) {
    throw new IllegalStateException(String.format(
        "onPartitionProcessBegin called before onTableProcessBegin for table: %s, partitions: %s",
        tableKey(partition.getTable()), partitionKey(partition)));
  }

  // Remove dropped partitions
  Collection<String> droppedPartitions =
      Collections2.transform(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(partition),
          new Function<Map<String, String>, String>() {
            @Override
            public String apply(Map<String, String> input) {
              return PARTITION_VALUES_JOINER.join(input.values());
            }
          });

  this.expectedHighWatermarks.removePartitionWatermarks(tableKey(partition.getTable()), droppedPartitions);
  this.expectedHighWatermarks.addPartitionWatermark(tableKey(partition.getTable()), partitionKey(partition),
      partitionUpdateTime);
}
 
Example #21
Source File: PartitionLevelWatermarker.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Return the previous high watermark if found in previous state. Else returns 0
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getPreviousHighWatermark(org.apache.hadoop.hive.ql.metadata.Partition)
 */
@Override
public LongWatermark getPreviousHighWatermark(Partition partition) {
  if (this.previousWatermarks.hasPartitionWatermarks(tableKey(partition.getTable()))) {

    // If partition has a watermark return.
    if (this.previousWatermarks.get(tableKey(partition.getTable())).containsKey(partitionKey(partition))) {
      return new LongWatermark(this.previousWatermarks.getPartitionWatermark(tableKey(partition.getTable()),
          partitionKey(partition)));
    }
  }
  return new LongWatermark(0);

}
 
Example #22
Source File: HiveTargetPathHelperTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testReplacePrefix() {
  Properties properties = new Properties();
  properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED, "/table");
  properties.setProperty(HiveTargetPathHelper.COPY_TARGET_TABLE_PREFIX_REPLACEMENT, "/replaced");

  HiveTargetPathHelper helper = createTestTargetPathHelper(properties);

  Path source = new Path(TABLE_ROOT, "partition/file1");
  Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.<Partition>absent(), true), new Path("/replaced/path/partition/file1"));
}
 
Example #23
Source File: HiveUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * @param client an {@link IMetaStoreClient} for the correct metastore.
 * @param table the {@link Table} for which we should get partitions.
 * @param filter an optional filter for partitions as would be used in Hive. Can only filter on String columns.
 *               (e.g. "part = \"part1\"" or "date > \"2015\"".
 * @return a map of values to {@link Partition} for input {@link Table} filtered and non-nullified.
 */
public static Map<List<String>, Partition> getPartitionsMap(IMetaStoreClient client, Table table,
    Optional<String> filter, Optional<? extends HivePartitionExtendedFilter> hivePartitionExtendedFilterOptional) throws IOException {
  return Maps.uniqueIndex(getPartitions(client, table, filter, hivePartitionExtendedFilterOptional), new Function<Partition, List<String>>() {
    @Override
    public List<String> apply(@Nullable Partition partition) {
      if (partition == null) {
        return null;
      }
      return partition.getValues();
    }
  });
}
 
Example #24
Source File: BackfillHiveSource.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public boolean shouldCreateWorkunit(Partition sourcePartition, LongWatermark lowWatermark) {
  // If a whitelist is provided only create workunits for those partitions
  if (!this.partitionsWhitelist.isEmpty()) {
    return this.partitionsWhitelist.contains(sourcePartition.getCompleteName());
  }
  // If no whitelist is set, all partitions of a dataset are backfilled
  return true;
}
 
Example #25
Source File: BackfillHiveSource.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public boolean isOlderThanLookback(Partition partition) {
  // If partition whitelist is provided, ignore lookback
  if (!this.partitionsWhitelist.isEmpty()) {
    return false;
  } else {
    return super.isOlderThanLookback(partition);
  }
}
 
Example #26
Source File: DatePatternUpdateProviderTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public static Partition createMockPartitionWithLocation(String location) {
  Partition mockPartition = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS);
  org.apache.hadoop.hive.metastore.api.Partition mockTPartition =
      Mockito.mock(org.apache.hadoop.hive.metastore.api.Partition.class, Mockito.RETURNS_SMART_NULLS);
  StorageDescriptor mockSd = Mockito.mock(StorageDescriptor.class, Mockito.RETURNS_SMART_NULLS);
  Mockito.when(mockSd.getLocation()).thenReturn(location);
  Mockito.when(mockTPartition.getSd()).thenReturn(mockSd);
  Mockito.when(mockPartition.getTPartition()).thenReturn(mockTPartition);
  return mockPartition;
}
 
Example #27
Source File: HiveSource.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
protected HiveWorkUnit workUnitForPartition(HiveDataset hiveDataset, Partition partition, boolean disableAvroCheck) throws IOException {
  HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(hiveDataset, partition);
  if (disableAvroCheck || isAvro(hiveDataset.getTable())) {
    hiveWorkUnit.setTableSchemaUrl(this.avroSchemaManager.getSchemaUrl(hiveDataset.getTable()));
    hiveWorkUnit.setPartitionSchemaUrl(this.avroSchemaManager.getSchemaUrl(partition));
  }
  return hiveWorkUnit;
}
 
Example #28
Source File: HiveTargetPathHelperTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testReplicatePaths() {
  Properties properties = new Properties();

  HiveTargetPathHelper helper = createTestTargetPathHelper(properties);

  Path source = new Path(TABLE_ROOT, "partition/file1");
  Assert.assertEquals(helper.getTargetPath(source, this.fs, Optional.<Partition>absent(), true), new Path(TABLE_ROOT, "partition/file1"));
}
 
Example #29
Source File: EventWorkunitUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Set SLA event metadata in the workunit. The publisher will use this metadta to publish sla events
 */
public static void setPartitionSlaEventMetadata(WorkUnit state, Table table, Partition partition, long updateTime,
    long lowWatermark, long beginGetWorkunitsTime) {
  state.setProp(SlaEventKeys.DATASET_URN_KEY, state.getProp(ConfigurationKeys.DATASET_URN_KEY));
  state.setProp(SlaEventKeys.PARTITION_KEY, partition.getName());
  state.setProp(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, String.valueOf(updateTime));

  // Time when the workunit was created
  state.setProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY, System.currentTimeMillis());
  state.setProp(EventConstants.WORK_UNIT_CREATE_TIME, state.getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY));
  state.setProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY, lowWatermark);
  state.setProp(EventConstants.BEGIN_GET_WORKUNITS_TIME, beginGetWorkunitsTime);

  state.setProp(EventConstants.SOURCE_DATA_LOCATION, partition.getDataLocation());
}
 
Example #30
Source File: HdfsBasedUpdateProvider.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Get the update time of a {@link Partition}
 *
 * @return the update time if available, 0 otherwise
 *
 * {@inheritDoc}
 * @see HiveUnitUpdateProvider#getUpdateTime(org.apache.hadoop.hive.ql.metadata.Partition)
 */
@Override
public long getUpdateTime(Partition partition) throws UpdateNotFoundException {

  try {
    return getUpdateTime(partition.getDataLocation());
  } catch (IOException e) {
    throw new UpdateNotFoundException(String.format("Failed to get update time for %s", partition.getCompleteName()),
        e);
  }
}