org.apache.hive.hcatalog.data.transfer.ReaderContext Java Examples

The following examples show how to use org.apache.hive.hcatalog.data.transfer.ReaderContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveTableReader.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private static ReaderContext getHiveReaderContext(String database, String table, Map<String, String> partitionKV) throws Exception {
    HiveConf hiveConf = new HiveConf(HiveTableReader.class);
    Iterator<Entry<String, String>> itr = hiveConf.iterator();
    Map<String, String> map = new HashMap<String, String>();
    while (itr.hasNext()) {
        Entry<String, String> kv = itr.next();
        map.put(kv.getKey(), kv.getValue());
    }

    ReadEntity entity;
    if (partitionKV == null || partitionKV.size() == 0) {
        entity = new ReadEntity.Builder().withDatabase(database).withTable(table).build();
    } else {
        entity = new ReadEntity.Builder().withDatabase(database).withTable(table).withPartition(partitionKV).build();
    }

    HCatReader reader = DataTransferFactory.getHCatReader(entity, map);
    ReaderContext cntxt = reader.prepareRead();

    return cntxt;
}
 
Example #2
Source File: HCatalogIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Calculates the 'desired' number of splits based on desiredBundleSizeBytes which is passed as
 * a hint to native API. Retrieves the actual splits generated by native API, which could be
 * different from the 'desired' split count calculated using desiredBundleSizeBytes
 */
@Override
public List<BoundedSource<HCatRecord>> split(
    long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
  int desiredSplitCount = 1;
  long estimatedSizeBytes = getEstimatedSizeBytes(options);
  if (desiredBundleSizeBytes > 0 && estimatedSizeBytes > 0) {
    desiredSplitCount = (int) Math.ceil((double) estimatedSizeBytes / desiredBundleSizeBytes);
  }
  ReaderContext readerContext = getReaderContext(desiredSplitCount);
  // process the splits returned by native API
  // this could be different from 'desiredSplitCount' calculated above
  LOG.info(
      "Splitting into bundles of {} bytes: "
          + "estimated size {}, desired split count {}, actual split count {}",
      desiredBundleSizeBytes,
      estimatedSizeBytes,
      desiredSplitCount,
      readerContext.numSplits());

  List<BoundedSource<HCatRecord>> res = new ArrayList<>();
  for (int split = 0; split < readerContext.numSplits(); split++) {
    res.add(new BoundedHCatalogSource(spec.withContext(readerContext).withSplitId(split)));
  }
  return res;
}
 
Example #3
Source File: HCatalogIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Test of Read using SourceTestUtils.readFromSource(..). */
@Test
@NeedsTestData
public void testReadFromSource() throws Exception {
  ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
  HCatalogIO.Read spec =
      HCatalogIO.read()
          .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
          .withContext(context)
          .withTable(TEST_TABLE);

  List<String> records = new ArrayList<>();
  for (int i = 0; i < context.numSplits(); i++) {
    BoundedHCatalogSource source = new BoundedHCatalogSource(spec.withSplitId(i));
    for (HCatRecord record : SourceTestUtils.readFromSource(source, OPTIONS)) {
      records.add(record.get(0).toString());
    }
  }
  assertThat(records, containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT).toArray()));
}
 
Example #4
Source File: HCatalogIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Test of Read using SourceTestUtils.assertSourcesEqualReferenceSource(..). */
@Test
@NeedsTestData
public void testSourceEqualsSplits() throws Exception {
  final int numRows = 1500;
  final int numSamples = 10;
  final long bytesPerRow = 15;
  ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
  HCatalogIO.Read spec =
      HCatalogIO.read()
          .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
          .withContext(context)
          .withTable(TEST_TABLE);

  BoundedHCatalogSource source = new BoundedHCatalogSource(spec);
  List<BoundedSource<HCatRecord>> unSplitSource = source.split(-1, OPTIONS);
  assertEquals(1, unSplitSource.size());

  List<BoundedSource<HCatRecord>> splits =
      source.split(numRows * bytesPerRow / numSamples, OPTIONS);
  assertTrue(splits.size() >= 1);

  SourceTestUtils.assertSourcesEqualReferenceSource(unSplitSource.get(0), splits, OPTIONS);
}
 
Example #5
Source File: HiveTableReader.java    From kylin with Apache License 2.0 6 votes vote down vote up
private static ReaderContext getHiveReaderContext(String database, String table, Map<String, String> partitionKV) throws Exception {
    HiveConf hiveConf = new HiveConf(HiveTableReader.class);
    Iterator<Entry<String, String>> itr = hiveConf.iterator();
    Map<String, String> map = new HashMap<String, String>();
    while (itr.hasNext()) {
        Entry<String, String> kv = itr.next();
        map.put(kv.getKey(), kv.getValue());
    }

    ReadEntity entity;
    if (partitionKV == null || partitionKV.size() == 0) {
        entity = new ReadEntity.Builder().withDatabase(database).withTable(table).build();
    } else {
        entity = new ReadEntity.Builder().withDatabase(database).withTable(table).withPartition(partitionKV).build();
    }

    HCatReader reader = DataTransferFactory.getHCatReader(entity, map);
    ReaderContext cntxt = reader.prepareRead();

    return cntxt;
}
 
Example #6
Source File: HiveTableReader.java    From Kylin with Apache License 2.0 6 votes vote down vote up
private static ReaderContext getHiveReaderContext(String database, String table, Map<String, String> partitionKV) throws Exception {
    HiveConf hiveConf = new HiveConf(HiveTableReader.class);
    Iterator<Entry<String, String>> itr = hiveConf.iterator();
    Map<String, String> map = new HashMap<String, String>();
    while (itr.hasNext()) {
        Entry<String, String> kv = itr.next();
        map.put(kv.getKey(), kv.getValue());
    }

    ReadEntity entity;
    if (partitionKV == null || partitionKV.size() == 0) {
        entity = new ReadEntity.Builder().withDatabase(database).withTable(table).build();
    } else {
        entity = new ReadEntity.Builder().withDatabase(database).withTable(table).withPartition(partitionKV).build();
    }

    HCatReader reader = DataTransferFactory.getHCatReader(entity, map);
    ReaderContext cntxt = reader.prepareRead();

    return cntxt;
}
 
Example #7
Source File: PartitionReaderFn.java    From beam with Apache License 2.0 5 votes vote down vote up
private ReaderContext getReaderContext(Read readRequest, Integer partitionIndexToRead)
    throws Exception {
  final List<Partition> partitions =
      metaStoreClient.listPartitions(
          readRequest.getDatabase(), readRequest.getTable(), Short.MAX_VALUE);
  final Partition partition = partitions.get(partitionIndexToRead);
  checkArgument(
      partition != null, "Unable to find a partition to read at index " + partitionIndexToRead);

  final int desiredSplitCount = HCatalogUtils.getSplitCount(readRequest, partition);
  final List<String> values = partition.getValues();
  final List<String> partitionCols = readRequest.getPartitionCols();
  checkArgument(
      values.size() == partitionCols.size(),
      "Number of input partitions should be equal to the values of list partition values.");

  List<String> filter = new ArrayList<>();
  for (int i = 0; i < partitionCols.size(); i++) {
    filter.add(partitionCols.get(i) + "=" + "'" + values.get(i) + "'");
  }
  final String filterString = String.join(" and ", filter);

  ReadEntity entity =
      new ReadEntity.Builder()
          .withDatabase(readRequest.getDatabase())
          .withFilter(filterString)
          .withTable(readRequest.getTable())
          .build();
  // pass the 'desired' split count as an hint to the API
  Map<String, String> configProps = new HashMap<>(readRequest.getConfigProperties());
  configProps.put(
      HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, String.valueOf(desiredSplitCount));
  return DataTransferFactory.getHCatReader(entity, configProps).prepareRead();
}
 
Example #8
Source File: PartitionReaderFn.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
  final Read readRequest = c.element().getKey();
  final Integer partitionIndexToRead = c.element().getValue();
  ReaderContext readerContext = getReaderContext(readRequest, partitionIndexToRead);
  for (int i = 0; i < readerContext.numSplits(); i++) {
    HCatReader reader = DataTransferFactory.getHCatReader(readerContext, i);
    Iterator<HCatRecord> hcatIterator = reader.read();
    while (hcatIterator.hasNext()) {
      final HCatRecord record = hcatIterator.next();
      c.output(record);
    }
  }
}
 
Example #9
Source File: HCatalogIO.java    From beam with Apache License 2.0 5 votes vote down vote up
private ReaderContext getReaderContext(long desiredSplitCount) throws HCatException {
  ReadEntity entity =
      new ReadEntity.Builder()
          .withDatabase(spec.getDatabase())
          .withTable(spec.getTable())
          .withFilter(spec.getFilter())
          .build();
  // pass the 'desired' split count as an hint to the API
  Map<String, String> configProps = new HashMap<>(spec.getConfigProperties());
  configProps.put(
      HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, String.valueOf(desiredSplitCount));
  return DataTransferFactory.getHCatReader(entity, configProps).prepareRead();
}
 
Example #10
Source File: HiveTableReader.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private static Iterator<HCatRecord> loadHCatRecordItr(ReaderContext readCntxt, int dataSplit) throws HCatException {
    HCatReader currentHCatReader = DataTransferFactory.getHCatReader(readCntxt, dataSplit);

    return currentHCatReader.read();
}
 
Example #11
Source File: HCatalogIOTestUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Returns a ReaderContext instance for the passed datastore config params. */
public static ReaderContext getReaderContext(Map<String, String> config) throws HCatException {
  return DataTransferFactory.getHCatReader(READ_ENTITY, config).prepareRead();
}
 
Example #12
Source File: HCatalogIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
abstract ReaderContext getContext();
 
Example #13
Source File: HCatalogIO.java    From beam with Apache License 2.0 4 votes vote down vote up
Read withContext(ReaderContext context) {
  return toBuilder().setContext(context).build();
}
 
Example #14
Source File: HiveTableReader.java    From kylin with Apache License 2.0 4 votes vote down vote up
private static Iterator<HCatRecord> loadHCatRecordItr(ReaderContext readCntxt, int dataSplit) throws HCatException {
    HCatReader currentHCatReader = DataTransferFactory.getHCatReader(readCntxt, dataSplit);

    return currentHCatReader.read();
}
 
Example #15
Source File: HiveTableReader.java    From Kylin with Apache License 2.0 4 votes vote down vote up
private static Iterator<HCatRecord> loadHCatRecordItr(ReaderContext readCntxt, int dataSplit) throws HCatException {
    HCatReader currentHCatReader = DataTransferFactory.getHCatReader(readCntxt, dataSplit);
    return currentHCatReader.read();
}
 
Example #16
Source File: HCatalogIO.java    From beam with Apache License 2.0 votes vote down vote up
abstract Builder setContext(ReaderContext context);