Java Code Examples for org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker#tryClaim()

The following examples show how to use org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker#tryClaim() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    ProcessContext context, RestrictionTracker<OffsetRange, Long> tracker) {
  Uninterruptibles.sleepUninterruptibly(
      sleepBeforeFirstClaim.getMillis(), TimeUnit.MILLISECONDS);
  for (long i = tracker.currentRestriction().getFrom(), numIterations = 1;
      tracker.tryClaim(i);
      ++i, ++numIterations) {
    Uninterruptibles.sleepUninterruptibly(
        sleepBeforeEachOutput.getMillis(), TimeUnit.MILLISECONDS);
    context.output("" + i);
    if (numIterations == numOutputsPerProcessCall) {
      return resume();
    }
  }
  return stop();
}
 
Example 2
Source File: HBaseReadSplittableDoFn.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(
    @Element Read read,
    OutputReceiver<Result> out,
    RestrictionTracker<ByteKeyRange, ByteKey> tracker)
    throws Exception {
  Connection connection = ConnectionFactory.createConnection(read.getConfiguration());
  TableName tableName = TableName.valueOf(read.getTableId());
  Table table = connection.getTable(tableName);
  final ByteKeyRange range = tracker.currentRestriction();
  try (ResultScanner scanner =
      table.getScanner(HBaseUtils.newScanInRange(read.getScan(), range))) {
    for (Result result : scanner) {
      ByteKey key = ByteKey.copyFrom(result.getRow());
      if (!tracker.tryClaim(key)) {
        return;
      }
      out.output(result);
    }
    tracker.tryClaim(ByteKey.EMPTY);
  }
}
 
Example 3
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    @Element String element,
    OutputReceiver<String> receiver,
    RestrictionTracker<OffsetRange, Long> tracker,
    BundleFinalizer bundleFinalizer)
    throws InterruptedException {
  if (wasFinalized.get()) {
    // Claim beyond the end now that we know we have been finalized.
    tracker.tryClaim(Long.MAX_VALUE);
    receiver.output(element);
    return stop();
  }
  if (tracker.tryClaim(tracker.currentRestriction().getFrom() + 1)) {
    bundleFinalizer.afterBundleCommit(
        Instant.now().plus(Duration.standardSeconds(MAX_ATTEMPTS)),
        () -> wasFinalized.set(true));
    // We sleep here instead of setting a resume time since the resume time doesn't need to
    // be honored.
    sleep(1000L); // 1 second
    return resume();
  }
  return stop();
}
 
Example 4
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public ProcessContinuation processElement(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  int[] blockStarts = {-1, 0, 12, 123, 1234, 12345, 34567, MAX_INDEX};
  int trueStart = snapToNextBlock((int) tracker.currentRestriction().getFrom(), blockStarts);
  for (int i = trueStart, numIterations = 1;
      tracker.tryClaim((long) blockStarts[i]);
      ++i, ++numIterations) {
    for (int index = blockStarts[i]; index < blockStarts[i + 1]; ++index) {
      c.output(KV.of(c.sideInput(sideInput) + ":" + c.element(), index));
    }
    if (numIterations == numClaimsPerCall) {
      return resume();
    }
  }
  return stop();
}
 
Example 5
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public ProcessContinuation processElement(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  int[] blockStarts = {-1, 0, 12, 123, 1234, 12345, 34567, MAX_INDEX};
  int trueStart = snapToNextBlock((int) tracker.currentRestriction().getFrom(), blockStarts);
  for (int i = trueStart, numIterations = 1;
      tracker.tryClaim((long) blockStarts[i]);
      ++i, ++numIterations) {
    for (int index = blockStarts[i]; index < blockStarts[i + 1]; ++index) {
      c.output(index);
    }
    if (numIterations == numClaimsPerCall) {
      return resume();
    }
  }
  return stop();
}
 
Example 6
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  for (long i = tracker.currentRestriction().getFrom(), numIterations = 0;
      tracker.tryClaim(i);
      ++i, ++numIterations) {
    c.output(KV.of(c.element(), (int) i));
    if (numIterations % 3 == 0) {
      return resume();
    }
  }
  return stop();
}
 
Example 7
Source File: SplittableParDoProcessFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  for (long i = tracker.currentRestriction().getFrom(), numIterations = 0;
      tracker.tryClaim(i);
      ++i, ++numIterations) {
    c.output(String.valueOf(c.element() + i));
    if (numIterations == numOutputsPerCall - 1) {
      return resume();
    }
  }
  return stop();
}
 
Example 8
Source File: SplittableParDoProcessFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void process(
    ProcessContext c,
    RestrictionTracker<OffsetRange, Long> tracker,
    ManualWatermarkEstimator<Instant> watermarkEstimator) {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    watermarkEstimator.setWatermark(c.element().plus(Duration.standardSeconds(i)));
    c.output(String.valueOf(i));
  }
}
 
Example 9
Source File: SplittableParDoExpanderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  for (long i = tracker.currentRestriction().getFrom(), numIterations = 0;
      tracker.tryClaim(i);
      ++i, ++numIterations) {
    c.output(KV.of(c.element(), (int) i));
    if (numIterations % 3 == 0) {
      return resume();
    }
  }
  return stop();
}
 
Example 10
Source File: HL7v2IO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * List messages.
 *
 * @param hl7v2Store the HL7v2 store to list messages from
 * @throws IOException the io exception
 */
@ProcessElement
public void listMessages(
    @Element String hl7v2Store,
    RestrictionTracker<OffsetRange, Long> tracker,
    OutputReceiver<HL7v2Message> outputReceiver)
    throws IOException {
  OffsetRange currentRestriction = (OffsetRange) tracker.currentRestriction();
  Instant startRestriction = Instant.ofEpochMilli(currentRestriction.getFrom());
  Instant endRestriction = Instant.ofEpochMilli(currentRestriction.getTo());
  HttpHealthcareApiClient.HL7v2MessagePages pages =
      new HttpHealthcareApiClient.HL7v2MessagePages(
          client, hl7v2Store, startRestriction, endRestriction, filter.get(), "sendTime");
  Instant cursor;
  long lastClaimedMilliSecond = startRestriction.getMillis() - 1;
  for (HL7v2Message msg : FluentIterable.concat(pages)) {
    cursor = Instant.parse(msg.getSendTime());
    if (cursor.getMillis() > lastClaimedMilliSecond) {
      // Return early after the first claim failure preventing us from iterating
      // through the remaining messages.
      if (!tracker.tryClaim(cursor.getMillis())) {
        return;
      }
      lastClaimedMilliSecond = cursor.getMillis();
    }

    outputReceiver.output(msg);
  }

  // We've paginated through all messages for this restriction but the last message may be
  // before the end of the restriction
  tracker.tryClaim(currentRestriction.getTo());
}
 
Example 11
Source File: S3Import.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  // create the channel
  String fileName = c.element().getKey();
  try (SeekableByteChannel channel = getReader(c.element().getValue())) {
    ByteBuffer readBuffer = ByteBuffer.allocate(BATCH_SIZE);
    ByteString buffer = ByteString.EMPTY;
    for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
      long startOffset = (i * BATCH_SIZE) - BATCH_SIZE;
      channel.position(startOffset);
      readBuffer = ByteBuffer.allocate(BATCH_SIZE);
      buffer = ByteString.EMPTY;
      channel.read(readBuffer);
      readBuffer.flip();
      buffer = ByteString.copyFrom(readBuffer);
      readBuffer.clear();
      LOG.debug(
          "Current Restriction {}, Content Size{}",
          tracker.currentRestriction(),
          buffer.size());
      c.output(KV.of(fileName, buffer.toStringUtf8().trim()));
    }
  } catch (Exception e) {

    c.output(textReaderFailedElements, e.getMessage());
  }
}
 
Example 12
Source File: Read.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(
    RestrictionTracker<BoundedSource<T>, TimestampedValue<T>[]> tracker,
    OutputReceiver<T> receiver)
    throws IOException {
  TimestampedValue<T>[] out = new TimestampedValue[1];
  while (tracker.tryClaim(out)) {
    receiver.outputWithTimestamp(out[0].getValue(), out[0].getTimestamp());
  }
}
 
Example 13
Source File: Watch.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  long position = tracker.currentRestriction().getFrom();
  while (tracker.tryClaim(position)) {
    TimestampedValue<OutputT> value = c.element().getValue().get((int) position);
    c.outputWithTimestamp(KV.of(c.element().getKey(), value.getValue()), value.getTimestamp());
    position += 1L;
  }
}
 
Example 14
Source File: PeriodicSequence.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public ProcessContinuation processElement(
    @Element SequenceDefinition srcElement,
    OutputReceiver<Instant> out,
    RestrictionTracker<OffsetRange, Long> restrictionTracker) {

  OffsetRange restriction = restrictionTracker.currentRestriction();
  Long interval = srcElement.durationMilliSec;
  Long nextOutput = restriction.getFrom() + interval;

  boolean claimSuccess = true;

  while (claimSuccess && Instant.ofEpochMilli(nextOutput).isBeforeNow()) {
    claimSuccess = restrictionTracker.tryClaim(nextOutput);
    if (claimSuccess) {
      Instant output = Instant.ofEpochMilli(nextOutput);
      out.outputWithTimestamp(output, output);
      nextOutput = nextOutput + interval;
    }
  }

  ProcessContinuation continuation = ProcessContinuation.stop();
  if (claimSuccess) {
    Duration offset = new Duration(Instant.now(), Instant.ofEpochMilli(nextOutput));
    continuation = ProcessContinuation.resume().withResumeDelay(offset);
  }
  return continuation;
}
 
Example 15
Source File: FnApiDoFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
@ProcessElement
public ProcessContinuation processElement(
    ProcessContext context,
    RestrictionTracker<OffsetRange, Long> tracker,
    ManualWatermarkEstimator<Instant> watermarkEstimator)
    throws Exception {
  long checkpointUpperBound = Long.parseLong(context.sideInput(singletonSideInput));
  long position = tracker.currentRestriction().getFrom();
  boolean claimStatus;
  while (true) {
    claimStatus = (tracker.tryClaim(position));
    if (!claimStatus) {
      break;
    } else if (position == NonWindowObservingTestSplittableDoFn.SPLIT_ELEMENT) {
      enableAndWaitForTrySplitToHappen();
    }
    context.outputWithTimestamp(
        context.element() + ":" + position, GlobalWindow.TIMESTAMP_MIN_VALUE.plus(position));
    watermarkEstimator.setWatermark(GlobalWindow.TIMESTAMP_MIN_VALUE.plus(position));
    position += 1L;
    if (position == checkpointUpperBound) {
      break;
    }
  }
  if (!claimStatus) {
    return ProcessContinuation.stop();
  } else {
    return ProcessContinuation.resume().withResumeDelay(Duration.millis(54321L));
  }
}
 
Example 16
Source File: FnApiDoFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public ProcessContinuation processElement(
    ProcessContext context,
    RestrictionTracker<OffsetRange, Long> tracker,
    ManualWatermarkEstimator<Instant> watermarkEstimator)
    throws Exception {
  long checkpointUpperBound = CHECKPOINT_UPPER_BOUND;
  long position = tracker.currentRestriction().getFrom();
  boolean claimStatus;
  while (true) {
    claimStatus = (tracker.tryClaim(position));
    if (!claimStatus) {
      break;
    } else if (position == SPLIT_ELEMENT) {
      enableAndWaitForTrySplitToHappen();
    }
    context.outputWithTimestamp(
        context.element() + ":" + position, GlobalWindow.TIMESTAMP_MIN_VALUE.plus(position));
    watermarkEstimator.setWatermark(GlobalWindow.TIMESTAMP_MIN_VALUE.plus(position));
    position += 1L;
    if (position == checkpointUpperBound) {
      break;
    }
  }
  if (!claimStatus) {
    return ProcessContinuation.stop();
  } else {
    return ProcessContinuation.resume().withResumeDelay(Duration.millis(54321L));
  }
}
 
Example 17
Source File: Watch.java    From beam with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    ProcessContext c,
    RestrictionTracker<GrowthState, KV<Growth.PollResult<OutputT>, TerminationStateT>> tracker,
    ManualWatermarkEstimator<Instant> watermarkEstimator)
    throws Exception {

  GrowthState currentRestriction = tracker.currentRestriction();
  if (currentRestriction instanceof NonPollingGrowthState) {
    Growth.PollResult<OutputT> priorPoll =
        ((NonPollingGrowthState<OutputT>) currentRestriction).getPending();
    if (tracker.tryClaim(KV.of(priorPoll, null))) {
      if (!priorPoll.getOutputs().isEmpty()) {
        LOG.info(
            "{} - re-emitting output of prior poll containing {} results.",
            c.element(),
            priorPoll.getOutputs().size());
        c.output(KV.of(c.element(), priorPoll.getOutputs()));
      }
      watermarkEstimator.setWatermark(priorPoll.getWatermark());
    }
    return stop();
  }

  // Poll for additional elements.
  Instant now = Instant.now();
  Growth.PollResult<OutputT> res =
      spec.getPollFn().getClosure().apply(c.element(), wrapProcessContext(c));

  PollingGrowthState<TerminationStateT> pollingRestriction =
      (PollingGrowthState<TerminationStateT>) currentRestriction;
  // Produce a poll result that only contains never seen before results.
  Growth.PollResult<OutputT> newResults =
      computeNeverSeenBeforeResults(pollingRestriction, res);

  // If we had zero new results, attempt to update the watermark if the poll result
  // provided a watermark. Otherwise attempt to claim all pending outputs.
  LOG.info(
      "{} - current round of polling took {} ms and returned {} results, "
          + "of which {} were new.",
      c.element(),
      new Duration(now, Instant.now()).getMillis(),
      res.getOutputs().size(),
      newResults.getOutputs().size());

  TerminationStateT terminationState = pollingRestriction.getTerminationState();
  if (!newResults.getOutputs().isEmpty()) {
    terminationState =
        getTerminationCondition().onSeenNewOutput(Instant.now(), terminationState);
  }

  if (!tracker.tryClaim(KV.of(newResults, terminationState))) {
    LOG.info("{} - will not emit poll result tryClaim failed.", c.element());
    return stop();
  }

  if (!newResults.getOutputs().isEmpty()) {
    c.output(KV.of(c.element(), newResults.getOutputs()));
  }

  if (newResults.getWatermark() != null) {
    watermarkEstimator.setWatermark(newResults.getWatermark());
  }

  Instant currentTime = Instant.now();
  if (getTerminationCondition().canStopPolling(currentTime, terminationState)) {
    LOG.info(
        "{} - told to stop polling by polling function at {} with termination state {}.",
        c.element(),
        currentTime,
        getTerminationCondition().toString(terminationState));
    return stop();
  }

  if (BoundedWindow.TIMESTAMP_MAX_VALUE.equals(newResults.getWatermark())) {
    LOG.info("{} - will stop polling, reached max timestamp.", c.element());
    return stop();
  }

  LOG.info(
      "{} - will resume polling in {} ms.", c.element(), spec.getPollInterval().getMillis());
  return resume().withResumeDelay(spec.getPollInterval());
}
 
Example 18
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker)
    throws IOException {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    String fileKey = c.element().getKey();
    try (BufferedReader br = getReader(c.element().getValue())) {

      csvHeaders = getHeaders(c.sideInput(headerMap), fileKey);
      if (csvHeaders != null) {
        List<FieldId> dlpTableHeaders =
            csvHeaders.stream()
                .map(header -> FieldId.newBuilder().setName(header).build())
                .collect(Collectors.toList());
        List<Table.Row> rows = new ArrayList<>();
        Table dlpTable = null;
        /** finding out EOL for this restriction so that we know the SOL */
        int endOfLine = (int) (i * batchSize.get().intValue());
        int startOfLine = (endOfLine - batchSize.get().intValue());
        /** skipping all the rows that's not part of this restriction */
        br.readLine();
        Iterator<CSVRecord> csvRows =
            CSVFormat.DEFAULT.withSkipHeaderRecord().parse(br).iterator();
        for (int line = 0; line < startOfLine; line++) {
          if (csvRows.hasNext()) {
            csvRows.next();
          }
        }
        /** looping through buffered reader and creating DLP Table Rows equals to batch */
        while (csvRows.hasNext() && lineCount <= batchSize.get()) {

          CSVRecord csvRow = csvRows.next();
          rows.add(convertCsvRowToTableRow(csvRow));
          lineCount += 1;
        }
        /** creating DLP table and output for next transformation */
        dlpTable = Table.newBuilder().addAllHeaders(dlpTableHeaders).addAllRows(rows).build();
        c.output(KV.of(fileKey, dlpTable));

        LOG.debug(
            "Current Restriction From: {}, Current Restriction To: {},"
                + " StartofLine: {}, End Of Line {}, BatchData {}",
            tracker.currentRestriction().getFrom(),
            tracker.currentRestriction().getTo(),
            startOfLine,
            endOfLine,
            dlpTable.getRowsCount());

      } else {

        throw new RuntimeException("Header Values Can't be found For file Key " + fileKey);
      }
    }
  }
}
 
Example 19
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker)
    throws IOException {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    String fileKey = c.element().getKey();
    try (BufferedReader br = getReader(c.element().getValue())) {

      csvHeaders = getHeaders(c.sideInput(headerMap), fileKey);
      if (csvHeaders != null) {
        List<FieldId> dlpTableHeaders =
            csvHeaders.stream()
                .map(header -> FieldId.newBuilder().setName(header).build())
                .collect(Collectors.toList());
        List<Table.Row> rows = new ArrayList<>();
        Table dlpTable = null;
        /** finding out EOL for this restriction so that we know the SOL */
        int endOfLine = (int) (i * batchSize.get().intValue());
        int startOfLine = (endOfLine - batchSize.get().intValue());
        /** skipping all the rows that's not part of this restriction */
        br.readLine();
        Iterator<CSVRecord> csvRows =
            CSVFormat.DEFAULT.withSkipHeaderRecord().parse(br).iterator();
        for (int line = 0; line < startOfLine; line++) {
          if (csvRows.hasNext()) {
            csvRows.next();
          }
        }
        /** looping through buffered reader and creating DLP Table Rows equals to batch */
        while (csvRows.hasNext() && lineCount <= batchSize.get()) {

          CSVRecord csvRow = csvRows.next();
          rows.add(convertCsvRowToTableRow(csvRow));
          lineCount += 1;
        }
        /** creating DLP table and output for next transformation */
        dlpTable = Table.newBuilder().addAllHeaders(dlpTableHeaders).addAllRows(rows).build();
        c.output(KV.of(fileKey, dlpTable));

        LOG.debug(
            "Current Restriction From: {}, Current Restriction To: {},"
                + " StartofLine: {}, End Of Line {}, BatchData {}",
            tracker.currentRestriction().getFrom(),
            tracker.currentRestriction().getTo(),
            startOfLine,
            endOfLine,
            dlpTable.getRowsCount());

      } else {

        throw new RuntimeException("Header Values Can't be found For file Key " + fileKey);
      }
    }
  }
}