com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream Java Examples

The following examples show how to use com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DynamoDBSourceConfigTests.java    From pulsar with Apache License 2.0 6 votes vote down vote up
@Test
public final void loadFromYamlFileTest() throws IOException {
    File yamlFile = getFile("sourceConfig.yaml");
    DynamoDBSourceConfig config = DynamoDBSourceConfig.load(yamlFile.getAbsolutePath());
    assertNotNull(config);
    assertEquals(config.getAwsEndpoint(), "https://some.endpoint.aws");
    assertEquals(config.getAwsRegion(), "us-east-1");
    assertEquals(config.getAwsDynamodbStreamArn(), "arn:aws:dynamodb:us-west-2:111122223333:table/TestTable/stream/2015-05-11T21:21:33.291");
    assertEquals(config.getAwsCredentialPluginParam(),
            "{\"accessKey\":\"myKey\",\"secretKey\":\"my-Secret\"}");
    assertEquals(config.getApplicationName(), "My test application");
    assertEquals(config.getCheckpointInterval(), 30000);
    assertEquals(config.getBackoffTime(), 4000);
    assertEquals(config.getNumRetries(), 3);
    assertEquals(config.getReceiveQueueSize(), 2000);
    assertEquals(config.getInitialPositionInStream(), InitialPositionInStream.TRIM_HORIZON);
    
    Calendar cal = Calendar.getInstance();
    cal.setTime(config.getStartAtTime());
    ZonedDateTime actual = ZonedDateTime.ofInstant(cal.toInstant(), ZoneOffset.UTC);
    ZonedDateTime expected = ZonedDateTime.ofInstant(DAY.toInstant(), ZoneOffset.UTC);
    assertEquals(actual, expected);
}
 
Example #2
Source File: KinesisSourceIT.java    From datacollector with Apache License 2.0 6 votes vote down vote up
private KinesisConsumerConfigBean getKinesisConsumerConfig(String streamName) {
  KinesisConsumerConfigBean conf = new KinesisConsumerConfigBean();
  conf.dataFormatConfig = new DataParserFormatConfig();
  conf.awsConfig = new AWSConfig();
  conf.awsConfig.awsAccessKeyId = () -> "foo";
  conf.awsConfig.awsSecretAccessKey = () -> "boo";

  conf.region = AwsRegion.OTHER;
  conf.endpoint = getKinesisEndpoint();
  conf.streamName = streamName;

  conf.dataFormat = DataFormat.JSON;
  conf.dataFormatConfig.jsonContent = JsonMode.MULTIPLE_OBJECTS;
  conf.dataFormatConfig.charset = "UTF-8";
  conf.dataFormatConfig.jsonMaxObjectLen = 1024;

  conf.applicationName = UUID.randomUUID().toString();
  conf.idleTimeBetweenReads = 250;
  conf.initialPositionInStream = InitialPositionInStream.TRIM_HORIZON;
  conf.maxBatchSize = 1000;
  conf.maxRecordProcessors = 2; // Must be at least 1

  return conf;
}
 
Example #3
Source File: DynamicCheckpointGeneratorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldMapAllValidShardsToCheckpoints() throws Exception {
  when(shard1.getShardId()).thenReturn("shard-01");
  when(shard2.getShardId()).thenReturn("shard-02");
  when(shard3.getShardId()).thenReturn("shard-03");
  String streamName = "stream";
  Set<Shard> shards = Sets.newHashSet(shard1, shard2);
  StartingPoint startingPoint = new StartingPoint(InitialPositionInStream.LATEST);
  when(startingPointShardsFinder.findShardsAtStartingPoint(
          kinesisClient, "stream", startingPoint))
      .thenReturn(shards);

  DynamicCheckpointGenerator underTest =
      new DynamicCheckpointGenerator(streamName, startingPoint, startingPointShardsFinder);

  KinesisReaderCheckpoint checkpoint = underTest.generate(kinesisClient);
  assertThat(checkpoint)
      .hasSize(2)
      .doesNotContain(new ShardCheckpoint(streamName, shard3.getShardId(), startingPoint));
}
 
Example #4
Source File: DynamicCheckpointGeneratorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldMapAllShardsToCheckpoints() throws Exception {
  when(shard1.getShardId()).thenReturn("shard-01");
  when(shard2.getShardId()).thenReturn("shard-02");
  when(shard3.getShardId()).thenReturn("shard-03");
  Set<Shard> shards = Sets.newHashSet(shard1, shard2, shard3);
  StartingPoint startingPoint = new StartingPoint(InitialPositionInStream.LATEST);
  when(startingPointShardsFinder.findShardsAtStartingPoint(
          kinesisClient, "stream", startingPoint))
      .thenReturn(shards);
  DynamicCheckpointGenerator underTest =
      new DynamicCheckpointGenerator("stream", startingPoint, startingPointShardsFinder);

  KinesisReaderCheckpoint checkpoint = underTest.generate(kinesisClient);

  assertThat(checkpoint).hasSize(3);
}
 
Example #5
Source File: KinesisSourceUpgrader.java    From datacollector with Apache License 2.0 6 votes vote down vote up
private void upgradeToConsumerConfigBeanV1(List<Config> configs) {
  for (Config config : configs) {
    // Migrate existing configs that were moved into the Kinesis Consumer config bean
    switch (config.getName()) {
      case "applicationName":
        // fall through
      case "maxBatchSize":
        // fall through
      case "idleTimeBetweenReads":
        // fall through
      case "maxWaitTime":
        // fall through
      case "previewWaitTime":
        moveConfigToBean(config, KINESIS_CONFIG_BEAN);
        break;
      default:
        // no-op
    }
  }
  commitMove(configs);

  configs.add(new Config(KINESIS_CONFIG_BEAN + ".initialPositionInStream", InitialPositionInStream.LATEST));
}
 
Example #6
Source File: StartingPointShardsFinderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void shouldThrowExceptionWhenSuccessorsNotFoundForExpiredShard() throws Exception {
  // given
  StartingPoint latestStartingPoint = new StartingPoint(InitialPositionInStream.LATEST);
  Shard closedShard10 =
      createClosedShard("0010").withParentShardId("0008").withAdjacentParentShardId("0005");
  List<Shard> shards =
      ImmutableList.of(
          shard00,
          shard01,
          shard02,
          shard03,
          shard04,
          shard05,
          shard06,
          shard07,
          shard08,
          shard09,
          closedShard10);

  when(kinesis.listShards(STREAM_NAME)).thenReturn(shards);

  // when
  underTest.findShardsAtStartingPoint(kinesis, STREAM_NAME, latestStartingPoint);
}
 
Example #7
Source File: ShardRecordsIterator.java    From beam with Apache License 2.0 6 votes vote down vote up
List<ShardRecordsIterator> findSuccessiveShardRecordIterators() throws TransientKinesisException {
  List<Shard> shards = kinesis.listShards(streamName);
  List<ShardRecordsIterator> successiveShardRecordIterators = new ArrayList<>();
  for (Shard shard : shards) {
    if (shardId.equals(shard.getParentShardId())) {
      ShardCheckpoint shardCheckpoint =
          new ShardCheckpoint(
              streamName,
              shard.getShardId(),
              new StartingPoint(InitialPositionInStream.TRIM_HORIZON));
      successiveShardRecordIterators.add(
          new ShardRecordsIterator(shardCheckpoint, kinesis, watermarkPolicyFactory));
    }
  }
  return successiveShardRecordIterators;
}
 
Example #8
Source File: KinesisConfig.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Get KCL config for a given system stream.
 * @param system name of the system
 * @param stream name of the stream
 * @param appName name of the application
 * @return Stream scoped KCL configs required to build
 *         {@link KinesisClientLibConfiguration}
 */
public KinesisClientLibConfiguration getKinesisClientLibConfig(String system, String stream, String appName) {
  ClientConfiguration clientConfig = getAWSClientConfig(system);
  String workerId = appName + "-" + UUID.randomUUID();
  InitialPositionInStream startPos = InitialPositionInStream.LATEST;
  AWSCredentialsProvider provider = credentialsProviderForStream(system, stream);
  KinesisClientLibConfiguration kinesisClientLibConfiguration =
      new KinesisClientLibConfiguration(appName, stream, provider, workerId)
          .withRegionName(getRegion(system, stream).getName())
          .withKinesisClientConfig(clientConfig)
          .withCloudWatchClientConfig(clientConfig)
          .withDynamoDBClientConfig(clientConfig)
          .withInitialPositionInStream(startPos)
          .withCallProcessRecordsEvenForEmptyRecordList(true); // For health monitoring metrics.
  // First, get system scoped configs for KCL and override with configs set at stream scope.
  setKinesisClientLibConfigs(
      subset(String.format(CONFIG_SYSTEM_KINESIS_CLIENT_LIB_CONFIG, system)), kinesisClientLibConfiguration);
  setKinesisClientLibConfigs(subset(String.format(CONFIG_STREAM_KINESIS_CLIENT_LIB_CONFIG, system, stream)),
      kinesisClientLibConfiguration);
  return kinesisClientLibConfiguration;
}
 
Example #9
Source File: TestKinesisConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testKclConfigs() {
  Map<String, String> kv = new HashMap<>();
  String system = "kinesis";
  String stream = "kinesis-stream";
  String systemConfigPrefix = String.format("systems.%s.", system);

  // region config is required for setting kcl config.
  kv.put(systemConfigPrefix + "aws.region", "us-east-1");

  // Kcl Configs
  kv.put(systemConfigPrefix + "aws.kcl.TableName", "sample-table");
  kv.put(systemConfigPrefix + "aws.kcl.MaxRecords", "100");
  kv.put(systemConfigPrefix + "aws.kcl.CallProcessRecordsEvenForEmptyRecordList", "true");
  kv.put(systemConfigPrefix + "aws.kcl.InitialPositionInStream", "TRIM_HORIZON");
  // override one of the Kcl configs for kinesis-stream1
  kv.put(systemConfigPrefix + "streams.kinesis-stream1.aws.kcl.InitialPositionInStream", "LATEST");

  Config config = new MapConfig(kv);
  KinesisConfig kConfig = new KinesisConfig(config);
  KinesisClientLibConfiguration kclConfig = kConfig.getKinesisClientLibConfig(system, stream, "sample-app");

  assertEquals("sample-table", kclConfig.getTableName());
  assertEquals(100, kclConfig.getMaxRecords());
  assertTrue(kclConfig.shouldCallProcessRecordsEvenForEmptyRecordList());
  assertEquals(InitialPositionInStream.TRIM_HORIZON, kclConfig.getInitialPositionInStream());

  // verify if the overriden config is applied for kinesis-stream1
  kclConfig = kConfig.getKinesisClientLibConfig(system, "kinesis-stream1", "sample-app");
  assertEquals(InitialPositionInStream.LATEST, kclConfig.getInitialPositionInStream());
}
 
Example #10
Source File: DynamoDBSourceConfigTests.java    From pulsar with Apache License 2.0 5 votes vote down vote up
@Test
public final void loadFromMapTest() throws IOException {
    Map<String, Object> map = new HashMap<String, Object> ();
    map.put("awsEndpoint", "https://some.endpoint.aws");
    map.put("awsRegion", "us-east-1");
    map.put("awsDynamodbStreamArn", "arn:aws:dynamodb:us-west-2:111122223333:table/TestTable/stream/2015-05-11T21:21:33.291");
    map.put("awsCredentialPluginParam", "{\"accessKey\":\"myKey\",\"secretKey\":\"my-Secret\"}");
    map.put("checkpointInterval", "30000");
    map.put("backoffTime", "4000");
    map.put("numRetries", "3");
    map.put("receiveQueueSize", 2000);
    map.put("applicationName", "My test application");
    map.put("initialPositionInStream", InitialPositionInStream.TRIM_HORIZON);
    map.put("startAtTime", DAY);

    DynamoDBSourceConfig config = DynamoDBSourceConfig.load(map);
    
    assertNotNull(config);
    assertEquals(config.getAwsEndpoint(), "https://some.endpoint.aws");
    assertEquals(config.getAwsRegion(), "us-east-1");
    assertEquals(config.getAwsDynamodbStreamArn(), "arn:aws:dynamodb:us-west-2:111122223333:table/TestTable/stream/2015-05-11T21:21:33.291");
    assertEquals(config.getAwsCredentialPluginParam(),
            "{\"accessKey\":\"myKey\",\"secretKey\":\"my-Secret\"}");
    assertEquals(config.getApplicationName(), "My test application");
    assertEquals(config.getCheckpointInterval(), 30000);
    assertEquals(config.getBackoffTime(), 4000);
    assertEquals(config.getNumRetries(), 3);
    assertEquals(config.getReceiveQueueSize(), 2000);
    assertEquals(config.getInitialPositionInStream(), InitialPositionInStream.TRIM_HORIZON);
    
    Calendar cal = Calendar.getInstance();
    cal.setTime(config.getStartAtTime());
    ZonedDateTime actual = ZonedDateTime.ofInstant(cal.toInstant(), ZoneOffset.UTC);
    ZonedDateTime expected = ZonedDateTime.ofInstant(DAY.toInstant(), ZoneOffset.UTC);
    assertEquals(actual, expected);
}
 
Example #11
Source File: DynamoDBSourceConfigTests.java    From pulsar with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = IllegalArgumentException.class, 
        expectedExceptionsMessageRegExp = "Timestamp must be specified")
public final void missingStartTimeTest() throws Exception {
    Map<String, Object> map = new HashMap<String, Object> ();
    map.put("awsEndpoint", "https://some.endpoint.aws");
    map.put("awsRegion", "us-east-1");
    map.put("awsDynamodbStreamArn", "arn:aws:dynamodb:us-west-2:111122223333:table/TestTable/stream/2015-05-11T21:21:33.291");
    map.put("awsCredentialPluginParam", 
            "{\"accessKey\":\"myKey\",\"secretKey\":\"my-Secret\"}");
    map.put("initialPositionInStream", InitialPositionInStream.AT_TIMESTAMP);

    DynamoDBSource source = new DynamoDBSource();
    source.open(map, null);
}
 
Example #12
Source File: KinesisConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
private void setKinesisClientLibConfigs(Map<String, String> config, KinesisClientLibConfiguration kinesisLibConfig) {
  for (Entry<String, String> entry : config.entrySet()) {
    boolean found = false;
    String key = entry.getKey();
    String value = entry.getValue();
    if (StringUtils.isEmpty(value)) {
      continue;
    }
    for (Method method : KinesisClientLibConfiguration.class.getMethods()) {
      if (method.getName().equals("with" + key)) {
        found = true;
        Class<?> type = method.getParameterTypes()[0];
        try {
          if (type == long.class) {
            method.invoke(kinesisLibConfig, Long.valueOf(value));
          } else if (type == int.class) {
            method.invoke(kinesisLibConfig, Integer.valueOf(value));
          } else if (type == boolean.class) {
            method.invoke(kinesisLibConfig, Boolean.valueOf(value));
          } else if (type == String.class) {
            method.invoke(kinesisLibConfig, value);
          } else if (type == InitialPositionInStream.class) {
            method.invoke(kinesisLibConfig, InitialPositionInStream.valueOf(value.toUpperCase()));
          }
          LOG.info("Loaded property " + key + " = " + value);
          break;
        } catch (Exception e) {
          throw new IllegalArgumentException(
              String.format("Error trying to set field %s with the value '%s'", key, value), e);
        }
      }
    }
    if (!found) {
      LOG.warn("Property " + key + " ignored as there is no corresponding set method");
    }
  }
}
 
Example #13
Source File: KinesisConnectorConfiguration.java    From amazon-kinesis-connectors with Apache License 2.0 5 votes vote down vote up
private InitialPositionInStream getInitialPositionInStreamProperty(String property,
        InitialPositionInStream defaultInitialPositionInInputStream,
        Properties properties) {
    String propertyValue = properties.getProperty(property, defaultInitialPositionInInputStream.toString());
    try {
        return InitialPositionInStream.valueOf(propertyValue);
    } catch (Exception e) {
        LOG.error(e);
        return defaultInitialPositionInInputStream;
    }
}
 
Example #14
Source File: KinesisEventConsumer.java    From koupler with MIT License 5 votes vote down vote up
public KinesisEventConsumer(String propertiesFile, String streamName, String appName, String initialPosition) {
    KinesisProducerConfiguration config = KinesisProducerConfiguration.fromPropertiesFile(propertiesFile);

    InitialPositionInStream position = InitialPositionInStream.valueOf(initialPosition);
    
    KinesisClientLibConfiguration clientConfig = new KinesisClientLibConfiguration(appName, streamName,
            new DefaultAWSCredentialsProviderChain(), appName)
                    .withRegionName(config.getRegion())
                    .withInitialPositionInStream(position);
    
    this.builder = new Worker.Builder().recordProcessorFactory(this).config(clientConfig);
}
 
Example #15
Source File: KinesisSource.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private Worker createKinesisWorker(IRecordProcessorFactory recordProcessorFactory, int maxBatchSize) {
  KinesisClientLibConfiguration kclConfig =
      new KinesisClientLibConfiguration(
          conf.applicationName,
          conf.streamName,
          credentials,
          getWorkerId()
      );

  kclConfig
      .withMaxRecords(maxBatchSize)
      .withCallProcessRecordsEvenForEmptyRecordList(false)
      .withIdleTimeBetweenReadsInMillis(conf.idleTimeBetweenReads)
      .withKinesisClientConfig(clientConfiguration);

  if (conf.initialPositionInStream == InitialPositionInStream.AT_TIMESTAMP) {
    kclConfig.withTimestampAtInitialPositionInStream(new Date(conf.initialTimestamp));
  } else if (conf.initialPositionInStream == InitialPositionInStream.LATEST || conf.initialPositionInStream == InitialPositionInStream.TRIM_HORIZON) {
    kclConfig.withInitialPositionInStream(conf.initialPositionInStream);
  }

  if (conf.region == AwsRegion.OTHER) {
    kclConfig.withKinesisEndpoint(conf.endpoint);
  } else {
    kclConfig.withRegionName(conf.region.getId());
  }

  return new Worker.Builder()
      .recordProcessorFactory(recordProcessorFactory)
      .metricsFactory(metricsFactory)
      .dynamoDBClient(dynamoDBClient)
      .cloudWatchClient(cloudWatchClient)
      .execService(executor)
      .config(kclConfig)
      .build();
}
 
Example #16
Source File: KinesisSource.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private void previewProcess(
    int maxBatchSize,
    BatchMaker batchMaker
) throws IOException, StageException {
  ClientConfiguration awsClientConfig = AWSUtil.getClientConfiguration(conf.proxyConfig);

  String shardId = KinesisUtil.getLastShardId(awsClientConfig, conf, conf.streamName);

  GetShardIteratorRequest getShardIteratorRequest = new GetShardIteratorRequest();
  getShardIteratorRequest.setStreamName(conf.streamName);
  getShardIteratorRequest.setShardId(shardId);
  getShardIteratorRequest.setShardIteratorType(conf.initialPositionInStream.name());

  if (conf.initialPositionInStream == InitialPositionInStream.AT_TIMESTAMP) {
    getShardIteratorRequest.setTimestamp(new Date(conf.initialTimestamp));
  }

  if (!getContext().isPreview() && conf.maxBatchSize > maxBatchSize) {
    getContext().reportError(Errors.KINESIS_18, maxBatchSize);
  }

  List<com.amazonaws.services.kinesis.model.Record> results = KinesisUtil.getPreviewRecords(
      awsClientConfig,
      conf,
      Math.min(conf.maxBatchSize, maxBatchSize),
      getShardIteratorRequest
  );

  int batchSize = results.size() > maxBatchSize ? maxBatchSize : results.size();

  for (int index = 0; index < batchSize; index++) {
    com.amazonaws.services.kinesis.model.Record record = results.get(index);
    UserRecord userRecord = new UserRecord(record);
    KinesisUtil.processKinesisRecord(
        getShardIteratorRequest.getShardId(),
        userRecord,
        parserFactory
    ).forEach(batchMaker::addRecord);
  }
}
 
Example #17
Source File: KinesisInputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
private InitialPositionInStream convertToPosition(KinesisInputProperties.OffsetType offsetType) {
    switch (offsetType) {
    case LATEST:
        return InitialPositionInStream.LATEST;
    case EARLIEST:
        return InitialPositionInStream.TRIM_HORIZON;
    default:
        TalendRuntimeException.build(CommonErrorCodes.UNEXPECTED_ARGUMENT).setAndThrow(
                String.format("Do not support OffsetType %s", offsetType));
        return null;
    }
}
 
Example #18
Source File: StartingPointShardsFinderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldFindEarliestShardsWhenTrimHorizonStartingPointRequested() throws Exception {
  // given
  StartingPoint trimHorizonStartingPoint =
      new StartingPoint(InitialPositionInStream.TRIM_HORIZON);
  when(kinesis.listShards(STREAM_NAME)).thenReturn(allShards);

  // when
  Iterable<Shard> shardsAtStartingPoint =
      underTest.findShardsAtStartingPoint(kinesis, STREAM_NAME, trimHorizonStartingPoint);

  // then
  assertThat(shardsAtStartingPoint).containsExactlyInAnyOrder(shard00, shard01);
}
 
Example #19
Source File: StartingPointShardsFinderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldFindLastShardsWhenLatestStartingPointRequested() throws Exception {
  // given
  StartingPoint latestStartingPoint = new StartingPoint(InitialPositionInStream.LATEST);
  when(kinesis.listShards(STREAM_NAME)).thenReturn(allShards);

  // when
  Iterable<Shard> shardsAtStartingPoint =
      underTest.findShardsAtStartingPoint(kinesis, STREAM_NAME, latestStartingPoint);

  // then
  assertThat(shardsAtStartingPoint).containsExactlyInAnyOrder(shard09, shard10);
}
 
Example #20
Source File: KinesisMockWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteAndReadFromMockKinesis() {
  KinesisServiceMock kinesisService = KinesisServiceMock.getInstance();

  Iterable<byte[]> data =
      ImmutableList.of(
          "1".getBytes(StandardCharsets.UTF_8), "2".getBytes(StandardCharsets.UTF_8));
  p.apply(Create.of(data))
      .apply(
          KinesisIO.write()
              .withStreamName(STREAM)
              .withPartitionKey(PARTITION_KEY)
              .withAWSClientsProvider(new FakeKinesisProvider()));
  p.run().waitUntilFinish();
  assertEquals(2, kinesisService.getAddedRecords().get());

  List<List<AmazonKinesisMock.TestData>> testData = kinesisService.getShardedData();

  int noOfShards = 1;
  int noOfEventsPerShard = 2;
  PCollection<AmazonKinesisMock.TestData> result =
      p2.apply(
              KinesisIO.read()
                  .withStreamName(STREAM)
                  .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON)
                  .withAWSClientsProvider(new AmazonKinesisMock.Provider(testData, 10))
                  .withMaxNumRecords(noOfShards * noOfEventsPerShard))
          .apply(ParDo.of(new KinesisMockReadTest.KinesisRecordToTestData()));
  PAssert.that(result).containsInAnyOrder(Iterables.concat(testData));
  p2.run().waitUntilFinish();
}
 
Example #21
Source File: KinesisMockReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
public void verifyReadWithProvider(
    AmazonKinesisMock.Provider provider, List<List<AmazonKinesisMock.TestData>> testData) {
  PCollection<AmazonKinesisMock.TestData> result =
      p.apply(
              KinesisIO.read()
                  .withStreamName("stream")
                  .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON)
                  .withAWSClientsProvider(provider)
                  .withArrivalTimeWatermarkPolicy()
                  .withMaxNumRecords(noOfShards * noOfEventsPerShard))
          .apply(ParDo.of(new KinesisRecordToTestData()));
  PAssert.that(result).containsInAnyOrder(Iterables.concat(testData));
  p.run();
}
 
Example #22
Source File: KinesisIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Read test dataset from Kinesis stream. */
private void runRead() {
  PCollection<KinesisRecord> output =
      pipelineRead.apply(
          KinesisIO.read()
              .withStreamName(options.getAwsKinesisStream())
              .withAWSClientsProvider(
                  options.getAwsAccessKey(),
                  options.getAwsSecretKey(),
                  Regions.fromName(options.getAwsKinesisRegion()))
              .withMaxNumRecords(numberOfRows)
              // to prevent endless running in case of error
              .withMaxReadTime(Duration.standardMinutes(10))
              .withInitialPositionInStream(InitialPositionInStream.AT_TIMESTAMP)
              .withInitialTimestampInStream(now)
              .withRequestRecordsLimit(1000));

  PAssert.thatSingleton(output.apply("Count All", Count.globally()))
      .isEqualTo((long) numberOfRows);

  PCollection<String> consolidatedHashcode =
      output
          .apply(ParDo.of(new ExtractDataValues()))
          .apply("Hash row contents", Combine.globally(new HashingFn()).withoutDefaults());

  PAssert.that(consolidatedHashcode)
      .containsInAnyOrder(TestRow.getExpectedHashForRowCount(numberOfRows));

  pipelineRead.run().waitUntilFinish();
}
 
Example #23
Source File: DynamoDBSourceConfig.java    From pulsar with Apache License 2.0 5 votes vote down vote up
public InitialPositionInStreamExtended getStreamStartPosition() {
    if (initialPositionInStream == InitialPositionInStream.AT_TIMESTAMP) {
        return InitialPositionInStreamExtended.newInitialPositionAtTimestamp(getStartAtTime());
    }
    else {
        return InitialPositionInStreamExtended.newInitialPosition(this.getInitialPositionInStream());
    }
}
 
Example #24
Source File: DynamoDBTableReplicator.java    From podyn with Apache License 2.0 5 votes vote down vote up
public void startReplicatingChanges() throws StreamNotEnabledException {
	if (tableSchema == null) {
		throw new TableExistsException("table %s does not exist in destination", dynamoTableName);
	}

	String tableStreamArn = getStreamArn();

	if (tableStreamArn == null) {
		throw new StreamNotEnabledException("table %s does not have a stream enabled\n", dynamoTableName);
	}

	AmazonDynamoDBStreamsAdapterClient adapterClient = new AmazonDynamoDBStreamsAdapterClient(streamsClient);
	AmazonCloudWatch cloudWatchClient = AmazonCloudWatchClientBuilder.standard().build();

	String workerId = generateWorkerId();

	final KinesisClientLibConfiguration workerConfig = new KinesisClientLibConfiguration(
			APPLICATION_NAME, tableStreamArn, awsCredentialsProvider, workerId).
			withMaxRecords(1000).
			withIdleTimeBetweenReadsInMillis(500).
			withCallProcessRecordsEvenForEmptyRecordList(false).
			withCleanupLeasesUponShardCompletion(false).
			withFailoverTimeMillis(20000).
			withTableName(LEASE_TABLE_PREFIX + dynamoTableName).
			withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON);

	Worker worker = new Worker.Builder().
			recordProcessorFactory(recordProcessorFactory).
			config(workerConfig).
			kinesisClient(adapterClient).
			cloudWatchClient(cloudWatchClient).
			dynamoDBClient(dynamoDBClient).
			execService(executor).
			build();

	executor.execute(worker);
}
 
Example #25
Source File: StreamsAdapterDemo.java    From aws-doc-sdk-examples with Apache License 2.0 4 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) throws Exception {
    System.out.println("Starting demo...");

    dynamoDBClient = AmazonDynamoDBClientBuilder.standard()
                                                .withRegion(awsRegion)
                                                .build();
    cloudWatchClient = AmazonCloudWatchClientBuilder.standard()
                                                    .withRegion(awsRegion)
                                                    .build();
    dynamoDBStreamsClient = AmazonDynamoDBStreamsClientBuilder.standard()
                                                              .withRegion(awsRegion)
                                                              .build();
    adapterClient = new AmazonDynamoDBStreamsAdapterClient(dynamoDBStreamsClient);
    String srcTable = tablePrefix + "-src";
    String destTable = tablePrefix + "-dest";
    recordProcessorFactory = new StreamsRecordProcessorFactory(dynamoDBClient, destTable);

    setUpTables();

    workerConfig = new KinesisClientLibConfiguration("streams-adapter-demo",
                                                     streamArn,
                                                     awsCredentialsProvider,
                                                     "streams-demo-worker")
            .withMaxRecords(1000)
            .withIdleTimeBetweenReadsInMillis(500)
            .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON);

    System.out.println("Creating worker for stream: " + streamArn);
    worker = StreamsWorkerFactory.createDynamoDbStreamsWorker(recordProcessorFactory, workerConfig, adapterClient, dynamoDBClient, cloudWatchClient);
    System.out.println("Starting worker...");
    Thread t = new Thread(worker);
    t.start();

    Thread.sleep(25000);
    worker.shutdown();
    t.join();

    if (StreamsAdapterDemoHelper.scanTable(dynamoDBClient, srcTable).getItems()
                                .equals(StreamsAdapterDemoHelper.scanTable(dynamoDBClient, destTable).getItems())) {
        System.out.println("Scan result is equal.");
    }
    else {
        System.out.println("Tables are different!");
    }

    System.out.println("Done.");
    cleanupAndExit(0);
}
 
Example #26
Source File: DynamoStreamsManager.java    From dynamo-cassandra-proxy with Apache License 2.0 4 votes vote down vote up
public void configure(DCProxyConfiguration config) {

        //TODO make table name dynamic
        String tableName = "test";

        this.dynamodbEndpoint = config.getAwsDynamodbEndpoint();
        this.streamsEndpoint = config.getStreamsEndpoint();
        this.signinRegion = config.getDynamoRegion();
        this.accessKey = config.getDynamoAccessKey();
        this.secretKey = config.getDynamoSecretKey();

        Properties props = System.getProperties();
        props.setProperty("aws.accessKeyId", accessKey);
        props.setProperty("aws.secretKey", secretKey);

        AwsClientBuilder.EndpointConfiguration endpointConfiguration =
                new AwsClientBuilder.EndpointConfiguration(streamsEndpoint, signinRegion);
        SystemPropertiesCredentialsProvider spcp = new SystemPropertiesCredentialsProvider();

        realDDB = AmazonDynamoDBClientBuilder.standard().
                withRegion(Regions.US_EAST_2).
                //withEndpointConfiguration(endpointConfiguration).
                withCredentials(spcp).build();

        DescribeTableResult tableResult = realDDB.describeTable(tableName);
        streamArn = tableResult.getTable().getLatestStreamArn();
        //streamSpec = tableResult.getTable().getStreamSpecification();
        streamsClient = AmazonDynamoDBStreamsClientBuilder.standard().withEndpointConfiguration(endpointConfiguration).build();

        adapterClient = new AmazonDynamoDBStreamsAdapterClient(streamsClient);

        recordProcessorFactory = new StreamsRecordProcessorFactory(ddbProxy, tableName);

        workerConfig = new KinesisClientLibConfiguration("test-app",
                streamArn,
                spcp,
                "streams-worker")
                .withMaxRecords(1000)
                .withIdleTimeBetweenReadsInMillis(500)
                .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON);
        AmazonCloudWatch cloudWatchClient;
        cloudWatchClient = AmazonCloudWatchClientBuilder.standard()
        .withRegion(signinRegion)
        .build();

        System.out.println("Creating worker for stream: " + streamArn);

        /*
        DescribeStreamRequest request = new DescribeStreamRequest();
        DescribeStreamRequestAdapter describeStreamResult = new DescribeStreamRequestAdapter(request);
        String id = describeStreamResult.getExclusiveStartShardId();
        String id2 = describeStreamResult.withStreamArn(streamArn).getExclusiveStartShardId();
        */

        Worker worker = StreamsWorkerFactory.createDynamoDbStreamsWorker(
                recordProcessorFactory,
                workerConfig,
                adapterClient,
                realDDB,
                cloudWatchClient
        );

        System.out.println("Starting worker...");
        Thread t = new Thread(worker);
        t.start();
    }
 
Example #27
Source File: StreamsAdapterDemo.java    From aws-dynamodb-examples with Apache License 2.0 4 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) throws Exception {
    System.out.println("Starting demo...");

    String srcTable = tablePrefix + "-src";
    String destTable = tablePrefix + "-dest";
    streamsCredentials = new ProfileCredentialsProvider();
    dynamoDBCredentials = new ProfileCredentialsProvider();
    recordProcessorFactory = new StreamsRecordProcessorFactory(dynamoDBCredentials, dynamodbEndpoint, serviceName, destTable);


    /* ===== REQUIRED =====
     * Users will have to explicitly instantiate and configure the adapter, then pass it to
     * the KCL worker.
     */
    adapterClient = new AmazonDynamoDBStreamsAdapterClient(streamsCredentials, new ClientConfiguration());
    adapterClient.setEndpoint(streamsEndpoint);

    dynamoDBClient = new AmazonDynamoDBClient(dynamoDBCredentials, new ClientConfiguration());
    dynamoDBClient.setEndpoint(dynamodbEndpoint);

    cloudWatchClient = new AmazonCloudWatchClient(dynamoDBCredentials, new ClientConfiguration());

    setUpTables();

    workerConfig = new KinesisClientLibConfiguration("streams-adapter-demo",
            streamArn, streamsCredentials, "streams-demo-worker")
        .withMaxRecords(1)
        .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON);

    System.out.println("Creating worker for stream: " + streamArn);
    worker = new Worker(recordProcessorFactory, workerConfig, adapterClient, dynamoDBClient, cloudWatchClient);
    System.out.println("Starting worker...");
    Thread t = new Thread(worker);
    t.start();

    Thread.sleep(25000);
    worker.shutdown();
    t.join();

    if(StreamsAdapterDemoHelper.scanTable(dynamoDBClient, srcTable).getItems().equals(StreamsAdapterDemoHelper.scanTable(dynamoDBClient, destTable).getItems())) {
        System.out.println("Scan result is equal.");
    } else {
        System.out.println("Tables are different!");
    }

    System.out.println("Done.");
    cleanupAndExit(0);
}
 
Example #28
Source File: AmazonDynamoDBStreamstoIgnite.java    From aws-big-data-blog with Apache License 2.0 4 votes vote down vote up
public void run() throws Exception {
	adapterClient = new AmazonDynamoDBStreamsAdapterClient(new ClientConfiguration());
	adapterClient.setEndpoint(streamsEndpoint);
	dynamoDBClient = new AmazonDynamoDBClient(new ClientConfiguration());
	dynamoDBClient.setEndpoint(dynamodbEndpoint);

	cloudWatchClient = new AmazonCloudWatchClient(dynamoDBCredentials, new ClientConfiguration());

	TcpDiscoverySpi spi = new TcpDiscoverySpi();
	TcpDiscoveryVmIpFinder ipFinder = new TcpDiscoveryVmIpFinder();
	List<String> hostList = Arrays.asList(Properties.getString("hostList").split(","));
	ipFinder.setAddresses(hostList);
	spi.setIpFinder(ipFinder);
	IgniteConfiguration cfg = new IgniteConfiguration();
	cfg.setDiscoverySpi(spi);
	cfg.setClientMode(true);
	cfg.setPeerClassLoadingEnabled(true);

	@SuppressWarnings("unused")
	Ignite ignite = Ignition.start(cfg);
	cache = Ignition.ignite().cache(Properties.getString("cacheName"));
	LOG.info(">>> cache acquired");

	recordProcessorFactory = new StreamsRecordProcessorFactory(cache);
	workerConfig = new KinesisClientLibConfiguration(Properties.getString("applicationName"), streamArn,
			streamsCredentials, "ddbstreamsworker")
					.withMaxRecords(Integer.parseInt(Properties.getString("maxRecords")))
					.withInitialPositionInStream(
							InitialPositionInStream.valueOf(Properties.getString("initialPositionInStream")));

	LOG.info("Creating worker for stream: " + streamArn);
	worker = new Worker(recordProcessorFactory, workerConfig, adapterClient, dynamoDBClient, cloudWatchClient);
	LOG.info("Starting worker...");

	int exitCode = 0;
	try {
		worker.run();
	} catch (Throwable t) {
		LOG.error("Caught throwable while processing data.");
		t.printStackTrace();
		exitCode = 1;
	}
	System.exit(exitCode);
}
 
Example #29
Source File: KinesisApplication.java    From aws-big-data-blog with Apache License 2.0 4 votes vote down vote up
/**
 * @param propertiesFile
 * @throws IOException Thrown when we run into issues reading properties
 */
private static void loadProperties(String propertiesFile) throws IOException {
    FileInputStream inputStream = new FileInputStream(propertiesFile);
    Properties properties = new Properties();
    try {
        properties.load(inputStream);
    } finally {
        inputStream.close();
    }

    String appNameOverride = properties.getProperty(ConfigKeys.APPLICATION_NAME_KEY);
    if (appNameOverride != null) {
        applicationName = appNameOverride;
    }
    LOG.info("Using application name " + applicationName);

    String streamNameOverride = properties.getProperty(ConfigKeys.STREAM_NAME_KEY);
    if (streamNameOverride != null) {
        streamName = streamNameOverride;
    }
    LOG.info("Using stream name " + streamName);

    String kinesisEndpointOverride = properties.getProperty(ConfigKeys.KINESIS_ENDPOINT_KEY);
    if (kinesisEndpointOverride != null) {
        kinesisEndpoint = kinesisEndpointOverride;
    }
    LOG.info("Using Kinesis endpoint " + kinesisEndpoint);
    
    String initialPositionOverride = properties.getProperty(ConfigKeys.INITIAL_POSITION_IN_STREAM_KEY);
    if (initialPositionOverride != null) {
         initialPositionInStream = InitialPositionInStream.valueOf(initialPositionOverride);
    }
    LOG.info("Using initial position " + initialPositionInStream.toString() + " (if a checkpoint is not found).");
    
    String redisEndpointOverride = properties.getProperty(ConfigKeys.REDIS_ENDPOINT);
    if (redisEndpointOverride != null) {
        redisEndpoint = redisEndpointOverride;
    }
    LOG.info("Using Redis endpoint " + redisEndpoint);
    
    String redisPortOverride = properties.getProperty(ConfigKeys.REDIS_PORT);
    if (redisPortOverride != null) {
    	try {
    		redisPort = Integer.parseInt(redisPortOverride);
    	} catch(Exception e) {
    		
    	}
    }
    LOG.info("Using Redis port " + redisPort);
     
}
 
Example #30
Source File: ManagedConsumer.java    From aws-big-data-blog with Apache License 2.0 4 votes vote down vote up
public void configure() throws Exception {
    if (!isConfigured) {
        validateConfig();

        try {
            String userAgent = "AWSKinesisManagedConsumer/" + this.version;

            if (this.positionInStream != null) {
                streamPosition = InitialPositionInStream.valueOf(this.positionInStream);
            } else {
                streamPosition = InitialPositionInStream.LATEST;
            }

            // append the environment name to the application name
            if (environmentName != null) {
                appName = String.format("%s-%s", appName, environmentName);
            }

            // ensure the JVM will refresh the cached IP values of AWS
            // resources
            // (e.g. service endpoints).
            java.security.Security.setProperty("networkaddress.cache.ttl", "60");

            String workerId = NetworkInterface.getNetworkInterfaces() + ":" + UUID.randomUUID();
            LOG.info("Using Worker ID: " + workerId);

            // obtain credentials using the default provider chain or the
            // credentials provider supplied
            AWSCredentialsProvider credentialsProvider = this.credentialsProvider == null ? new DefaultAWSCredentialsProviderChain()
                    : this.credentialsProvider;

            LOG.info("Using credentials with Access Key ID: "
                    + credentialsProvider.getCredentials().getAWSAccessKeyId());

            config = new KinesisClientLibConfiguration(appName, streamName,
                    credentialsProvider, workerId).withInitialPositionInStream(streamPosition).withKinesisEndpoint(
                    kinesisEndpoint);

            config.getKinesisClientConfiguration().setUserAgent(userAgent);

            if (regionName != null) {
                Region region = Region.getRegion(Regions.fromName(regionName));
                config.withRegionName(region.getName());
            }

            if (this.maxRecords != -1)
                config.withMaxRecords(maxRecords);

            if (this.positionInStream != null)
                config.withInitialPositionInStream(InitialPositionInStream.valueOf(this.positionInStream));

            LOG.info(String.format(
                    "Amazon Kinesis Managed Client prepared for %s on %s in %s (%s) using %s Max Records",
                    config.getApplicationName(), config.getStreamName(),
                    config.getRegionName(), config.getWorkerIdentifier(),
                    config.getMaxRecords()));

            isConfigured = true;
        } catch (Exception e) {
            throw new InvalidConfigurationException(e);
        }
    }
}