org.apache.samza.system.SystemFactory Java Examples

The following examples show how to use org.apache.samza.system.SystemFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractSamzaBench.java    From samza with Apache License 2.0 6 votes vote down vote up
public void start() throws IOException, InterruptedException {
  startPartition = Integer.parseInt(cmd.getOptionValue(OPT_SHORT_START_PARTITION));
  endPartition = Integer.parseInt(cmd.getOptionValue(OPT_SHORT_END_PARTITION));
  totalEvents = Integer.parseInt(cmd.getOptionValue(OPT_SHORT_NUM_EVENTS));
  String propsFile = cmd.getOptionValue(OPT_SHORT_PROPERTIES_FILE);
  streamId = cmd.getOptionValue(OPT_SHORT_STREAM);
  Properties props = new Properties();
  props.load(new FileInputStream(propsFile));
  addMoreSystemConfigs(props);
  config = convertToSamzaConfig(props);
  systemName = config.get(String.format(CFG_STREAM_SYSTEM_NAME, streamId));
  String systemFactory = config.get(String.format(CFG_SYSTEM_FACTORY, systemName));
  physicalStreamName = config.get(String.format(CFG_PHYSICAL_STREAM_NAME, streamId));

  factory = ReflectionUtil.getObj(systemFactory, SystemFactory.class);
}
 
Example #2
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = TopicAlreadyMarkedForDeletionException.class)
public void testStartFailsOnTopicCreationErrors() {

  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);
  // create an admin that throws an exception during createStream
  SystemAdmin mockAdmin = newAdmin("0", "10");
  doThrow(new TopicAlreadyMarkedForDeletionException("invalid stream")).when(mockAdmin).createStream(checkpointSpec);

  SystemFactory factory = newFactory(mock(SystemProducer.class), mock(SystemConsumer.class), mockAdmin);
  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      true, mock(Config.class), mock(MetricsRegistry.class), null, new KafkaCheckpointLogKeySerde());

  // expect an exception during startup
  checkpointManager.createResources();
  checkpointManager.start();
}
 
Example #3
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = StreamValidationException.class)
public void testStartFailsOnTopicValidationErrors() {

  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);

  // create an admin that throws an exception during validateStream
  SystemAdmin mockAdmin = newAdmin("0", "10");
  doThrow(new StreamValidationException("invalid stream")).when(mockAdmin).validateStream(checkpointSpec);

  SystemFactory factory = newFactory(mock(SystemProducer.class), mock(SystemConsumer.class), mockAdmin);
  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      true, mock(Config.class), mock(MetricsRegistry.class), null, new KafkaCheckpointLogKeySerde());

  // expect an exception during startup
  checkpointManager.createResources();
  checkpointManager.start();
}
 
Example #4
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void testReadFailsOnSerdeExceptions() throws Exception {
  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);
  Config mockConfig = mock(Config.class);
  when(mockConfig.get(JobConfig.SSP_GROUPER_FACTORY)).thenReturn(GROUPER_FACTORY_CLASS);

  // mock out a consumer that returns a single checkpoint IME
  SystemStreamPartition ssp = new SystemStreamPartition("system-1", "input-topic", new Partition(0));
  List<List<IncomingMessageEnvelope>> checkpointEnvelopes = ImmutableList.of(
      ImmutableList.of(newCheckpointEnvelope(TASK1, ssp, "0")));
  SystemConsumer mockConsumer = newConsumer(checkpointEnvelopes);

  SystemAdmin mockAdmin = newAdmin("0", "1");
  SystemFactory factory = newFactory(mock(SystemProducer.class), mockConsumer, mockAdmin);

  // wire up an exception throwing serde with the checkpointmanager
  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      true, mockConfig, mock(MetricsRegistry.class), new ExceptionThrowingCheckpointSerde(), new KafkaCheckpointLogKeySerde());
  checkpointManager.register(TASK1);
  checkpointManager.start();

  // expect an exception from ExceptionThrowingSerde
  checkpointManager.readLastCheckpoint(TASK1);
}
 
Example #5
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadSucceedsOnKeySerdeExceptionsWhenValidationIsDisabled() throws Exception {
  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);
  Config mockConfig = mock(Config.class);
  when(mockConfig.get(JobConfig.SSP_GROUPER_FACTORY)).thenReturn(GROUPER_FACTORY_CLASS);

  // mock out a consumer that returns a single checkpoint IME
  SystemStreamPartition ssp = new SystemStreamPartition("system-1", "input-topic", new Partition(0));
  List<List<IncomingMessageEnvelope>> checkpointEnvelopes = ImmutableList.of(
      ImmutableList.of(newCheckpointEnvelope(TASK1, ssp, "0")));
  SystemConsumer mockConsumer = newConsumer(checkpointEnvelopes);

  SystemAdmin mockAdmin = newAdmin("0", "1");
  SystemFactory factory = newFactory(mock(SystemProducer.class), mockConsumer, mockAdmin);

  // wire up an exception throwing serde with the checkpointmanager
  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      false, mockConfig, mock(MetricsRegistry.class), new ExceptionThrowingCheckpointSerde(),
      new ExceptionThrowingCheckpointKeySerde());
  checkpointManager.register(TASK1);
  checkpointManager.start();

  // expect the read to succeed inspite of the exception from ExceptionThrowingSerde
  checkpointManager.readLastCheckpoint(TASK1);
}
 
Example #6
Source File: TranslationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/** The dummy stream created will only be used in Beam tests. */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
  final GenericSystemDescriptor dummySystem =
      new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
  final GenericInputDescriptor<OpMessage<String>> dummyInput =
      dummySystem.getInputDescriptor(id, new NoOpSerde<>());
  dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
  final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
  final SystemFactory factory = new InMemorySystemFactory();
  final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
  factory.getAdmin(id, config).createStream(dummyStreamSpec);

  final SystemProducer producer = factory.getProducer(id, config, null);
  final SystemStream sysStream = new SystemStream(id, id);
  final Consumer<Object> sendFn =
      (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
      };
  final WindowedValue<String> windowedValue =
      WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());

  sendFn.accept(OpMessage.ofElement(windowedValue));
  sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
  sendFn.accept(new EndOfStreamMessage(null));
  return dummyInput;
}
 
Example #7
Source File: TestDiagnosticsUtil.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildDiagnosticsManagerReturnsConfiguredReporter() {
  Config config = new MapConfig(buildTestConfigs());
  JobModel mockJobModel = mock(JobModel.class);
  SystemFactory systemFactory = mock(SystemFactory.class);
  SystemProducer mockProducer = mock(SystemProducer.class);
  MetricsReporterFactory metricsReporterFactory = mock(MetricsReporterFactory.class);
  MetricsSnapshotReporter mockReporter = mock(MetricsSnapshotReporter.class);

  when(systemFactory.getProducer(anyString(), any(Config.class), any(MetricsRegistry.class))).thenReturn(mockProducer);
  when(metricsReporterFactory.getMetricsReporter(anyString(), anyString(), any(Config.class))).thenReturn(
      mockReporter);
  PowerMockito.mockStatic(ReflectionUtil.class);
  when(ReflectionUtil.getObj(REPORTER_FACTORY, MetricsReporterFactory.class)).thenReturn(metricsReporterFactory);
  when(ReflectionUtil.getObj(SYSTEM_FACTORY, SystemFactory.class)).thenReturn(systemFactory);

  Optional<Pair<DiagnosticsManager, MetricsSnapshotReporter>> managerReporterPair =
      DiagnosticsUtil.buildDiagnosticsManager(JOB_NAME, JOB_ID, mockJobModel, CONTAINER_ID, Optional.of(ENV_ID),
          config);

  Assert.assertTrue(managerReporterPair.isPresent());
  Assert.assertEquals(mockReporter, managerReporterPair.get().getValue());
}
 
Example #8
Source File: ContainerStorageManager.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 *  Creates SystemConsumer objects for store restoration, creating one consumer per system.
 */
private static Map<String, SystemConsumer> createConsumers(Set<String> storeSystems,
    Map<String, SystemFactory> systemFactories, Config config, MetricsRegistry registry) {
  // Create one consumer for each system in use, map with one entry for each such system
  Map<String, SystemConsumer> consumers = new HashMap<>();

  // Iterate over the list of storeSystems and create one sysConsumer per system
  for (String storeSystemName : storeSystems) {
    SystemFactory systemFactory = systemFactories.get(storeSystemName);
    if (systemFactory == null) {
      throw new SamzaException("System " + storeSystemName + " does not exist in config");
    }
    consumers.put(storeSystemName, systemFactory.getConsumer(storeSystemName, config, registry));
  }

  return consumers;
}
 
Example #9
Source File: TestRunner.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an in memory stream with {@link InMemorySystemFactory} and feeds its partition with stream of messages
 * @param partitionData key of the map represents partitionId and value represents messages in the partition
 * @param descriptor describes a stream to initialize with the in memory system
 */
private <StreamMessageType> void initializeInMemoryInputStream(InMemoryInputDescriptor<?> descriptor,
    Map<Integer, Iterable<StreamMessageType>> partitionData) {
  String systemName = descriptor.getSystemName();
  String streamName = (String) descriptor.getPhysicalName().orElse(descriptor.getStreamId());
  if (this.app instanceof LegacyTaskApplication) {
    // task.inputs is generated using descriptors for Task/StreamApplication, but needs to be generated here
    // for legacy applications that only specify task.class.
    if (configs.containsKey(TaskConfig.INPUT_STREAMS)) {
      configs.put(TaskConfig.INPUT_STREAMS,
          configs.get(TaskConfig.INPUT_STREAMS).concat("," + systemName + "." + streamName));
    } else {
      configs.put(TaskConfig.INPUT_STREAMS, systemName + "." + streamName);
    }
  }
  InMemorySystemDescriptor imsd = (InMemorySystemDescriptor) descriptor.getSystemDescriptor();
  imsd.withInMemoryScope(this.inMemoryScope);
  addConfig(descriptor.toConfig());
  addConfig(descriptor.getSystemDescriptor().toConfig());
  addSerdeConfigs(descriptor);
  StreamSpec spec = new StreamSpec(descriptor.getStreamId(), streamName, systemName, partitionData.size());
  SystemFactory factory = new InMemorySystemFactory();
  Config config = new MapConfig(descriptor.toConfig(), descriptor.getSystemDescriptor().toConfig());
  factory.getAdmin(systemName, config).createStream(spec);
  InMemorySystemProducer producer = (InMemorySystemProducer) factory.getProducer(systemName, config, null);
  SystemStream sysStream = new SystemStream(systemName, streamName);
  partitionData.forEach((partitionId, partition) -> {
    partition.forEach(e -> {
      Object key = e instanceof KV ? ((KV) e).getKey() : null;
      Object value = e instanceof KV ? ((KV) e).getValue() : e;
      if (value instanceof IncomingMessageEnvelope) {
        producer.send((IncomingMessageEnvelope) value);
      } else {
        producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), key, value));
      }
    });
    producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), null,
        new EndOfStreamMessage(null)));
  });
}
 
Example #10
Source File: TestSystemConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetSystemFactories() {
  Map<String, String> map = ImmutableMap.of(MOCK_SYSTEM_FACTORY_NAME1, MockSystemFactory.class.getName());
  SystemConfig systemConfig = new SystemConfig(new MapConfig(map));
  Map<String, SystemFactory> actual = systemConfig.getSystemFactories();
  assertEquals(actual.size(), 1);
  assertTrue(actual.get(MOCK_SYSTEM_NAME1) instanceof MockSystemFactory);
}
 
Example #11
Source File: SystemConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Get {@link SystemFactory} instances for all the systems defined in this config.
 *
 * @return a map from system name to {@link SystemFactory}
 */
public Map<String, SystemFactory> getSystemFactories() {
  Map<String, SystemFactory> systemFactories = getSystemNames().stream().collect(Collectors.toMap(
    systemName -> systemName,
    systemName -> {
      String systemFactoryClassName = getSystemFactory(systemName).orElseThrow(() -> new SamzaException(
          String.format("A stream uses system %s, which is missing from the configuration.", systemName)));
      return ReflectionUtil.getObj(systemFactoryClassName, SystemFactory.class);
    }));

  return systemFactories;
}
 
Example #12
Source File: CoordinatorStreamStore.java    From samza with Apache License 2.0 5 votes vote down vote up
public CoordinatorStreamStore(Config config, MetricsRegistry metricsRegistry) {
  this.config = config;
  this.coordinatorSystemStream = CoordinatorStreamUtil.getCoordinatorSystemStream(config);
  this.coordinatorSystemStreamPartition = new SystemStreamPartition(coordinatorSystemStream, new Partition(0));
  SystemFactory systemFactory = CoordinatorStreamUtil.getCoordinatorSystemFactory(config);
  this.systemProducer = systemFactory.getProducer(this.coordinatorSystemStream.getSystem(), config, metricsRegistry);
  this.systemConsumer = systemFactory.getConsumer(this.coordinatorSystemStream.getSystem(), config, metricsRegistry);
  this.systemAdmin = systemFactory.getAdmin(this.coordinatorSystemStream.getSystem(), config);
}
 
Example #13
Source File: CoordinatorStreamSystemProducer.java    From samza with Apache License 2.0 5 votes vote down vote up
public CoordinatorStreamSystemProducer(Config config, MetricsRegistry registry) {
  SystemStream coordinatorSystemStream = CoordinatorStreamUtil.getCoordinatorSystemStream(config);
  SystemFactory systemFactory = CoordinatorStreamUtil.getCoordinatorSystemFactory(config);
  SystemAdmin systemAdmin = systemFactory.getAdmin(coordinatorSystemStream.getSystem(), config);
  SystemProducer systemProducer = systemFactory.getProducer(coordinatorSystemStream.getSystem(), config, registry);
  this.systemStream = coordinatorSystemStream;
  this.systemProducer = systemProducer;
  this.systemAdmin = systemAdmin;
  this.keySerde = new JsonSerde<>();
  this.messageSerde = new JsonSerde<>();
}
 
Example #14
Source File: CoordinatorStreamSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
public CoordinatorStreamSystemConsumer(Config config, MetricsRegistry registry) {
  SystemStream coordinatorSystemStream = CoordinatorStreamUtil.getCoordinatorSystemStream(config);
  SystemFactory systemFactory = CoordinatorStreamUtil.getCoordinatorSystemFactory(config);
  SystemAdmin systemAdmin = systemFactory.getAdmin(coordinatorSystemStream.getSystem(), config);
  SystemConsumer systemConsumer = systemFactory.getConsumer(coordinatorSystemStream.getSystem(), config, registry);

  this.coordinatorSystemStreamPartition = new SystemStreamPartition(coordinatorSystemStream, new Partition(0));
  this.systemConsumer = systemConsumer;
  this.systemAdmin = systemAdmin;
  this.configMap = new HashMap<>();
  this.isBootstrapped = false;
  this.keySerde = new JsonSerde<>();
  this.messageSerde = new JsonSerde<>();
}
 
Example #15
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 5 votes vote down vote up
private SystemFactory newFactory(SystemProducer producer, SystemConsumer consumer, SystemAdmin admin) {
  SystemFactory factory = mock(SystemFactory.class);
  when(factory.getProducer(anyString(), any(Config.class), any(MetricsRegistry.class))).thenReturn(producer);
  when(factory.getConsumer(anyString(), any(Config.class), any(MetricsRegistry.class))).thenReturn(consumer);
  when(factory.getAdmin(anyString(), any(Config.class))).thenReturn(admin);
  return factory;
}
 
Example #16
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testAllMessagesInTheLogAreRead() throws Exception {
  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);
  Config mockConfig = mock(Config.class);
  when(mockConfig.get(JobConfig.SSP_GROUPER_FACTORY)).thenReturn(GROUPER_FACTORY_CLASS);

  SystemStreamPartition ssp = new SystemStreamPartition("system-1", "input-topic", new Partition(0));

  int oldestOffset = 0;
  int newestOffset = 10;

  // mock out a consumer that returns ten checkpoint IMEs for the same ssp
  List<List<IncomingMessageEnvelope>> pollOutputs = new ArrayList<>();
  for (int offset = oldestOffset; offset <= newestOffset; offset++) {
    pollOutputs.add(ImmutableList.of(newCheckpointEnvelope(TASK1, ssp, Integer.toString(offset))));
  }

  // return one message at a time from each poll simulating a KafkaConsumer with max.poll.records = 1
  SystemConsumer mockConsumer = newConsumer(pollOutputs);
  SystemAdmin mockAdmin = newAdmin(Integer.toString(oldestOffset), Integer.toString(newestOffset));
  SystemFactory factory = newFactory(mock(SystemProducer.class), mockConsumer, mockAdmin);

  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      true, mockConfig, mock(MetricsRegistry.class), new CheckpointSerde(), new KafkaCheckpointLogKeySerde());
  checkpointManager.register(TASK1);
  checkpointManager.start();

  // check that all ten messages are read, and the checkpoint is the newest message
  Checkpoint checkpoint = checkpointManager.readLastCheckpoint(TASK1);
  Assert.assertEquals(checkpoint.getOffsets(), ImmutableMap.of(ssp, Integer.toString(newestOffset)));
}
 
Example #17
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointsAreReadFromOldestOffset() throws Exception {
  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);
  Config mockConfig = mock(Config.class);
  when(mockConfig.get(JobConfig.SSP_GROUPER_FACTORY)).thenReturn(GROUPER_FACTORY_CLASS);

  // mock out a consumer that returns a single checkpoint IME
  SystemStreamPartition ssp = new SystemStreamPartition("system-1", "input-topic", new Partition(0));
  SystemConsumer mockConsumer = newConsumer(ImmutableList.of(
      ImmutableList.of(newCheckpointEnvelope(TASK1, ssp, "0"))));

  String oldestOffset = "0";
  SystemAdmin mockAdmin = newAdmin(oldestOffset, "1");
  SystemFactory factory = newFactory(mock(SystemProducer.class), mockConsumer, mockAdmin);
  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      true, mockConfig, mock(MetricsRegistry.class), new CheckpointSerde(), new KafkaCheckpointLogKeySerde());
  checkpointManager.register(TASK1);

  // 1. verify that consumer.register is called only during checkpointManager.start.
  // 2. verify that consumer.register is called with the oldest offset.
  // 3. verify that no other operation on the CheckpointManager re-invokes register since start offsets are set during
  // register
  verify(mockConsumer, times(0)).register(CHECKPOINT_SSP, oldestOffset);
  checkpointManager.start();
  verify(mockConsumer, times(1)).register(CHECKPOINT_SSP, oldestOffset);

  checkpointManager.readLastCheckpoint(TASK1);
  verify(mockConsumer, times(1)).register(CHECKPOINT_SSP, oldestOffset);
}
 
Example #18
Source File: DiagnosticsUtil.java    From samza with Apache License 2.0 4 votes vote down vote up
/**
 * Create a pair of DiagnosticsManager and Reporter for the given jobName, jobId, containerId, and execEnvContainerId,
 * if diagnostics is enabled.
 * execEnvContainerId is the ID assigned to the container by the cluster manager (e.g., YARN).
 */
public static Optional<Pair<DiagnosticsManager, MetricsSnapshotReporter>> buildDiagnosticsManager(String jobName,
    String jobId, JobModel jobModel, String containerId, Optional<String> execEnvContainerId, Config config) {

  JobConfig jobConfig = new JobConfig(config);
  MetricsConfig metricsConfig = new MetricsConfig(config);
  Optional<Pair<DiagnosticsManager, MetricsSnapshotReporter>> diagnosticsManagerReporterPair = Optional.empty();

  if (jobConfig.getDiagnosticsEnabled()) {

    // Diagnostics MetricReporter init
    String diagnosticsReporterName = MetricsConfig.METRICS_SNAPSHOT_REPORTER_NAME_FOR_DIAGNOSTICS;
    String diagnosticsFactoryClassName = metricsConfig.getMetricsFactoryClass(diagnosticsReporterName)
        .orElseThrow(() -> new SamzaException(
            String.format("Diagnostics reporter %s missing .class config", diagnosticsReporterName)));
    MetricsReporterFactory metricsReporterFactory =
        ReflectionUtil.getObj(diagnosticsFactoryClassName, MetricsReporterFactory.class);
    MetricsSnapshotReporter diagnosticsReporter =
        (MetricsSnapshotReporter) metricsReporterFactory.getMetricsReporter(diagnosticsReporterName,
            "samza-container-" + containerId, config);

    // DiagnosticsManager init
    ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(config);
    int containerMemoryMb = clusterManagerConfig.getContainerMemoryMb();
    int containerNumCores = clusterManagerConfig.getNumCores();
    long maxHeapSizeBytes = Runtime.getRuntime().maxMemory();
    int containerThreadPoolSize = jobConfig.getThreadPoolSize();
    String taskClassVersion = Util.getTaskClassVersion(config);
    String samzaVersion = Util.getSamzaVersion();
    String hostName = Util.getLocalHost().getHostName();
    Optional<String> diagnosticsReporterStreamName =
        metricsConfig.getMetricsSnapshotReporterStream(diagnosticsReporterName);

    if (!diagnosticsReporterStreamName.isPresent()) {
      throw new ConfigException(
          "Missing required config: " + String.format(MetricsConfig.METRICS_SNAPSHOT_REPORTER_STREAM,
              diagnosticsReporterName));
    }
    SystemStream diagnosticsSystemStream = StreamUtil.getSystemStreamFromNames(diagnosticsReporterStreamName.get());

    // Create a SystemProducer for DiagnosticsManager. This producer is used by the DiagnosticsManager
    // to write to the same stream as the MetricsSnapshotReporter called `diagnosticsreporter`.
    Optional<String> diagnosticsSystemFactoryName =
        new SystemConfig(config).getSystemFactory(diagnosticsSystemStream.getSystem());
    if (!diagnosticsSystemFactoryName.isPresent()) {
      throw new SamzaException("Missing factory in config for system " + diagnosticsSystemStream.getSystem());
    }
    SystemFactory systemFactory = ReflectionUtil.getObj(diagnosticsSystemFactoryName.get(), SystemFactory.class);
    SystemProducer systemProducer =
        systemFactory.getProducer(diagnosticsSystemStream.getSystem(), config, new MetricsRegistryMap());

    DiagnosticsManager diagnosticsManager =
        new DiagnosticsManager(jobName, jobId, jobModel.getContainers(), containerMemoryMb, containerNumCores,
            new StorageConfig(config).getNumPersistentStores(), maxHeapSizeBytes, containerThreadPoolSize,
            containerId, execEnvContainerId.orElse(""), taskClassVersion, samzaVersion, hostName,
            diagnosticsSystemStream, systemProducer,
            Duration.ofMillis(new TaskConfig(config).getShutdownMs()), jobConfig.getAutosizingEnabled());

    diagnosticsManagerReporterPair = Optional.of(new ImmutablePair<>(diagnosticsManager, diagnosticsReporter));
  }

  return diagnosticsManagerReporterPair;
}
 
Example #19
Source File: StorageRecovery.java    From samza with Apache License 2.0 4 votes vote down vote up
/**
 * create one TaskStorageManager for each task. Add all of them to the
 * List<TaskStorageManager>
 */
@SuppressWarnings("rawtypes")
private void getContainerStorageManagers() {
  Clock clock = SystemClock.instance();
  StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 5000, clock);
  // don't worry about prefetching for this; looks like the tool doesn't flush to offset files anyways
  Map<String, SystemFactory> systemFactories = new SystemConfig(jobConfig).getSystemFactories();
  CheckpointManager checkpointManager = new TaskConfig(jobConfig)
      .getCheckpointManager(new MetricsRegistryMap()).orElse(null);

  for (ContainerModel containerModel : containers.values()) {
    ContainerContext containerContext = new ContainerContextImpl(containerModel, new MetricsRegistryMap());

    Set<SystemStreamPartition> changelogSSPs = changeLogSystemStreams.values().stream()
        .flatMap(ss -> containerModel.getTasks().values().stream()
            .map(tm -> new SystemStreamPartition(ss, tm.getChangelogPartition())))
        .collect(Collectors.toSet());
    SSPMetadataCache sspMetadataCache = new SSPMetadataCache(systemAdmins, Duration.ofMillis(5000), clock, changelogSSPs);

    ContainerStorageManager containerStorageManager =
        new ContainerStorageManager(
            checkpointManager,
            containerModel,
            streamMetadataCache,
            sspMetadataCache,
            systemAdmins,
            changeLogSystemStreams,
            new HashMap<>(),
            storageEngineFactories,
            systemFactories,
            this.getSerdes(),
            jobConfig,
            new HashMap<>(),
            new SamzaContainerMetrics(containerModel.getId(), new MetricsRegistryMap()),
            JobContextImpl.fromConfigWithDefaults(jobConfig),
            containerContext,
            new HashMap<>(),
            storeBaseDir,
            storeBaseDir,
            maxPartitionNumber,
            null,
            new SystemClock());
    this.containerStorageManagers.put(containerModel.getId(), containerStorageManager);
  }
}
 
Example #20
Source File: StreamAppender.java    From samza with Apache License 2.0 4 votes vote down vote up
protected void setupSystem() {
  config = getConfig();
  Log4jSystemConfig log4jSystemConfig = new Log4jSystemConfig(config);

  if (streamName == null) {
    streamName = getStreamName(log4jSystemConfig.getJobName(), log4jSystemConfig.getJobId());
  }

  // TODO we need the ACTUAL metrics registry, or the metrics won't get reported by the metric reporters!
  MetricsRegistry metricsRegistry = new MetricsRegistryMap();
  metrics = new StreamAppenderMetrics("stream-appender", metricsRegistry);

  String systemName = log4jSystemConfig.getSystemName();
  String systemFactoryName = log4jSystemConfig.getSystemFactory(systemName)
      .orElseThrow(() -> new SamzaException(
          "Could not figure out \"" + systemName + "\" system factory for log4j StreamAppender to use"));
  SystemFactory systemFactory = ReflectionUtil.getObj(systemFactoryName, SystemFactory.class);

  setSerde(log4jSystemConfig, systemName, streamName);

  if (config.getBoolean(CREATE_STREAM_ENABLED, false)) {
    // Explicitly create stream appender stream with the partition count the same as the number of containers.
    System.out.println("[StreamAppender] creating stream " + streamName + " with partition count " + getPartitionCount());
    StreamSpec streamSpec =
        StreamSpec.createStreamAppenderStreamSpec(streamName, systemName, getPartitionCount());

    // SystemAdmin only needed for stream creation here.
    SystemAdmin systemAdmin = systemFactory.getAdmin(systemName, config);
    systemAdmin.start();
    systemAdmin.createStream(streamSpec);
    systemAdmin.stop();
  }

  systemProducer = systemFactory.getProducer(systemName, config, metricsRegistry);
  systemStream = new SystemStream(systemName, streamName);
  systemProducer.register(SOURCE);
  systemProducer.start();

  log.info(SOURCE + " has been registered in " + systemName + ". So all the logs will be sent to " + streamName
      + " in " + systemName + ". Logs are partitioned by " + key);

  startTransferThread();
}
 
Example #21
Source File: StreamAppender.java    From samza with Apache License 2.0 4 votes vote down vote up
protected void setupSystem() {
  config = getConfig();
  Log4jSystemConfig log4jSystemConfig = new Log4jSystemConfig(config);

  if (streamName == null) {
    streamName = getStreamName(log4jSystemConfig.getJobName(), log4jSystemConfig.getJobId());
  }

  // TODO we need the ACTUAL metrics registry, or the metrics won't get reported by the metric reporters!
  MetricsRegistry metricsRegistry = new MetricsRegistryMap();
  metrics = new StreamAppenderMetrics("stream-appender", metricsRegistry);

  String systemName = log4jSystemConfig.getSystemName();
  String systemFactoryName = log4jSystemConfig.getSystemFactory(systemName)
      .orElseThrow(() -> new SamzaException(
          "Could not figure out \"" + systemName + "\" system factory for log4j StreamAppender to use"));
  SystemFactory systemFactory = ReflectionUtil.getObj(systemFactoryName, SystemFactory.class);

  setSerde(log4jSystemConfig, systemName, streamName);

  if (config.getBoolean(CREATE_STREAM_ENABLED, false)) {
    // Explicitly create stream appender stream with the partition count the same as the number of containers.
    System.out.println("[StreamAppender] creating stream " + streamName + " with partition count " + getPartitionCount());
    StreamSpec streamSpec = StreamSpec.createStreamAppenderStreamSpec(streamName, systemName, getPartitionCount());

    // SystemAdmin only needed for stream creation here.
    SystemAdmin systemAdmin = systemFactory.getAdmin(systemName, config);
    systemAdmin.start();
    systemAdmin.createStream(streamSpec);
    systemAdmin.stop();
  }

  systemProducer = systemFactory.getProducer(systemName, config, metricsRegistry);
  systemStream = new SystemStream(systemName, streamName);
  systemProducer.register(SOURCE);
  systemProducer.start();

  log.info(SOURCE + " has been registered in " + systemName + ". So all the logs will be sent to " + streamName
      + " in " + systemName + ". Logs are partitioned by " + key);

  startTransferThread();
}
 
Example #22
Source File: TestRunner.java    From samza with Apache License 2.0 4 votes vote down vote up
/**
 * Gets the contents of the output stream represented by {@code outputDescriptor} after {@link TestRunner#run(Duration)}
 * has completed
 *
 * @param outputDescriptor describes the stream to be consumed
 * @param timeout timeout for consumption of stream in Ms
 * @param <StreamMessageType> type of message
 *
 * @return a map whose key is {@code partitionId} and value is messages in partition
 * @throws SamzaException Thrown when a poll is incomplete
 */
public static <StreamMessageType> Map<Integer, List<StreamMessageType>> consumeStream(
    InMemoryOutputDescriptor outputDescriptor, Duration timeout) throws SamzaException {
  Preconditions.checkNotNull(outputDescriptor);
  String streamId = outputDescriptor.getStreamId();
  String systemName = outputDescriptor.getSystemName();
  Set<SystemStreamPartition> ssps = new HashSet<>();
  Set<String> streamIds = new HashSet<>();
  streamIds.add(streamId);
  SystemFactory factory = new InMemorySystemFactory();
  Config config = new MapConfig(outputDescriptor.toConfig(), outputDescriptor.getSystemDescriptor().toConfig());
  Map<String, SystemStreamMetadata> metadata = factory.getAdmin(systemName, config).getSystemStreamMetadata(streamIds);
  SystemConsumer consumer = factory.getConsumer(systemName, config, null);
  String name = (String) outputDescriptor.getPhysicalName().orElse(streamId);
  metadata.get(name).getSystemStreamPartitionMetadata().keySet().forEach(partition -> {
    SystemStreamPartition temp = new SystemStreamPartition(systemName, streamId, partition);
    ssps.add(temp);
    consumer.register(temp, "0");
  });

  long t = System.currentTimeMillis();
  Map<SystemStreamPartition, List<IncomingMessageEnvelope>> output = new HashMap<>();
  HashSet<SystemStreamPartition> didNotReachEndOfStream = new HashSet<>(ssps);
  while (System.currentTimeMillis() < t + timeout.toMillis()) {
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> currentState = null;
    try {
      currentState = consumer.poll(ssps, 10);
    } catch (InterruptedException e) {
      throw new SamzaException("Timed out while consuming stream \n" + e.getMessage());
    }
    for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> entry : currentState.entrySet()) {
      SystemStreamPartition ssp = entry.getKey();
      output.computeIfAbsent(ssp, k -> new LinkedList<IncomingMessageEnvelope>());
      List<IncomingMessageEnvelope> currentBuffer = entry.getValue();
      int totalMessagesToFetch = Integer.valueOf(metadata.get(outputDescriptor.getStreamId())
          .getSystemStreamPartitionMetadata()
          .get(ssp.getPartition())
          .getUpcomingOffset());
      if (output.get(ssp).size() + currentBuffer.size() == totalMessagesToFetch) {
        didNotReachEndOfStream.remove(entry.getKey());
        ssps.remove(entry.getKey());
      }
      output.get(ssp).addAll(currentBuffer);
    }
    if (didNotReachEndOfStream.isEmpty()) {
      break;
    }
  }

  if (!didNotReachEndOfStream.isEmpty()) {
    throw new IllegalStateException("Could not poll for all system stream partitions");
  }

  return output.entrySet()
      .stream()
      .collect(Collectors.toMap(entry -> entry.getKey().getPartition().getPartitionId(),
        entry -> entry.getValue().stream().map(e -> (StreamMessageType) e.getMessage()).collect(Collectors.toList())));
}