Java Code Examples for org.apache.samza.config.Config#get()

The following examples show how to use org.apache.samza.config.Config#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SamzaProcessingItem.java    From samoa with Apache License 2.0 6 votes vote down vote up
@Override
public void init(Config config, TaskContext context) throws Exception {
	String yarnConfHome = config.get(SamzaConfigFactory.YARN_CONF_HOME_KEY);
	if (yarnConfHome != null && yarnConfHome.length() > 0) // if the property is set , otherwise, assume we are running in
           													// local mode and ignore this
		SystemsUtils.setHadoopConfigHome(yarnConfHome);
	
	String filename = config.get(SamzaConfigFactory.FILE_KEY);
	String filesystem = config.get(SamzaConfigFactory.FILESYSTEM_KEY);
	this.setName(config.get(SamzaConfigFactory.JOB_NAME_KEY));
	SerializationProxy wrapper = (SerializationProxy) SystemsUtils.deserializeObjectFromFileAndKey(filesystem, filename, this.getName());
	this.setProcessor(wrapper.processor);
	this.outputStreams = wrapper.outputStreams;
	
	// Init Processor and Streams
	this.getProcessor().onCreate(0);
	for (SamzaStream stream:this.outputStreams) {
		stream.onCreate();
	}
	
}
 
Example 2
Source File: SamzaEntranceProcessingItem.java    From samoa with Apache License 2.0 6 votes vote down vote up
public SamoaSystemConsumer(String systemName, Config config) {
	String yarnConfHome = config.get(SamzaConfigFactory.YARN_CONF_HOME_KEY);
	if (yarnConfHome != null && yarnConfHome.length() > 0) // if the property is set , otherwise, assume we are running in
		                                            // local mode and ignore this
		SystemsUtils.setHadoopConfigHome(yarnConfHome);
	
	String filename = config.get(SamzaConfigFactory.FILE_KEY);
	String filesystem = config.get(SamzaConfigFactory.FILESYSTEM_KEY);
	String name = config.get(SamzaConfigFactory.JOB_NAME_KEY);
	SerializationProxy wrapper = (SerializationProxy) SystemsUtils.deserializeObjectFromFileAndKey(filesystem, filename, name);
	
	this.entranceProcessor = wrapper.processor;
	this.entranceProcessor.onCreate(0);
	
	// Internal stream from SystemConsumer to EntranceTask, so we
	// need only one partition
	this.systemStreamPartition = new SystemStreamPartition(systemName, wrapper.name, new Partition(0));
}
 
Example 3
Source File: SamzaEntranceProcessingItem.java    From incubator-samoa with Apache License 2.0 6 votes vote down vote up
@Override
public void init(Config config, TaskContext context) throws Exception {
  String yarnConfHome = config.get(SamzaConfigFactory.YARN_CONF_HOME_KEY);
  if (yarnConfHome != null && yarnConfHome.length() > 0) // if the property is set, otherwise, assume we are running in local mode and ignore this
    SystemsUtils.setHadoopConfigHome(yarnConfHome);

  String filename = config.get(SamzaConfigFactory.FILE_KEY);
  String filesystem = config.get(SamzaConfigFactory.FILESYSTEM_KEY);

  this.setName(config.get(SamzaConfigFactory.JOB_NAME_KEY));
  SerializationProxy wrapper = (SerializationProxy) SystemsUtils.deserializeObjectFromFileAndKey(filesystem,
      filename, this.getName());
  this.setOutputStream(wrapper.outputStream);
  SamzaStream output = (SamzaStream) this.getOutputStream();
  if (output != null) // if output stream exists, set it up
    output.onCreate();
}
 
Example 4
Source File: SamzaEntranceProcessingItem.java    From samoa with Apache License 2.0 6 votes vote down vote up
@Override
public void init(Config config, TaskContext context) throws Exception {
	String yarnConfHome = config.get(SamzaConfigFactory.YARN_CONF_HOME_KEY);
	if (yarnConfHome != null && yarnConfHome.length() > 0) // if the property is set , otherwise, assume we are running in
           												// local mode and ignore this
		SystemsUtils.setHadoopConfigHome(yarnConfHome);
	
	String filename = config.get(SamzaConfigFactory.FILE_KEY);
	String filesystem = config.get(SamzaConfigFactory.FILESYSTEM_KEY);
	
	this.setName(config.get(SamzaConfigFactory.JOB_NAME_KEY));
	SerializationProxy wrapper = (SerializationProxy) SystemsUtils.deserializeObjectFromFileAndKey(filesystem, filename, this.getName());
	this.setOutputStream(wrapper.outputStream);
	SamzaStream output = (SamzaStream)this.getOutputStream();
	if (output != null) // if output stream exists, set it up
		output.onCreate();
}
 
Example 5
Source File: BroadcastAssertApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  final MessageStream<PageView> broadcastPageViews = appDescriptor
      .getInputStream(isd)
      .broadcast(serde, "pv");

  /**
   * Each task will see all the pageview events
   */
  MessageStreamAssert.that("Each task contains all broadcast PageView events", broadcastPageViews, serde)
      .forEachTask()
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1", "p1", "u1"),
              new PageView("v2", "p2", "u1"),
              new PageView("v3", "p1", "u2"),
              new PageView("v4", "p3", "u2")
          ));
}
 
Example 6
Source File: MockCoordinatorStreamSystemFactory.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a consumer that sends all configs to the coordinator stream.
 *
 * @param config Along with the configs, you can pass checkpoints and changelog stream messages into the stream.
 *               The expected pattern is cp:source:taskname -> ssp,offset for checkpoint (Use sspToString util)
 *               ch:source:taskname -> changelogPartition for changelog
 *               Everything else is processed as normal config
 */
@Override
public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) {
  if (useCachedConsumer && mockConsumer != null) {
    return mockConsumer;
  }

  String jobName = config.get("job.name");
  String jobId = config.get("job.id");
  if (jobName == null) {
    throw new ConfigException("Must define job.name.");
  }
  if (jobId == null) {
    jobId = "1";
  }
  String streamName = CoordinatorStreamUtil.getCoordinatorStreamName(jobName, jobId);
  SystemStreamPartition systemStreamPartition = new SystemStreamPartition(systemName, streamName, new Partition(0));
  mockConsumer = new MockCoordinatorStreamWrappedConsumer(systemStreamPartition, config);
  mockConsumer.register(systemStreamPartition, "0");
  return mockConsumer;
}
 
Example 7
Source File: TestZkStreamProcessorBase.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Context context) {
  Config config = context.getJobContext().getConfig();
  this.processorId = config.get(ApplicationConfig.PROCESSOR_ID);
  this.outputTopic = config.get("app.outputTopic", "output");
  this.outputSystem = config.get("app.outputSystem", "test-system");
  this.processorIdToFail = config.get("processor.id.to.fail", "1");
}
 
Example 8
Source File: SimpleInstallationFinder.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Finds all the job instances in the specified path and adds a corresponding {@link JobInstance} and
 * {@link InstallationRecord} for each instance.
 *
 * @param jobInstallPath  the path to search for job instances.
 * @param jobs            the map to which the job instances will be added.
 */
private void findJobInstances(final File jobInstallPath, final Map<JobInstance, InstallationRecord> jobs) {
  try {
    String jobInstallCanonPath = jobInstallPath.getCanonicalPath();
    File configPath = Paths.get(jobInstallCanonPath, CFG_SUBPATH).toFile();
    if (!(configPath.exists() && configPath.isDirectory())) {
      log.debug("Config path not found: " + configPath);
      return;
    }

    for (File configFile : configPath.listFiles()) {

      if (configFile.isFile()) {

        String configFilePath = configFile.getCanonicalPath();
        Config config = jobConfigFactory.getConfig(new URI("file://" + configFilePath));

        if (config.containsKey(JobConfig.JOB_NAME) && config.containsKey(JobConfig.STREAM_JOB_FACTORY_CLASS)) {

          String jobName = config.get(JobConfig.JOB_NAME);
          String jobId = config.get(JobConfig.JOB_ID, "1");
          JobInstance jobInstance = new JobInstance(jobName, jobId);

          if (jobs.containsKey(jobInstance)) {
            throw new IllegalStateException(
                String.format("Found more than one job config with jobName:%s and jobId:%s", jobName, jobId));
          }
          InstallationRecord jobInstall =
              new InstallationRecord(jobName, jobId, jobInstallCanonPath, configFilePath, getBinPath(jobInstallCanonPath));
          jobs.put(jobInstance, jobInstall);
        }
      }
    }
  } catch (Exception e) {
    throw new SamzaException("Exception finding job instance in path: " + jobInstallPath, e);
  }
}
 
Example 9
Source File: SamzaSqlApplicationConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
public static SqlIOResolver createIOResolver(Config config) {
  String sourceResolveValue = config.get(CFG_IO_RESOLVER);
  Map<String, String> metadataPrefixProperties = new HashMap<>();
  metadataPrefixProperties.put(
      String.format(CFG_FMT_SOURCE_RESOLVER_DOMAIN, sourceResolveValue) + CFG_METADATA_TOPIC_PREFIX,
      config.get(CFG_METADATA_TOPIC_PREFIX, DEFAULT_METADATA_TOPIC_PREFIX));
  Config newConfig = new MapConfig(Arrays.asList(config, metadataPrefixProperties));
  Validate.notEmpty(sourceResolveValue, "ioResolver config is not set or empty");
  return initializePlugin("SqlIOResolver", sourceResolveValue, newConfig, CFG_FMT_SOURCE_RESOLVER_DOMAIN,
    (o, c) -> ((SqlIOResolverFactory) o).create(c, newConfig));
}
 
Example 10
Source File: IdentityStreamTask.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Context context) throws Exception {
  Config config = context.getJobContext().getConfig();
  this.expectedMessageCount = config.getInt("app.messageCount");
  this.outputTopic = config.get("app.outputTopic", "output");
  this.outputSystem = config.get("app.outputSystem", "test-system");
}
 
Example 11
Source File: FaultInjectionTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new FaultInjectionTask(containerShutdownLatch));
}
 
Example 12
Source File: TestJobNodeConfigurationGenerator.java    From samza with Apache License 2.0 5 votes vote down vote up
private void validateStreamSerdeConfigure(String streamId, Config config, Map<String, Serde> deserializedSerdes) {
  Config streamConfig = config.subset(String.format("streams.%s.samza.", streamId));
  String keySerdeName = streamConfig.get("key.serde");
  String valueSerdeName = streamConfig.get("msg.serde");
  assertTrue(String.format("Serialized serdes should contain %s key serde", streamId), deserializedSerdes.containsKey(keySerdeName));
  assertTrue(String.format("Serialized %s key serde should be a StringSerde", streamId), keySerdeName.startsWith(StringSerde.class.getSimpleName()));
  assertTrue(String.format("Serialized serdes should contain %s msg serde", streamId), deserializedSerdes.containsKey(valueSerdeName));
  assertTrue(String.format("Serialized %s msg serde should be a JsonSerdeV2", streamId), valueSerdeName.startsWith(JsonSerdeV2.class.getSimpleName()));
}
 
Example 13
Source File: IdentityStreamTask.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Context context) throws Exception {
  Config config = context.getJobContext().getConfig();
  this.expectedMessageCount = config.getInt("app.messageCount");
  this.outputTopic = config.get("app.outputTopic", "output");
  this.outputSystem = config.get("app.outputSystem", "test-system");
}
 
Example 14
Source File: TestStreamApplication.java    From samza with Apache License 2.0 5 votes vote down vote up
public static StreamApplication getInstance(
    String systemName,
    List<String> inputTopics,
    String outputTopic,
    CountDownLatch processedMessageLatch,
    StreamApplicationCallback callback,
    CountDownLatch kafkaEventsConsumedLatch,
    Config config) {
  String appName = new ApplicationConfig(config).getGlobalAppId();
  String processorName = config.get(JobConfig.PROCESSOR_ID);
  registerLatches(processedMessageLatch, kafkaEventsConsumedLatch, callback, appName, processorName);

  StreamApplication app = new TestStreamApplication(systemName, inputTopics, outputTopic, appName, processorName);
  return app;
}
 
Example 15
Source File: PropertiesConfigLoaderFactory.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public ConfigLoader getLoader(Config config) {
  String path = config.get(PATH_KEY);

  if (path == null) {
    throw new SamzaException("path is required to read config from properties file");
  }

  return new PropertiesConfigLoader(path);
}
 
Example 16
Source File: JobNodeConfigurationGenerator.java    From samza with Apache License 2.0 5 votes vote down vote up
private void configureBroadcastInputs(Map<String, String> configs, Config config, Set<String> broadcastStreams) {
  // TODO: SAMZA-1841: remove this once we support defining broadcast input stream in high-level
  // task.broadcast.input should be generated by the planner in the future.
  if (broadcastStreams.isEmpty()) {
    return;
  }
  String broadcastInputs = config.get(TaskConfig.BROADCAST_INPUT_STREAMS);
  if (StringUtils.isNotBlank(broadcastInputs)) {
    broadcastStreams.add(broadcastInputs);
  }
  configs.put(TaskConfig.BROADCAST_INPUT_STREAMS, Joiner.on(',').join(broadcastStreams));
}
 
Example 17
Source File: SamzaSqlApplicationConfig.java    From samza with Apache License 2.0 4 votes vote down vote up
public SamzaSqlApplicationConfig(Config staticConfig, List<String> inputSystemStreams,
    List<String> outputSystemStreams) {

  ioResolver = createIOResolver(staticConfig);

  this.outputSystemStreams = new LinkedList<>(outputSystemStreams);

  // There could be duplicate streams across different queries. Let's dedupe them.
  Set<String> inputSystemStreamSet = new HashSet<>(inputSystemStreams);
  Set<String> outputSystemStreamSet = new HashSet<>(outputSystemStreams);

  // Let's get the output system stream configs before input system stream configs. This is to account for
  // table descriptor that could be both input and output. Please note that there could be only one
  // instance of table descriptor and writable table is a readable table but vice versa is not true.
  outputSystemStreamConfigsBySource = outputSystemStreamSet.stream()
       .collect(Collectors.toMap(Function.identity(), x -> ioResolver.fetchSinkInfo(x)));

  inputSystemStreamConfigBySource = inputSystemStreamSet.stream()
      .collect(Collectors.toMap(Function.identity(), src -> ioResolver.fetchSourceInfo(src)));

  Map<String, SqlIOConfig> systemStreamConfigsBySource = new HashMap<>(inputSystemStreamConfigBySource);
  systemStreamConfigsBySource.putAll(outputSystemStreamConfigsBySource);

  Set<SqlIOConfig> systemStreamConfigs = new HashSet<>(systemStreamConfigsBySource.values());

  relSchemaProvidersBySource = systemStreamConfigs.stream()
      .collect(Collectors.toMap(SqlIOConfig::getSource,
        x -> initializePlugin("RelSchemaProvider", x.getRelSchemaProviderName(), staticConfig,
          CFG_FMT_REL_SCHEMA_PROVIDER_DOMAIN,
          (o, c) -> ((RelSchemaProviderFactory) o).create(x.getSystemStream(), c))));

  samzaRelConvertersBySource = systemStreamConfigs.stream()
      .collect(Collectors.toMap(SqlIOConfig::getSource,
        x -> initializePlugin("SamzaRelConverter", x.getSamzaRelConverterName(), staticConfig,
          CFG_FMT_SAMZA_REL_CONVERTER_DOMAIN, (o, c) -> ((SamzaRelConverterFactory) o).create(x.getSystemStream(),
            relSchemaProvidersBySource.get(x.getSource()), c))));

  samzaRelTableKeyConvertersBySource = systemStreamConfigs.stream()
      .filter(SqlIOConfig::isRemoteTable)
      .collect(Collectors.toMap(SqlIOConfig::getSource,
        x -> initializePlugin("SamzaRelTableKeyConverter", x.getSamzaRelTableKeyConverterName(),
          staticConfig, CFG_FMT_SAMZA_REL_TABLE_KEY_CONVERTER_DOMAIN,
          (o, c) -> ((SamzaRelTableKeyConverterFactory) o).create(x.getSystemStream(), c))));

  udfResolver = createUdfResolver(staticConfig);
  udfMetadata = udfResolver.getUdfs();

  metadataTopicPrefix =
      staticConfig.get(CFG_METADATA_TOPIC_PREFIX, DEFAULT_METADATA_TOPIC_PREFIX);

  processSystemEvents = staticConfig.getBoolean(CFG_SQL_PROCESS_SYSTEM_EVENTS, true);
  windowDurationMs = staticConfig.getLong(CFG_GROUPBY_WINDOW_DURATION_MS, DEFAULT_GROUPBY_WINDOW_DURATION_MS);
}
 
Example 18
Source File: RemoteStoreIOResolverTestFactory.java    From samza with Apache License 2.0 4 votes vote down vote up
public TestRemoteStoreIOResolver(Config config) {
  this.config = config;
  String metadataTopicPrefix = config.get(CFG_METADATA_TOPIC_PREFIX, DEFAULT_METADATA_TOPIC_PREFIX);
  this.changeLogStorePrefix = metadataTopicPrefix + (metadataTopicPrefix.isEmpty() ? "" : "_");
}
 
Example 19
Source File: StringSerdeFactory.java    From samza with Apache License 2.0 4 votes vote down vote up
public Serde<String> getSerde(String name, Config config) {
  return new StringSerde(config.get("encoding", "UTF-8"));
}
 
Example 20
Source File: RepartitionJoinWindowApp.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
  // before the application is run
  Config config = appDescriptor.getConfig();
  String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
  String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
  String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
  MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);

  MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews
      .partitionBy(PageView::getViewId, pv -> pv,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");

  MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);

  MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks
      .partitionBy(AdClick::getViewId, ac -> ac,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
  MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);

  MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly
      .join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(),
          new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class),
          Duration.ofMinutes(1), "pageViewAdClickJoin");

  MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks
      .partitionBy(UserPageAdClick::getUserId, upac -> upac,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");

  userPageAdClicksByUserId.map(KV::getValue)
      .window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size())))
      .sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(
            new OutgoingMessageEnvelope(
                new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
      });


  intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}