org.apache.samza.system.descriptors.InputDescriptor Java Examples

The following examples show how to use org.apache.samza.system.descriptors.InputDescriptor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TranslationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
public <OutT> void registerInputMessageStream(
    PValue pvalue,
    InputDescriptor<org.apache.samza.operators.KV<?, OpMessage<OutT>>, ?> inputDescriptor) {
  // we want to register it with the Samza graph only once per i/o stream
  final String streamId = inputDescriptor.getStreamId();
  if (registeredInputStreams.containsKey(streamId)) {
    MessageStream<OpMessage<OutT>> messageStream = registeredInputStreams.get(streamId);
    LOG.info(
        String.format(
            "Stream id %s has already been mapped to %s stream. Mapping %s to the same message stream.",
            streamId, messageStream, pvalue));
    registerMessageStream(pvalue, messageStream);

    return;
  }
  @SuppressWarnings("unchecked")
  final MessageStream<OpMessage<OutT>> typedStream =
      getValueStream(appDescriptor.getInputStream(inputDescriptor));

  registerMessageStream(pvalue, typedStream);
  registeredInputStreams.put(streamId, typedStream);
}
 
Example #2
Source File: TranslationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/** The dummy stream created will only be used in Beam tests. */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
  final GenericSystemDescriptor dummySystem =
      new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
  final GenericInputDescriptor<OpMessage<String>> dummyInput =
      dummySystem.getInputDescriptor(id, new NoOpSerde<>());
  dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
  final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
  final SystemFactory factory = new InMemorySystemFactory();
  final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
  factory.getAdmin(id, config).createStream(dummyStreamSpec);

  final SystemProducer producer = factory.getProducer(id, config, null);
  final SystemStream sysStream = new SystemStream(id, id);
  final Consumer<Object> sendFn =
      (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
      };
  final WindowedValue<String> windowedValue =
      WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());

  sendFn.accept(OpMessage.ofElement(windowedValue));
  sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
  sendFn.accept(new EndOfStreamMessage(null));
  return dummyInput;
}
 
Example #3
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public <M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor) {
  SystemDescriptor systemDescriptor = inputDescriptor.getSystemDescriptor();
  Optional<StreamExpander> expander = systemDescriptor.getExpander();
  if (expander.isPresent()) {
    return expander.get().apply(this, inputDescriptor);
  }

  // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream
  addInputDescriptor(inputDescriptor);

  String streamId = inputDescriptor.getStreamId();
  Serde serde = inputDescriptor.getSerde();
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
  boolean isKeyed = serde instanceof KVSerde;
  InputTransformer transformer = inputDescriptor.getTransformer().orElse(null);
  InputOperatorSpec inputOperatorSpec =
      OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(),
          transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
  inputOperators.put(streamId, inputOperatorSpec);
  return new MessageStreamImpl(this, inputOperators.get(streamId));
}
 
Example #4
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Register an input stream with certain config id. */
public <T> void registerInputMessageStream(
    String id, InputDescriptor<KV<?, OpMessage<T>>, ?> inputDescriptor) {
  // we want to register it with the Samza graph only once per i/o stream
  final String streamId = inputDescriptor.getStreamId();
  if (registeredInputStreams.contains(streamId)) {
    return;
  }
  final MessageStream<OpMessage<T>> stream =
      appDescriptor.getInputStream(inputDescriptor).map(org.apache.samza.operators.KV::getValue);

  registerMessageStream(id, stream);
  registeredInputStreams.add(streamId);
}
 
Example #5
Source File: ApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
final void addInputDescriptor(InputDescriptor inputDescriptor) {
  String streamId = inputDescriptor.getStreamId();
  Preconditions.checkState(!inputDescriptors.containsKey(streamId)
          || inputDescriptors.get(streamId) == inputDescriptor,
      String.format("Cannot add multiple input descriptors with the same streamId: %s", streamId));
  inputDescriptors.put(streamId, inputDescriptor);
  addSystemDescriptor(inputDescriptor.getSystemDescriptor());
}
 
Example #6
Source File: TaskApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public TaskApplicationDescriptor withInputStream(InputDescriptor inputDescriptor) {
  // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream
  addInputDescriptor(inputDescriptor);
  getOrCreateStreamSerdes(inputDescriptor.getStreamId(), inputDescriptor.getSerde());
  return this;
}
 
Example #7
Source File: TranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public MessageStream<OpMessage<String>> getDummyStream() {
  InputDescriptor<OpMessage<String>, ?> dummyInput =
      createDummyStreamDescriptor(UUID.randomUUID().toString());
  return appDescriptor.getInputStream(dummyInput);
}
 
Example #8
Source File: ScanTranslator.java    From samza with Apache License 2.0 4 votes vote down vote up
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId,
    final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors,
    Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
  StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
  List<String> tableNameParts = tableScan.getTable().getQualifiedName();
  String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);

  Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
  SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
  final String systemName = sqlIOConfig.getSystemName();
  final String streamId = sqlIOConfig.getStreamId();
  final String source = sqlIOConfig.getSource();

  final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (
      sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor()
          .get() instanceof CachingTableDescriptor);

  // For remote table, we don't have an input stream descriptor. The table descriptor is already defined by the
  // SqlIOResolverFactory.
  // For local table, even though table descriptor is already defined, we still need to create the input stream
  // descriptor to load the local table.
  if (isRemoteTable) {
    return;
  }

  // set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
  DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
  if (systemDescriptor == null) {
    systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
    systemDescriptors.put(systemName, systemDescriptor);
  } else {
    /* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
     * in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
     * used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
    if (systemDescriptor.getTransformer().isPresent()) {
      InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
      if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
        throw new SamzaException(
            "SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
      }
    }
  }

  InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());

  if (!inputMsgStreams.containsKey(source)) {
    MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
    inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
  }
  MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source)
      .filter(new FilterSystemMessageFunction(sourceName, queryId))
      .map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));

  context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
 
Example #9
Source File: StreamApplicationDescriptor.java    From samza with Apache License 2.0 2 votes vote down vote up
/**
 * Gets the input {@link MessageStream} corresponding to the {@code inputDescriptor}.
 * <p>
 * A {@code MessageStream<KV<K, V>}, obtained by calling this method with a descriptor with a {@code KVSerde<K, V>},
 * can receive messages of type {@code KV<K, V>}. An input {@code MessageStream<M>}, obtained using a descriptor with
 * any other {@code Serde<M>}, can receive messages of type M - the key in the incoming message is ignored.
 * <p>
 * A {@code KVSerde<NoOpSerde, NoOpSerde>} or {@code NoOpSerde} may be used for the descriptor if the
 * {@code SystemConsumer} deserializes the incoming messages itself, and no further deserialization is required from
 * the framework.
 * <p>
 * Multiple invocations of this method with the same {@code inputDescriptor} will throw an
 * {@link IllegalStateException}.
 *
 * @param inputDescriptor the descriptor for the stream
 * @param <M> the type of messages in the input {@link MessageStream}
 * @return the input {@link MessageStream}
 * @throws IllegalStateException when invoked multiple times with the same {@code inputDescriptor}
 */
<M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor);
 
Example #10
Source File: TaskApplicationDescriptor.java    From samza with Apache License 2.0 2 votes vote down vote up
/**
 * Adds the input stream to the application.
 *
 * @param isd the {@link InputDescriptor}
 * @return this {@link TaskApplicationDescriptor}
 */
TaskApplicationDescriptor withInputStream(InputDescriptor isd);
 
Example #11
Source File: ApplicationDescriptorImpl.java    From samza with Apache License 2.0 2 votes vote down vote up
/**
 * Get all the {@link InputDescriptor}s to this application
 *
 * @return an immutable map of streamId to {@link InputDescriptor}
 */
public Map<String, InputDescriptor> getInputDescriptors() {
  return Collections.unmodifiableMap(inputDescriptors);
}