Java Code Examples for org.apache.samza.SamzaException

The following examples show how to use org.apache.samza.SamzaException. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: UnboundedSourceSystem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) {
  return streamNames.stream()
      .collect(
          Collectors.toMap(
              Function.<String>identity(),
              streamName -> {
                try {
                  final List<UnboundedSource<T, CheckpointMarkT>> splits =
                      split(source, pipelineOptions);
                  final Map<Partition, SystemStreamPartitionMetadata> partitionMetaData =
                      new HashMap<>();
                  // we assume that the generated splits are stable,
                  // this is necessary so that the mapping of partition to source is correct
                  // in each container.
                  for (int i = 0; i < splits.size(); i++) {
                    partitionMetaData.put(
                        new Partition(i), new SystemStreamPartitionMetadata(null, null, null));
                  }
                  return new SystemStreamMetadata(streamName, partitionMetaData);
                } catch (Exception e) {
                  throw new SamzaException("Fail to read stream metadata", e);
                }
              }));
}
 
Example 2
Source Project: samza   Source File: TestQueryTranslator.java    License: Apache License 2.0 6 votes vote down vote up
@Test (expected = SamzaException.class)
public void testTranslateStreamTableJoinWithThetaCondition() {
  Map<String, String> config = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(configs, 1);
  String sql =
      "Insert into testavro.enrichedPageViewTopic(profileName, pageKey)"
          + " select p.name as profileName, pv.pageKey"
          + " from testavro.PAGEVIEW as pv"
          + " join testavro.PROFILE.`$table` as p"
          + " on p.id <> pv.profileId";
  config.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
  Config samzaConfig = SamzaSqlApplicationRunner.computeSamzaConfigs(true, new MapConfig(config));

  List<String> sqlStmts = fetchSqlFromConfig(config);
  List<SamzaSqlQueryParser.QueryInfo> queryInfo = fetchQueryInfo(sqlStmts);
  SamzaSqlApplicationConfig samzaSqlApplicationConfig = new SamzaSqlApplicationConfig(new MapConfig(config),
      queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream)
          .collect(Collectors.toList()),
      queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(streamApp -> { }, samzaConfig);
  QueryTranslator translator = new QueryTranslator(streamAppDesc, samzaSqlApplicationConfig);
  translator.translate(queryInfo.get(0), streamAppDesc, 0);
}
 
Example 3
Source Project: samza   Source File: HdfsSystemAdmin.java    License: Apache License 2.0 6 votes vote down vote up
private void persistPartitionDescriptor(String streamName,
  Map<Partition, List<String>> partitionDescriptorMap) {
  if (StringUtils.isBlank(stagingDirectory) || StringUtils.isBlank(streamName)) {
    LOG.warn("Staging directory ({}) or stream name ({}) is empty", stagingDirectory, streamName);
    return;
  }
  Path targetPath = PartitionDescriptorUtil.getPartitionDescriptorPath(stagingDirectory, streamName);
  try (FileSystem fs = targetPath.getFileSystem(new Configuration())) {
    // Partition descriptor is supposed to be immutable. So don't override it if it exists.
    if (fs.exists(targetPath)) {
      LOG.warn(targetPath.toString() + " exists. Skip persisting partition descriptor.");
    } else {
      LOG.info("About to persist partition descriptors to path: " + targetPath.toString());
      try (FSDataOutputStream fos = fs.create(targetPath)) {
        fos.write(
          PartitionDescriptorUtil.getJsonFromDescriptorMap(partitionDescriptorMap).getBytes(StandardCharsets.UTF_8));
      }
    }
  } catch (IOException e) {
    throw new SamzaException("Failed to validate/persist partition description on hdfs.", e);
  }
}
 
Example 4
Source Project: samza   Source File: StreamAppender.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Additional configurations needed before logging to stream. Called once in the container before the first log event is sent.
 */
@Override
public void activateOptions() {
  String containerName = System.getProperty(JAVA_OPTS_CONTAINER_NAME);
  if (containerName != null) {
    isApplicationMaster = containerName.contains(JOB_COORDINATOR_TAG);
  } else {
    throw new SamzaException("Got null container name from system property: " + JAVA_OPTS_CONTAINER_NAME +
        ". This is used as the key for the log appender, so can't proceed.");
  }
  key = containerName; // use the container name as the key for the logs

  // StreamAppender has to wait until the JobCoordinator is up when the log is in the AM
  if (isApplicationMaster) {
    systemInitialized = false;
  } else {
    setupSystem();
    systemInitialized = true;
  }
}
 
Example 5
Source Project: samza   Source File: PartialJoinOperatorImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected CompletionStage<Collection<JM>> handleMessageAsync(M message, MessageCollector collector,
    TaskCoordinator coordinator) {
  Collection<JM> output = Collections.emptyList();

  try {
    KeyValueStore<K, TimestampedValue<M>> thisState = thisPartialJoinFn.getState();
    KeyValueStore<K, TimestampedValue<OM>> otherState = otherPartialJoinFn.getState();

    K key = thisPartialJoinFn.getKey(message);
    thisState.put(key, new TimestampedValue<>(message, clock.currentTimeMillis()));
    TimestampedValue<OM> otherMessage = otherState.get(key);

    long now = clock.currentTimeMillis();
    if (otherMessage != null && otherMessage.getTimestamp() > now - ttlMs) {
      JM joinResult = thisPartialJoinFn.apply(message, otherMessage.getValue());
      output = Collections.singletonList(joinResult);
    }
  } catch (Exception e) {
    throw new SamzaException("Error handling message in PartialJoinOperatorImpl " + getOpImplId(), e);
  }

  return CompletableFuture.completedFuture(output);
}
 
Example 6
Source Project: samza   Source File: TestKafkaSystemAdminWithMock.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void testGetSSPMetadataShouldTerminateAfterFiniteRetriesOnException() throws Exception {
  SystemStreamPartition oneSSP = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
  SystemStreamPartition otherSSP = new SystemStreamPartition(TEST_SYSTEM, "otherTopic", new Partition(1));

  ImmutableSet<SystemStreamPartition> ssps = ImmutableSet.of(oneSSP, otherSSP);
  List<TopicPartition> topicPartitions = ssps.stream()
      .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId()))
      .collect(Collectors.toList());

  when(mockKafkaConsumer.beginningOffsets(topicPartitions)).thenThrow(new RuntimeException())
      .thenThrow(new RuntimeException());

  kafkaSystemAdmin.getSSPMetadata(ssps, new ExponentialSleepStrategy(2,
      1, 1));
}
 
Example 7
Source Project: samza   Source File: ZkMetadataStore.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Splits the input byte array value into independent byte array segments of 1 MB size.
 * @param value the input byte array to split.
 * @return the byte array splitted into independent byte array chunks.
 */
private static List<byte[]> chunkMetadataStoreValue(byte[] value) {
  try {
    byte[] checksum = getCRCChecksum(value);
    byte[] valueWithChecksum = ArrayUtils.addAll(value, checksum);
    List<byte[]> valueSegments = new ArrayList<>();
    int length = valueWithChecksum.length;
    for (int index = 0; index < length; index += VALUE_SEGMENT_SIZE_IN_BYTES) {
      byte[] valueSegment = ArrayUtils.subarray(valueWithChecksum, index, Math.min(index + VALUE_SEGMENT_SIZE_IN_BYTES, length));
      valueSegments.add(valueSegment);
    }
    return valueSegments;
  } catch (Exception e) {
    throw new SamzaException(String.format("Exception occurred when splitting the value: %s to small chunks.", value), e);
  }
}
 
Example 8
Source Project: samza   Source File: IntermediateMessageSerde.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public byte[] toBytes(Object object) {
  final byte[] data;
  final MessageType type = MessageType.of(object);
  switch (type) {
    case USER_MESSAGE:
      data = userMessageSerde.toBytes(object);
      break;
    case WATERMARK:
      data = watermarkSerde.toBytes((WatermarkMessage) object);
      break;
    case END_OF_STREAM:
      data = eosSerde.toBytes((EndOfStreamMessage) object);
      break;
    default:
      throw new SamzaException("Unknown message type: " + type.name());
  }

  final byte[] bytes = new byte[data.length + 1];
  bytes[0] = (byte) type.ordinal();
  System.arraycopy(data, 0, bytes, 1, data.length);

  return bytes;
}
 
Example 9
Source Project: samza   Source File: TaskPartitionAssignmentManager.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Stores the task names to {@link SystemStreamPartition} assignments to the metadata store.
 * @param sspToTaskNameMapping the mapped assignments to write to the metadata store. If the task name list is empty,
 *                             then the entry is deleted from the metadata store.
 */
public void writeTaskPartitionAssignments(Map<SystemStreamPartition, List<String>> sspToTaskNameMapping) {
  for (SystemStreamPartition partition: sspToTaskNameMapping.keySet()) {
    List<String> taskNames = sspToTaskNameMapping.get(partition);
    // For broadcast streams, a input system stream partition will be mapped to more than one tasks in a
    // SamzaContainer. Rather than storing taskName to list of SystemStreamPartitions in metadata store, here
    // systemStreamPartition to list of taskNames is stored. This was done due to 1 MB limit on value size in kafka.
    String serializedSSPAsJson = serializeSSPToJson(partition);
    if (taskNames == null || taskNames.isEmpty()) {
      LOG.info("Deleting the key: {} from the metadata store.", partition);
      metadataStore.delete(serializedSSPAsJson);
    } else {
      try {
        String taskNamesAsString = taskNamesMapper.writeValueAsString(taskNames);
        byte[] taskNamesAsBytes = valueSerde.toBytes(taskNamesAsString);
        LOG.info("Storing the partition: {} and taskNames: {} into the metadata store.", serializedSSPAsJson, taskNames);
        metadataStore.put(serializedSSPAsJson, taskNamesAsBytes);
      } catch (Exception e) {
        throw new SamzaException("Exception occurred when writing task to partition assignment.", e);
      }
    }
  }
  metadataStore.flush();
}
 
Example 10
Source Project: samza   Source File: TestJoinOperator.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void joinWithSelfThrowsException() throws Exception {
  Map<String, String> mapConfig = new HashMap<>();
  mapConfig.put("job.name", "jobName");
  mapConfig.put("job.id", "jobId");
  StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
  Config config = new MapConfig(mapConfig);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    IntegerSerde integerSerde = new IntegerSerde();
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
    GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("inStream", kvSerde);

    MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor);

    inStream.join(inStream, new TestJoinFunction(), integerSerde, kvSerde, kvSerde, JOIN_TTL, "join");
  }, config);

  createStreamOperatorTask(new SystemClock(), streamAppDesc); // should throw an exception
}
 
Example 11
Source Project: samza   Source File: SerdeUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method to serialize Java objects as json strings
 * @param name name of object used for logging
 * @param object object to be serialized
 * @return Json representation of the object
 */
public static String toJson(String name, Object object) {
  final Gson gson = new GsonBuilder().excludeFieldsWithModifiers(Modifier.TRANSIENT, Modifier.STATIC)
      // Tells Gson how to serialize fields with type of Class.
      .registerTypeHierarchyAdapter(Class.class, new TypeAdapter<Class>() {
        @Override
        public void write(JsonWriter out, Class value) throws IOException {
          if (value == null) {
            out.nullValue();
          } else {
            out.value(value.getName());
          }
        }

        @Override
        public Class read(JsonReader in) {
          throw new SamzaException("Deserialization from json is not supported.");
        }
      }).create();
  try {
    return gson.toJson(object);
  } catch (Exception e) {
    throw new SamzaException(String.format("Failed to serialize %s to json", name), e);
  }
}
 
Example 12
Source Project: samza   Source File: TestQueryTranslator.java    License: Apache License 2.0 6 votes vote down vote up
@Test (expected = SamzaException.class)
public void testTranslateStreamStreamJoin() {
  Map<String, String> config = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(configs, 1);
  String sql =
      "Insert into testavro.enrichedPageViewTopic(profileName, pageKey)"
          + " select p.name as profileName, pv.pageKey"
          + " from testavro.PAGEVIEW as pv"
          + " join testavro.PROFILE as p"
          + " on p.id = pv.profileId";
  config.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
  Config samzaConfig = SamzaSqlApplicationRunner.computeSamzaConfigs(true, new MapConfig(config));

  List<String> sqlStmts = fetchSqlFromConfig(config);
  List<SamzaSqlQueryParser.QueryInfo> queryInfo = fetchQueryInfo(sqlStmts);
  SamzaSqlApplicationConfig samzaSqlApplicationConfig = new SamzaSqlApplicationConfig(new MapConfig(config),
      queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream)
          .collect(Collectors.toList()),
      queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(streamApp -> { }, samzaConfig);
  QueryTranslator translator = new QueryTranslator(streamAppDesc, samzaSqlApplicationConfig);
  translator.translate(queryInfo.get(0), streamAppDesc, 0);
}
 
Example 13
Source Project: samza   Source File: AzureBlobAvroWriter.java    License: Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
byte[] encodeRecord(IndexedRecord record) {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  Schema schema = record.getSchema();
  try {
    EncoderFactory encoderfactory = new EncoderFactory();
    BinaryEncoder encoder = encoderfactory.binaryEncoder(out, null);
    DatumWriter<IndexedRecord> writer;
    if (record instanceof SpecificRecord) {
      writer = new SpecificDatumWriter<>(schema);
    } else {
      writer = new GenericDatumWriter<>(schema);
    }
    writer.write(record, encoder);
    encoder.flush(); //encoder may buffer
  } catch (Exception e) {
    throw new SamzaException("Unable to serialize Avro record using schema within the record: " + schema.toString(), e);
  }
  return out.toByteArray();
}
 
Example 14
Source Project: samza   Source File: TestAzureBlobAvroWriter.java    License: Apache License 2.0 6 votes vote down vote up
private Thread writeInThread(OutgoingMessageEnvelope ome, AzureBlobAvroWriter azureBlobAvroWriter,
    int numberOfSends) {
  Thread t = new Thread() {
    @Override
    public void run() {
      try {
        for (int i = 0; i < numberOfSends; i++) {
          azureBlobAvroWriter.write(ome);
        }
      } catch (IOException e) {
        throw new SamzaException(e);
      }
    }
  };
  return t;
}
 
Example 15
Source Project: samza   Source File: CouchbaseTableWriteFunction.java    License: Apache License 2.0 6 votes vote down vote up
protected <T> CompletableFuture<T>  asyncWriteHelper(Observable<? extends Document> observable, String errorMessage,
    boolean isVoid) {
  CompletableFuture<T> future = new CompletableFuture<>();
  observable.toSingle().subscribe(new SingleSubscriber<Document>() {
    @Override
    public void onSuccess(Document document) {
      if (isVoid) {
        future.complete(null);
      } else {
        future.complete((T) document.content());
      }
    }

    @Override
    public void onError(Throwable error) {
      future.completeExceptionally(new SamzaException(errorMessage, error));
    }
  });
  return future;
}
 
Example 16
Source Project: samza   Source File: CoordinatorStreamSystemProducer.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Serialize and send a coordinator stream message.
 *
 * @param message
 *          The message to send.
 */
public void send(CoordinatorStreamMessage message) {
  log.debug("Sending {}", message);
  try {
    String source = message.getSource();
    byte[] key = keySerde.toBytes(Arrays.asList(message.getKeyArray()));
    byte[] value = null;
    if (!message.isDelete()) {
      value = messageSerde.toBytes(message.getMessageMap());
    }
    OutgoingMessageEnvelope envelope = new OutgoingMessageEnvelope(systemStream, Integer.valueOf(0), key, value);
    systemProducer.send(source, envelope);
  } catch (Exception e) {
    throw new SamzaException(e);
  }
}
 
Example 17
Source Project: samza   Source File: KinesisSystemConsumer.java    License: Apache License 2.0 6 votes vote down vote up
IRecordProcessorFactory createRecordProcessorFactory(String stream) {
  return () -> {
    // This code is executed in Kinesis thread context.
    try {
      SystemStreamPartition ssp = sspAllocator.allocate(stream);
      KinesisRecordProcessor processor = new KinesisRecordProcessor(ssp, KinesisSystemConsumer.this);
      KinesisRecordProcessor prevProcessor = processors.put(ssp, processor);
      Validate.isTrue(prevProcessor == null, String.format("Adding new kinesis record processor %s while the"
              + " previous processor %s for the same ssp %s is still active.", processor, prevProcessor, ssp));
      return processor;
    } catch (Exception e) {
      callbackException = e;
      // This exception is the result of kinesis dynamic shard splits due to which sspAllocator ran out of free ssps.
      // Set the failed state in consumer which will eventually result in stopping the container. A manual job restart
      // will be required at this point. After the job restart, the newly created shards will be discovered and enough
      // ssps will be added to sspAllocator freePool.
      throw new SamzaException(e);
    }
  };
}
 
Example 18
Source Project: samza   Source File: TestQueryTranslator.java    License: Apache License 2.0 6 votes vote down vote up
@Test (expected = SamzaException.class)
public void testTranslateStreamTableJoinWithAndLiteralCondition() {
  Map<String, String> config = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(configs, 1);
  String sql =
      "Insert into testavro.enrichedPageViewTopic(profileName, pageKey)"
          + " select p.name as profileName, pv.pageKey"
          + " from testavro.PAGEVIEW as pv"
          + " join testavro.PROFILE.`$table` as p"
          + " on p.id = pv.profileId and p.name = 'John'";
  config.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
  Config samzaConfig = SamzaSqlApplicationRunner.computeSamzaConfigs(true, new MapConfig(config));

  List<String> sqlStmts = fetchSqlFromConfig(config);
  List<SamzaSqlQueryParser.QueryInfo> queryInfo = fetchQueryInfo(sqlStmts);
  SamzaSqlApplicationConfig samzaSqlApplicationConfig = new SamzaSqlApplicationConfig(new MapConfig(config),
      queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream)
          .collect(Collectors.toList()),
      queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(streamApp -> { }, samzaConfig);
  QueryTranslator translator = new QueryTranslator(streamAppDesc, samzaSqlApplicationConfig);
  translator.translate(queryInfo.get(0), streamAppDesc, 0);
}
 
Example 19
Source Project: samza-hello-samza   Source File: CouchbaseTableExample.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public <T> CompletableFuture<T> writeAsync(int opId, Object... args) {
  switch (opId) {
    case OP_COUNTER:
      Preconditions.checkArgument(2 == args.length,
          String.format("Two arguments (String and int) are expected for counter operation (opId=%d)", opId));
      String id = (String) args[0];
      int delta = (int) args[1];
      return asyncWriteHelper(
          bucket.async().counter(id, delta, 1, timeout.toMillis(), TimeUnit.MILLISECONDS),
          String.format("Failed to invoke counter with Id %s from bucket %s.", id, bucketName),
          false);
    default:
      throw new SamzaException("Unknown opId: " + opId);
  }
}
 
Example 20
Source Project: samza   Source File: StorageRecovery.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get the changelog streams and the storage factories from the config file
 * and put them into the maps
 */
private void getChangeLogSystemStreamsAndStorageFactories() {
  StorageConfig config = new StorageConfig(jobConfig);
  List<String> storeNames = config.getStoreNames();

  LOG.info("Got store names: " + storeNames.toString());

  for (String storeName : storeNames) {
    Optional<String> streamName = config.getChangelogStream(storeName);

    LOG.info("stream name for " + storeName + " is " + streamName.orElse(null));

    streamName.ifPresent(name -> changeLogSystemStreams.put(storeName, StreamUtil.getSystemStreamFromNames(name)));

    Optional<String> factoryClass = config.getStorageFactoryClassName(storeName);
    if (factoryClass.isPresent()) {
      @SuppressWarnings("unchecked")
      StorageEngineFactory<Object, Object> factory =
          (StorageEngineFactory<Object, Object>) ReflectionUtil.getObj(factoryClass.get(), StorageEngineFactory.class);

      storageEngineFactories.put(storeName, factory);
    } else {
      throw new SamzaException("Missing storage factory for " + storeName + ".");
    }
  }
}
 
Example 21
Source Project: samza   Source File: ContainerStorageManager.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * For each standby task, we remove its changeLogSSPs from changelogSSP map and add it to the task's taskSideInputSSPs.
 * The task's sideInputManager will consume and restore these as well.
 *
 * @param containerModel the container's model
 * @param changelogSystemStreams the passed in set of changelogSystemStreams
 * @return A map of changeLogSSP to storeName across all tasks, assuming no two stores have the same changelogSSP
 */
private Map<String, SystemStream> getChangelogSystemStreams(ContainerModel containerModel, Map<String, SystemStream> changelogSystemStreams) {

  if (MapUtils.invertMap(changelogSystemStreams).size() != changelogSystemStreams.size()) {
    throw new SamzaException("Two stores cannot have the same changelog system-stream");
  }

  Map<SystemStreamPartition, String> changelogSSPToStore = new HashMap<>();
  changelogSystemStreams.forEach((storeName, systemStream) ->
      containerModel.getTasks().forEach((taskName, taskModel) -> { changelogSSPToStore.put(new SystemStreamPartition(systemStream, taskModel.getChangelogPartition()), storeName); })
  );

  getTasks(containerModel, TaskMode.Standby).forEach((taskName, taskModel) -> {
    this.taskSideInputStoreSSPs.putIfAbsent(taskName, new HashMap<>());
    changelogSystemStreams.forEach((storeName, systemStream) -> {
      SystemStreamPartition ssp = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
      changelogSSPToStore.remove(ssp);
      this.taskSideInputStoreSSPs.get(taskName).put(storeName, Collections.singleton(ssp));
    });
  });

  // changelogSystemStreams correspond only to active tasks (since those of standby-tasks moved to sideInputs above)
  return MapUtils.invertMap(changelogSSPToStore).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, x -> x.getValue().getSystemStream()));
}
 
Example 22
Source Project: samza   Source File: TestAzureBlobAvroWriter.java    License: Apache License 2.0 6 votes vote down vote up
private Thread writeFlushInThread(OutgoingMessageEnvelope ome, AzureBlobAvroWriter azureBlobAvroWriter,
    int numberOfSends) {
  Thread t = new Thread() {
    @Override
    public void run() {
      try {
        for (int i = 0; i < numberOfSends; i++) {
          azureBlobAvroWriter.write(ome);
        }
        azureBlobAvroWriter.flush();
      } catch (IOException e) {
        throw new SamzaException(e);
      }
    }
  };
  return t;
}
 
Example 23
private void convertConfigToCoordinatorMessage(Config config) {
  try {
    for (Map.Entry<String, String> configPair : config.entrySet()) {
      byte[] keyBytes;
      byte[] messgeBytes;
      if (configPair.getKey().startsWith(CHANGELOGPREFIX)) {
        String[] changelogInfo = configPair.getKey().split(":");
        String changeLogPartition = configPair.getValue();
        SetChangelogMapping changelogMapping = new SetChangelogMapping(changelogInfo[1], changelogInfo[2], Integer.parseInt(changeLogPartition));
        keyBytes = MAPPER.writeValueAsString(changelogMapping.getKeyArray()).getBytes("UTF-8");
        messgeBytes = MAPPER.writeValueAsString(changelogMapping.getMessageMap()).getBytes("UTF-8");
      } else {
        SetConfig setConfig = new SetConfig("source", configPair.getKey(), configPair.getValue());
        keyBytes = MAPPER.writeValueAsString(setConfig.getKeyArray()).getBytes("UTF-8");
        messgeBytes = MAPPER.writeValueAsString(setConfig.getMessageMap()).getBytes("UTF-8");
      }
      // The ssp here is the coordinator ssp (which is always fixed) and not the task ssp.
      put(systemStreamPartition, new IncomingMessageEnvelope(systemStreamPartition, "", keyBytes, messgeBytes));
    }
    setIsAtHead(systemStreamPartition, true);
  } catch (Exception e) {
    throw new SamzaException(e);
  }
}
 
Example 24
Source Project: samza   Source File: AsyncSystemProducer.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Default implementation of the flush that just waits for all the pendingFutures to be complete.
 * SystemProducer should override this, if the underlying system provides flush semantics.
 * @param source String representing the source of the message.
 */
@Override
public synchronized void flush(String source) {
  long incompleteSends = pendingFutures.stream().filter(x -> !x.isDone()).count();
  LOG.info("Trying to flush pending {} sends.", incompleteSends);
  checkForSendCallbackErrors("Received exception on message send.");
  CompletableFuture<Void> future =
      CompletableFuture.allOf(pendingFutures.toArray(new CompletableFuture[pendingFutures.size()]));

  try {
    // Block until all the pending sends are complete or timeout.
    future.get(DEFAULT_FLUSH_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);
  } catch (InterruptedException | ExecutionException | TimeoutException e) {
    incompleteSends = pendingFutures.stream().filter(x -> !x.isDone()).count();
    String msg = String.format("Flush failed with error. Total pending sends %d", incompleteSends);
    LOG.error(msg, e);
    throw new SamzaException(msg, e);
  }

  pendingFutures.clear();

  checkForSendCallbackErrors("Sending one or more of the messages failed during flush.");
}
 
Example 25
Source Project: samza   Source File: EventHubSystemProducer.java    License: Apache License 2.0 6 votes vote down vote up
private String convertPartitionKeyToString(Object partitionKey) {
  String partitionKeyStr;
  if (partitionKey instanceof String) {
    partitionKeyStr = (String) partitionKey;
  } else if (partitionKey instanceof Integer) {
    partitionKeyStr = String.valueOf(partitionKey);
  } else if (partitionKey instanceof byte[]) {
    partitionKeyStr = new String((byte[]) partitionKey, Charset.defaultCharset());
  } else {
    throw new SamzaException("Unsupported key type: " + partitionKey.getClass().toString());
  }
  if (partitionKeyStr != null && partitionKeyStr.length() > ClientConstants.MAX_PARTITION_KEY_LENGTH) {
    LOG.debug("Length of partition key: {} exceeds limit: {}. Truncating.", partitionKeyStr.length(),
        ClientConstants.MAX_PARTITION_KEY_LENGTH);
    partitionKeyStr = partitionKeyStr.substring(0, ClientConstants.MAX_PARTITION_KEY_LENGTH);
  }
  return partitionKeyStr;
}
 
Example 26
Source Project: samza   Source File: TestTaskFactoryUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFinalizeTaskFactory() throws NoSuchFieldException, IllegalAccessException {
  TaskFactory mockFactory = mock(TaskFactory.class);
  try {
    TaskFactoryUtil.finalizeTaskFactory(mockFactory, null);
    fail("Should have failed with validation");
  } catch (SamzaException se) {
    // expected
  }
  StreamTaskFactory mockStreamFactory = mock(StreamTaskFactory.class);

  ExecutorService mockThreadPool = mock(ExecutorService.class);
  TaskFactory retFactory = TaskFactoryUtil.finalizeTaskFactory(mockStreamFactory, mockThreadPool);
  assertTrue(retFactory instanceof AsyncStreamTaskFactory);
  assertTrue(((AsyncStreamTaskFactory) retFactory).createInstance() instanceof AsyncStreamTaskAdapter);
  AsyncStreamTaskAdapter taskAdapter = (AsyncStreamTaskAdapter) ((AsyncStreamTaskFactory) retFactory).createInstance();
  Field executorSrvFld = AsyncStreamTaskAdapter.class.getDeclaredField("executor");
  executorSrvFld.setAccessible(true);
  ExecutorService executor = (ExecutorService) executorSrvFld.get(taskAdapter);
  assertEquals(executor, mockThreadPool);

  AsyncStreamTaskFactory mockAsyncStreamFactory = mock(AsyncStreamTaskFactory.class);
  retFactory = TaskFactoryUtil.finalizeTaskFactory(mockAsyncStreamFactory, null);
  assertEquals(retFactory, mockAsyncStreamFactory);
}
 
Example 27
Source Project: samza   Source File: KafkaConsumerProxy.java    License: Apache License 2.0 6 votes vote down vote up
private void initializeLags() {
  // This is expensive, so only do it once at the beginning. After the first poll, we can rely on metrics for lag.

  Map<TopicPartition, Long> endOffsets;
  // Synchronize, in case the consumer is used in some other thread (metadata or something else)
  synchronized (kafkaConsumer) {
    endOffsets = kafkaConsumer.endOffsets(topicPartitionToSSP.keySet());
  }
  if (endOffsets == null) {
    throw new SamzaException("Failed to fetch kafka consumer endoffsets for system " + systemName);
  }
  endOffsets.forEach((tp, offset) -> {
    SystemStreamPartition ssp = topicPartitionToSSP.get(tp);
    long startingOffset = nextOffsets.get(ssp);
    // End offsets are the offset of the newest message + 1
    // If the message we are about to consume is < end offset, we are starting with a lag.
    long initialLag = endOffsets.get(tp) - startingOffset;

    LOG.info("Initial lag for SSP {} is {} (end={}, startOffset={})", ssp, initialLag, endOffsets.get(tp), startingOffset);
    latestLags.put(ssp, initialLag);
    sink.setIsAtHighWatermark(ssp, initialLag == 0);
  });

  // initialize lag metrics
  refreshLagMetrics();
}
 
Example 28
Source Project: samza   Source File: TestSamzaObjectMapper.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Given a {@link ContainerModel} JSON with neither a processor-id nor a container-id, deserialization should fail.
 */
@Test(expected = SamzaException.class)
public void testDeserializeContainerModelMissingProcessorIdAndContainerId() throws IOException {
  ObjectNode jobModelJson = buildJobModelJson();
  ObjectNode containerModelJson = (ObjectNode) jobModelJson.get("containers").get("1");
  containerModelJson.remove("processor-id");
  deserializeFromObjectNode(jobModelJson);
}
 
Example 29
Source Project: beam   Source File: BoundedSourceSystem.java    License: Apache License 2.0 5 votes vote down vote up
Consumer(
    BoundedSource<T> source,
    SamzaPipelineOptions pipelineOptions,
    SamzaMetricsContainer metricsContainer,
    String stepName) {
  try {
    splits = split(source, pipelineOptions);
  } catch (Exception e) {
    throw new SamzaException("Fail to split source", e);
  }
  this.pipelineOptions = pipelineOptions;
  this.metricsContainer = metricsContainer;
  this.stepName = stepName;
}
 
Example 30
Source Project: samza   Source File: DefaultIndexRequestFactory.java    License: Apache License 2.0 5 votes vote down vote up
protected void setSource(OutgoingMessageEnvelope envelope, IndexRequest indexRequest) {
  Object message = envelope.getMessage();
  if (message instanceof byte[]) {
    indexRequest.source((byte[]) message);
  } else if (message instanceof Map) {
    indexRequest.source((Map) message);
  } else {
    throw new SamzaException("Unsupported message type: " + message.getClass().getCanonicalName());
  }
}