Java Code Examples for org.apache.beam.sdk.coders.Coder#encode()

The following examples show how to use org.apache.beam.sdk.coders.Coder#encode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <T> void testDecoderWith(Coder<T> coder, T... expected) throws IOException {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  for (T value : expected) {
    int size = baos.size();
    coder.encode(value, baos);
    // Pad an arbitrary byte when values encode to zero bytes
    if (baos.size() - size == 0) {
      baos.write(0);
    }
  }

  Iterator<T> decoder =
      new DataStreamDecoder<>(coder, new ByteArrayInputStream(baos.toByteArray()));

  Object[] actual = Iterators.toArray(decoder, Object.class);
  assertArrayEquals(expected, actual);

  assertFalse(decoder.hasNext());
  assertFalse(decoder.hasNext());

  thrown.expect(NoSuchElementException.class);
  decoder.next();
}
 
Example 2
Source File: StreamingGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <V> void addElement(
    InputMessageBundle.Builder messageBundle,
    Collection<IntervalWindow> windows,
    Instant timestamp,
    Coder<V> valueCoder,
    V value)
    throws IOException {
  @SuppressWarnings({"unchecked", "rawtypes"})
  Coder<Collection<? extends BoundedWindow>> windowsCoder =
      (Coder) CollectionCoder.of(windowCoder);

  ByteString.Output dataOutput = ByteString.newOutput();
  valueCoder.encode(value, dataOutput, Context.OUTER);
  messageBundle
      .addMessagesBuilder()
      .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
      .setData(dataOutput.toByteString())
      .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
}
 
Example 3
Source File: StreamingGroupAlsoByWindowsReshuffleDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <V> void addElement(
    InputMessageBundle.Builder messageBundle,
    Collection<IntervalWindow> windows,
    Instant timestamp,
    Coder<V> valueCoder,
    V value)
    throws IOException {
  @SuppressWarnings({"unchecked", "rawtypes"})
  Coder<Collection<? extends BoundedWindow>> windowsCoder =
      (Coder) CollectionCoder.of(windowCoder);

  ByteString.Output dataOutput = ByteString.newOutput();
  valueCoder.encode(value, dataOutput, Context.OUTER);
  messageBundle
      .addMessagesBuilder()
      .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
      .setData(dataOutput.toByteString())
      .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
}
 
Example 4
Source File: SamzaTimerInternalsFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
static <K> KeyedTimerData<K> toKeyedTimerData(
    TimerKey<K> timerKey, long timestamp, TimeDomain domain, Coder<K> keyCoder) {
  byte[] keyBytes = null;
  if (keyCoder != null && timerKey.key != null) {
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    try {
      keyCoder.encode(timerKey.key, baos);
    } catch (IOException e) {
      throw new RuntimeException("Could not encode key: " + timerKey.key, e);
    }
    keyBytes = baos.toByteArray();
  }

  return new KeyedTimerData<K>(
      keyBytes,
      timerKey.key,
      TimerInternals.TimerData.of(
          timerKey.timerId,
          timerKey.stateNamespace,
          new Instant(timestamp),
          new Instant(timestamp),
          domain));
}
 
Example 5
Source File: CoderProperties.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * A utility method that passes the given (unencoded) elements through coder's
 * registerByteSizeObserver() and encode() methods, and confirms they are mutually consistent.
 * This is useful for testing coder implementations.
 */
public static <T> void testByteCount(Coder<T> coder, Coder.Context context, T[] elements)
    throws Exception {
  TestElementByteSizeObserver observer = new TestElementByteSizeObserver();

  try (CountingOutputStream os = new CountingOutputStream(ByteStreams.nullOutputStream())) {
    for (T elem : elements) {
      coder.registerByteSizeObserver(elem, observer);
      coder.encode(elem, os, context);
      observer.advance();
    }
    long expectedLength = os.getCount();

    if (!context.isWholeStream) {
      assertEquals(expectedLength, observer.getSum());
    }
    assertEquals(elements.length, observer.getCount());
  }
}
 
Example 6
Source File: Watch.java    From beam with Apache License 2.0 6 votes vote down vote up
private WatchGrowthFn(
    Growth<InputT, OutputT, KeyT> spec,
    Coder<OutputT> outputCoder,
    SerializableFunction<OutputT, KeyT> outputKeyFn,
    Coder<KeyT> outputKeyCoder) {
  this.spec = spec;
  this.outputCoder = outputCoder;
  this.outputKeyFn = outputKeyFn;
  this.outputKeyCoder = outputKeyCoder;
  this.coderFunnel =
      (from, into) -> {
        try {
          // Rather than hashing the output itself, hash the output key.
          KeyT outputKey = outputKeyFn.apply(from);
          outputKeyCoder.encode(outputKey, Funnels.asOutputStream(into));
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      };
}
 
Example 7
Source File: CoderUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Encodes {@code value} to the given {@code stream}, which should be a stream that never throws
 * {@code IOException}, such as {@code ByteArrayOutputStream} or {@link
 * ExposedByteArrayOutputStream}.
 */
private static <T> void encodeToSafeStream(
    Coder<T> coder, T value, OutputStream stream, Coder.Context context) throws CoderException {
  try {
    coder.encode(value, new UnownedOutputStream(stream), context);
  } catch (IOException exn) {
    Throwables.propagateIfPossible(exn, CoderException.class);
    throw new IllegalArgumentException("Forbidden IOException when writing to OutputStream", exn);
  }
}
 
Example 8
Source File: CoderProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static <T> byte[] encode(Coder<T> coder, Coder.Context context, T value)
    throws CoderException, IOException {
  @SuppressWarnings("unchecked")
  Coder<T> deserializedCoder = SerializableUtils.clone(coder);

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  deserializedCoder.encode(value, new UnownedOutputStream(os), context);
  return os.toByteArray();
}
 
Example 9
Source File: IterableCombinerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializing() throws IOException {

  IterableCombinerFn<String> tested = new IterableCombinerFn<>(STRING_TYPE_DESCRIPTOR);
  IterableCombinerFn.CollectionAccumulator<String> originalAccumulator =
      tested.createAccumulator();

  FIRST_ITEMS.forEach(originalAccumulator::addInput);

  Coder<IterableCombinerFn.CollectionAccumulator<String>> accumulatorCoder =
      tested.getAccumulatorCoder(null, StringUtf8Coder.of());

  byte[] bytes;

  try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
    accumulatorCoder.encode(originalAccumulator, byteArrayOutputStream);
    byteArrayOutputStream.flush();

    bytes = byteArrayOutputStream.toByteArray();
  }

  IterableCombinerFn.CollectionAccumulator<String> decodedAccumulator;

  try (ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes)) {
    decodedAccumulator = accumulatorCoder.decode(byteArrayInputStream);
  }

  String[] originalItems = FIRST_ITEMS.toArray(new String[0]);

  MatcherAssert.assertThat(
      originalAccumulator.extractOutput(), Matchers.containsInAnyOrder(originalItems));
  MatcherAssert.assertThat(
      decodedAccumulator.extractOutput(), Matchers.containsInAnyOrder(originalItems));
}
 
Example 10
Source File: CoderHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Utility method for serializing an object using the specified coder.
 *
 * @param value Value to serialize.
 * @param coder Coder to serialize with.
 * @param <T> type of value that is serialized
 * @return Byte array representing serialized object.
 */
public static <T> byte[] toByteArray(T value, Coder<T> coder) {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  try {
    coder.encode(value, baos);
  } catch (IOException e) {
    throw new IllegalStateException("Error encoding value: " + value, e);
  }
  return baos.toByteArray();
}
 
Example 11
Source File: BillingEvent.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Override
public void encode(InvoiceGroupingKey value, OutputStream outStream) throws IOException {
  Coder<String> stringCoder = StringUtf8Coder.of();
  stringCoder.encode(value.startDate(), outStream);
  stringCoder.encode(value.endDate(), outStream);
  stringCoder.encode(value.productAccountKey(), outStream);
  stringCoder.encode(value.usageGroupingKey(), outStream);
  stringCoder.encode(value.description(), outStream);
  stringCoder.encode(String.valueOf(value.unitPrice()), outStream);
  stringCoder.encode(value.unitPriceCurrency(), outStream);
  stringCoder.encode(value.poNumber(), outStream);
}
 
Example 12
Source File: CoderHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Utility method for serializing an object using the specified coder, appending timestamp
 * representation. This is useful when sorting by timestamp
 *
 * @param value Value to serialize.
 * @param coder Coder to serialize with.
 * @param timestamp timestamp to be bundled into key's ByteArray representation
 * @param <T> type of value that is serialized
 * @return Byte array representing serialized object.
 */
public static <T> byte[] toByteArrayWithTs(T value, Coder<T> coder, Instant timestamp) {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  try {
    coder.encode(value, baos);
    ByteBuffer buf = ByteBuffer.allocate(8);
    buf.asLongBuffer().put(timestamp.getMillis());
    baos.write(buf.array());
  } catch (IOException e) {
    throw new IllegalStateException("Error encoding value: " + value, e);
  }
  return baos.toByteArray();
}
 
Example 13
Source File: ShuffleSink.java    From beam with Apache License 2.0 5 votes vote down vote up
private <EncodeT> int encodeToChunk(Coder<EncodeT> coder, EncodeT value) throws IOException {
  // Move forward enough bytes so we can prefix the size on after performing the write
  int initialChunkSize = chunk.size();
  chunk.resetTo(initialChunkSize + Ints.BYTES);
  coder.encode(value, chunk.asOutputStream(), Context.OUTER);
  int elementSize = chunk.size() - initialChunkSize - Ints.BYTES;

  byte[] internalBytes = chunk.array();
  internalBytes[initialChunkSize] = (byte) ((elementSize >>> 24) & 0xFF);
  internalBytes[initialChunkSize + 1] = (byte) ((elementSize >>> 16) & 0xFF);
  internalBytes[initialChunkSize + 2] = (byte) ((elementSize >>> 8) & 0xFF);
  internalBytes[initialChunkSize + 3] = (byte) ((elementSize >>> 0) & 0xFF);
  return elementSize;
}
 
Example 14
Source File: StreamingSideInputFetcher.java    From beam with Apache License 2.0 5 votes vote down vote up
private <SideWindowT extends BoundedWindow> Windmill.GlobalDataRequest buildGlobalDataRequest(
    PCollectionView<?> view, BoundedWindow mainWindow) {
  @SuppressWarnings("unchecked")
  WindowingStrategy<?, SideWindowT> sideWindowStrategy =
      (WindowingStrategy<?, SideWindowT>) view.getWindowingStrategyInternal();

  WindowFn<?, SideWindowT> sideWindowFn = sideWindowStrategy.getWindowFn();

  Coder<SideWindowT> sideInputWindowCoder = sideWindowFn.windowCoder();

  SideWindowT sideInputWindow =
      (SideWindowT) view.getWindowMappingFn().getSideInputWindow(mainWindow);

  ByteString.Output windowStream = ByteString.newOutput();
  try {
    sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }

  return Windmill.GlobalDataRequest.newBuilder()
      .setDataId(
          Windmill.GlobalDataId.newBuilder()
              .setTag(view.getTagInternal().getId())
              .setVersion(windowStream.toByteString())
              .build())
      .setExistenceWatermarkDeadline(
          WindmillTimeUtils.harnessToWindmillTimestamp(
              sideWindowStrategy.getTrigger().getWatermarkThatGuaranteesFiring(sideInputWindow)))
      .build();
}
 
Example 15
Source File: ApproximateUnique.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Encodes the given element using the given coder and hashes the encoding. */
static <T> long hash(T element, Coder<T> coder) throws CoderException, IOException {
  try (HashingOutputStream stream =
      new HashingOutputStream(Hashing.murmur3_128(), ByteStreams.nullOutputStream())) {
    coder.encode(element, stream, Context.OUTER);
    return stream.hash().asLong();
  }
}
 
Example 16
Source File: StreamingModeExecutionContext.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public <T, W extends BoundedWindow> void writePCollectionViewData(
    TupleTag<?> tag,
    Iterable<T> data,
    Coder<Iterable<T>> dataCoder,
    W window,
    Coder<W> windowCoder)
    throws IOException {
  if (getSerializedKey().size() != 0) {
    throw new IllegalStateException("writePCollectionViewData must follow a Combine.globally");
  }

  ByteString.Output dataStream = ByteString.newOutput();
  dataCoder.encode(data, dataStream, Coder.Context.OUTER);

  ByteString.Output windowStream = ByteString.newOutput();
  windowCoder.encode(window, windowStream, Coder.Context.OUTER);

  if (stateFamily == null) {
    throw new IllegalStateException(
        "Tried to write view data for stateless step: " + getNameContext());
  }

  Windmill.GlobalData.Builder builder =
      Windmill.GlobalData.newBuilder()
          .setDataId(
              Windmill.GlobalDataId.newBuilder()
                  .setTag(tag.getId())
                  .setVersion(windowStream.toByteString())
                  .build())
          .setData(dataStream.toByteString())
          .setStateFamily(stateFamily);

  outputBuilder.addGlobalDataUpdates(builder.build());
}
 
Example 17
Source File: WindmillSink.java    From beam with Apache License 2.0 5 votes vote down vote up
public static ByteString encodeMetadata(
    Coder<Collection<? extends BoundedWindow>> windowsCoder,
    Collection<? extends BoundedWindow> windows,
    PaneInfo pane)
    throws IOException {
  ByteString.Output stream = ByteString.newOutput();
  PaneInfoCoder.INSTANCE.encode(pane, stream);
  windowsCoder.encode(windows, stream, Coder.Context.OUTER);
  return stream.toByteString();
}
 
Example 18
Source File: DoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void encode(KV<Integer, WindowedValue<?>> kv, OutputStream out) throws IOException {
  Coder<WindowedValue<?>> coder = idsToCoders.get(kv.getKey());
  VarIntCoder.of().encode(kv.getKey(), out);
  coder.encode(kv.getValue(), out);
}
 
Example 19
Source File: StateFetcherTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testFetchGlobalDataCacheOverflow() throws Exception {
  Coder<List<String>> coder = ListCoder.of(StringUtf8Coder.of());

  ByteString.Output stream = ByteString.newOutput();
  coder.encode(Arrays.asList("data1"), stream, Coder.Context.OUTER);
  ByteString encodedIterable1 = stream.toByteString();
  stream = ByteString.newOutput();
  coder.encode(Arrays.asList("data2"), stream, Coder.Context.OUTER);
  ByteString encodedIterable2 = stream.toByteString();

  Cache<StateFetcher.SideInputId, StateFetcher.SideInputCacheEntry> cache =
      CacheBuilder.newBuilder().build();

  StateFetcher fetcher = new StateFetcher(server, cache);

  PCollectionView<String> view1 =
      TestPipeline.create().apply(Create.empty(StringUtf8Coder.of())).apply(View.asSingleton());

  PCollectionView<String> view2 =
      TestPipeline.create().apply(Create.empty(StringUtf8Coder.of())).apply(View.asSingleton());

  String tag1 = view1.getTagInternal().getId();
  String tag2 = view2.getTagInternal().getId();

  // Test four calls in a row. First, fetch view1, then view2 (which evicts view1 from the cache),
  // then view 1 again twice.
  when(server.getSideInputData(any(Windmill.GlobalDataRequest.class)))
      .thenReturn(
          buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1),
          buildGlobalDataResponse(tag2, ByteString.EMPTY, true, encodedIterable2),
          buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1));

  assertEquals(
      "data1",
      fetcher
          .fetchSideInput(
              view1,
              GlobalWindow.INSTANCE,
              STATE_FAMILY,
              SideInputState.UNKNOWN,
              readStateSupplier)
          .orNull());
  assertEquals(
      "data2",
      fetcher
          .fetchSideInput(
              view2,
              GlobalWindow.INSTANCE,
              STATE_FAMILY,
              SideInputState.UNKNOWN,
              readStateSupplier)
          .orNull());
  cache.invalidateAll();
  assertEquals(
      "data1",
      fetcher
          .fetchSideInput(
              view1,
              GlobalWindow.INSTANCE,
              STATE_FAMILY,
              SideInputState.UNKNOWN,
              readStateSupplier)
          .orNull());
  assertEquals(
      "data1",
      fetcher
          .fetchSideInput(
              view1,
              GlobalWindow.INSTANCE,
              STATE_FAMILY,
              SideInputState.UNKNOWN,
              readStateSupplier)
          .orNull());

  ArgumentCaptor<Windmill.GlobalDataRequest> captor =
      ArgumentCaptor.forClass(Windmill.GlobalDataRequest.class);

  verify(server, times(3)).getSideInputData(captor.capture());
  verifyNoMoreInteractions(server);

  assertThat(
      captor.getAllValues(),
      contains(
          buildGlobalDataRequest(tag1, ByteString.EMPTY),
          buildGlobalDataRequest(tag2, ByteString.EMPTY),
          buildGlobalDataRequest(tag1, ByteString.EMPTY)));
}
 
Example 20
Source File: ExpansionServiceTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundCodersForExternalConfiguration() throws Exception {
  ExternalTransforms.ExternalConfigurationPayload.Builder builder =
      ExternalTransforms.ExternalConfigurationPayload.newBuilder();

  builder.putConfiguration(
      "config_key1",
      ExternalTransforms.ConfigValue.newBuilder()
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.VARINT))
          .setPayload(ByteString.copyFrom(new byte[] {1}))
          .build());

  List<byte[]> byteList =
      ImmutableList.of("testing", "compound", "coders").stream()
          .map(str -> str.getBytes(Charsets.UTF_8))
          .collect(Collectors.toList());
  IterableCoder<byte[]> compoundCoder = IterableCoder.of(ByteArrayCoder.of());
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  compoundCoder.encode(byteList, baos);

  builder.putConfiguration(
      "config_key2",
      ExternalTransforms.ConfigValue.newBuilder()
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.ITERABLE))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.BYTES))
          .setPayload(ByteString.copyFrom(baos.toByteArray()))
          .build());

  List<KV<byte[], Long>> byteKvList =
      ImmutableList.of("testing", "compound", "coders").stream()
          .map(str -> KV.of(str.getBytes(Charsets.UTF_8), (long) str.length()))
          .collect(Collectors.toList());
  IterableCoder<KV<byte[], Long>> compoundCoder2 =
      IterableCoder.of(KvCoder.of(ByteArrayCoder.of(), VarLongCoder.of()));
  ByteArrayOutputStream baos2 = new ByteArrayOutputStream();
  compoundCoder2.encode(byteKvList, baos2);

  builder.putConfiguration(
      "config_key3",
      ExternalTransforms.ConfigValue.newBuilder()
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.ITERABLE))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.KV))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.BYTES))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.VARINT))
          .setPayload(ByteString.copyFrom(baos2.toByteArray()))
          .build());

  List<KV<List<Long>, byte[]>> byteKvListWithListKey =
      ImmutableList.of("testing", "compound", "coders").stream()
          .map(
              str ->
                  KV.of(
                      Collections.singletonList((long) str.length()),
                      str.getBytes(Charsets.UTF_8)))
          .collect(Collectors.toList());
  Coder compoundCoder3 =
      IterableCoder.of(KvCoder.of(IterableCoder.of(VarLongCoder.of()), ByteArrayCoder.of()));
  ByteArrayOutputStream baos3 = new ByteArrayOutputStream();
  compoundCoder3.encode(byteKvListWithListKey, baos3);

  builder.putConfiguration(
      "config_key4",
      ExternalTransforms.ConfigValue.newBuilder()
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.ITERABLE))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.KV))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.ITERABLE))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.VARINT))
          .addCoderUrn(BeamUrns.getUrn(RunnerApi.StandardCoders.Enum.BYTES))
          .setPayload(ByteString.copyFrom(baos3.toByteArray()))
          .build());

  ExternalTransforms.ExternalConfigurationPayload externalConfig = builder.build();
  TestConfig config = new TestConfig();
  ExpansionService.ExternalTransformRegistrarLoader.populateConfiguration(config, externalConfig);

  assertThat(config.configKey1, Matchers.is(1L));
  assertArrayEquals(Iterables.toArray(config.configKey2, byte[].class), byteList.toArray());
  assertArrayEquals(Iterables.toArray(config.configKey3, KV.class), byteKvList.toArray());
  assertArrayEquals(
      Iterables.toArray(config.configKey4, KV.class), byteKvListWithListKey.toArray());
}