Java Code Examples for org.apache.beam.sdk.util.CoderUtils#decodeFromByteArray()

The following examples show how to use org.apache.beam.sdk.util.CoderUtils#decodeFromByteArray() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ByteToWindowFunction.java    From twister2 with Apache License 2.0 6 votes vote down vote up
@Override
public KV<K, Iterable<WindowedValue<V>>> map(Tuple<byte[], Iterator<byte[]>> input) {
  K key = null;
  Iterable<WindowedValue<V>> value = null;
  try {
    key = CoderUtils.decodeFromByteArray(keyCoder, input.getKey());

    value = StreamSupport.stream(
        Spliterators.spliteratorUnknownSize(input.getValue(), Spliterator.ORDERED), false)
        .map(bytes -> TranslationUtils.fromByteArray(bytes, wvCoder))
        .collect(Collectors.toList());
  } catch (CoderException e) {
    e.printStackTrace();
  }
  return KV.of(key, value);
}
 
Example 2
Source File: FlinkKeyUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Decodes a key from a ByteBuffer containing a byte array. */
public static <K> K decodeKey(ByteBuffer byteBuffer, Coder<K> keyCoder) {
  checkNotNull(byteBuffer, "Provided ByteBuffer must not be null");
  checkNotNull(keyCoder, "Provided coder must not be null");
  checkState(byteBuffer.hasArray(), "ByteBuffer key must contain an array.");
  @SuppressWarnings("ByteBufferBackingArray")
  final byte[] keyBytes = byteBuffer.array();
  try {
    return CoderUtils.decodeFromByteArray(keyCoder, keyBytes, Coder.Context.NESTED);
  } catch (Exception e) {
    throw new RuntimeException(
        String.format(
            Locale.ENGLISH, "Failed to decode encoded key: %s", Arrays.toString(keyBytes)),
        e);
  }
}
 
Example 3
Source File: View.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public T identity() {
  if (hasDefault) {
    if (defaultValue == null) {
      return null;
    }
    try {
      return CoderUtils.decodeFromByteArray(valueCoder, defaultValue);
    } catch (CoderException e) {
      throw new IllegalArgumentException(
          String.format(
              "Could not decode the default value with the provided coder %s", valueCoder));
    }
  } else {
    throw new IllegalArgumentException(
        "Empty PCollection accessed as a singleton view. "
            + "Consider setting withDefault to provide a default value");
  }
}
 
Example 4
Source File: PCollectionViews.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the default value that was specified.
 *
 * <p>For internal use only.
 *
 * @throws NoSuchElementException if no default was specified.
 */
public T getDefaultValue() {
  if (!hasDefault) {
    throw new NoSuchElementException("Empty PCollection accessed as a singleton view.");
  }
  // Lazily decode the default value once
  synchronized (this) {
    if (encodedDefaultValue != null) {
      try {
        defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
        // Clear the encoded default value to free the reference once we have the object
        // version. Also, this will guarantee that the value will only be decoded once.
        encodedDefaultValue = null;
      } catch (IOException e) {
        throw new RuntimeException("Unexpected IOException: ", e);
      }
    }
    return defaultValue;
  }
}
 
Example 5
Source File: PCollectionViews.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the default value that was specified.
 *
 * <p>For internal use only.
 *
 * @throws NoSuchElementException if no default was specified.
 */
public T getDefaultValue() {
  if (!hasDefault) {
    throw new NoSuchElementException("Empty PCollection accessed as a singleton view.");
  }
  // Lazily decode the default value once
  synchronized (this) {
    if (encodedDefaultValue != null) {
      try {
        defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
        // Clear the encoded default value to free the reference once we have the object
        // version. Also, this will guarantee that the value will only be decoded once.
        encodedDefaultValue = null;
      } catch (IOException e) {
        throw new RuntimeException("Unexpected IOException: ", e);
      }
    }
    return defaultValue;
  }
}
 
Example 6
Source File: ByteToWindowFunctionPrimitive.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public WindowedValue<KV<K, V>> map(Tuple<byte[], byte[]> input) {
  K key = null;
  WindowedValue<V> value = null;
  try {
    key = CoderUtils.decodeFromByteArray(keyCoder, input.getKey());

    value = TranslationUtils.fromByteArray(input.getValue(), wvCoder);
  } catch (CoderException e) {
    LOG.info(e.getMessage());
  }
  WindowedValue<KV<K, V>> element;

  if (value == null) {
    value = WindowedValue.valueInGlobalWindow(null);
  }
  element =
      WindowedValue.of(
          KV.of(key, value.getValue()),
          value.getTimestamp(),
          value.getWindows(),
          value.getPane());

  return element;
}
 
Example 7
Source File: PairWithConstantKeyDoFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ParDoFn create(
    PipelineOptions options,
    CloudObject cloudUserFn,
    List<SideInputInfo> sideInputInfos,
    TupleTag<?> mainOutputTag,
    Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  Coder<?> coder =
      CloudObjects.coderFromCloudObject(
          CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
  Object key =
      CoderUtils.decodeFromByteArray(
          coder, Structs.getBytes(cloudUserFn, WorkerPropertyNames.ENCODED_KEY));
  return new PairWithConstantKeyParDoFn(key);
}
 
Example 8
Source File: LengthPrefixCoderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiCoderCycle() throws Exception {
  LengthPrefixCoder<Long> lengthPrefixedValueCoder =
      LengthPrefixCoder.of(BigEndianLongCoder.of());

  LengthPrefixCoder<byte[]> lengthPrefixedBytesCoder = LengthPrefixCoder.of(ByteArrayCoder.of());

  // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16]
  byte[] userEncoded = CoderUtils.encodeToByteArray(lengthPrefixedValueCoder, 22L);

  // [0, 0, 0, 0, 0, 0, 0, 0x16]
  byte[] decodedToBytes = CoderUtils.decodeFromByteArray(lengthPrefixedBytesCoder, userEncoded);

  // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16]
  byte[] reencodedBytes = CoderUtils.encodeToByteArray(lengthPrefixedBytesCoder, decodedToBytes);

  long userDecoded = CoderUtils.decodeFromByteArray(lengthPrefixedValueCoder, reencodedBytes);

  assertFalse(
      "Length-prefix decoding to bytes should drop the length",
      Arrays.equals(userEncoded, decodedToBytes));
  assertArrayEquals(userEncoded, reencodedBytes);
  assertEquals(22L, userDecoded);
}
 
Example 9
Source File: GroupingShuffleReader.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public boolean advance() throws IOException {
  try (Closeable read = tracker.enterState(readState)) {
    if (!groups.advance()) {
      current = null;
      return false;
    }
  }

  K key = CoderUtils.decodeFromByteArray(parentReader.keyCoder, groups.getCurrent().key);
  parentReader.executionContext.setKey(key);
  current =
      new ValueInEmptyWindows<>(
          KV.<K, Reiterable<V>>of(key, new ValuesIterable(groups.getCurrent().values)));
  return true;
}
 
Example 10
Source File: TestStreamP.java    From beam with Apache License 2.0 5 votes vote down vote up
private static TestStream decodePayload(byte[] payload, TestStream.TestStreamCoder coder) {
  try {
    return (TestStream) CoderUtils.decodeFromByteArray(coder, payload);
  } catch (CoderException e) {
    throw ExceptionUtil.rethrow(e);
  }
}
 
Example 11
Source File: TranslationUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Utility method for deserializing a byte array using the specified coder. (From spark code)
 *
 * @param serialized bytearray to be deserialized.
 * @param coder Coder to deserialize with.
 * @param <T> Type of object to be returned.
 * @return Deserialized object.
 */
public static <T> T fromByteArray(byte[] serialized, Coder<T> coder) {
  try {
    return CoderUtils.decodeFromByteArray(coder, serialized);
  } catch (CoderException e) {
    LOG.log(Level.SEVERE, "Error while decoding message", e);
  }
  return null;
}
 
Example 12
Source File: UngroupedShuffleReader.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public boolean advance() throws IOException {
  if (!iterator.hasNext()) {
    current = null;
    return false;
  }
  ShuffleEntry record = iterator.next();
  // Throw away the primary and the secondary keys.
  byte[] value = record.getValue();
  shuffleReader.notifyElementRead(record.length());
  current = CoderUtils.decodeFromByteArray(shuffleReader.coder, value);
  return true;
}
 
Example 13
Source File: TranslationUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Utility method for deserializing a byte array using the specified coder. (From spark code)
 *
 * @param <T> Type of object to be returned.
 * @param serialized bytearray to be deserialized.
 * @param coder Coder to deserialize with.
 * @return Deserialized object.
 */
public static <T> WindowedValue<T> fromByteArray(
    byte[] serialized, WindowedValue.WindowedValueCoder<T> coder) {
  try {
    return CoderUtils.decodeFromByteArray(coder, serialized);
  } catch (CoderException e) {
    LOG.log(Level.SEVERE, "Error while decoding message", e);
  }
  return null;
}
 
Example 14
Source File: StructuralKey.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public K getKey() {
  try {
    return CoderUtils.decodeFromByteArray(coder, encoded);
  } catch (CoderException e) {
    throw new IllegalArgumentException(
        "Could not decode Key with coder of type " + coder.getClass().getSimpleName(), e);
  }
}
 
Example 15
Source File: SerializableCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializableCoder() throws Exception {
  IterableCoder<MyRecord> coder = IterableCoder.of(SerializableCoder.of(MyRecord.class));

  List<MyRecord> records = new ArrayList<>();
  for (String l : LINES) {
    records.add(new MyRecord(l));
  }

  byte[] encoded = CoderUtils.encodeToByteArray(coder, records);
  Iterable<MyRecord> decoded = CoderUtils.decodeFromByteArray(coder, encoded);

  assertEquals(records, decoded);
}
 
Example 16
Source File: SerializableMatchers.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public T get() {
  if (value == null) {
    try {
      value = CoderUtils.decodeFromByteArray(coder, encodedValue);
    } catch (CoderException exc) {
      throw new RuntimeException("Error deserializing via Coder", exc);
    }
  }
  return value;
}
 
Example 17
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <T> void testBoundedToUnboundedSourceAdapterCheckpointRestart(
    BoundedSource<T> boundedSource, List<T> expectedElements) throws Exception {
  BoundedToUnboundedSourceAdapter<T> unboundedSource =
      new BoundedToUnboundedSourceAdapter<>(boundedSource);

  PipelineOptions options = PipelineOptionsFactory.create();
  BoundedToUnboundedSourceAdapter<T>.Reader reader = unboundedSource.createReader(options, null);

  List<T> actual = Lists.newArrayList();
  for (boolean hasNext = reader.start(); hasNext; ) {
    actual.add(reader.getCurrent());
    // checkpoint every 9 elements
    if (actual.size() % 9 == 0) {
      Checkpoint<T> checkpoint = reader.getCheckpointMark();
      Coder<Checkpoint<T>> checkpointCoder = unboundedSource.getCheckpointMarkCoder();
      Checkpoint<T> decodedCheckpoint =
          CoderUtils.decodeFromByteArray(
              checkpointCoder, CoderUtils.encodeToByteArray(checkpointCoder, checkpoint));
      reader.close();
      checkpoint.finalizeCheckpoint();

      BoundedToUnboundedSourceAdapter<T>.Reader restarted =
          unboundedSource.createReader(options, decodedCheckpoint);
      reader = restarted;
      hasNext = reader.start();
    } else {
      hasNext = reader.advance();
    }
  }
  Checkpoint<T> checkpointDone = reader.getCheckpointMark();
  assertTrue(
      checkpointDone.getResidualElements() == null
          || checkpointDone.getResidualElements().isEmpty());

  assertEquals(expectedElements.size(), actual.size());
  assertEquals(Sets.newHashSet(expectedElements), Sets.newHashSet(actual));
}
 
Example 18
Source File: ShuffleSinkTest.java    From beam with Apache License 2.0 4 votes vote down vote up
void runTestWriteGroupingShuffleSink(List<KV<Integer, String>> expected) throws Exception {
  BatchModeExecutionContext executionContext =
      BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
  ShuffleSink<KV<Integer, String>> shuffleSink =
      new ShuffleSink<>(
          PipelineOptionsFactory.create(),
          null,
          ShuffleSink.ShuffleKind.GROUP_KEYS,
          WindowedValue.getFullCoder(
              KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
              IntervalWindow.getCoder()),
          executionContext,
          TestOperationContext.create());

  TestShuffleWriter shuffleWriter = new TestShuffleWriter();
  List<Long> actualSizes = new ArrayList<>();
  try (SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
      shuffleSink.writer(shuffleWriter, "dataset")) {
    for (KV<Integer, String> kv : expected) {
      actualSizes.add(
          shuffleSinkWriter.add(
              WindowedValue.of(
                  KV.of(kv.getKey(), kv.getValue()),
                  timestamp,
                  Lists.newArrayList(window),
                  PaneInfo.NO_FIRING)));
    }
  }

  List<ShuffleEntry> records = shuffleWriter.getRecords();

  List<KV<Integer, String>> actual = new ArrayList<>();
  for (ShuffleEntry record : records) {
    byte[] keyBytes = record.getKey();
    byte[] valueBytes = record.getValue();
    Assert.assertEquals(
        timestamp, CoderUtils.decodeFromByteArray(InstantCoder.of(), record.getSecondaryKey()));

    Integer key = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), keyBytes);
    String valueElem = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), valueBytes);

    actual.add(KV.of(key, valueElem));
  }

  Assert.assertEquals(expected, actual);
  Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
}
 
Example 19
Source File: PubsubUnboundedSourceTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void multipleReaders() throws IOException {
  List<IncomingMessage> incoming = new ArrayList<>();
  for (int i = 0; i < 2; i++) {
    String data = String.format("data_%d", i);
    String ackid = String.format("ackid_%d", i);
    incoming.add(
        IncomingMessage.of(
            com.google.pubsub.v1.PubsubMessage.newBuilder()
                .setData(ByteString.copyFromUtf8(data))
                .build(),
            TIMESTAMP,
            0,
            ackid,
            RECORD_ID));
  }
  setupOneMessage(incoming);
  PubsubReader reader = primSource.createReader(p.getOptions(), null);
  // Consume two messages, only read one.
  assertTrue(reader.start());
  assertEquals("data_0", data(reader.getCurrent()));

  // Grab checkpoint.
  PubsubCheckpoint checkpoint = reader.getCheckpointMark();
  checkpoint.finalizeCheckpoint();
  assertEquals(1, checkpoint.notYetReadIds.size());
  assertEquals("ackid_1", checkpoint.notYetReadIds.get(0));

  // Read second message.
  assertTrue(reader.advance());
  assertEquals("data_1", data(reader.getCurrent()));

  // Restore from checkpoint.
  byte[] checkpointBytes =
      CoderUtils.encodeToByteArray(primSource.getCheckpointMarkCoder(), checkpoint);
  checkpoint =
      CoderUtils.decodeFromByteArray(primSource.getCheckpointMarkCoder(), checkpointBytes);
  assertEquals(1, checkpoint.notYetReadIds.size());
  assertEquals("ackid_1", checkpoint.notYetReadIds.get(0));

  // Re-read second message.
  reader = primSource.createReader(p.getOptions(), checkpoint);
  assertTrue(reader.start());
  assertEquals("data_1", data(reader.getCurrent()));

  // We are done.
  assertFalse(reader.advance());

  // ACK final message.
  checkpoint = reader.getCheckpointMark();
  checkpoint.finalizeCheckpoint();
  reader.close();
}
 
Example 20
Source File: ShuffleSinkTest.java    From beam with Apache License 2.0 4 votes vote down vote up
void runTestWriteGroupingSortingShuffleSink(List<KV<Integer, KV<String, Integer>>> expected)
    throws Exception {
  BatchModeExecutionContext executionContext =
      BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
  ShuffleSink<KV<Integer, KV<String, Integer>>> shuffleSink =
      new ShuffleSink<>(
          PipelineOptionsFactory.create(),
          null,
          ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES,
          WindowedValue.getFullCoder(
              KvCoder.of(
                  BigEndianIntegerCoder.of(),
                  KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())),
              new GlobalWindows().windowCoder()),
          executionContext,
          TestOperationContext.create());

  TestShuffleWriter shuffleWriter = new TestShuffleWriter();
  List<Long> actualSizes = new ArrayList<>();
  try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<String, Integer>>>> shuffleSinkWriter =
      shuffleSink.writer(shuffleWriter, "dataset")) {
    for (KV<Integer, KV<String, Integer>> kv : expected) {
      actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(kv)));
    }
  }

  List<ShuffleEntry> records = shuffleWriter.getRecords();

  List<KV<Integer, KV<String, Integer>>> actual = new ArrayList<>();
  for (ShuffleEntry record : records) {
    byte[] keyBytes = record.getKey();
    byte[] valueBytes = record.getValue();
    byte[] sortKeyBytes = record.getSecondaryKey();

    Integer key = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), keyBytes);
    ByteArrayInputStream bais = new ByteArrayInputStream(sortKeyBytes);
    String sortKey = StringUtf8Coder.of().decode(bais);
    Integer sortValue = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), valueBytes);

    actual.add(KV.of(key, KV.of(sortKey, sortValue)));
  }

  Assert.assertEquals(expected, actual);
  Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
}