org.apache.beam.sdk.util.CoderUtils Java Examples

The following examples show how to use org.apache.beam.sdk.util.CoderUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IsmSideInputReader.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Finds the metadata associated with the specific key components. Returns null if the metadata
 * does not exist.
 */
private <V, T> T findMetadata(
    List<IsmReader<WindowedValue<V>>> readers, List<?> keyComponents, Coder<T> metadataCoder)
    throws IOException {

  // Find a set of reader iterators that have the requested key components.
  List<IsmReader<WindowedValue<V>>.IsmPrefixReaderIterator> readerIterators =
      findAndStartReaders(readers, keyComponents);

  if (readerIterators.isEmpty()) {
    return null;
  }

  // We expect at most one such reader iterator to have been returned.
  IsmReader<WindowedValue<V>>.IsmPrefixReaderIterator readerIterator =
      Iterables.getOnlyElement(readerIterators);

  // Decode the metadata
  return CoderUtils.decodeFromByteArray(
      metadataCoder, readerIterator.getCurrent().getValue().getMetadata());
}
 
Example #2
Source File: TDigestQuantilesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <T> boolean encodeDecodeEquals(MergingDigest tDigest) throws IOException {
  MergingDigest decoded = CoderUtils.clone(new MergingDigestCoder(), tDigest);

  boolean equal = true;
  // the only way to compare the two sketches is to compare them centroid by centroid.
  // Indeed, the means are doubles but are encoded as float and cast during decoding.
  // This entails a small approximation that makes the centroids different after decoding.
  Iterator<Centroid> it1 = decoded.centroids().iterator();
  Iterator<Centroid> it2 = tDigest.centroids().iterator();

  for (int i = 0; i < decoded.centroids().size(); i++) {
    Centroid c1 = it1.next();
    Centroid c2 = it2.next();
    if ((float) c1.mean() != (float) c2.mean() || c1.count() != c2.count()) {
      equal = false;
      break;
    }
  }
  return equal;
}
 
Example #3
Source File: TestStreamTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTestStreamCoder() throws Exception {
  TestStream<String> testStream =
      TestStream.create(StringUtf8Coder.of())
          .addElements("hey")
          .advanceWatermarkTo(Instant.ofEpochMilli(22521600))
          .advanceProcessingTime(Duration.millis(42))
          .addElements("hey", "joe")
          .advanceWatermarkToInfinity();

  TestStream.TestStreamCoder<String> coder = TestStream.TestStreamCoder.of(StringUtf8Coder.of());

  byte[] bytes = CoderUtils.encodeToByteArray(coder, testStream);
  TestStream<String> recoveredStream = CoderUtils.decodeFromByteArray(coder, bytes);

  assertThat(recoveredStream, is(testStream));
}
 
Example #4
Source File: SortValues.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  Iterable<KV<SecondaryKeyT, ValueT>> records = c.element().getValue();

  try {
    Sorter sorter = BufferedExternalSorter.create(sorterOptions);
    for (KV<SecondaryKeyT, ValueT> record : records) {
      sorter.add(
          KV.of(
              CoderUtils.encodeToByteArray(keyCoder, record.getKey()),
              CoderUtils.encodeToByteArray(valueCoder, record.getValue())));
    }

    c.output(KV.of(c.element().getKey(), new DecodingIterable(sorter.sort())));
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #5
Source File: PubsubIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <T> void setupTestClient(List<T> inputs, Coder<T> coder) {
  List<IncomingMessage> messages =
      inputs.stream()
          .map(
              t -> {
                try {
                  return CoderUtils.encodeToByteArray(coder, t);
                } catch (CoderException e) {
                  throw new RuntimeException(e);
                }
              })
          .map(
              ba ->
                  IncomingMessage.of(
                      com.google.pubsub.v1.PubsubMessage.newBuilder()
                          .setData(ByteString.copyFrom(ba))
                          .build(),
                      1234L,
                      0,
                      UUID.randomUUID().toString(),
                      UUID.randomUUID().toString()))
          .collect(Collectors.toList());

  clientFactory = PubsubTestClient.createFactoryForPull(CLOCK, SUBSCRIPTION, 60, messages);
}
 
Example #6
Source File: TextIOReadTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void runTestRead(String[] expected) throws Exception {
  File tmpFile = tempFolder.newFile();
  String filename = tmpFile.getPath();

  try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
    for (String elem : expected) {
      byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem);
      String line = new String(encodedElem, Charsets.UTF_8);
      writer.println(line);
    }
  }

  TextIO.Read read = TextIO.read().from(filename);
  PCollection<String> output = p.apply(read);

  PAssert.that(output).containsInAnyOrder(expected);
  p.run();
}
 
Example #7
Source File: ByteToWindowFunction.java    From twister2 with Apache License 2.0 6 votes vote down vote up
@Override
public KV<K, Iterable<WindowedValue<V>>> map(Tuple<byte[], Iterator<byte[]>> input) {
  K key = null;
  Iterable<WindowedValue<V>> value = null;
  try {
    key = CoderUtils.decodeFromByteArray(keyCoder, input.getKey());

    value = StreamSupport.stream(
        Spliterators.spliteratorUnknownSize(input.getValue(), Spliterator.ORDERED), false)
        .map(bytes -> TranslationUtils.fromByteArray(bytes, wvCoder))
        .collect(Collectors.toList());
  } catch (CoderException e) {
    e.printStackTrace();
  }
  return KV.of(key, value);
}
 
Example #8
Source File: ByteToWindowFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public KV<K, Iterable<WindowedValue<V>>> map(Tuple<byte[], Iterator<byte[]>> input) {
  K key = null;
  Iterable<WindowedValue<V>> value = null;
  try {
    key = CoderUtils.decodeFromByteArray(keyCoder, input.getKey());
    // TODO need to replace this with a custom iterator
    value =
        StreamSupport.stream(
                Spliterators.spliteratorUnknownSize(input.getValue(), Spliterator.ORDERED), false)
            .map(bytes -> TranslationUtils.fromByteArray(bytes, wvCoder))
            .collect(Collectors.toList());
  } catch (CoderException e) {
    LOG.info(e.getMessage());
  }
  return KV.of(key, value);
}
 
Example #9
Source File: IsmSideInputReaderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Each windowed value is expected to be within the same window. */
<K, V> List<IsmRecord<WindowedValue<V>>> forMapMetadata(
    Coder<K> keyCoder, Collection<K> keys, BoundedWindow window) throws Exception {

  List<IsmRecord<WindowedValue<V>>> rval = new ArrayList<>();
  // Add the size metadata record
  rval.add(
      IsmRecord.<WindowedValue<V>>meta(
          ImmutableList.of(IsmFormat.getMetadataKey(), window, 0L),
          CoderUtils.encodeToByteArray(VarLongCoder.of(), (long) keys.size())));

  // Add the positional entries for each key
  long i = 1L;
  for (K key : keys) {
    rval.add(
        IsmRecord.<WindowedValue<V>>meta(
            ImmutableList.of(IsmFormat.getMetadataKey(), window, i),
            CoderUtils.encodeToByteArray(keyCoder, key)));
    i += 1L;
  }
  return rval;
}
 
Example #10
Source File: GroupingShuffleReader.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public boolean advance() throws IOException {
  try (Closeable read = tracker.enterState(readState)) {
    if (!groups.advance()) {
      current = null;
      return false;
    }
  }

  K key = CoderUtils.decodeFromByteArray(parentReader.keyCoder, groups.getCurrent().key);
  parentReader.executionContext.setKey(key);
  current =
      new ValueInEmptyWindows<>(
          KV.<K, Reiterable<V>>of(key, new ValuesIterable(groups.getCurrent().values)));
  return true;
}
 
Example #11
Source File: PCollectionViews.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the default value that was specified.
 *
 * <p>For internal use only.
 *
 * @throws NoSuchElementException if no default was specified.
 */
public T getDefaultValue() {
  if (!hasDefault) {
    throw new NoSuchElementException("Empty PCollection accessed as a singleton view.");
  }
  // Lazily decode the default value once
  synchronized (this) {
    if (encodedDefaultValue != null) {
      try {
        defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
        // Clear the encoded default value to free the reference once we have the object
        // version. Also, this will guarantee that the value will only be decoded once.
        encodedDefaultValue = null;
      } catch (IOException e) {
        throw new RuntimeException("Unexpected IOException: ", e);
      }
    }
    return defaultValue;
  }
}
 
Example #12
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static BeamFnApi.StateRequest.Builder stateRequest(ByteString key, String userStateId)
    throws Exception {
  return BeamFnApi.StateRequest.newBuilder()
      .setStateKey(
          BeamFnApi.StateKey.newBuilder()
              .setBagUserState(
                  BeamFnApi.StateKey.BagUserState.newBuilder()
                      .setTransformId("transform")
                      .setKey(key)
                      .setUserStateId(userStateId)
                      .setWindow(
                          ByteString.copyFrom(
                              CoderUtils.encodeToByteArray(
                                  GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE)))
                      .build()));
}
 
Example #13
Source File: PublishResultCodersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullPublishResultWithoutHeadersDecodeEncodeEquals() throws Exception {
  CoderProperties.coderDecodeEncodeEqual(
      PublishResultCoders.fullPublishResultWithoutHeaders(),
      new PublishResult().withMessageId(UUID.randomUUID().toString()));

  PublishResult value = buildFullPublishResult();
  PublishResult clone =
      CoderUtils.clone(PublishResultCoders.fullPublishResultWithoutHeaders(), value);
  assertThat(
      clone.getSdkResponseMetadata().getRequestId(),
      equalTo(value.getSdkResponseMetadata().getRequestId()));
  assertThat(
      clone.getSdkHttpMetadata().getHttpStatusCode(),
      equalTo(value.getSdkHttpMetadata().getHttpStatusCode()));
  assertThat(clone.getSdkHttpMetadata().getHttpHeaders().isEmpty(), equalTo(true));
}
 
Example #14
Source File: PairWithConstantKeyDoFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ParDoFn create(
    PipelineOptions options,
    CloudObject cloudUserFn,
    List<SideInputInfo> sideInputInfos,
    TupleTag<?> mainOutputTag,
    Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  Coder<?> coder =
      CloudObjects.coderFromCloudObject(
          CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
  Object key =
      CoderUtils.decodeFromByteArray(
          coder, Structs.getBytes(cloudUserFn, WorkerPropertyNames.ENCODED_KEY));
  return new PairWithConstantKeyParDoFn(key);
}
 
Example #15
Source File: StateNamespaces.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public String stringKey() {
  try {
    // equivalent to String.format("/%s/%s/", ...)
    return "/"
        + CoderUtils.encodeToBase64(windowCoder, window)
        +
        // Use base 36 so that can address 36 triggers in a single byte and still be human
        // readable.
        "/"
        + Integer.toString(triggerIndex, TRIGGER_RADIX).toUpperCase()
        + "/";
  } catch (CoderException e) {
    throw new RuntimeException("Unable to generate string key from window " + window, e);
  }
}
 
Example #16
Source File: PAssert.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionView<ActualT> expand(PBegin input) {
  final Coder<T> coder = actual.getCoder();
  return actual
      .apply("FilterActuals", rewindowActuals.prepareActuals())
      .apply("GatherPanes", GatherAllPanes.globally())
      .apply("ExtractPane", MapElements.via(extractPane))
      .setCoder(IterableCoder.of(actual.getCoder()))
      .apply(Flatten.iterables())
      .apply("RewindowActuals", rewindowActuals.windowActuals())
      .apply(
          ParDo.of(
              new DoFn<T, T>() {
                @ProcessElement
                public void processElement(ProcessContext context) throws CoderException {
                  context.output(CoderUtils.clone(coder, context.element()));
                }
              }))
      .apply(actualView);
}
 
Example #17
Source File: PublishResultCodersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullPublishResultIncludingHeadersDecodeEncodeEquals() throws Exception {
  CoderProperties.coderDecodeEncodeEqual(
      PublishResultCoders.fullPublishResult(),
      new PublishResult().withMessageId(UUID.randomUUID().toString()));

  PublishResult value = buildFullPublishResult();
  PublishResult clone = CoderUtils.clone(PublishResultCoders.fullPublishResult(), value);
  assertThat(
      clone.getSdkResponseMetadata().getRequestId(),
      equalTo(value.getSdkResponseMetadata().getRequestId()));
  assertThat(
      clone.getSdkHttpMetadata().getHttpStatusCode(),
      equalTo(value.getSdkHttpMetadata().getHttpStatusCode()));
  assertThat(
      clone.getSdkHttpMetadata().getHttpHeaders(),
      equalTo(value.getSdkHttpMetadata().getHttpHeaders()));
}
 
Example #18
Source File: MapToTupleFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple<byte[], byte[]> map(WindowedValue<KV<K, V>> input) {
  Tuple<byte[], byte[]> element = null;

  WindowedValue<KV<K, WindowedValue<V>>> temp =
      WindowedValue.of(
          KV.of(
              input.getValue().getKey(),
              WindowedValue.of(
                  input.getValue().getValue(),
                  input.getTimestamp(),
                  input.getWindows(),
                  input.getPane())),
          input.getTimestamp(),
          input.getWindows(),
          input.getPane());
  try {
    element =
        new Tuple<>(
            CoderUtils.encodeToByteArray(keyCoder, temp.getValue().getKey()),
            CoderUtils.encodeToByteArray(wvCoder, temp.getValue().getValue()));
  } catch (CoderException e) {
    LOG.info(e.getMessage());
  }
  return element;
}
 
Example #19
Source File: LengthPrefixCoderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiCoderCycle() throws Exception {
  LengthPrefixCoder<Long> lengthPrefixedValueCoder =
      LengthPrefixCoder.of(BigEndianLongCoder.of());

  LengthPrefixCoder<byte[]> lengthPrefixedBytesCoder = LengthPrefixCoder.of(ByteArrayCoder.of());

  // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16]
  byte[] userEncoded = CoderUtils.encodeToByteArray(lengthPrefixedValueCoder, 22L);

  // [0, 0, 0, 0, 0, 0, 0, 0x16]
  byte[] decodedToBytes = CoderUtils.decodeFromByteArray(lengthPrefixedBytesCoder, userEncoded);

  // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16]
  byte[] reencodedBytes = CoderUtils.encodeToByteArray(lengthPrefixedBytesCoder, decodedToBytes);

  long userDecoded = CoderUtils.decodeFromByteArray(lengthPrefixedValueCoder, reencodedBytes);

  assertFalse(
      "Length-prefix decoding to bytes should drop the length",
      Arrays.equals(userEncoded, decodedToBytes));
  assertArrayEquals(userEncoded, reencodedBytes);
  assertEquals(22L, userDecoded);
}
 
Example #20
Source File: DAGBuilder.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Object applyEx(byte[] b) throws Exception {
  if (coder == null) {
    return "ALL";
  } else {
    WindowedValue<KV<K, V>> windowedValue =
        CoderUtils.decodeFromByteArray(coder, b); // todo: decoding twice....
    KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
    return CoderUtils.encodeToByteArray(
        kvCoder.getKeyCoder(), windowedValue.getValue().getKey());
  }
}
 
Example #21
Source File: ByteArrayCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testEncodeThenMutate() throws Exception {
  byte[] input = {0x7, 0x3, 0xA, 0xf};
  byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, input);
  input[1] = 0x9;
  byte[] decoded = CoderUtils.decodeFromByteArray(TEST_CODER, encoded);

  // now that I have mutated the input, the output should NOT match
  assertThat(input, not(equalTo(decoded)));
}
 
Example #22
Source File: AvroCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link AvroCoder} works around issues in Avro where cache classes might be from the
 * wrong ClassLoader, causing confusing "Cannot cast X to X" error messages.
 */
@Test
public void testTwoClassLoaders() throws Exception {
  ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
  ClassLoader loader1 =
      new InterceptingUrlClassLoader(contextClassLoader, AvroCoderTestPojo.class.getName());
  ClassLoader loader2 =
      new InterceptingUrlClassLoader(contextClassLoader, AvroCoderTestPojo.class.getName());

  Class<?> pojoClass1 = loader1.loadClass(AvroCoderTestPojo.class.getName());
  Class<?> pojoClass2 = loader2.loadClass(AvroCoderTestPojo.class.getName());

  Object pojo1 = InstanceBuilder.ofType(pojoClass1).withArg(String.class, "hello").build();
  Object pojo2 = InstanceBuilder.ofType(pojoClass2).withArg(String.class, "goodbye").build();

  // Confirm incompatibility
  try {
    pojoClass2.cast(pojo1);
    fail("Expected ClassCastException; without it, this test is vacuous");
  } catch (ClassCastException e) {
    // g2g
  }

  // The first coder is expected to populate the Avro SpecificData cache
  // The second coder is expected to be corrupted if the caching is done wrong.
  AvroCoder<Object> avroCoder1 = (AvroCoder) AvroCoder.of(pojoClass1);
  AvroCoder<Object> avroCoder2 = (AvroCoder) AvroCoder.of(pojoClass2);

  Object cloned1 = CoderUtils.clone(avroCoder1, pojo1);
  Object cloned2 = CoderUtils.clone(avroCoder2, pojo2);

  // Confirming that the uncorrupted coder is fine
  pojoClass1.cast(cloned1);

  // Confirmed to fail prior to the fix
  pojoClass2.cast(cloned2);
}
 
Example #23
Source File: KvCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void encodeNullThrowsCoderException() throws Exception {
  thrown.expect(CoderException.class);
  thrown.expectMessage("cannot encode a null KV");

  CoderUtils.encodeToBase64(TEST_CODER, null);
}
 
Example #24
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Beam expects immutable objects, but the Hadoop InputFormats tend to re-use the same object
 * when returning them. Hence, mutable objects returned by Hadoop InputFormats are cloned.
 */
private <T> T cloneIfPossiblyMutable(T input, Coder<T> coder)
    throws CoderException, ClassCastException {
  // If the input object is not of known immutable type, clone the object.
  if (!isKnownImmutable(input)) {
    input = CoderUtils.clone(coder, input);
  }
  return input;
}
 
Example #25
Source File: BigDecimalCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetEncodedElementByteSize() throws Exception {
  TestElementByteSizeObserver observer = new TestElementByteSizeObserver();
  for (BigDecimal value : TEST_VALUES) {
    TEST_CODER.registerByteSizeObserver(value, observer);
    observer.advance();
    assertThat(
        observer.getSumAndReset(),
        equalTo(
            (long) CoderUtils.encodeToByteArray(TEST_CODER, value, Coder.Context.NESTED).length));
  }
}
 
Example #26
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointCoderNulls() throws Exception {
  CheckpointCoder<String> coder = new CheckpointCoder<>(StringUtf8Coder.of());
  Checkpoint<String> emptyCheckpoint = new Checkpoint<>(null, null);
  Checkpoint<String> decodedEmptyCheckpoint =
      CoderUtils.decodeFromByteArray(coder, CoderUtils.encodeToByteArray(coder, emptyCheckpoint));
  assertNull(decodedEmptyCheckpoint.getResidualElements());
  assertNull(decodedEmptyCheckpoint.getResidualSource());
}
 
Example #27
Source File: TextualIntegerCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void encodeNullThrowsCoderException() throws Exception {
  thrown.expect(CoderException.class);
  thrown.expectMessage("cannot encode a null Integer");

  CoderUtils.encodeToBase64(TEST_CODER, null);
}
 
Example #28
Source File: AmqpMessageCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void encodeDecodeLargeMessage() throws Exception {
  Message message = Message.Factory.create();
  message.setAddress("address");
  message.setSubject("subject");
  String body = Joiner.on("").join(Collections.nCopies(32 * 1024 * 1024, " "));
  message.setBody(new AmqpValue(body));

  AmqpMessageCoder coder = AmqpMessageCoder.of();

  Message clone = CoderUtils.clone(coder, message);

  clone.getBody().toString().equals(message.getBody().toString());
}
 
Example #29
Source File: CommonCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Utility for adding new entries to the common coder spec -- prints the serialized bytes of the
 * given value in the given context using JSON-escaped strings.
 */
private static <T> String jsonByteString(Coder<T> coder, T value, Context context)
    throws CoderException {
  byte[] bytes = CoderUtils.encodeToByteArray(coder, value, context);
  ObjectMapper mapper = new ObjectMapper();
  mapper.configure(JsonGenerator.Feature.ESCAPE_NON_ASCII, true);
  try {
    return mapper.writeValueAsString(new String(bytes, StandardCharsets.ISO_8859_1));
  } catch (JsonProcessingException e) {
    throw new CoderException(String.format("Unable to encode %s with coder %s", value, coder), e);
  }
}
 
Example #30
Source File: ByteArrayCoderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void encodeNullThrowsCoderException() throws Exception {
  thrown.expect(CoderException.class);
  thrown.expectMessage("cannot encode a null byte[]");

  CoderUtils.encodeToBase64(TEST_CODER, null);
}