Java Code Examples for org.apache.beam.sdk.io.FileIO#ReadableFile

The following examples show how to use org.apache.beam.sdk.io.FileIO#ReadableFile . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ThriftIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(@Element FileIO.ReadableFile file, OutputReceiver<T> out) {
  try {
    InputStream inputStream = Channels.newInputStream(file.open());
    TIOStreamTransport streamTransport =
        new TIOStreamTransport(new BufferedInputStream(inputStream));
    AutoExpandingBufferReadTransport readTransport =
        new AutoExpandingBufferReadTransport(262_144_000);
    readTransport.fill(streamTransport, inputStream.available());
    TProtocol protocol = tProtocol.getProtocol(readTransport);
    while (protocol.getTransport().getBytesRemainingInBuffer() > 0) {
      TBase<?, ?> tb = (TBase<?, ?>) tBaseType.getDeclaredConstructor().newInstance();
      tb.read(protocol);
      out.output((T) tb);
    }
  } catch (Exception ioe) {
    String filename = file.getMetadata().resourceId().toString();
    LOG.error(String.format("Error in reading file: %1$s%n%2$s", filename, ioe));
    throw new RuntimeException(ioe);
  }
}
 
Example 2
Source File: ParquetIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext processContext) throws Exception {
  FileIO.ReadableFile file = processContext.element();

  if (!file.getMetadata().isReadSeekEfficient()) {
    ResourceId filename = file.getMetadata().resourceId();
    throw new RuntimeException(String.format("File has to be seekable: %s", filename));
  }

  SeekableByteChannel seekableByteChannel = file.openSeekable();

  AvroParquetReader.Builder builder =
      AvroParquetReader.<GenericRecord>builder(new BeamParquetInputFile(seekableByteChannel));
  if (modelClass != null) {
    // all GenericData implementations have a static get method
    builder = builder.withDataModel((GenericData) modelClass.getMethod("get").invoke(null));
  }

  try (ParquetReader<GenericRecord> reader = builder.build()) {
    GenericRecord read;
    while ((read = reader.read()) != null) {
      processContext.output(read);
    }
  }
}
 
Example 3
Source File: ThriftIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<T> expand(PCollection<FileIO.ReadableFile> input) {
  checkNotNull(getRecordClass(), "Record class cannot be null");
  checkNotNull(getTProtocolFactory(), "Thrift protocol cannot be null");

  return input
      .apply(ParDo.of(new ReadFn<>(getRecordClass(), getTProtocolFactory())))
      .setCoder(ThriftCoder.of(getRecordClass(), getTProtocolFactory()));
}
 
Example 4
Source File: ParquetIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<GenericRecord> expand(PCollection<FileIO.ReadableFile> input) {
  checkNotNull(getSchema(), "Schema can not be null");
  return input
      .apply(ParDo.of(new ReadFn(getAvroDataModel())))
      .setCoder(AvroCoder.of(getSchema()));
}