Java Code Examples for org.apache.parquet.hadoop.api.WriteSupport

The following examples show how to use org.apache.parquet.hadoop.api.WriteSupport. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
protected WriteSupport<T> getWriteSupport(Configuration conf) {
  AvroLogicalTypeSupport avroLogicalTypeSupport = AvroLogicalTypeSupport.getAvroLogicalTypeSupport();
  if (avroLogicalTypeSupport.isLogicalTypeSupported()) {
    LOG.debug("Returning write support with converter = AvroSchemaConverter190Int96Avro18");
    return new AvroWriteSupportInt96Avro18<>(
        (new AvroSchemaConverter190Int96Avro18(conf)).convert(this.schema),
        this.schema,
        this.model,
        this.timeZoneId
    );
  } else {
    LOG.debug("Returning write support with converter = AvroSchemaConverter190Int96Avro17");
    return new AvroWriteSupportInt96Avro17<>(
        (new AvroSchemaConverter190Int96Avro17(conf)).convert(this.schema),
        this.schema,
        this.model,
        this.timeZoneId
    );
  }
}
 
Example 2
Source Project: parquet-mr   Source File: InternalParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @param parquetFileWriter the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param rowGroupSize the size of a block in the file (this will be approximate)
 * @param compressor the codec used to compress
 */
public InternalParquetRecordWriter(
    ParquetFileWriter parquetFileWriter,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long rowGroupSize,
    BytesCompressor compressor,
    boolean validating,
    ParquetProperties props) {
  this.parquetFileWriter = parquetFileWriter;
  this.writeSupport = Objects.requireNonNull(writeSupport, "writeSupport cannot be null");
  this.schema = schema;
  this.extraMetaData = extraMetaData;
  this.rowGroupSize = rowGroupSize;
  this.rowGroupSizeThreshold = rowGroupSize;
  this.nextRowGroupSize = rowGroupSizeThreshold;
  this.compressor = compressor;
  this.validating = validating;
  this.props = props;
  initStore();
}
 
Example 3
Source Project: parquet-mr   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param pageSize the size of a page in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 * @param writerVersion writer compatibility version
 * @param memoryManager memory manager for the write
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion,
    MemoryManager memoryManager) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null");
  memoryManager.addWriter(internalWriter, blockSize);
  this.codecFactory = null;
}
 
Example 4
Source Project: parquet-mr   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param codec the compression codec used to compress the pages
 * @param validating if schema validation should be turned on
 * @param props parquet encoding properties
 */
ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long blockSize,
    CompressionCodecName codec,
    boolean validating,
    ParquetProperties props,
    MemoryManager memoryManager,
    Configuration conf) {
  this.codecFactory = new CodecFactory(conf, props.getPageSizeThreshold());
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, codecFactory.getCompressor(codec), validating,
      props);
  this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null");
  memoryManager.addWriter(internalWriter, blockSize);
}
 
Example 5
Source Project: parquet-mr   Source File: ParquetWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a new ParquetWriter.
 *
 * @param file the file to create
 * @param writeSupport the implementation to write a record to a RecordConsumer
 * @param compressionCodecName the compression codec to use
 * @param blockSize the block size threshold
 * @param pageSize the page size threshold
 * @param dictionaryPageSize the page size threshold for the dictionary pages
 * @param enableDictionary to turn dictionary encoding on
 * @param validating to turn on validation using the schema
 * @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
 * @param conf Hadoop configuration to use while accessing the filesystem
 * @throws IOException if there is an error while writing
 * @deprecated will be removed in 2.0.0
 */
@Deprecated
public ParquetWriter(
    Path file,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int blockSize,
    int pageSize,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion,
    Configuration conf) throws IOException {
  this(file, ParquetFileWriter.Mode.CREATE, writeSupport,
      compressionCodecName, blockSize, pageSize, dictionaryPageSize,
      enableDictionary, validating, writerVersion, conf);
}
 
Example 6
Source Project: parquet-mr   Source File: ParquetWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a new ParquetWriter.
 *
 * @param file the file to create
 * @param mode file creation mode
 * @param writeSupport the implementation to write a record to a RecordConsumer
 * @param compressionCodecName the compression codec to use
 * @param blockSize the block size threshold
 * @param pageSize the page size threshold
 * @param dictionaryPageSize the page size threshold for the dictionary pages
 * @param enableDictionary to turn dictionary encoding on
 * @param validating to turn on validation using the schema
 * @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
 * @param conf Hadoop configuration to use while accessing the filesystem
 * @throws IOException if there is an error while writing
 * @deprecated will be removed in 2.0.0
 */
@Deprecated
public ParquetWriter(
    Path file,
    ParquetFileWriter.Mode mode,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int blockSize,
    int pageSize,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion,
    Configuration conf) throws IOException {
  this(HadoopOutputFile.fromPath(file, conf),
      mode, writeSupport, compressionCodecName, blockSize,
      validating, conf, MAX_PADDING_SIZE_DEFAULT,
      ParquetProperties.builder()
          .withPageSize(pageSize)
          .withDictionaryPageSize(dictionaryPageSize)
          .withDictionaryEncoding(enableDictionary)
          .withWriterVersion(writerVersion)
          .build());
}
 
Example 7
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <T> WriteSupport<T> getWriteSupport(MessageType type) {
  if (writeSupport != null) {
    return (WriteSupport<T>) writeSupport;
  } else {
    return new AvroWriteSupport<>(
        type,
        ParquetAvro.parquetAvroSchema(AvroSchemaUtil.convert(schema, name)),
        ParquetAvro.DEFAULT_MODEL);
  }
}
 
Example 8
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected WriteSupport<T> getWriteSupport(Configuration configuration) {
  for (Map.Entry<String, String> entry : config.entrySet()) {
    configuration.set(entry.getKey(), entry.getValue());
  }
  return new ParquetWriteSupport<>(type, keyValueMetadata, writeSupport);
}
 
Example 9
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <T> WriteSupport<T> getWriteSupport(MessageType type) {
  if (writeSupport != null) {
    return (WriteSupport<T>) writeSupport;
  } else {
    return new AvroWriteSupport<>(
        type,
        ParquetAvro.parquetAvroSchema(AvroSchemaUtil.convert(schema, name)),
        ParquetAvro.DEFAULT_MODEL);
  }
}
 
Example 10
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected WriteSupport<T> getWriteSupport(Configuration configuration) {
  for (Map.Entry<String, String> entry : config.entrySet()) {
    configuration.set(entry.getKey(), entry.getValue());
  }
  return new ParquetWriteSupport<>(type, keyValueMetadata, writeSupport);
}
 
Example 11
Source Project: hudi   Source File: HoodieAvroWriteSupport.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public WriteSupport.FinalizedWriteContext finalizeWrite() {
  HashMap<String, String> extraMetaData = new HashMap<>();
  if (bloomFilter != null) {
    extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
    if (minRecordKey != null && maxRecordKey != null) {
      extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
      extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
    }
    if (bloomFilter.getBloomFilterTypeCode().name().contains(HoodieDynamicBoundedBloomFilter.TYPE_CODE_PREFIX)) {
      extraMetaData.put(HOODIE_BLOOM_FILTER_TYPE_CODE, bloomFilter.getBloomFilterTypeCode().name());
    }
  }
  return new WriteSupport.FinalizedWriteContext(extraMetaData);
}
 
Example 12
Source Project: parquet-mr   Source File: GroupWriteSupport.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
  // if present, prefer the schema passed to the constructor
  if (schema == null) {
    schema = getSchema(configuration);
  }
  return new WriteContext(schema, this.extraMetaData);
}
 
Example 13
Source Project: parquet-mr   Source File: ParquetRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param pageSize the size of a page in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 * @param writerVersion writer compatibility version
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    int blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = null;
  this.codecFactory = null;
}
 
Example 14
Source Project: parquet-mr   Source File: ParquetOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
public static Class<?> getWriteSupportClass(Configuration configuration) {
  final String className = configuration.get(WRITE_SUPPORT_CLASS);
  if (className == null) {
    return null;
  }
  final Class<?> writeSupportClass = ConfigurationUtil.getClassFromConfig(configuration, WRITE_SUPPORT_CLASS, WriteSupport.class);
  return writeSupportClass;
}
 
Example 15
Source Project: parquet-mr   Source File: ParquetOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param configuration to find the configuration for the write support class
 * @return the configured write support
 */
@SuppressWarnings("unchecked")
public WriteSupport<T> getWriteSupport(Configuration configuration){
  if (writeSupport != null) return writeSupport;
  Class<?> writeSupportClass = getWriteSupportClass(configuration);
  try {
    return (WriteSupport<T>) Objects
        .requireNonNull(writeSupportClass, "writeSupportClass cannot be null")
        .newInstance();
  } catch (InstantiationException | IllegalAccessException e) {
    throw new BadConfigurationException("could not instantiate write support class: " + writeSupportClass, e);
  }
}
 
Example 16
Source Project: parquet-mr   Source File: ParquetWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a new ParquetWriter.
 *
 * @param file the file to create
 * @param writeSupport the implementation to write a record to a RecordConsumer
 * @param compressionCodecName the compression codec to use
 * @param blockSize the block size threshold
 * @param pageSize the page size threshold
 * @param dictionaryPageSize the page size threshold for the dictionary pages
 * @param enableDictionary to turn dictionary encoding on
 * @param validating to turn on validation using the schema
 * @throws IOException if there is an error while writing
 * @deprecated will be removed in 2.0.0
 */
@Deprecated
public ParquetWriter(
    Path file,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int blockSize,
    int pageSize,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating) throws IOException {
  this(file, writeSupport, compressionCodecName, blockSize, pageSize,
      dictionaryPageSize, enableDictionary, validating,
      DEFAULT_WRITER_VERSION);
}
 
Example 17
Source Project: parquet-mr   Source File: ParquetWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Deprecated
public ParquetWriter(Path file, Configuration conf, WriteSupport<T> writeSupport) throws IOException {
  this(file,
      writeSupport,
      DEFAULT_COMPRESSION_CODEC_NAME,
      DEFAULT_BLOCK_SIZE,
      DEFAULT_PAGE_SIZE,
      DEFAULT_PAGE_SIZE,
      DEFAULT_IS_DICTIONARY_ENABLED,
      DEFAULT_IS_VALIDATING_ENABLED,
      DEFAULT_WRITER_VERSION,
      conf);
}
 
Example 18
Source Project: parquet-mr   Source File: ParquetWriter.java    License: Apache License 2.0 5 votes vote down vote up
ParquetWriter(
    OutputFile file,
    ParquetFileWriter.Mode mode,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int rowGroupSize,
    boolean validating,
    Configuration conf,
    int maxPaddingSize,
    ParquetProperties encodingProps) throws IOException {

  WriteSupport.WriteContext writeContext = writeSupport.init(conf);
  MessageType schema = writeContext.getSchema();

  ParquetFileWriter fileWriter = new ParquetFileWriter(
    file, schema, mode, rowGroupSize, maxPaddingSize,
    encodingProps.getColumnIndexTruncateLength(), encodingProps.getStatisticsTruncateLength(),
    encodingProps.getPageWriteChecksumEnabled());
  fileWriter.start();

  this.codecFactory = new CodecFactory(conf, encodingProps.getPageSizeThreshold());
  CodecFactory.BytesCompressor compressor =	codecFactory.getCompressor(compressionCodecName);
  this.writer = new InternalParquetRecordWriter<T>(
      fileWriter,
      writeSupport,
      schema,
      writeContext.getExtraMetaData(),
      rowGroupSize,
      compressor,
      validating,
      encodingProps);
}
 
Example 19
Source Project: garmadon   Source File: ExtraMetadataWriteSupport.java    License: Apache License 2.0 4 votes vote down vote up
public ExtraMetadataWriteSupport(WriteSupport<T> delegate) {
    super(delegate);
}
 
Example 20
Source Project: iceberg   Source File: ParquetWriteSupport.java    License: Apache License 2.0 4 votes vote down vote up
ParquetWriteSupport(MessageType type, Map<String, String> keyValueMetadata, WriteSupport<T> writeSupport) {
  this.type = type;
  this.keyValueMetadata = keyValueMetadata;
  this.wrapped = writeSupport;
}
 
Example 21
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 4 votes vote down vote up
public WriteBuilder writeSupport(WriteSupport<?> newWriteSupport) {
  this.writeSupport = newWriteSupport;
  return this;
}
 
Example 22
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 4 votes vote down vote up
public ParquetWriteBuilder<T> setWriteSupport(WriteSupport<T> writeSupport) {
  this.writeSupport = writeSupport;
  return self();
}
 
Example 23
Source Project: iceberg   Source File: ParquetWriteSupport.java    License: Apache License 2.0 4 votes vote down vote up
ParquetWriteSupport(MessageType type, Map<String, String> keyValueMetadata, WriteSupport<T> writeSupport) {
  this.type = type;
  this.keyValueMetadata = keyValueMetadata;
  this.wrapped = writeSupport;
}
 
Example 24
Source Project: iceberg   Source File: Parquet.java    License: Apache License 2.0 4 votes vote down vote up
public ParquetWriteBuilder<T> setWriteSupport(WriteSupport<T> writeSupport) {
  this.writeSupport = writeSupport;
  return self();
}
 
Example 25
Source Project: osm-parquetizer   Source File: ParquetWriterFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected WriteSupport<Way> getWriteSupport(Configuration conf) {
    return new WayWriteSupport(excludeMetadata);
}
 
Example 26
Source Project: osm-parquetizer   Source File: ParquetWriterFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected WriteSupport<Node> getWriteSupport(Configuration conf) {
    return new NodeWriteSupport(excludeMetadata);
}
 
Example 27
Source Project: osm-parquetizer   Source File: ParquetWriterFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected WriteSupport<Relation> getWriteSupport(Configuration conf) {
    return new RelationWriteSupport(excludeMetadata);
}
 
Example 28
Source Project: embulk-output-parquet   Source File: EmbulkWriterBuilder.java    License: MIT License 4 votes vote down vote up
@Override
protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
{
    return new EmbulkWriteSupport(schema, timestampFormatters, addUTF8);
}
 
Example 29
Source Project: datacollector   Source File: AvroParquetWriterBuilder.java    License: Apache License 2.0 4 votes vote down vote up
protected WriteSupport<T> getWriteSupport(Configuration conf) {
  return new AvroWriteSupport((new AvroSchemaConverterLogicalTypesPre19(conf)).convert(this.schema), this.schema, this.model);
}
 
Example 30
Source Project: flink   Source File: ParquetRowDataBuilder.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected WriteSupport<RowData> getWriteSupport(Configuration conf) {
	return new ParquetWriteSupport();
}