Java Code Examples for org.apache.parquet.column.ParquetProperties#getPageSizeThreshold()

The following examples show how to use org.apache.parquet.column.ParquetProperties#getPageSizeThreshold() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnWriteStoreBase.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Deprecated
ColumnWriteStoreBase(
    final PageWriteStore pageWriteStore,
    final ParquetProperties props) {
  this.props = props;
  this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);

  this.columns = new TreeMap<>();

  this.rowCountForNextSizeCheck = min(props.getMinRowCountForPageSizeCheck(), props.getPageRowCountLimit());

  columnWriterProvider = new ColumnWriterProvider() {
    @Override
    public ColumnWriter getColumnWriter(ColumnDescriptor path) {
      ColumnWriterBase column = columns.get(path);
      if (column == null) {
        column = createColumnWriter(path, pageWriteStore.getPageWriter(path), null, props);
        columns.put(path, column);
      }
      return column;
    }
  };
}
 
Example 2
Source File: ColumnWriteStoreBase.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
ColumnWriteStoreBase(
    MessageType schema,
    PageWriteStore pageWriteStore,
    ParquetProperties props) {
  this.props = props;
  this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
  Map<ColumnDescriptor, ColumnWriterBase> mcolumns = new TreeMap<>();
  for (ColumnDescriptor path : schema.getColumns()) {
    PageWriter pageWriter = pageWriteStore.getPageWriter(path);
    mcolumns.put(path, createColumnWriter(path, pageWriter, null, props));
  }
  this.columns = unmodifiableMap(mcolumns);

  this.rowCountForNextSizeCheck = min(props.getMinRowCountForPageSizeCheck(), props.getPageRowCountLimit());

  columnWriterProvider = new ColumnWriterProvider() {
    @Override
    public ColumnWriter getColumnWriter(ColumnDescriptor path) {
      return columns.get(path);
    }
  };
}
 
Example 3
Source File: ParquetRecordWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param codec the compression codec used to compress the pages
 * @param validating if schema validation should be turned on
 * @param props parquet encoding properties
 */
ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long blockSize,
    CompressionCodecName codec,
    boolean validating,
    ParquetProperties props,
    MemoryManager memoryManager,
    Configuration conf) {
  this.codecFactory = new CodecFactory(conf, props.getPageSizeThreshold());
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, codecFactory.getCompressor(codec), validating,
      props);
  this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null");
  memoryManager.addWriter(internalWriter, blockSize);
}
 
Example 4
Source File: ColumnWriteStoreBase.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ColumnWriteStoreBase(
  MessageType schema,
  PageWriteStore pageWriteStore,
  BloomFilterWriteStore bloomFilterWriteStore,
  ParquetProperties props) {
  this.props = props;
  this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
  Map<ColumnDescriptor, ColumnWriterBase> mcolumns = new TreeMap<>();
  for (ColumnDescriptor path : schema.getColumns()) {
    PageWriter pageWriter = pageWriteStore.getPageWriter(path);
    if (props.isBloomFilterEnabled(path)) {
      BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
      mcolumns.put(path, createColumnWriter(path, pageWriter, bloomFilterWriter, props));
    } else {
      mcolumns.put(path, createColumnWriter(path, pageWriter, null, props));
    }
  }
  this.columns = unmodifiableMap(mcolumns);

  this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();

  columnWriterProvider = new ColumnWriterProvider() {
    @Override
    public ColumnWriter getColumnWriter(ColumnDescriptor path) {
      return columns.get(path);
    }
  };
}
 
Example 5
Source File: ParquetWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ParquetWriter(
    OutputFile file,
    ParquetFileWriter.Mode mode,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int rowGroupSize,
    boolean validating,
    Configuration conf,
    int maxPaddingSize,
    ParquetProperties encodingProps) throws IOException {

  WriteSupport.WriteContext writeContext = writeSupport.init(conf);
  MessageType schema = writeContext.getSchema();

  ParquetFileWriter fileWriter = new ParquetFileWriter(
    file, schema, mode, rowGroupSize, maxPaddingSize,
    encodingProps.getColumnIndexTruncateLength(), encodingProps.getStatisticsTruncateLength(),
    encodingProps.getPageWriteChecksumEnabled());
  fileWriter.start();

  this.codecFactory = new CodecFactory(conf, encodingProps.getPageSizeThreshold());
  CodecFactory.BytesCompressor compressor =	codecFactory.getCompressor(compressionCodecName);
  this.writer = new InternalParquetRecordWriter<T>(
      fileWriter,
      writeSupport,
      schema,
      writeContext.getExtraMetaData(),
      rowGroupSize,
      compressor,
      validating,
      encodingProps);
}