org.apache.parquet.Preconditions Java Examples

The following examples show how to use org.apache.parquet.Preconditions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public AvroUnionConverter(ParentValueContainer parent, Type parquetSchema,
                          Schema avroSchema, GenericData model) {
  super(parent);
  GroupType parquetGroup = parquetSchema.asGroupType();
  this.memberConverters = new Converter[ parquetGroup.getFieldCount()];

  int parquetIndex = 0;
  for (int index = 0; index < avroSchema.getTypes().size(); index++) {
    Schema memberSchema = avroSchema.getTypes().get(index);
    if (!memberSchema.getType().equals(Schema.Type.NULL)) {
      Type memberType = parquetGroup.getType(parquetIndex);
      memberConverters[parquetIndex] = newConverter(memberSchema, memberType, model, new ParentValueContainer() {
        @Override
        public void add(Object value) {
          Preconditions.checkArgument(
              AvroUnionConverter.this.memberValue == null,
              "Union is resolving to more than one type");
          memberValue = value;
        }
      });
      parquetIndex++; // Note for nulls the parquetIndex id not increased
    }
  }
}
 
Example #2
Source File: AvroRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public AvroArrayConverter(ParentValueContainer parent, GroupType type,
                          Schema avroSchema, GenericData model,
                          Class<?> arrayClass) {
  this.parent = parent;
  this.avroSchema = avroSchema;

  Preconditions.checkArgument(arrayClass.isArray(),
      "Cannot convert non-array: " + arrayClass.getName());
  this.elementClass = arrayClass.getComponentType();

  ParentValueContainer setter = createSetterAndContainer();
  Schema elementSchema = this.avroSchema.getElementType();
  Type repeatedType = type.getType(0);

  // always determine whether the repeated type is the element type by
  // matching it against the element schema.
  if (isElementType(repeatedType, elementSchema)) {
    // the element type is the repeated type (and required)
    converter = newConverter(elementSchema, repeatedType, model, elementClass, setter);
  } else {
    // the element is wrapped in a synthetic group and may be optional
    converter = new ArrayElementConverter(
        repeatedType.asGroupType(), elementSchema, model, setter);
  }
}
 
Example #3
Source File: ColumnarBatchReader.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public final ColumnarBatch read(ColumnarBatch reuse, int numRowsToRead) {
  Preconditions.checkArgument(numRowsToRead > 0, "Invalid number of rows to read: %s", numRowsToRead);
  ColumnVector[] arrowColumnVectors = new ColumnVector[readers.length];

  if (reuse == null) {
    closeVectors();
  }

  for (int i = 0; i < readers.length; i += 1) {
    vectorHolders[i] = readers[i].read(vectorHolders[i], numRowsToRead);
    int numRowsInVector = vectorHolders[i].numValues();
    Preconditions.checkState(
        numRowsInVector == numRowsToRead,
        "Number of rows in the vector %s didn't match expected %s ", numRowsInVector,
        numRowsToRead);
    arrowColumnVectors[i] =
        IcebergArrowColumnVector.forHolder(vectorHolders[i], numRowsInVector);
  }
  ColumnarBatch batch = new ColumnarBatch(arrowColumnVectors);
  batch.setNumRows(numRowsToRead);
  return batch;
}
 
Example #4
Source File: MessageTypeToType.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Override
public Type list(GroupType array, Type elementType) {
  GroupType repeated = array.getType(0).asGroupType();
  org.apache.parquet.schema.Type element = repeated.getType(0);

  Preconditions.checkArgument(
      !element.isRepetition(Repetition.REPEATED),
      "Elements cannot have repetition REPEATED: {}", element);

  int elementFieldId = getId(element);

  addAlias(element.getName(), elementFieldId);

  if (element.isRepetition(Repetition.OPTIONAL)) {
    return Types.ListType.ofOptional(elementFieldId, elementType);
  } else {
    return Types.ListType.ofRequired(elementFieldId, elementType);
  }
}
 
Example #5
Source File: AvroIndexedRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public AvroUnionConverter(ParentValueContainer parent, Type parquetSchema,
                          Schema avroSchema, GenericData model) {
  this.parent = parent;
  GroupType parquetGroup = parquetSchema.asGroupType();
  this.memberConverters = new Converter[ parquetGroup.getFieldCount()];

  int parquetIndex = 0;
  for (int index = 0; index < avroSchema.getTypes().size(); index++) {
    Schema memberSchema = avroSchema.getTypes().get(index);
    if (!memberSchema.getType().equals(Schema.Type.NULL)) {
      Type memberType = parquetGroup.getType(parquetIndex);
      memberConverters[parquetIndex] = newConverter(memberSchema, memberType, model, new ParentValueContainer() {
        @Override
        public void add(Object value) {
          Preconditions.checkArgument(memberValue==null, "Union is resolving to more than one type");
          memberValue = value;
        }
      });
      parquetIndex++; // Note for nulls the parquetIndex id not increased
    }
  }
}
 
Example #6
Source File: RunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public BytesInput toBytes() throws IOException {
  Preconditions.checkArgument(!toBytesCalled,
      "You cannot call toBytes() more than once without calling reset()");

  // write anything that is buffered / queued up for an rle-run
  if (repeatCount >= 8) {
    writeRleRun();
  } else if(numBufferedValues > 0) {
    for (int i = numBufferedValues; i < 8; i++) {
      bufferedValues[i] = 0;
    }
    writeOrAppendBitPackedRun();
    endPreviousBitPackedRun();
  } else {
    endPreviousBitPackedRun();
  }

  toBytesCalled = true;
  return BytesInput.from(baos);
}
 
Example #7
Source File: Types.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
protected Type build(String name) {
  Preconditions.checkState(logicalTypeAnnotation == null,
      "MAP is already a logical type and can't be changed.");
  if (keyType == null) {
    keyType = STRING_KEY;
  }

  GroupBuilder<GroupType> builder = buildGroup(repetition).as(OriginalType.MAP);
  if (id != null) {
    builder.id(id.intValue());
  }

  if (valueType != null) {
    return builder
        .repeatedGroup().addFields(keyType, valueType).named("map")
        .named(name);
  } else {
    return builder
        .repeatedGroup().addFields(keyType).named("map")
        .named(name);
  }
}
 
Example #8
Source File: Types.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
protected DecimalMetadata decimalMetadata() {
  DecimalMetadata meta = null;
  if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
    LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation;
    if (newLogicalTypeSet) {
      if (scaleAlreadySet) {
        Preconditions.checkArgument(this.scale == decimalType.getScale(),
          "Decimal scale should match with the scale of the logical type");
      }
      if (precisionAlreadySet) {
        Preconditions.checkArgument(this.precision == decimalType.getPrecision(),
          "Decimal precision should match with the precision of the logical type");
      }
      scale = decimalType.getScale();
      precision = decimalType.getPrecision();
    }
    Preconditions.checkArgument(precision > 0,
        "Invalid DECIMAL precision: " + precision);
    Preconditions.checkArgument(this.scale >= 0,
        "Invalid DECIMAL scale: " + this.scale);
    Preconditions.checkArgument(this.scale <= precision,
        "Invalid DECIMAL scale: cannot be greater than precision");
    meta = new DecimalMetadata(precision, scale);
  }
  return meta;
}
 
Example #9
Source File: BlockSplitBloomFilter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * Calculate optimal size according to the number of distinct values and false positive probability.
 *
 * @param n: The number of distinct values.
 * @param p: The false positive probability.
 *
 * @return optimal number of bits of given n and p.
 */
public static int optimalNumOfBits(long n, double p) {
  Preconditions.checkArgument((p > 0.0 && p < 1.0),
    "FPP should be less than 1.0 and great than 0.0");
  final double m = -8 * n / Math.log(1 - Math.pow(p, 1.0 / 8));
  int numBits = (int)m ;

  // Handle overflow.
  if (numBits > UPPER_BOUND_BYTES << 3 || m < 0) {
    numBits = UPPER_BOUND_BYTES << 3;
  }

  // Round numBits up to (k * BITS_PER_BLOCK)
  numBits = (numBits + BITS_PER_BLOCK -1) & ~BITS_PER_BLOCK;

  if (numBits < (LOWER_BOUND_BYTES << 3)) {
    numBits = LOWER_BOUND_BYTES << 3;
  }

  return numBits;
}
 
Example #10
Source File: SnappyCompressor.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void setInput(byte[] buffer, int off, int len) {  
  SnappyUtil.validateBuffer(buffer, off, len);
  
  Preconditions.checkArgument(!outputBuffer.hasRemaining(), 
      "Output buffer should be empty. Caller must call compress()");

  if (inputBuffer.capacity() - inputBuffer.position() < len) {
    ByteBuffer tmp = ByteBuffer.allocateDirect(inputBuffer.position() + len);
    inputBuffer.rewind();
    tmp.put(inputBuffer);
    ByteBuffer oldBuffer = inputBuffer;
    inputBuffer = tmp;
    CleanUtil.cleanDirectBuffer(oldBuffer);
  } else {
    inputBuffer.limit(inputBuffer.position() + len);
  }

  // Append the current bytes to the input buffer
  inputBuffer.put(buffer, off, len);
  bytesRead += len;
}
 
Example #11
Source File: ParquetInputFormat.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
  Configuration configuration = ContextUtil.getConfiguration(jobContext);
  List<InputSplit> splits = new ArrayList<InputSplit>();

  if (isTaskSideMetaData(configuration)) {
    // Although not required by the API, some clients may depend on always
    // receiving ParquetInputSplit. Translation is required at some point.
    for (InputSplit split : super.getSplits(jobContext)) {
      Preconditions.checkArgument(split instanceof FileSplit,
          "Cannot wrap non-FileSplit: " + split);
      splits.add(ParquetInputSplit.from((FileSplit) split));
    }
    return splits;

  } else {
    splits.addAll(getSplits(configuration, getFooters(jobContext)));
  }

  return splits;
}
 
Example #12
Source File: ParquetFileWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * writes _common_metadata file, and optionally a _metadata file depending on the {@link JobSummaryLevel} provided
 * @param configuration the configuration to use to get the FileSystem
 * @param outputPath the directory to write the _metadata file to
 * @param footers the list of footers to merge
 * @param level level of summary to write
 * @throws IOException if there is an error while writing
 * @deprecated metadata files are not recommended and will be removed in 2.0.0
 */
@Deprecated
public static void writeMetadataFile(Configuration configuration, Path outputPath, List<Footer> footers, JobSummaryLevel level) throws IOException {
  Preconditions.checkArgument(level == JobSummaryLevel.ALL || level == JobSummaryLevel.COMMON_ONLY,
      "Unsupported level: " + level);

  FileSystem fs = outputPath.getFileSystem(configuration);
  outputPath = outputPath.makeQualified(fs);
  ParquetMetadata metadataFooter = mergeFooters(outputPath, footers);

  if (level == JobSummaryLevel.ALL) {
    writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_METADATA_FILE);
  }

  metadataFooter.getBlocks().clear();
  writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_COMMON_METADATA_FILE);
}
 
Example #13
Source File: ParquetFileWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * Given a list of metadata files, merge them into a single ParquetMetadata
 * Requires that the schemas be compatible, and the extraMetadata be exactly equal.
 * @param files a list of files to merge metadata from
 * @param conf a configuration
 * @return merged parquet metadata for the files
 * @throws IOException if there is an error while writing
 * @deprecated metadata files are not recommended and will be removed in 2.0.0
 */
@Deprecated
public static ParquetMetadata mergeMetadataFiles(List<Path> files,  Configuration conf) throws IOException {
  Preconditions.checkArgument(!files.isEmpty(), "Cannot merge an empty list of metadata");

  GlobalMetaData globalMetaData = null;
  List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();

  for (Path p : files) {
    ParquetMetadata pmd = ParquetFileReader.readFooter(conf, p, ParquetMetadataConverter.NO_FILTER);
    FileMetaData fmd = pmd.getFileMetaData();
    globalMetaData = mergeInto(fmd, globalMetaData, true);
    blocks.addAll(pmd.getBlocks());
  }

  // collapse GlobalMetaData into a single FileMetaData, which will throw if they are not compatible
  return new ParquetMetadata(globalMetaData.merge(), blocks);
}
 
Example #14
Source File: Types.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
protected Type build(String name) {
  Preconditions.checkState(logicalTypeAnnotation == null,
      "LIST is already the logical type and can't be changed");
  Objects.requireNonNull(elementType, "List element type cannot be null");

  GroupBuilder<GroupType> builder = buildGroup(repetition).as(OriginalType.LIST);
  if (id != null) {
    builder.id(id.intValue());
  }

  return builder
      .repeatedGroup().addFields(elementType).named("list")
      .named(name);
}
 
Example #15
Source File: ParquetProperties.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Set the Parquet format page size.
 *
 * @param pageSize an integer size in bytes
 * @return this builder for method chaining.
 */
public Builder withPageSize(int pageSize) {
  Preconditions.checkArgument(pageSize > 0,
      "Invalid page size (negative): %s", pageSize);
  this.pageSize = pageSize;
  return this;
}
 
Example #16
Source File: ParquetProperties.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Set the Parquet format dictionary page size.
 *
 * @param dictionaryPageSize an integer size in bytes
 * @return this builder for method chaining.
 */
public Builder withDictionaryPageSize(int dictionaryPageSize) {
  Preconditions.checkArgument(dictionaryPageSize > 0,
      "Invalid dictionary page size (negative): %s", dictionaryPageSize);
  this.dictPageSize = dictionaryPageSize;
  return this;
}
 
Example #17
Source File: NanoTime.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static NanoTime fromBinary(Binary bytes) {
  Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes");
  ByteBuffer buf = bytes.toByteBuffer();
  buf.order(ByteOrder.LITTLE_ENDIAN);
  long timeOfDayNanos = buf.getLong();
  int julianDay = buf.getInt();
  return new NanoTime(julianDay, timeOfDayNanos);
}
 
Example #18
Source File: RunLengthDecoder.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the internal state for decoding ints of `bitWidth`.
 */
private void initWidthAndPacker(int bitWidth) {
	Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
	this.bitWidth = bitWidth;
	this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth);
	this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
}
 
Example #19
Source File: TimestampColumnReader.java    From flink with Apache License 2.0 5 votes vote down vote up
public static TimestampData decodeInt96ToTimestamp(
		boolean utcTimestamp,
		org.apache.parquet.column.Dictionary dictionary,
		int id) {
	Binary binary = dictionary.decodeToBinary(id);
	Preconditions.checkArgument(
			binary.length() == 12,
			"Timestamp with int96 should be 12 bytes.");
	ByteBuffer buffer = binary.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
	return int96ToTimestamp(utcTimestamp, buffer.getLong(), buffer.getInt());
}
 
Example #20
Source File: GenericParquetWriter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public Optional<PrimitiveWriter<?>> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intType) {
  Preconditions.checkArgument(intType.isSigned() || intType.getBitWidth() < 64,
      "Cannot read uint64: not a supported Java type");
  if (intType.getBitWidth() < 64) {
    return Optional.of(ParquetValueWriters.ints(desc));
  } else {
    return Optional.of(ParquetValueWriters.longs(desc));
  }
}
 
Example #21
Source File: LogicalTypeAnnotation.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static IntLogicalTypeAnnotation intType(final int bitWidth, final boolean isSigned) {
  Preconditions.checkArgument(
    bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64,
    "Invalid bit width for integer logical type, " + bitWidth + " is not allowed, " +
      "valid bit width values: 8, 16, 32, 64");
  return new IntLogicalTypeAnnotation(bitWidth, isSigned);
}
 
Example #22
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private ColumnOrder requireValidColumnOrder(ColumnOrder columnOrder) {
  if (primitive == PrimitiveTypeName.INT96) {
    Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED,
        "The column order {} is not supported by INT96", columnOrder);
  }
  if (getLogicalTypeAnnotation() != null) {
    Preconditions.checkArgument(getLogicalTypeAnnotation().isValidColumnOrder(columnOrder),
      "The column order {} is not supported by {} ({})", columnOrder, primitive, getLogicalTypeAnnotation());
  }
  return columnOrder;
}
 
Example #23
Source File: DirectCodecFactory.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * See docs on CodecFactory#createDirectCodecFactory which is how this class is
 * exposed publicly and is just a pass-through factory method for this constructor
 * to hide the rest of this class from public access.
 *
 * @throws NullPointerException if allocator is {@code null}
 */
DirectCodecFactory(Configuration config, ByteBufferAllocator allocator, int pageSize) {
  super(config, pageSize);

  this.allocator = Objects.requireNonNull(allocator, "allocator cannot be null");
  Preconditions.checkState(allocator.isDirect(),
      "A %s requires a direct buffer allocator be provided.",
      getClass().getSimpleName());
}
 
Example #24
Source File: DirectCodecFactory.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void decompress(ByteBuffer input, int compressedSize, ByteBuffer output, int uncompressedSize)
    throws IOException {
  Preconditions.checkArgument(compressedSize == uncompressedSize,
      "Non-compressed data did not have matching compressed and uncompressed sizes.");
  output.clear();
  output.put((ByteBuffer) input.duplicate().position(0).limit(compressedSize));
}
 
Example #25
Source File: DirectCodecFactory.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public Object borrowDirectDecompressor(){
  Preconditions.checkArgument(supportDirectDecompressor, "Tried to get a direct Decompressor from a non-direct codec.");
  try {
    return directDecompressorPool.borrowObject();
  } catch (Exception e) {
    throw new ParquetCompressionCodecException(e);
  }
}
 
Example #26
Source File: SnappyDecompressor.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
  * Fills specified buffer with uncompressed data. Returns actual number
  * of bytes of uncompressed data. A return value of 0 indicates that
  * {@link #needsInput()} should be called in order to determine if more
  * input data is required.
  *
  * @param buffer   Buffer for the compressed data
  * @param off Start offset of the data
  * @param len Size of the buffer
  * @return The actual number of bytes of uncompressed data.
  * @throws IOException if reading or decompression fails
  */
 @Override
 public synchronized int decompress(byte[] buffer, int off, int len) throws IOException {
   SnappyUtil.validateBuffer(buffer, off, len);
if (inputBuffer.position() == 0 && !outputBuffer.hasRemaining()) {
     return 0;
   }
   
   if (!outputBuffer.hasRemaining()) {
     inputBuffer.rewind();
     Preconditions.checkArgument(inputBuffer.position() == 0, "Invalid position of 0.");
     Preconditions.checkArgument(outputBuffer.position() == 0, "Invalid position of 0.");
     // There is compressed input, decompress it now.
     int decompressedSize = Snappy.uncompressedLength(inputBuffer);
     if (decompressedSize > outputBuffer.capacity()) {
       ByteBuffer oldBuffer = outputBuffer;
       outputBuffer = ByteBuffer.allocateDirect(decompressedSize);
       CleanUtil.cleanDirectBuffer(oldBuffer);
     }

     // Reset the previous outputBuffer (i.e. set position to 0)
     outputBuffer.clear();
     int size = Snappy.uncompress(inputBuffer, outputBuffer);
     outputBuffer.limit(size);
     // We've decompressed the entire input, reset the input now
     inputBuffer.clear();
     inputBuffer.limit(0);
     finished = true;
   }

   // Return compressed output up to 'len'
   int numBytes = Math.min(len, outputBuffer.remaining());
   outputBuffer.get(buffer, off, numBytes);
   return numBytes;	    
 }
 
Example #27
Source File: DynConstructors.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <R> R invoke(Object target, Object... args) {
  Preconditions.checkArgument(target == null,
      "Invalid call to constructor: target must be null");
  return (R) newInstance(args);
}
 
Example #28
Source File: DynConstructors.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <R> R invokeChecked(Object target, Object... args) throws Exception {
  Preconditions.checkArgument(target == null,
      "Invalid call to constructor: target must be null");
  return (R) newInstanceChecked(args);
}
 
Example #29
Source File: DynMethods.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns this method as a BoundMethod for the given receiver.
 *
 * @param receiver an Object to receive the method invocation
 * @return a {@link BoundMethod} for this method and the receiver
 * @throws IllegalStateException if the method is static
 * @throws IllegalArgumentException if the receiver's class is incompatible
 */
public BoundMethod bind(Object receiver) {
  Preconditions.checkState(!isStatic(),
      "Cannot bind static method " + method.toGenericString());
  Preconditions.checkArgument(
      method.getDeclaringClass().isAssignableFrom(receiver.getClass()),
      "Cannot bind " + method.toGenericString() + " to instance of " +
          receiver.getClass());

  return new BoundMethod(this, receiver);
}
 
Example #30
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void writeList(GroupType schema, Schema avroSchema, Object value) {
  recordConsumer.startGroup(); // group wrapper (original type LIST)
  if (value instanceof Collection) {
    writeCollection(schema, avroSchema, (Collection) value);
  } else {
    Class<?> arrayClass = value.getClass();
    Preconditions.checkArgument(arrayClass.isArray(),
        "Cannot write unless collection or array: " + arrayClass.getName());
    writeJavaArray(schema, avroSchema, arrayClass, value);
  }
  recordConsumer.endGroup();
}