org.apache.parquet.io.InvalidRecordException Java Examples

The following examples show how to use org.apache.parquet.io.InvalidRecordException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetFileReaderTest.java    From kafka-connect-fs with Apache License 2.0 6 votes vote down vote up
@ParameterizedTest
@MethodSource("fileSystemConfigProvider")
public void readerWithInvalidProjection(ReaderFsTestConfig fsConfig) throws IOException {
    Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
            .fields()
            .name("field1").type("string").noDefault()
            .endRecord();
    Map<String, Object> readerConfig = getReaderConfig();
    readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, testSchema.toString());
    readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
    FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
    fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
    try {
        readAllData(fsConfig);
    } catch (Exception e) {
        assertEquals(ConnectException.class, e.getClass());
        assertEquals(InvalidRecordException.class, e.getCause().getClass());
    }
}
 
Example #2
Source File: AvroRecordConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Schema.Field getAvroField(String parquetFieldName) {
  Schema.Field avroField = avroSchema.getField(parquetFieldName);
  if (avroField != null) {
    return avroField;
  }

  for (Schema.Field f : avroSchema.getFields()) {
    if (f.aliases().contains(parquetFieldName)) {
      return f;
    }
  }

  throw new InvalidRecordException(String.format(
      "Parquet/Avro schema mismatch: Avro field '%s' not found",
      parquetFieldName));
}
 
Example #3
Source File: ParquetRowiseReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void verifyDecimalTypesAreSame(OutputMutator output, ParquetColumnResolver columnResolver) {
  for (ValueVector vector : output.getVectors()) {
    Field fieldInSchema = vector.getField();
    if (fieldInSchema.getType().getTypeID() == ArrowType.ArrowTypeID.Decimal) {
      ArrowType.Decimal typeInTable = (ArrowType.Decimal) fieldInSchema.getType();
      Type typeInParquet = null;
      // the field in arrow schema may not be present in hive schema
      try {
        typeInParquet  = schema.getType(columnResolver.getParquetColumnName(fieldInSchema.getName()));
      } catch (InvalidRecordException e) {
      }
      if (typeInParquet == null) {
        continue;
      }
      boolean schemaMisMatch = true;
      OriginalType originalType = typeInParquet.getOriginalType();
      if (originalType.equals(OriginalType.DECIMAL) ) {
        int precision = typeInParquet
          .asPrimitiveType().getDecimalMetadata().getPrecision();
        int scale = typeInParquet.asPrimitiveType().getDecimalMetadata().getScale();
        ArrowType decimalType = new ArrowType.Decimal(precision, scale);
        if (decimalType.equals(typeInTable)) {
          schemaMisMatch = false;
        }
      }
      if (schemaMisMatch) {
        throw UserException.schemaChangeError().message("Mixed types "+ fieldInSchema.getType()
          + " , " + typeInParquet + " is not supported.")
          .build(logger);
      }
    }
  }
}
 
Example #4
Source File: MessageType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkContains(Type subType) {
  if (!(subType instanceof MessageType)) {
    throw new InvalidRecordException(subType + " found: expected " + this);
  }
  checkGroupContains(subType);
}
 
Example #5
Source File: GroupType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param name string name of a field
 * @return the index of the field with that name
 */
public int getFieldIndex(String name) {
  Integer i = indexByName.get(name);
  if (i == null) {
    throw new InvalidRecordException(name + " not found in " + this);
  }
  return i.intValue();
}
 
Example #6
Source File: GroupType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
void checkGroupContains(Type subType) {
  if (subType.isPrimitive()) {
    throw new InvalidRecordException(subType + " found: expected " + this);
  }
  List<Type> fields = subType.asGroupType().getFields();
  for (Type otherType : fields) {
    Type thisType = this.getType(otherType.getName());
    thisType.checkContains(otherType);
  }
}
 
Example #7
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public int getMaxRepetitionLevel(String[] path, int i) {
  if (path.length != i) {
    throw new InvalidRecordException("Arrived at primitive node, path invalid");
  }
  return isRepetition(Repetition.REPEATED)? 1 : 0;
}
 
Example #8
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public int getMaxDefinitionLevel(String[] path, int i) {
  if (path.length != i) {
    throw new InvalidRecordException("Arrived at primitive node, path invalid");
  }
  return isRepetition(Repetition.REQUIRED) ? 0 : 1;
}
 
Example #9
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public Type getType(String[] path, int i) {
  if (path.length != i) {
    throw new InvalidRecordException("Arrived at primitive node at index " + i + " , path invalid: " + Arrays.toString(path));
  }
  return this;
}
 
Example #10
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
void checkContains(Type subType) {
  super.checkContains(subType);
  if (!subType.isPrimitive()) {
    throw new InvalidRecordException(subType + " found: expected " + this);
  }
  PrimitiveType primitiveType = subType.asPrimitiveType();
  if (this.primitive != primitiveType.primitive) {
    throw new InvalidRecordException(subType + " found: expected " + this);
  }

}
 
Example #11
Source File: ProtoMessageConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Translates given parquet enum value to protocol buffer enum value.
 * @throws org.apache.parquet.io.InvalidRecordException is there is no corresponding value.
 * */
private Descriptors.EnumValueDescriptor translateEnumValue(Binary binaryValue) {
  Descriptors.EnumValueDescriptor protoValue = enumLookup.get(binaryValue);

  if (protoValue == null) {
    Set<Binary> knownValues = enumLookup.keySet();
    String msg = "Illegal enum value \"" + binaryValue + "\""
            + " in protocol buffer \"" + fieldType.getFullName() + "\""
            + " legal values are: \"" + knownValues + "\"";
    throw new InvalidRecordException(msg);
  }
  return protoValue;
}
 
Example #12
Source File: AvroIndexedRecordConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Schema.Field getAvroField(String parquetFieldName) {
  Schema.Field avroField = avroSchema.getField(parquetFieldName);
  for (Schema.Field f : avroSchema.getFields()) {
    if (f.aliases().contains(parquetFieldName)) {
      return f;
    }
  }
  if (avroField == null) {
    throw new InvalidRecordException(String.format(
        "Parquet/Avro schema mismatch. Avro field '%s' not found.",
        parquetFieldName));
  }
  return avroField;
}
 
Example #13
Source File: ParquetFilePOJOReader.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
/**
 * Converts a Parquet <b>Group</b>(parquet.example.data.Group) to a POJO.
 * Supported parquet primitive types are BOOLEAN, INT32, INT64, FLOAT, DOUBLE
 * and BINARY
 *
 * @throws ParquetEncodingException
 *           if group contains unsupported type
 */
@Override
protected Object convertGroup(Group group)
{
  Object obj;
  try {
    obj = pojoClass.newInstance();
  } catch (InstantiationException | IllegalAccessException ex) {
    throw new RuntimeException(ex);
  }

  for (int i = 0; i < activeFieldInfos.size(); i++) {
    try {
      ParquetFilePOJOReader.ActiveFieldInfo afi = activeFieldInfos.get(i);

      switch (afi.primitiveTypeName) {

        case BOOLEAN:
          Boolean booleanVal = Boolean.parseBoolean(group.getValueToString(afi.fieldIndex, 0));
          ((PojoUtils.SetterBoolean<Object>)afi.setter).set(obj, booleanVal);
          break;

        case INT32:
          Integer intVal = Integer.parseInt(group.getValueToString(afi.fieldIndex, 0));
          ((PojoUtils.SetterInt<Object>)afi.setter).set(obj, intVal);
          break;

        case INT64:
          Long longVal = Long.parseLong(group.getValueToString(afi.fieldIndex, 0));
          ((PojoUtils.SetterLong<Object>)afi.setter).set(obj, longVal);
          break;

        case FLOAT:
          Float floatVal = Float.parseFloat(group.getValueToString(afi.fieldIndex, 0));
          ((PojoUtils.SetterFloat<Object>)afi.setter).set(obj, floatVal);
          break;

        case DOUBLE:
          Double doubleVal = Double.parseDouble(group.getValueToString(afi.fieldIndex, 0));
          ((PojoUtils.SetterDouble<Object>)afi.setter).set(obj, doubleVal);
          break;

        case BINARY:
          ((PojoUtils.Setter<Object, String>)afi.setter).set(obj, group.getValueToString(afi.fieldIndex, 0));
          break;

        default:
          throw new ParquetEncodingException("Unsupported column type: " + afi.primitiveTypeName);

      }
    } catch (InvalidRecordException e) {
      logger.error("Field not found in schema {} ", e);
    }
  }
  return obj;
}
 
Example #14
Source File: ParquetTableSource.java    From flink with Apache License 2.0 4 votes vote down vote up
@Nullable
private Tuple2<Column, Comparable> extractColumnAndLiteral(BinaryComparison comp) {
	String columnName = getColumnName(comp);
	ColumnPath columnPath = ColumnPath.fromDotString(columnName);
	TypeInformation<?> typeInfo = null;
	try {
		Type type = parquetSchema.getType(columnPath.toArray());
		typeInfo = ParquetSchemaConverter.convertParquetTypeToTypeInfo(type);
	} catch (InvalidRecordException e) {
		LOG.error("Pushed predicate on undefined field name {} in schema", columnName);
		return null;
	}

	// fetch literal and ensure it is comparable
	Object value = getLiteral(comp);
	// validate that literal is comparable
	if (!(value instanceof Comparable)) {
		LOG.warn("Encountered a non-comparable literal of type {}." +
			"Cannot push predicate [{}] into ParquetTablesource." +
			"This is a bug and should be reported.", value.getClass().getCanonicalName(), comp);
		return null;
	}

	if (typeInfo == BasicTypeInfo.BYTE_TYPE_INFO ||
		typeInfo == BasicTypeInfo.SHORT_TYPE_INFO ||
		typeInfo == BasicTypeInfo.INT_TYPE_INFO) {
		return new Tuple2<>(FilterApi.intColumn(columnName), ((Number) value).intValue());
	} else if (typeInfo == BasicTypeInfo.LONG_TYPE_INFO) {
		return new Tuple2<>(FilterApi.longColumn(columnName), ((Number) value).longValue());
	} else if (typeInfo == BasicTypeInfo.FLOAT_TYPE_INFO) {
		return new Tuple2<>(FilterApi.floatColumn(columnName), ((Number) value).floatValue());
	} else if (typeInfo == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return new Tuple2<>(FilterApi.booleanColumn(columnName), (Boolean) value);
	} else if (typeInfo == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return new Tuple2<>(FilterApi.doubleColumn(columnName), ((Number) value).doubleValue());
	} else if (typeInfo == BasicTypeInfo.STRING_TYPE_INFO) {
		return new Tuple2<>(FilterApi.binaryColumn(columnName), Binary.fromString((String) value));
	} else {
		// unsupported type
		return null;
	}
}
 
Example #15
Source File: Type.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
void checkContains(Type subType) {
  if (!this.name.equals(subType.name)
      || this.repetition != subType.repetition) {
    throw new InvalidRecordException(subType + " found: expected " + this);
  }
}
 
Example #16
Source File: ProtoWriteSupport.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private FieldWriter unknownType(FieldDescriptor fieldDescriptor) {
  String exceptionMsg = "Unknown type with descriptor \"" + fieldDescriptor
          + "\" and type \"" + fieldDescriptor.getJavaType() + "\".";
  throw new InvalidRecordException(exceptionMsg);
}