Java Code Examples for org.apache.orc.TypeDescription#getId()

The following examples show how to use org.apache.orc.TypeDescription#getId() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrcRowInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null).
 *
 * @return The ORC projection mask.
 */
private boolean[] computeProjectionMask() {
	// mask with all fields of the schema
	boolean[] projectionMask = new boolean[schema.getMaximumId() + 1];
	// for each selected field
	for (int inIdx : selectedFields) {
		// set all nested fields of a selected field to true
		TypeDescription fieldSchema = schema.getChildren().get(inIdx);
		for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) {
			projectionMask[i] = true;
		}
	}
	return projectionMask;
}
 
Example 2
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null).
 *
 * @return The ORC projection mask.
 */
private boolean[] computeProjectionMask() {
	// mask with all fields of the schema
	boolean[] projectionMask = new boolean[schema.getMaximumId() + 1];
	// for each selected field
	for (int inIdx : selectedFields) {
		// set all nested fields of a selected field to true
		TypeDescription fieldSchema = schema.getChildren().get(inIdx);
		for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) {
			projectionMask[i] = true;
		}
	}
	return projectionMask;
}
 
Example 3
Source File: ORCSchemaUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Map<Integer, OrcField> icebergToOrcMapping(String name, TypeDescription orcType) {
  Map<Integer, OrcField> icebergToOrc = Maps.newHashMap();
  switch (orcType.getCategory()) {
    case STRUCT:
      List<String> childrenNames = orcType.getFieldNames();
      List<TypeDescription> children = orcType.getChildren();
      for (int i = 0; i < children.size(); i++) {
        icebergToOrc.putAll(icebergToOrcMapping(childrenNames.get(i), children.get(i)));
      }
      break;
    case LIST:
      icebergToOrc.putAll(icebergToOrcMapping("element", orcType.getChildren().get(0)));
      break;
    case MAP:
      icebergToOrc.putAll(icebergToOrcMapping("key", orcType.getChildren().get(0)));
      icebergToOrc.putAll(icebergToOrcMapping("value", orcType.getChildren().get(1)));
      break;
  }

  if (orcType.getId() > 0) {
    // Only add to non-root types.
    icebergID(orcType)
        .ifPresent(integer -> icebergToOrc.put(integer, new OrcField(name, orcType)));
  }

  return icebergToOrc;
}
 
Example 4
Source File: OrcNoHiveShim.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader createRecordReader(
		Configuration conf,
		TypeDescription schema,
		int[] selectedFields,
		List<OrcSplitReader.Predicate> conjunctPredicates,
		org.apache.flink.core.fs.Path path,
		long splitStart,
		long splitLength) throws IOException {
	// open ORC file and create reader
	org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());

	Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(
			splitStart, splitLength, orcReader.getStripes());

	// create ORC row reader configuration
	Reader.Options options = new Reader.Options()
			.schema(schema)
			.range(offsetAndLength.f0, offsetAndLength.f1)
			.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
			.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
			.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));

	// TODO configure filters

	// configure selected fields
	options.include(computeProjectionMask(schema, selectedFields));

	// create ORC row reader
	RecordReader orcRowsReader = orcReader.rows(options);

	// assign ids
	schema.getId();

	return orcRowsReader;
}
 
Example 5
Source File: OrcShimV200.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null).
 *
 * @return The ORC projection mask.
 */
public static boolean[] computeProjectionMask(TypeDescription schema, int[] selectedFields) {
	// mask with all fields of the schema
	boolean[] projectionMask = new boolean[schema.getMaximumId() + 1];
	// for each selected field
	for (int inIdx : selectedFields) {
		// set all nested fields of a selected field to true
		TypeDescription fieldSchema = schema.getChildren().get(inIdx);
		for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) {
			projectionMask[i] = true;
		}
	}
	return projectionMask;
}
 
Example 6
Source File: OrcShimV200.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader createRecordReader(
		Configuration conf,
		TypeDescription schema,
		int[] selectedFields,
		List<Predicate> conjunctPredicates,
		org.apache.flink.core.fs.Path path,
		long splitStart,
		long splitLength) throws IOException {
	// open ORC file and create reader
	Path hPath = new Path(path.toUri());

	Reader orcReader = createReader(hPath, conf);

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(
			splitStart, splitLength, orcReader.getStripes());

	// create ORC row reader configuration
	Reader.Options options = readOrcConf(
			new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1),
			conf);

	// configure filters
	if (!conjunctPredicates.isEmpty()) {
		SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
		b = b.startAnd();
		for (Predicate predicate : conjunctPredicates) {
			predicate.add(b);
		}
		b = b.end();
		options.searchArgument(b.build(), new String[]{});
	}

	// configure selected fields
	options.include(computeProjectionMask(schema, selectedFields));

	// create ORC row reader
	RecordReader orcRowsReader = createRecordReader(orcReader, options);

	// assign ids
	schema.getId();

	return orcRowsReader;
}