Java Code Examples for org.apache.orc.Reader#Options

The following examples show how to use org.apache.orc.Reader#Options . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrcCompactionTaskTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Read a output ORC compacted file into memory.
 * This only works if fields are int value.
 */
public List<OrcStruct> readOrcFile(Path orcFilePath)
    throws IOException, InterruptedException {
  ReaderImpl orcReader = new ReaderImpl(orcFilePath, new OrcFile.ReaderOptions(new Configuration()));

  Reader.Options options = new Reader.Options().schema(orcReader.getSchema());
  OrcMapreduceRecordReader recordReader = new OrcMapreduceRecordReader(orcReader, options);
  List<OrcStruct> result = new ArrayList<>();

  OrcStruct recordContainer;
  while (recordReader.nextKeyValue()) {
    recordContainer = (OrcStruct) OrcUtils.createValueRecursively(orcReader.getSchema());
    OrcUtils.upConvertOrcStruct((OrcStruct) recordReader.getCurrentValue(), recordContainer, orcReader.getSchema());
    result.add(recordContainer);
  }

  return result;
}
 
Example 2
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitStripesGivenSplits() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = spy(new Reader.Options());
	doReturn(options).when(spy).getOptions(any());

	FileInputSplit[] splits = spy.createInputSplits(3);

	spy.openInputFormat();
	spy.open(splits[0]);
	verify(options).range(eq(3L), eq(137005L));
	spy.open(splits[1]);
	verify(options).range(eq(137008L), eq(136182L));
	spy.open(splits[2]);
	verify(options).range(eq(273190L), eq(123633L));
}
 
Example 3
Source File: ORC.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public OrcIterator build() {
  Preconditions.checkNotNull(schema, "Schema is required");
  try {
    Path path = new Path(file.location());
    Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    ColumnIdMap columnIds = new ColumnIdMap();
    TypeDescription orcSchema = TypeConversion.toOrc(schema, columnIds);
    Reader.Options options = reader.options();
    if (start != null) {
      options.range(start, length);
    }
    options.schema(orcSchema);
    return new OrcIterator(path, orcSchema, reader.rows(options));
  } catch (IOException e) {
    throw new RuntimeException("Can't open " + file.location(), e);
  }
}
 
Example 4
Source File: OrcIterable.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static VectorizedRowBatchIterator newOrcIterator(InputFile file,
                                                         TypeDescription readerSchema,
                                                         Long start, Long length,
                                                         Reader orcFileReader, SearchArgument sarg) {
  final Reader.Options options = orcFileReader.options();
  if (start != null) {
    options.range(start, length);
  }
  options.schema(readerSchema);
  options.searchArgument(sarg, new String[]{});

  try {
    return new VectorizedRowBatchIterator(file.location(), readerSchema, orcFileReader.rows(options));
  } catch (IOException ioe) {
    throw new RuntimeIOException(ioe, "Failed to get ORC rows for file: %s", file);
  }
}
 
Example 5
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitStripesGivenSplits() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = spy(new Reader.Options());
	doReturn(options).when(spy).getOptions(any());

	FileInputSplit[] splits = spy.createInputSplits(3);

	spy.openInputFormat();
	spy.open(splits[0]);
	verify(options).range(eq(3L), eq(137005L));
	spy.open(splits[1]);
	verify(options).range(eq(137008L), eq(136182L));
	spy.open(splits[2]);
	verify(options).range(eq(273190L), eq(123633L));
}
 
Example 6
Source File: OrcNoHiveShim.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader createRecordReader(
		Configuration conf,
		TypeDescription schema,
		int[] selectedFields,
		List<OrcSplitReader.Predicate> conjunctPredicates,
		org.apache.flink.core.fs.Path path,
		long splitStart,
		long splitLength) throws IOException {
	// open ORC file and create reader
	org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());

	Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(
			splitStart, splitLength, orcReader.getStripes());

	// create ORC row reader configuration
	Reader.Options options = new Reader.Options()
			.schema(schema)
			.range(offsetAndLength.f0, offsetAndLength.f1)
			.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
			.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
			.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));

	// TODO configure filters

	// configure selected fields
	options.include(computeProjectionMask(schema, selectedFields));

	// create ORC row reader
	RecordReader orcRowsReader = orcReader.rows(options);

	// assign ids
	schema.getId();

	return orcRowsReader;
}
 
Example 7
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 8
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 9
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 10
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 11
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectionMaskNested() throws IOException{
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.selectFields(9, 11, 2);
	spy.openInputFormat();
	FileInputSplit[] splits = spy.createInputSplits(1);
	spy.open(splits[0]);

	// top-level struct is false
	boolean[] expected = new boolean[]{
		false, // top level
		false, false, // flat fields 0, 1 are out
		true, // flat field 2 is in
		false, false, false, false, false, false, // flat fields 3, 4, 5, 6, 7, 8 are out
		true, true, true, true, true, // nested field 9 is in
		false, false, false, false, // nested field 10 is out
		true, true, true, true, true}; // nested field 11 is in
	assertArrayEquals(expected, options.getInclude());
}
 
Example 12
Source File: OrcShimV230.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected RecordReader createRecordReader(Reader reader, Reader.Options options) throws IOException {
	return reader.rows(options);
}
 
Example 13
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testNumericBooleanStringPredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.selectFields(0, 1, 2, 3, 4, 5, 6, 8);

	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("boolean1", PredicateLeaf.Type.BOOLEAN, false));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThan("byte1", PredicateLeaf.Type.LONG, 1));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThanEquals("short1", PredicateLeaf.Type.LONG, 1024));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Between("int1", PredicateLeaf.Type.LONG, -1, 65536));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("long1", PredicateLeaf.Type.LONG, 9223372036854775807L));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("float1", PredicateLeaf.Type.FLOAT, 1.0));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("double1", PredicateLeaf.Type.FLOAT, -15.0));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.IsNull("string1", PredicateLeaf.Type.STRING));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("string1", PredicateLeaf.Type.STRING, "hello"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6 leaf-7 leaf-8)", sarg.getExpression().toString());
	assertEquals(9, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS boolean1 false)", leaves.get(0).toString());
	assertEquals("(LESS_THAN byte1 1)", leaves.get(1).toString());
	assertEquals("(LESS_THAN_EQUALS short1 1024)", leaves.get(2).toString());
	assertEquals("(BETWEEN int1 -1 65536)", leaves.get(3).toString());
	assertEquals("(EQUALS long1 9223372036854775807)", leaves.get(4).toString());
	assertEquals("(EQUALS float1 1.0)", leaves.get(5).toString());
	assertEquals("(EQUALS double1 -15.0)", leaves.get(6).toString());
	assertEquals("(IS_NULL string1)", leaves.get(7).toString());
	assertEquals("(EQUALS string1 hello)", leaves.get(8).toString());
}
 
Example 14
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSplitStripesCustomSplits() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock list of stripes
	List<StripeInformation> stripes = new ArrayList<>();
	StripeInformation stripe1 = mock(StripeInformation.class);
	when(stripe1.getOffset()).thenReturn(10L);
	when(stripe1.getLength()).thenReturn(90L);
	StripeInformation stripe2 = mock(StripeInformation.class);
	when(stripe2.getOffset()).thenReturn(100L);
	when(stripe2.getLength()).thenReturn(100L);
	StripeInformation stripe3 = mock(StripeInformation.class);
	when(stripe3.getOffset()).thenReturn(200L);
	when(stripe3.getLength()).thenReturn(100L);
	StripeInformation stripe4 = mock(StripeInformation.class);
	when(stripe4.getOffset()).thenReturn(300L);
	when(stripe4.getLength()).thenReturn(100L);
	StripeInformation stripe5 = mock(StripeInformation.class);
	when(stripe5.getOffset()).thenReturn(400L);
	when(stripe5.getLength()).thenReturn(100L);
	stripes.add(stripe1);
	stripes.add(stripe2);
	stripes.add(stripe3);
	stripes.add(stripe4);
	stripes.add(stripe5);
	doReturn(stripes).when(spy).getStripes(any());

	// mock options to check configuration of ORC reader
	Reader.Options options = spy(new Reader.Options());
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	// split ranging 2 stripes
	spy.open(new FileInputSplit(0, new Path(getPath(TEST_FILE_FLAT)), 0, 150, new String[]{}));
	verify(options).range(eq(10L), eq(190L));
	// split ranging 0 stripes
	spy.open(new FileInputSplit(1, new Path(getPath(TEST_FILE_FLAT)), 150, 10, new String[]{}));
	verify(options).range(eq(0L), eq(0L));
	// split ranging 1 stripe
	spy.open(new FileInputSplit(2, new Path(getPath(TEST_FILE_FLAT)), 160, 41, new String[]{}));
	verify(options).range(eq(200L), eq(100L));
	// split ranging 2 stripe
	spy.open(new FileInputSplit(3, new Path(getPath(TEST_FILE_FLAT)), 201, 299, new String[]{}));
	verify(options).range(eq(300L), eq(200L));
}
 
Example 15
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
Reader.Options getOptions(Reader orcReader) {
	return orcReader.options();
}
 
Example 16
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void open(FileInputSplit fileSplit) throws IOException {

	LOG.debug("Opening ORC file {}", fileSplit.getPath());

	// open ORC file and create reader
	org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(fileSplit.getPath().getPath());
	Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(fileSplit, getStripes(orcReader));

	// create ORC row reader configuration
	Reader.Options options = getOptions(orcReader)
		.schema(schema)
		.range(offsetAndLength.f0, offsetAndLength.f1)
		.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
		.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
		.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));

	// configure filters
	if (!conjunctPredicates.isEmpty()) {
		SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
		b = b.startAnd();
		for (Predicate predicate : conjunctPredicates) {
			predicate.add(b);
		}
		b = b.end();
		options.searchArgument(b.build(), new String[]{});
	}

	// configure selected fields
	options.include(computeProjectionMask());

	// create ORC row reader
	this.orcRowsReader = orcReader.rows(options);

	// assign ids
	this.schema.getId();
	// create row batch
	this.rowBatch = schema.createRowBatch(batchSize);
	rowsInBatch = 0;
	nextRow = 0;
}
 
Example 17
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testNumericBooleanStringPredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.selectFields(0, 1, 2, 3, 4, 5, 6, 8);

	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("boolean1", PredicateLeaf.Type.BOOLEAN, false));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThan("byte1", PredicateLeaf.Type.LONG, 1));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThanEquals("short1", PredicateLeaf.Type.LONG, 1024));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Between("int1", PredicateLeaf.Type.LONG, -1, 65536));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("long1", PredicateLeaf.Type.LONG, 9223372036854775807L));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("float1", PredicateLeaf.Type.FLOAT, 1.0));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("double1", PredicateLeaf.Type.FLOAT, -15.0));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.IsNull("string1", PredicateLeaf.Type.STRING));
	// boolean pred
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("string1", PredicateLeaf.Type.STRING, "hello"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6 leaf-7 leaf-8)", sarg.getExpression().toString());
	assertEquals(9, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS boolean1 false)", leaves.get(0).toString());
	assertEquals("(LESS_THAN byte1 1)", leaves.get(1).toString());
	assertEquals("(LESS_THAN_EQUALS short1 1024)", leaves.get(2).toString());
	assertEquals("(BETWEEN int1 -1 65536)", leaves.get(3).toString());
	assertEquals("(EQUALS long1 9223372036854775807)", leaves.get(4).toString());
	assertEquals("(EQUALS float1 1.0)", leaves.get(5).toString());
	assertEquals("(EQUALS double1 -15.0)", leaves.get(6).toString());
	assertEquals("(IS_NULL string1)", leaves.get(7).toString());
	assertEquals("(EQUALS string1 hello)", leaves.get(8).toString());
}
 
Example 18
Source File: OrcShimV200.java    From flink with Apache License 2.0 4 votes vote down vote up
protected Reader.Options readOrcConf(Reader.Options options, Configuration conf) {
	return options.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
			.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf));
}
 
Example 19
Source File: OrcShimV200.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader createRecordReader(
		Configuration conf,
		TypeDescription schema,
		int[] selectedFields,
		List<Predicate> conjunctPredicates,
		org.apache.flink.core.fs.Path path,
		long splitStart,
		long splitLength) throws IOException {
	// open ORC file and create reader
	Path hPath = new Path(path.toUri());

	Reader orcReader = createReader(hPath, conf);

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(
			splitStart, splitLength, orcReader.getStripes());

	// create ORC row reader configuration
	Reader.Options options = readOrcConf(
			new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1),
			conf);

	// configure filters
	if (!conjunctPredicates.isEmpty()) {
		SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
		b = b.startAnd();
		for (Predicate predicate : conjunctPredicates) {
			predicate.add(b);
		}
		b = b.end();
		options.searchArgument(b.build(), new String[]{});
	}

	// configure selected fields
	options.include(computeProjectionMask(schema, selectedFields));

	// create ORC row reader
	RecordReader orcRowsReader = createRecordReader(orcReader, options);

	// assign ids
	schema.getId();

	return orcRowsReader;
}
 
Example 20
Source File: OrcShimV230.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Reader.Options readOrcConf(Reader.Options options, Configuration conf) {
	return super.readOrcConf(options, conf)
			.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
}