org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 6 votes vote down vote up
@Test
public void parseFilterWithIn() {

    SearchArgument sarg = SearchArgumentFactory.
            newBuilder().
            startAnd().
            in("FOO", PredicateLeaf.Type.LONG, 1L, 2L, 3L).
            end().
            build();
    String expected = toKryo(sarg);

    // _1_ IN (1,2,3)
    context.setFilterString("a1m1007s1d1s1d2s1d3o10");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #2
Source File: OrcStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
SearchArgument getSearchArgument(Expression expr) {
    if (expr == null) {
        return null;
    }
    Builder builder = SearchArgumentFactory.newBuilder();
    boolean beginWithAnd = !(expr.getOpType().equals(OpType.OP_AND) || expr.getOpType().equals(OpType.OP_OR) || expr.getOpType().equals(OpType.OP_NOT));
    if (beginWithAnd) {
        builder.startAnd();
    }
    buildSearchArgument(expr, builder);
    if (beginWithAnd) {
        builder.end();
    }
    SearchArgument sArg = builder.build();
    return sArg;
}
 
Example #3
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNULL() {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO", PredicateLeaf.Type.STRING).end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o8");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #4
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNOTNULL() {

    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO", PredicateLeaf.Type.STRING).end().end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o9");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #5
Source File: ORCSearchArgumentGenerator.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
ORCSearchArgumentGenerator(final List<String> columnNames, List<HiveReaderProto.ColumnInfo> columnInfos) {
  super(true);
  this.columnNames = columnNames;
  this.columnInfos = columnInfos;
  sargBuilder = SearchArgumentFactory.newBuilder();
  sargBuilder.startAnd();
}
 
Example #6
Source File: OrcRowInputFormat.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public void open(FileInputSplit fileSplit) throws IOException {

	LOG.debug("Opening ORC file {}", fileSplit.getPath());

	// open ORC file and create reader
	org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(fileSplit.getPath().getPath());
	Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(fileSplit, getStripes(orcReader));

	// create ORC row reader configuration
	Reader.Options options = getOptions(orcReader)
		.schema(schema)
		.range(offsetAndLength.f0, offsetAndLength.f1)
		.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
		.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
		.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));

	// configure filters
	if (!conjunctPredicates.isEmpty()) {
		SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
		b = b.startAnd();
		for (Predicate predicate : conjunctPredicates) {
			predicate.add(b);
		}
		b = b.end();
		options.searchArgument(b.build(), new String[]{});
	}

	// configure selected fields
	options.include(computeProjectionMask());

	// create ORC row reader
	this.orcRowsReader = orcReader.rows(options);

	// assign ids
	this.schema.getId();
	// create row batch
	this.rowBatch = schema.createRowBatch(batchSize);
	rowsInBatch = 0;
	nextRow = 0;
}
 
Example #7
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void open(FileInputSplit fileSplit) throws IOException {

	LOG.debug("Opening ORC file {}", fileSplit.getPath());

	// open ORC file and create reader
	org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(fileSplit.getPath().getPath());
	Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(fileSplit, getStripes(orcReader));

	// create ORC row reader configuration
	Reader.Options options = getOptions(orcReader)
		.schema(schema)
		.range(offsetAndLength.f0, offsetAndLength.f1)
		.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
		.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
		.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));

	// configure filters
	if (!conjunctPredicates.isEmpty()) {
		SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
		b = b.startAnd();
		for (Predicate predicate : conjunctPredicates) {
			predicate.add(b);
		}
		b = b.end();
		options.searchArgument(b.build(), new String[]{});
	}

	// configure selected fields
	options.include(computeProjectionMask());

	// create ORC row reader
	this.orcRowsReader = orcReader.rows(options);

	// assign ids
	this.schema.getId();
	// create row batch
	this.rowBatch = schema.createRowBatch(batchSize);
	rowsInBatch = 0;
	nextRow = 0;
}
 
Example #8
Source File: HiveORCSearchArgumentBuilder.java    From pxf with Apache License 2.0 4 votes vote down vote up
public HiveORCSearchArgumentBuilder(List<ColumnDescriptor> tupleDescription, Configuration configuration) {
    this.filterBuilder = SearchArgumentFactory.newBuilder(configuration);
    this.columnDescriptors = tupleDescription;
}
 
Example #9
Source File: OrcShimV200.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader createRecordReader(
		Configuration conf,
		TypeDescription schema,
		int[] selectedFields,
		List<Predicate> conjunctPredicates,
		org.apache.flink.core.fs.Path path,
		long splitStart,
		long splitLength) throws IOException {
	// open ORC file and create reader
	Path hPath = new Path(path.toUri());

	Reader orcReader = createReader(hPath, conf);

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(
			splitStart, splitLength, orcReader.getStripes());

	// create ORC row reader configuration
	Reader.Options options = readOrcConf(
			new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1),
			conf);

	// configure filters
	if (!conjunctPredicates.isEmpty()) {
		SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
		b = b.startAnd();
		for (Predicate predicate : conjunctPredicates) {
			predicate.add(b);
		}
		b = b.end();
		options.searchArgument(b.build(), new String[]{});
	}

	// configure selected fields
	options.include(computeProjectionMask(schema, selectedFields));

	// create ORC row reader
	RecordReader orcRowsReader = createRecordReader(orcReader, options);

	// assign ids
	schema.getId();

	return orcRowsReader;
}