Java Code Examples for org.apache.hadoop.hive.ql.io.sarg.SearchArgument

The following examples show how to use org.apache.hadoop.hive.ql.io.sarg.SearchArgument. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: pxf   Source File: HiveORCAccessor.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Uses {@link HivePartitionFilterBuilder} to translate a filter string into a
 * Hive {@link SearchArgument} object. The result is added as a filter to
 * JobConf object
 */
private void addFilters() throws Exception {
    if (!context.hasFilter()) {
        return;
    }

    /* Predicate push-down configuration */
    String filterStr = context.getFilterString();

    HiveORCSearchArgumentBuilder searchArgumentBuilder = new HiveORCSearchArgumentBuilder(context.getTupleDescription(), configuration);

    // Parse the filter string into a expression tree Node
    Node root = new FilterParser().parse(filterStr);
    // Prune the parsed tree with valid supported operators and then
    // traverse the pruned tree with the searchArgumentBuilder to produce a SearchArgument for ORC
    TRAVERSER.traverse(root, PRUNER, searchArgumentBuilder);

    SearchArgument.Builder filterBuilder = searchArgumentBuilder.getFilterBuilder();
    SearchArgument searchArgument = filterBuilder.build();
    jobConf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(searchArgument));
}
 
Example 2
Source Project: pxf   Source File: HiveORCAccessorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void parseFilterWithIn() {

    SearchArgument sarg = SearchArgumentFactory.
            newBuilder().
            startAnd().
            in("FOO", PredicateLeaf.Type.LONG, 1L, 2L, 3L).
            end().
            build();
    String expected = toKryo(sarg);

    // _1_ IN (1,2,3)
    context.setFilterString("a1m1007s1d1s1d2s1d3o10");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example 3
Source Project: spork   Source File: OrcStorage.java    License: Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
SearchArgument getSearchArgument(Expression expr) {
    if (expr == null) {
        return null;
    }
    Builder builder = SearchArgumentFactory.newBuilder();
    boolean beginWithAnd = !(expr.getOpType().equals(OpType.OP_AND) || expr.getOpType().equals(OpType.OP_OR) || expr.getOpType().equals(OpType.OP_NOT));
    if (beginWithAnd) {
        builder.startAnd();
    }
    buildSearchArgument(expr, builder);
    if (beginWithAnd) {
        builder.end();
    }
    SearchArgument sArg = builder.build();
    return sArg;
}
 
Example 4
Source Project: Flink-CEPplus   Source File: OrcRowInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected SearchArgument.Builder add(SearchArgument.Builder builder) {
	Object[] castedLiterals = new Object[literals.length];
	for (int i = 0; i < literals.length; i++) {
		castedLiterals[i] = castLiteral(literals[i]);
	}
	return builder.in(columnName, literalType, (Object[]) castedLiterals);
}
 
Example 5
Source Project: Flink-CEPplus   Source File: OrcRowInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected SearchArgument.Builder add(SearchArgument.Builder builder) {
	SearchArgument.Builder withOr = builder.startOr();
	for (Predicate p : preds) {
		withOr = p.add(withOr);
	}
	return withOr.end();
}
 
Example 6
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 7
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 8
Source Project: flink   Source File: OrcRowInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected SearchArgument.Builder add(SearchArgument.Builder builder) {
	Object[] castedLiterals = new Object[literals.length];
	for (int i = 0; i < literals.length; i++) {
		castedLiterals[i] = castLiteral(literals[i]);
	}
	return builder.in(columnName, literalType, (Object[]) castedLiterals);
}
 
Example 9
Source Project: flink   Source File: OrcRowInputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected SearchArgument.Builder add(SearchArgument.Builder builder) {
	SearchArgument.Builder withOr = builder.startOr();
	for (Predicate p : preds) {
		withOr = p.add(withOr);
	}
	return withOr.end();
}
 
Example 10
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 11
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 12
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIsNotNull() throws Exception {
    // NOT (_1_ IS NULL)
    String filterString = "a1o8l2"; // ORCA transforms is not null to NOT ( a IS NULL )
    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);

    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (IS_NULL cdate), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example 13
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIdFilter() throws Exception {
    // id = 1
    String filterString = "a0c20s1d1o5";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    // single filters are wrapped in and
    assertEquals("leaf-0 = (EQUALS id 1), expr = leaf-0", filterBuilder.build().toString());
}
 
Example 14
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDateAndAmtFilter() throws Exception {
    // cdate > '2008-02-01' and cdate < '2008-12-01' and amt > 1200
    String filterString = "a1c25s10d2008-02-01o2a1c25s10d2008-12-01o1l0a2c20s4d1200o2l0";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS cdate 2008-02-01), leaf-1 = (LESS_THAN cdate 2008-12-01), leaf-2 = (LESS_THAN_EQUALS amt 1200), expr = (and (not leaf-0) leaf-1 (not leaf-2))", filterBuilder.build().toString());
}
 
Example 15
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDateWithOrAndAmtFilter() throws Exception {
    // cdate > '2008-02-01' OR (cdate < '2008-12-01' AND amt > 1200)
    String filterString = "a1c1082s10d2008-02-01o2a1c1082s10d2008-12-01o1a0c23s4d1200o2l0l1";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS cdate 2008-02-01), leaf-1 = (LESS_THAN cdate 2008-12-01), leaf-2 = (LESS_THAN_EQUALS id 1200), expr = (and (or (not leaf-0) leaf-1) (or (not leaf-0) (not leaf-2)))", filterBuilder.build().toString());
}
 
Example 16
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDateOrAmtFilter() throws Exception {
    // cdate > '2008-02-01' or amt > 1200
    String filterString = "a1c25s10d2008-02-01o2a2c20s4d1200o2l1";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS cdate 2008-02-01), leaf-1 = (LESS_THAN_EQUALS amt 1200), expr = (or (not leaf-0) (not leaf-1))", filterBuilder.build().toString());
}
 
Example 17
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIsNotNullOperator() throws Exception {
    // a3 IS NOT NULL
    String filterString = "a3o9";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (IS_NULL grade), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example 18
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testInOperator() throws Exception {
    // id IN (194 , 82756)
    String filterString = "a0m1016s3d194s5d82756o10";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (IN id 194 82756), expr = leaf-0", filterBuilder.build().toString());
}
 
Example 19
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNotBoolean() throws Exception {
    // NOT a4
    String filterString = "a4c16s4dtrueo0l2";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (EQUALS b true), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example 20
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBoolean() throws Exception {
    // a4
    String filterString = "a4c16s4dtrueo0";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (EQUALS b true), expr = leaf-0", filterBuilder.build().toString());
}
 
Example 21
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNotInteger() throws Exception {
    // NOT a0 = 5
    String filterString = "a0c23s1d5o6";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (EQUALS id 5), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example 22
Source Project: pxf   Source File: HiveORCSearchArgumentBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
private SearchArgument.Builder helper(String filterString, List<ColumnDescriptor> columnDescriptors) throws Exception {
    HiveORCSearchArgumentBuilder treeVisitor =
            new HiveORCSearchArgumentBuilder(columnDescriptors, new Configuration());
    // Parse the filter string into a expression tree Node
    Node root = new FilterParser().parse(filterString);
    TRAVERSER.traverse(root, PRUNER, treeVisitor);
    return treeVisitor.getFilterBuilder();
}
 
Example 23
Source Project: pxf   Source File: HiveORCAccessorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNULL() {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO", PredicateLeaf.Type.STRING).end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o8");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example 24
Source Project: pxf   Source File: HiveORCAccessorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNOTNULL() {

    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO", PredicateLeaf.Type.STRING).end().end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o9");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example 25
Source Project: dremio-oss   Source File: HiveUtilities.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Encodes a SearchArgument to base64.
 * @param sarg
 * @return
 */
public static String encodeSearchArgumentAsBas64(final SearchArgument sarg) {
  try(Output out = new Output(4 * 1024, 10 * 1024 * 1024)) {
    new Kryo().writeObject(out, sarg);
    out.flush();
    return Base64.encodeBase64String(out.toBytes());
  }
}
 
Example 26
Source Project: dremio-oss   Source File: HiveUtilities.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Encodes a SearchArgument to base64.
 * @param sarg
 * @return
 */
public static String encodeSearchArgumentAsBas64(final SearchArgument sarg) {
  try(Output out = new Output(4 * 1024, 10 * 1024 * 1024)) {
    new Kryo().writeObject(out, sarg);
    out.flush();
    return Base64.encodeBase64String(out.toBytes());
  }
}
 
Example 27
Source Project: flink   Source File: OrcSplitReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public SearchArgument.Builder add(SearchArgument.Builder builder) {
	Object[] castedLiterals = new Object[literals.length];
	for (int i = 0; i < literals.length; i++) {
		castedLiterals[i] = castLiteral(literals[i]);
	}
	return builder.in(columnName, literalType, (Object[]) castedLiterals);
}
 
Example 28
Source Project: flink   Source File: OrcSplitReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public SearchArgument.Builder add(SearchArgument.Builder builder) {
	SearchArgument.Builder withOr = builder.startOr();
	for (Predicate p : preds) {
		withOr = p.add(withOr);
	}
	return withOr.end();
}
 
Example 29
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcSplitReader.Or(
			// timestamp pred
			new OrcSplitReader.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcSplitReader.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 30
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Not(
			// decimal pred
			new OrcSplitReader.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}