org.apache.hadoop.hive.ql.io.sarg.SearchArgument Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.sarg.SearchArgument. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveORCAccessor.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * Uses {@link HivePartitionFilterBuilder} to translate a filter string into a
 * Hive {@link SearchArgument} object. The result is added as a filter to
 * JobConf object
 */
private void addFilters() throws Exception {
    if (!context.hasFilter()) {
        return;
    }

    /* Predicate push-down configuration */
    String filterStr = context.getFilterString();

    HiveORCSearchArgumentBuilder searchArgumentBuilder = new HiveORCSearchArgumentBuilder(context.getTupleDescription(), configuration);

    // Parse the filter string into a expression tree Node
    Node root = new FilterParser().parse(filterStr);
    // Prune the parsed tree with valid supported operators and then
    // traverse the pruned tree with the searchArgumentBuilder to produce a SearchArgument for ORC
    TRAVERSER.traverse(root, PRUNER, searchArgumentBuilder);

    SearchArgument.Builder filterBuilder = searchArgumentBuilder.getFilterBuilder();
    SearchArgument searchArgument = filterBuilder.build();
    jobConf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(searchArgument));
}
 
Example #2
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 6 votes vote down vote up
@Test
public void parseFilterWithIn() {

    SearchArgument sarg = SearchArgumentFactory.
            newBuilder().
            startAnd().
            in("FOO", PredicateLeaf.Type.LONG, 1L, 2L, 3L).
            end().
            build();
    String expected = toKryo(sarg);

    // _1_ IN (1,2,3)
    context.setFilterString("a1m1007s1d1s1d2s1d3o10");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #3
Source File: OrcStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
SearchArgument getSearchArgument(Expression expr) {
    if (expr == null) {
        return null;
    }
    Builder builder = SearchArgumentFactory.newBuilder();
    boolean beginWithAnd = !(expr.getOpType().equals(OpType.OP_AND) || expr.getOpType().equals(OpType.OP_OR) || expr.getOpType().equals(OpType.OP_NOT));
    if (beginWithAnd) {
        builder.startAnd();
    }
    buildSearchArgument(expr, builder);
    if (beginWithAnd) {
        builder.end();
    }
    SearchArgument sArg = builder.build();
    return sArg;
}
 
Example #4
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnSupportedFields() throws Exception {
    //Struct, Map and Bag are not supported
    // TODO: Change the test to use ORCStorage to test OrcStorage.getPredicateFields()
    String q = query + "b = filter a by srcid == 10 and browser#'type' == 'IE';" +
            "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
            "expr = leaf-0", sarg.toString());
}
 
Example #5
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void setPushdownPredicate(Expression expr) throws IOException {
    SearchArgument sArg = getSearchArgument(expr);
    if (sArg != null) {
        log.info("Pushdown predicate expression is " + expr);
        log.info("Pushdown predicate SearchArgument is:\n" + sArg);
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
        try {
            p.setProperty(signature + SearchArgsSuffix, sArg.toKryo());
        } catch (Exception e) {
            throw new IOException("Cannot serialize SearchArgument: " + sArg);
        }
    }
}
 
Example #6
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNegativeMatchesExpr() throws Exception {
    // matches operator is not a supported op type
    String q = query + "b = filter a by name matches 'foo*';" + "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("name"));
    Assert.assertNull(expr);
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    Assert.assertNull(sarg);

    // AND in LHS/RHS
    q = query + "b = filter a by name matches 'foo*' and srcid == 10;" + "store b into 'out';";
    expr = getExpressionForTest(q, Arrays.asList("srcid", "name"));
    sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
            "expr = leaf-0", sarg.toString());

    q = query + "b = filter a by srcid == 10 and name matches 'foo*';" + "store b into 'out';";
    expr = getExpressionForTest(q, Arrays.asList("srcid", "name"));
    sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
            "expr = leaf-0", sarg.toString());

    // OR - Nothing should be pushed
    q = query + "b = filter a by name matches 'foo*' or srcid == 10;" + "store b into 'out';";
    expr = getExpressionForTest(q, Arrays.asList("srcid", "name"));
    Assert.assertNull(expr);
    sarg = orcStorage.getSearchArgument(expr);
    Assert.assertNull(sarg);
}
 
Example #7
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testInExpression() throws Exception {
    // TODO: Add support for OP_IN expression type
    String q = query + "b = filter a by srcid == 10 or srcid == 11;" + "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
            "leaf-1 = (EQUALS srcid 11)\n" +
            "expr = (or leaf-0 leaf-1)", sarg.toString());
}
 
Example #8
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBetweenExpression() throws Exception {
    // TODO: Add support for OP_BETWEEN expression type
    String q = query + "b = filter a by srcid > 10 or srcid < 20;" + "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS srcid 10)\n" +
            "leaf-1 = (LESS_THAN srcid 20)\n" +
            "expr = (or (not leaf-0) leaf-1)", sarg.toString());
}
 
Example #9
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNot() throws Exception {
    String q = query + "b = filter a by srcid != 10 and mrkt is not null;" + "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("srcid", "dstid", "name", "mrkt"));
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
            "leaf-1 = (IS_NULL mrkt)\n" +
            "expr = (and (not leaf-0) (not leaf-1))", sarg.toString());
}
 
Example #10
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testAndOr() throws Exception {
    String q = query + "b = filter a by (srcid > 10 or dstid <= 5) and name == 'foo' and mrkt is null;" + "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("srcid", "dstid", "name", "mrkt"));
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS srcid 10)\n" +
            "leaf-1 = (LESS_THAN_EQUALS dstid 5)\n" +
            "leaf-2 = (EQUALS name foo)\n" +
            "leaf-3 = (IS_NULL mrkt)\n" +
            "expr = (and (or (not leaf-0) leaf-1) leaf-2 leaf-3)", sarg.toString());
}
 
Example #11
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcSplitReader.Or(
			// timestamp pred
			new OrcSplitReader.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcSplitReader.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example #12
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Encodes a SearchArgument to base64.
 * @param sarg
 * @return
 */
public static String encodeSearchArgumentAsBas64(final SearchArgument sarg) {
  try(Output out = new Output(4 * 1024, 10 * 1024 * 1024)) {
    new Kryo().writeObject(out, sarg);
    out.flush();
    return Base64.encodeBase64String(out.toBytes());
  }
}
 
Example #13
Source File: OrcSplitReader.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public SearchArgument.Builder add(SearchArgument.Builder builder) {
	Object[] castedLiterals = new Object[literals.length];
	for (int i = 0; i < literals.length; i++) {
		castedLiterals[i] = castLiteral(literals[i]);
	}
	return builder.in(columnName, literalType, (Object[]) castedLiterals);
}
 
Example #14
Source File: OrcSplitReader.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public SearchArgument.Builder add(SearchArgument.Builder builder) {
	SearchArgument.Builder withOr = builder.startOr();
	for (Predicate p : preds) {
		withOr = p.add(withOr);
	}
	return withOr.end();
}
 
Example #15
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoolean() throws Exception {
    // a4
    String filterString = "a4c16s4dtrueo0";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (EQUALS b true), expr = leaf-0", filterBuilder.build().toString());
}
 
Example #16
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Not(
			// decimal pred
			new OrcSplitReader.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example #17
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Encodes a SearchArgument to base64.
 * @param sarg
 * @return
 */
public static String encodeSearchArgumentAsBas64(final SearchArgument sarg) {
  try(Output out = new Output(4 * 1024, 10 * 1024 * 1024)) {
    new Kryo().writeObject(out, sarg);
    out.flush();
    return Base64.encodeBase64String(out.toBytes());
  }
}
 
Example #18
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimple() throws Exception {
    String q = query + "b = filter a by srcid == 10;" + "store b into 'out';";
    Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    SearchArgument sarg = orcStorage.getSearchArgument(expr);
    assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
            "expr = leaf-0", sarg.toString());
}
 
Example #19
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNOTNULL() {

    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO", PredicateLeaf.Type.STRING).end().end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o9");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #20
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNULL() {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO", PredicateLeaf.Type.STRING).end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o8");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #21
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
private SearchArgument.Builder helper(String filterString, List<ColumnDescriptor> columnDescriptors) throws Exception {
    HiveORCSearchArgumentBuilder treeVisitor =
            new HiveORCSearchArgumentBuilder(columnDescriptors, new Configuration());
    // Parse the filter string into a expression tree Node
    Node root = new FilterParser().parse(filterString);
    TRAVERSER.traverse(root, PRUNER, treeVisitor);
    return treeVisitor.getFilterBuilder();
}
 
Example #22
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotInteger() throws Exception {
    // NOT a0 = 5
    String filterString = "a0c23s1d5o6";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (EQUALS id 5), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example #23
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotBoolean() throws Exception {
    // NOT a4
    String filterString = "a4c16s4dtrueo0l2";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (EQUALS b true), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example #24
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testInOperator() throws Exception {
    // id IN (194 , 82756)
    String filterString = "a0m1016s3d194s5d82756o10";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (IN id 194 82756), expr = leaf-0", filterBuilder.build().toString());
}
 
Example #25
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testIsNotNullOperator() throws Exception {
    // a3 IS NOT NULL
    String filterString = "a3o9";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (IS_NULL grade), expr = (not leaf-0)", filterBuilder.build().toString());
}
 
Example #26
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testDateOrAmtFilter() throws Exception {
    // cdate > '2008-02-01' or amt > 1200
    String filterString = "a1c25s10d2008-02-01o2a2c20s4d1200o2l1";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS cdate 2008-02-01), leaf-1 = (LESS_THAN_EQUALS amt 1200), expr = (or (not leaf-0) (not leaf-1))", filterBuilder.build().toString());
}
 
Example #27
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testDateWithOrAndAmtFilter() throws Exception {
    // cdate > '2008-02-01' OR (cdate < '2008-12-01' AND amt > 1200)
    String filterString = "a1c1082s10d2008-02-01o2a1c1082s10d2008-12-01o1a0c23s4d1200o2l0l1";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS cdate 2008-02-01), leaf-1 = (LESS_THAN cdate 2008-12-01), leaf-2 = (LESS_THAN_EQUALS id 1200), expr = (and (or (not leaf-0) leaf-1) (or (not leaf-0) (not leaf-2)))", filterBuilder.build().toString());
}
 
Example #28
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testDateAndAmtFilter() throws Exception {
    // cdate > '2008-02-01' and cdate < '2008-12-01' and amt > 1200
    String filterString = "a1c25s10d2008-02-01o2a1c25s10d2008-12-01o1l0a2c20s4d1200o2l0";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (LESS_THAN_EQUALS cdate 2008-02-01), leaf-1 = (LESS_THAN cdate 2008-12-01), leaf-2 = (LESS_THAN_EQUALS amt 1200), expr = (and (not leaf-0) leaf-1 (not leaf-2))", filterBuilder.build().toString());
}
 
Example #29
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testIdFilter() throws Exception {
    // id = 1
    String filterString = "a0c20s1d1o5";

    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);
    assertNotNull(filterBuilder);
    // single filters are wrapped in and
    assertEquals("leaf-0 = (EQUALS id 1), expr = leaf-0", filterBuilder.build().toString());
}
 
Example #30
Source File: HiveORCSearchArgumentBuilderTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void testIsNotNull() throws Exception {
    // NOT (_1_ IS NULL)
    String filterString = "a1o8l2"; // ORCA transforms is not null to NOT ( a IS NULL )
    SearchArgument.Builder filterBuilder = helper(filterString, columnDescriptors);

    assertNotNull(filterBuilder);
    assertEquals("leaf-0 = (IS_NULL cdate), expr = (not leaf-0)", filterBuilder.build().toString());
}