Java Code Examples for org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf

The following examples show how to use org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: OrcTableSource.java    License: Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) {
	if (type == BasicTypeInfo.BYTE_TYPE_INFO ||
		type == BasicTypeInfo.SHORT_TYPE_INFO ||
		type == BasicTypeInfo.INT_TYPE_INFO ||
		type == BasicTypeInfo.LONG_TYPE_INFO) {
		return PredicateLeaf.Type.LONG;
	} else if (type == BasicTypeInfo.FLOAT_TYPE_INFO ||
		type == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return PredicateLeaf.Type.FLOAT;
	} else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return PredicateLeaf.Type.BOOLEAN;
	} else if (type == BasicTypeInfo.STRING_TYPE_INFO) {
		return PredicateLeaf.Type.STRING;
	} else if (type == SqlTimeTypeInfo.TIMESTAMP) {
		return PredicateLeaf.Type.TIMESTAMP;
	} else if (type == SqlTimeTypeInfo.DATE) {
		return PredicateLeaf.Type.DATE;
	} else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) {
		return PredicateLeaf.Type.DECIMAL;
	} else {
		// unsupported type
		return null;
	}
}
 
Example 2
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialization() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	rowOrcInputFormat.selectFields(0, 4, 1);
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat);
	OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader());

	FileInputSplit[] splits = copy.createInputSplits(1);
	copy.openInputFormat();
	copy.open(splits[0]);
	assertFalse(copy.reachedEnd());
	Row row = copy.nextRecord(null);

	assertNotNull(row);
	assertEquals(3, row.getArity());
	// check first row
	assertEquals(1, row.getField(0));
	assertEquals(500, row.getField(1));
	assertEquals("M", row.getField(2));
}
 
Example 3
Source Project: flink   Source File: OrcTableSource.java    License: Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) {
	if (type == BasicTypeInfo.BYTE_TYPE_INFO ||
		type == BasicTypeInfo.SHORT_TYPE_INFO ||
		type == BasicTypeInfo.INT_TYPE_INFO ||
		type == BasicTypeInfo.LONG_TYPE_INFO) {
		return PredicateLeaf.Type.LONG;
	} else if (type == BasicTypeInfo.FLOAT_TYPE_INFO ||
		type == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return PredicateLeaf.Type.FLOAT;
	} else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return PredicateLeaf.Type.BOOLEAN;
	} else if (type == BasicTypeInfo.STRING_TYPE_INFO) {
		return PredicateLeaf.Type.STRING;
	} else if (type == SqlTimeTypeInfo.TIMESTAMP) {
		return PredicateLeaf.Type.TIMESTAMP;
	} else if (type == SqlTimeTypeInfo.DATE) {
		return PredicateLeaf.Type.DATE;
	} else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) {
		return PredicateLeaf.Type.DECIMAL;
	} else {
		// unsupported type
		return null;
	}
}
 
Example 4
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialization() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	rowOrcInputFormat.selectFields(0, 4, 1);
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat);
	OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader());

	FileInputSplit[] splits = copy.createInputSplits(1);
	copy.openInputFormat();
	copy.open(splits[0]);
	assertFalse(copy.reachedEnd());
	Row row = copy.nextRecord(null);

	assertNotNull(row);
	assertEquals(3, row.getArity());
	// check first row
	assertEquals(1, row.getField(0));
	assertEquals(500, row.getField(1));
	assertEquals("M", row.getField(2));
}
 
Example 5
Source Project: pxf   Source File: HiveORCAccessorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void parseFilterWithIn() {

    SearchArgument sarg = SearchArgumentFactory.
            newBuilder().
            startAnd().
            in("FOO", PredicateLeaf.Type.LONG, 1L, 2L, 3L).
            end().
            build();
    String expected = toKryo(sarg);

    // _1_ IN (1,2,3)
    context.setFilterString("a1m1007s1d1s1d2s1d3o10");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example 6
Source Project: flink   Source File: OrcTableSource.java    License: Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) {
	if (type == BasicTypeInfo.BYTE_TYPE_INFO ||
		type == BasicTypeInfo.SHORT_TYPE_INFO ||
		type == BasicTypeInfo.INT_TYPE_INFO ||
		type == BasicTypeInfo.LONG_TYPE_INFO) {
		return PredicateLeaf.Type.LONG;
	} else if (type == BasicTypeInfo.FLOAT_TYPE_INFO ||
		type == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return PredicateLeaf.Type.FLOAT;
	} else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return PredicateLeaf.Type.BOOLEAN;
	} else if (type == BasicTypeInfo.STRING_TYPE_INFO) {
		return PredicateLeaf.Type.STRING;
	} else if (type == SqlTimeTypeInfo.TIMESTAMP) {
		return PredicateLeaf.Type.TIMESTAMP;
	} else if (type == SqlTimeTypeInfo.DATE) {
		return PredicateLeaf.Type.DATE;
	} else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) {
		return PredicateLeaf.Type.DECIMAL;
	} else {
		// unsupported type
		return null;
	}
}
 
Example 7
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialization() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	rowOrcInputFormat.selectFields(0, 4, 1);
	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat);
	OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader());

	FileInputSplit[] splits = copy.createInputSplits(1);
	copy.openInputFormat();
	copy.open(splits[0]);
	assertFalse(copy.reachedEnd());
	Row row = copy.nextRecord(null);

	assertNotNull(row);
	assertEquals(3, row.getArity());
	// check first row
	assertEquals(1, row.getField(0));
	assertEquals(500, row.getField(1));
	assertEquals("M", row.getField(2));
}
 
Example 8
Source Project: Flink-CEPplus   Source File: OrcTableSource.java    License: Apache License 2.0 5 votes vote down vote up
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) {
	if (literalOnRight(comp)) {
		return toOrcType(((Literal) comp.right()).resultType());
	} else {
		return toOrcType(((Literal) comp.left()).resultType());
	}
}
 
Example 9
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 10
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 11
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPredicateWithInvalidColumn() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("unknown", PredicateLeaf.Type.LONG, 42));
}
 
Example 12
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithFilter() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	// read head and tail of file
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Or(
			new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L),
			new OrcRowInputFormat.Not(
				new OrcRowInputFormat.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L))
		));
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and count all rows
	long cnt = 0;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5800, cnt);
}
 
Example 13
Source Project: Flink-CEPplus   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithEvolvedSchema() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(
		getPath(TEST_FILE_FLAT),
		"struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema
		new Configuration());
	rowOrcInputFormat.selectFields(3, 0, 2);

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and validate first row
	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(3, row.getArity());
	assertEquals("Primary", row.getField(0));
	assertEquals(1, row.getField(1));
	assertEquals("M", row.getField(2));

	// read and count remaining rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5000, cnt);
}
 
Example 14
Source Project: flink   Source File: OrcTableSource.java    License: Apache License 2.0 5 votes vote down vote up
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) {
	if (literalOnRight(comp)) {
		return toOrcType(((Literal) comp.right()).resultType());
	} else {
		return toOrcType(((Literal) comp.left()).resultType());
	}
}
 
Example 15
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 16
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 17
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPredicateWithInvalidColumn() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("unknown", PredicateLeaf.Type.LONG, 42));
}
 
Example 18
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithFilter() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	// read head and tail of file
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Or(
			new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L),
			new OrcRowInputFormat.Not(
				new OrcRowInputFormat.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L))
		));
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and count all rows
	long cnt = 0;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5800, cnt);
}
 
Example 19
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithEvolvedSchema() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(
		getPath(TEST_FILE_FLAT),
		"struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema
		new Configuration());
	rowOrcInputFormat.selectFields(3, 0, 2);

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and validate first row
	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(3, row.getArity());
	assertEquals("Primary", row.getField(0));
	assertEquals(1, row.getField(1));
	assertEquals("M", row.getField(2));

	// read and count remaining rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5000, cnt);
}
 
Example 20
Source Project: pxf   Source File: HiveORCSearchArgumentBuilder.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get the type of the given expression node.
 *
 * @param literal the object
 * @return int, string, or float or null if we don't know the type
 */
private PredicateLeaf.Type getType(Object literal) {
    if (literal instanceof Byte ||
            literal instanceof Short ||
            literal instanceof Integer ||
            literal instanceof Long) {
        return PredicateLeaf.Type.LONG;
    } else if (literal instanceof HiveChar ||
            literal instanceof HiveVarchar ||
            literal instanceof String) {
        return PredicateLeaf.Type.STRING;
    } else if (literal instanceof Float ||
            literal instanceof Double) {
        return PredicateLeaf.Type.FLOAT;
    } else if (literal instanceof Date) {
        return PredicateLeaf.Type.DATE;
    } else if (literal instanceof Timestamp) {
        return PredicateLeaf.Type.TIMESTAMP;
    } else if (literal instanceof HiveDecimal ||
            literal instanceof BigDecimal) {
        return PredicateLeaf.Type.DECIMAL;
    } else if (literal instanceof Boolean) {
        return PredicateLeaf.Type.BOOLEAN;
    } else if (literal instanceof List) {
        @SuppressWarnings("unchecked")
        List<Object> l = (List<Object>) literal;
        if (l.size() > 0)
            return getType(l.get(0));
    }
    throw new IllegalArgumentException(String.format("Unknown type for literal %s", literal));
}
 
Example 21
Source Project: pxf   Source File: HiveORCAccessorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNULL() {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO", PredicateLeaf.Type.STRING).end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o8");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example 22
Source Project: pxf   Source File: HiveORCAccessorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNOTNULL() {

    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO", PredicateLeaf.Type.STRING).end().end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o9");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example 23
Source Project: flink   Source File: OrcTableSource.java    License: Apache License 2.0 5 votes vote down vote up
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) {
	if (literalOnRight(comp)) {
		return toOrcType(((Literal) comp.right()).resultType());
	} else {
		return toOrcType(((Literal) comp.left()).resultType());
	}
}
 
Example 24
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcSplitReader.Or(
			// timestamp pred
			new OrcSplitReader.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcSplitReader.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example 25
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Not(
			// decimal pred
			new OrcSplitReader.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example 26
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPredicateWithInvalidColumn() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Equals("unknown", PredicateLeaf.Type.LONG, 42));
}
 
Example 27
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithFilter() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	// read head and tail of file
	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Or(
			new OrcSplitReader.LessThan("_col0", PredicateLeaf.Type.LONG, 10L),
			new OrcSplitReader.Not(
				new OrcSplitReader.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L))
		));
	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and count all rows
	long cnt = 0;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5800, cnt);
}
 
Example 28
Source Project: flink   Source File: OrcRowInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithEvolvedSchema() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(
		getPath(TEST_FILE_FLAT),
		"struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema
		new Configuration());
	rowOrcInputFormat.selectFields(3, 0, 2);

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.LessThan("_col0", PredicateLeaf.Type.LONG, 10L));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and validate first row
	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(3, row.getArity());
	assertEquals("Primary", row.getField(0));
	assertEquals(1, row.getField(1));
	assertEquals("M", row.getField(2));

	// read and count remaining rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5000, cnt);
}
 
Example 29
Source Project: Flink-CEPplus   Source File: OrcRowInputFormat.java    License: Apache License 2.0 4 votes vote down vote up
ColumnPredicate(String columnName, PredicateLeaf.Type literalType) {
	this.columnName = columnName;
	this.literalType = literalType;
}
 
Example 30
Source Project: Flink-CEPplus   Source File: OrcRowInputFormat.java    License: Apache License 2.0 4 votes vote down vote up
BinaryPredicate(String columnName, PredicateLeaf.Type literalType, Serializable literal) {
	super(columnName, literalType);
	this.literal = literal;
}