org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialization() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	rowOrcInputFormat.selectFields(0, 4, 1);
	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat);
	OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader());

	FileInputSplit[] splits = copy.createInputSplits(1);
	copy.openInputFormat();
	copy.open(splits[0]);
	assertFalse(copy.reachedEnd());
	Row row = copy.nextRecord(null);

	assertNotNull(row);
	assertEquals(3, row.getArity());
	// check first row
	assertEquals(1, row.getField(0));
	assertEquals(500, row.getField(1));
	assertEquals("M", row.getField(2));
}
 
Example #2
Source File: OrcTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) {
	if (type == BasicTypeInfo.BYTE_TYPE_INFO ||
		type == BasicTypeInfo.SHORT_TYPE_INFO ||
		type == BasicTypeInfo.INT_TYPE_INFO ||
		type == BasicTypeInfo.LONG_TYPE_INFO) {
		return PredicateLeaf.Type.LONG;
	} else if (type == BasicTypeInfo.FLOAT_TYPE_INFO ||
		type == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return PredicateLeaf.Type.FLOAT;
	} else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return PredicateLeaf.Type.BOOLEAN;
	} else if (type == BasicTypeInfo.STRING_TYPE_INFO) {
		return PredicateLeaf.Type.STRING;
	} else if (type == SqlTimeTypeInfo.TIMESTAMP) {
		return PredicateLeaf.Type.TIMESTAMP;
	} else if (type == SqlTimeTypeInfo.DATE) {
		return PredicateLeaf.Type.DATE;
	} else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) {
		return PredicateLeaf.Type.DECIMAL;
	} else {
		// unsupported type
		return null;
	}
}
 
Example #3
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 6 votes vote down vote up
@Test
public void parseFilterWithIn() {

    SearchArgument sarg = SearchArgumentFactory.
            newBuilder().
            startAnd().
            in("FOO", PredicateLeaf.Type.LONG, 1L, 2L, 3L).
            end().
            build();
    String expected = toKryo(sarg);

    // _1_ IN (1,2,3)
    context.setFilterString("a1m1007s1d1s1d2s1d3o10");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #4
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialization() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	rowOrcInputFormat.selectFields(0, 4, 1);
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat);
	OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader());

	FileInputSplit[] splits = copy.createInputSplits(1);
	copy.openInputFormat();
	copy.open(splits[0]);
	assertFalse(copy.reachedEnd());
	Row row = copy.nextRecord(null);

	assertNotNull(row);
	assertEquals(3, row.getArity());
	// check first row
	assertEquals(1, row.getField(0));
	assertEquals(500, row.getField(1));
	assertEquals("M", row.getField(2));
}
 
Example #5
Source File: OrcTableSource.java    From flink with Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) {
	if (type == BasicTypeInfo.BYTE_TYPE_INFO ||
		type == BasicTypeInfo.SHORT_TYPE_INFO ||
		type == BasicTypeInfo.INT_TYPE_INFO ||
		type == BasicTypeInfo.LONG_TYPE_INFO) {
		return PredicateLeaf.Type.LONG;
	} else if (type == BasicTypeInfo.FLOAT_TYPE_INFO ||
		type == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return PredicateLeaf.Type.FLOAT;
	} else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return PredicateLeaf.Type.BOOLEAN;
	} else if (type == BasicTypeInfo.STRING_TYPE_INFO) {
		return PredicateLeaf.Type.STRING;
	} else if (type == SqlTimeTypeInfo.TIMESTAMP) {
		return PredicateLeaf.Type.TIMESTAMP;
	} else if (type == SqlTimeTypeInfo.DATE) {
		return PredicateLeaf.Type.DATE;
	} else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) {
		return PredicateLeaf.Type.DECIMAL;
	} else {
		// unsupported type
		return null;
	}
}
 
Example #6
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialization() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	rowOrcInputFormat.selectFields(0, 4, 1);
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat);
	OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader());

	FileInputSplit[] splits = copy.createInputSplits(1);
	copy.openInputFormat();
	copy.open(splits[0]);
	assertFalse(copy.reachedEnd());
	Row row = copy.nextRecord(null);

	assertNotNull(row);
	assertEquals(3, row.getArity());
	// check first row
	assertEquals(1, row.getField(0));
	assertEquals(500, row.getField(1));
	assertEquals("M", row.getField(2));
}
 
Example #7
Source File: OrcTableSource.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) {
	if (type == BasicTypeInfo.BYTE_TYPE_INFO ||
		type == BasicTypeInfo.SHORT_TYPE_INFO ||
		type == BasicTypeInfo.INT_TYPE_INFO ||
		type == BasicTypeInfo.LONG_TYPE_INFO) {
		return PredicateLeaf.Type.LONG;
	} else if (type == BasicTypeInfo.FLOAT_TYPE_INFO ||
		type == BasicTypeInfo.DOUBLE_TYPE_INFO) {
		return PredicateLeaf.Type.FLOAT;
	} else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
		return PredicateLeaf.Type.BOOLEAN;
	} else if (type == BasicTypeInfo.STRING_TYPE_INFO) {
		return PredicateLeaf.Type.STRING;
	} else if (type == SqlTimeTypeInfo.TIMESTAMP) {
		return PredicateLeaf.Type.TIMESTAMP;
	} else if (type == SqlTimeTypeInfo.DATE) {
		return PredicateLeaf.Type.DATE;
	} else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) {
		return PredicateLeaf.Type.DECIMAL;
	} else {
		// unsupported type
		return null;
	}
}
 
Example #8
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithEvolvedSchema() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(
		getPath(TEST_FILE_FLAT),
		"struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema
		new Configuration());
	rowOrcInputFormat.selectFields(3, 0, 2);

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and validate first row
	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(3, row.getArity());
	assertEquals("Primary", row.getField(0));
	assertEquals(1, row.getField(1));
	assertEquals("M", row.getField(2));

	// read and count remaining rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5000, cnt);
}
 
Example #9
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNOTNULL() {

    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO", PredicateLeaf.Type.STRING).end().end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o9");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #10
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithFilter() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	// read head and tail of file
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Or(
			new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L),
			new OrcRowInputFormat.Not(
				new OrcRowInputFormat.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L))
		));
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and count all rows
	long cnt = 0;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5800, cnt);
}
 
Example #11
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPredicateWithInvalidColumn() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("unknown", PredicateLeaf.Type.LONG, 42));
}
 
Example #12
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example #13
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example #14
Source File: OrcTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) {
	if (literalOnRight(comp)) {
		return toOrcType(((Literal) comp.right()).resultType());
	} else {
		return toOrcType(((Literal) comp.left()).resultType());
	}
}
 
Example #15
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithEvolvedSchema() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(
		getPath(TEST_FILE_FLAT),
		"struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema
		new Configuration());
	rowOrcInputFormat.selectFields(3, 0, 2);

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and validate first row
	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(3, row.getArity());
	assertEquals("Primary", row.getField(0));
	assertEquals(1, row.getField(1));
	assertEquals("M", row.getField(2));

	// read and count remaining rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5000, cnt);
}
 
Example #16
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithFilter() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	// read head and tail of file
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Or(
			new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L),
			new OrcRowInputFormat.Not(
				new OrcRowInputFormat.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L))
		));
	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and count all rows
	long cnt = 0;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5800, cnt);
}
 
Example #17
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example #18
Source File: OrcRowInputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPredicateWithInvalidColumn() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Equals("unknown", PredicateLeaf.Type.LONG, 42));
}
 
Example #19
Source File: HiveORCSearchArgumentBuilder.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Get the type of the given expression node.
 *
 * @param literal the object
 * @return int, string, or float or null if we don't know the type
 */
private PredicateLeaf.Type getType(Object literal) {
    if (literal instanceof Byte ||
            literal instanceof Short ||
            literal instanceof Integer ||
            literal instanceof Long) {
        return PredicateLeaf.Type.LONG;
    } else if (literal instanceof HiveChar ||
            literal instanceof HiveVarchar ||
            literal instanceof String) {
        return PredicateLeaf.Type.STRING;
    } else if (literal instanceof Float ||
            literal instanceof Double) {
        return PredicateLeaf.Type.FLOAT;
    } else if (literal instanceof Date) {
        return PredicateLeaf.Type.DATE;
    } else if (literal instanceof Timestamp) {
        return PredicateLeaf.Type.TIMESTAMP;
    } else if (literal instanceof HiveDecimal ||
            literal instanceof BigDecimal) {
        return PredicateLeaf.Type.DECIMAL;
    } else if (literal instanceof Boolean) {
        return PredicateLeaf.Type.BOOLEAN;
    } else if (literal instanceof List) {
        @SuppressWarnings("unchecked")
        List<Object> l = (List<Object>) literal;
        if (l.size() > 0)
            return getType(l.get(0));
    }
    throw new IllegalArgumentException(String.format("Unknown type for literal %s", literal));
}
 
Example #20
Source File: HiveORCAccessorTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void parseFilterWithISNULL() {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO", PredicateLeaf.Type.STRING).end().build();
    String expected = toKryo(sarg);

    context.setFilterString("a1o8");
    try {
        accessor.openForRead();
    } catch (Exception e) {
        // Ignore exception thrown by openForRead complaining about file foo not found
    }

    assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN));
}
 
Example #21
Source File: OrcTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) {
	if (literalOnRight(comp)) {
		return toOrcType(((Literal) comp.right()).resultType());
	} else {
		return toOrcType(((Literal) comp.left()).resultType());
	}
}
 
Example #22
Source File: OrcTableSource.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) {
	if (literalOnRight(comp)) {
		return toOrcType(((Literal) comp.right()).resultType());
	} else {
		return toOrcType(((Literal) comp.left()).resultType());
	}
}
 
Example #23
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcSplitReader.Or(
			// timestamp pred
			new OrcSplitReader.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcSplitReader.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}
 
Example #24
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Not(
			// decimal pred
			new OrcSplitReader.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader());
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example #25
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testPredicateWithInvalidColumn() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Equals("unknown", PredicateLeaf.Type.LONG, 42));
}
 
Example #26
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithFilter() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());
	rowOrcInputFormat.selectFields(0, 1);

	// read head and tail of file
	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Or(
			new OrcSplitReader.LessThan("_col0", PredicateLeaf.Type.LONG, 10L),
			new OrcSplitReader.Not(
				new OrcSplitReader.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L))
		));
	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.Equals("_col1", PredicateLeaf.Type.STRING, "M"));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and count all rows
	long cnt = 0;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5800, cnt);
}
 
Example #27
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFileWithEvolvedSchema() throws IOException {

	rowOrcInputFormat = new OrcRowInputFormat(
		getPath(TEST_FILE_FLAT),
		"struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema
		new Configuration());
	rowOrcInputFormat.selectFields(3, 0, 2);

	rowOrcInputFormat.addPredicate(
		new OrcSplitReader.LessThan("_col0", PredicateLeaf.Type.LONG, 10L));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	assertEquals(1, splits.length);
	rowOrcInputFormat.openInputFormat();

	// open split
	rowOrcInputFormat.open(splits[0]);

	// read and validate first row
	assertFalse(rowOrcInputFormat.reachedEnd());
	Row row = rowOrcInputFormat.nextRecord(null);
	assertNotNull(row);
	assertEquals(3, row.getArity());
	assertEquals("Primary", row.getField(0));
	assertEquals(1, row.getField(1));
	assertEquals("M", row.getField(2));

	// read and count remaining rows
	long cnt = 1;
	while (!rowOrcInputFormat.reachedEnd()) {
		assertNotNull(rowOrcInputFormat.nextRecord(null));
		cnt++;
	}
	// check that only the first and last stripes of the file have been read.
	// Each stripe has 5000 rows, except the last which has 800 rows.
	assertEquals(5000, cnt);
}
 
Example #28
Source File: OrcRowInputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}
 
Example #29
Source File: OrcSplitReader.java    From flink with Apache License 2.0 4 votes vote down vote up
ColumnPredicate(String columnName, PredicateLeaf.Type literalType) {
	this.columnName = columnName;
	this.literalType = literalType;
}
 
Example #30
Source File: OrcRowInputFormat.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
ColumnPredicate(String columnName, PredicateLeaf.Type literalType) {
	this.columnName = columnName;
	this.literalType = literalType;
}