org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSerialization() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); rowOrcInputFormat.selectFields(0, 4, 1); rowOrcInputFormat.addPredicate( new OrcSplitReader.Equals("_col1", PredicateLeaf.Type.STRING, "M")); byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat); OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader()); FileInputSplit[] splits = copy.createInputSplits(1); copy.openInputFormat(); copy.open(splits[0]); assertFalse(copy.reachedEnd()); Row row = copy.nextRecord(null); assertNotNull(row); assertEquals(3, row.getArity()); // check first row assertEquals(1, row.getField(0)); assertEquals(500, row.getField(1)); assertEquals("M", row.getField(2)); }
Example #2
Source File: OrcTableSource.java From flink with Apache License 2.0 | 6 votes |
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) { if (type == BasicTypeInfo.BYTE_TYPE_INFO || type == BasicTypeInfo.SHORT_TYPE_INFO || type == BasicTypeInfo.INT_TYPE_INFO || type == BasicTypeInfo.LONG_TYPE_INFO) { return PredicateLeaf.Type.LONG; } else if (type == BasicTypeInfo.FLOAT_TYPE_INFO || type == BasicTypeInfo.DOUBLE_TYPE_INFO) { return PredicateLeaf.Type.FLOAT; } else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) { return PredicateLeaf.Type.BOOLEAN; } else if (type == BasicTypeInfo.STRING_TYPE_INFO) { return PredicateLeaf.Type.STRING; } else if (type == SqlTimeTypeInfo.TIMESTAMP) { return PredicateLeaf.Type.TIMESTAMP; } else if (type == SqlTimeTypeInfo.DATE) { return PredicateLeaf.Type.DATE; } else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) { return PredicateLeaf.Type.DECIMAL; } else { // unsupported type return null; } }
Example #3
Source File: HiveORCAccessorTest.java From pxf with Apache License 2.0 | 6 votes |
@Test public void parseFilterWithIn() { SearchArgument sarg = SearchArgumentFactory. newBuilder(). startAnd(). in("FOO", PredicateLeaf.Type.LONG, 1L, 2L, 3L). end(). build(); String expected = toKryo(sarg); // _1_ IN (1,2,3) context.setFilterString("a1m1007s1d1s1d2s1d3o10"); try { accessor.openForRead(); } catch (Exception e) { // Ignore exception thrown by openForRead complaining about file foo not found } assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN)); }
Example #4
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSerialization() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); rowOrcInputFormat.selectFields(0, 4, 1); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M")); byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat); OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader()); FileInputSplit[] splits = copy.createInputSplits(1); copy.openInputFormat(); copy.open(splits[0]); assertFalse(copy.reachedEnd()); Row row = copy.nextRecord(null); assertNotNull(row); assertEquals(3, row.getArity()); // check first row assertEquals(1, row.getField(0)); assertEquals(500, row.getField(1)); assertEquals("M", row.getField(2)); }
Example #5
Source File: OrcTableSource.java From flink with Apache License 2.0 | 6 votes |
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) { if (type == BasicTypeInfo.BYTE_TYPE_INFO || type == BasicTypeInfo.SHORT_TYPE_INFO || type == BasicTypeInfo.INT_TYPE_INFO || type == BasicTypeInfo.LONG_TYPE_INFO) { return PredicateLeaf.Type.LONG; } else if (type == BasicTypeInfo.FLOAT_TYPE_INFO || type == BasicTypeInfo.DOUBLE_TYPE_INFO) { return PredicateLeaf.Type.FLOAT; } else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) { return PredicateLeaf.Type.BOOLEAN; } else if (type == BasicTypeInfo.STRING_TYPE_INFO) { return PredicateLeaf.Type.STRING; } else if (type == SqlTimeTypeInfo.TIMESTAMP) { return PredicateLeaf.Type.TIMESTAMP; } else if (type == SqlTimeTypeInfo.DATE) { return PredicateLeaf.Type.DATE; } else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) { return PredicateLeaf.Type.DECIMAL; } else { // unsupported type return null; } }
Example #6
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSerialization() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); rowOrcInputFormat.selectFields(0, 4, 1); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M")); byte[] bytes = InstantiationUtil.serializeObject(rowOrcInputFormat); OrcRowInputFormat copy = InstantiationUtil.deserializeObject(bytes, getClass().getClassLoader()); FileInputSplit[] splits = copy.createInputSplits(1); copy.openInputFormat(); copy.open(splits[0]); assertFalse(copy.reachedEnd()); Row row = copy.nextRecord(null); assertNotNull(row); assertEquals(3, row.getArity()); // check first row assertEquals(1, row.getField(0)); assertEquals(500, row.getField(1)); assertEquals("M", row.getField(2)); }
Example #7
Source File: OrcTableSource.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private PredicateLeaf.Type toOrcType(TypeInformation<?> type) { if (type == BasicTypeInfo.BYTE_TYPE_INFO || type == BasicTypeInfo.SHORT_TYPE_INFO || type == BasicTypeInfo.INT_TYPE_INFO || type == BasicTypeInfo.LONG_TYPE_INFO) { return PredicateLeaf.Type.LONG; } else if (type == BasicTypeInfo.FLOAT_TYPE_INFO || type == BasicTypeInfo.DOUBLE_TYPE_INFO) { return PredicateLeaf.Type.FLOAT; } else if (type == BasicTypeInfo.BOOLEAN_TYPE_INFO) { return PredicateLeaf.Type.BOOLEAN; } else if (type == BasicTypeInfo.STRING_TYPE_INFO) { return PredicateLeaf.Type.STRING; } else if (type == SqlTimeTypeInfo.TIMESTAMP) { return PredicateLeaf.Type.TIMESTAMP; } else if (type == SqlTimeTypeInfo.DATE) { return PredicateLeaf.Type.DATE; } else if (type == BasicTypeInfo.BIG_DEC_TYPE_INFO) { return PredicateLeaf.Type.DECIMAL; } else { // unsupported type return null; } }
Example #8
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadFileWithEvolvedSchema() throws IOException { rowOrcInputFormat = new OrcRowInputFormat( getPath(TEST_FILE_FLAT), "struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema new Configuration()); rowOrcInputFormat.selectFields(3, 0, 2); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L)); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); // open split rowOrcInputFormat.open(splits[0]); // read and validate first row assertFalse(rowOrcInputFormat.reachedEnd()); Row row = rowOrcInputFormat.nextRecord(null); assertNotNull(row); assertEquals(3, row.getArity()); assertEquals("Primary", row.getField(0)); assertEquals(1, row.getField(1)); assertEquals("M", row.getField(2)); // read and count remaining rows long cnt = 1; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } // check that only the first and last stripes of the file have been read. // Each stripe has 5000 rows, except the last which has 800 rows. assertEquals(5000, cnt); }
Example #9
Source File: HiveORCAccessorTest.java From pxf with Apache License 2.0 | 5 votes |
@Test public void parseFilterWithISNOTNULL() { SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().isNull("FOO", PredicateLeaf.Type.STRING).end().end().build(); String expected = toKryo(sarg); context.setFilterString("a1o9"); try { accessor.openForRead(); } catch (Exception e) { // Ignore exception thrown by openForRead complaining about file foo not found } assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN)); }
Example #10
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadFileWithFilter() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); rowOrcInputFormat.selectFields(0, 1); // read head and tail of file rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Or( new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L), new OrcRowInputFormat.Not( new OrcRowInputFormat.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L)) )); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M")); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); // open split rowOrcInputFormat.open(splits[0]); // read and count all rows long cnt = 0; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } // check that only the first and last stripes of the file have been read. // Each stripe has 5000 rows, except the last which has 800 rows. assertEquals(5800, cnt); }
Example #11
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = IllegalArgumentException.class) public void testPredicateWithInvalidColumn() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Equals("unknown", PredicateLeaf.Type.LONG, 42)); }
Example #12
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTimePredicates() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration()); rowOrcInputFormat.addPredicate( // OR new OrcRowInputFormat.Or( // timestamp pred new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")), // date pred new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25"))) ); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString()); assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString()); }
Example #13
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testTimePredicates() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration()); rowOrcInputFormat.addPredicate( // OR new OrcRowInputFormat.Or( // timestamp pred new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")), // date pred new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25"))) ); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString()); assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString()); }
Example #14
Source File: OrcTableSource.java From flink with Apache License 2.0 | 5 votes |
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) { if (literalOnRight(comp)) { return toOrcType(((Literal) comp.right()).resultType()); } else { return toOrcType(((Literal) comp.left()).resultType()); } }
Example #15
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testReadFileWithEvolvedSchema() throws IOException { rowOrcInputFormat = new OrcRowInputFormat( getPath(TEST_FILE_FLAT), "struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema new Configuration()); rowOrcInputFormat.selectFields(3, 0, 2); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L)); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); // open split rowOrcInputFormat.open(splits[0]); // read and validate first row assertFalse(rowOrcInputFormat.reachedEnd()); Row row = rowOrcInputFormat.nextRecord(null); assertNotNull(row); assertEquals(3, row.getArity()); assertEquals("Primary", row.getField(0)); assertEquals(1, row.getField(1)); assertEquals("M", row.getField(2)); // read and count remaining rows long cnt = 1; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } // check that only the first and last stripes of the file have been read. // Each stripe has 5000 rows, except the last which has 800 rows. assertEquals(5000, cnt); }
Example #16
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testReadFileWithFilter() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); rowOrcInputFormat.selectFields(0, 1); // read head and tail of file rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Or( new OrcRowInputFormat.LessThan("_col0", PredicateLeaf.Type.LONG, 10L), new OrcRowInputFormat.Not( new OrcRowInputFormat.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L)) )); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Equals("_col1", PredicateLeaf.Type.STRING, "M")); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); // open split rowOrcInputFormat.open(splits[0]); // read and count all rows long cnt = 0; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } // check that only the first and last stripes of the file have been read. // Each stripe has 5000 rows, except the last which has 800 rows. assertEquals(5800, cnt); }
Example #17
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDecimalPredicate() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Not( // decimal pred new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5)))); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(not leaf-0)", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString()); }
Example #18
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = IllegalArgumentException.class) public void testPredicateWithInvalidColumn() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Equals("unknown", PredicateLeaf.Type.LONG, 42)); }
Example #19
Source File: HiveORCSearchArgumentBuilder.java From pxf with Apache License 2.0 | 5 votes |
/** * Get the type of the given expression node. * * @param literal the object * @return int, string, or float or null if we don't know the type */ private PredicateLeaf.Type getType(Object literal) { if (literal instanceof Byte || literal instanceof Short || literal instanceof Integer || literal instanceof Long) { return PredicateLeaf.Type.LONG; } else if (literal instanceof HiveChar || literal instanceof HiveVarchar || literal instanceof String) { return PredicateLeaf.Type.STRING; } else if (literal instanceof Float || literal instanceof Double) { return PredicateLeaf.Type.FLOAT; } else if (literal instanceof Date) { return PredicateLeaf.Type.DATE; } else if (literal instanceof Timestamp) { return PredicateLeaf.Type.TIMESTAMP; } else if (literal instanceof HiveDecimal || literal instanceof BigDecimal) { return PredicateLeaf.Type.DECIMAL; } else if (literal instanceof Boolean) { return PredicateLeaf.Type.BOOLEAN; } else if (literal instanceof List) { @SuppressWarnings("unchecked") List<Object> l = (List<Object>) literal; if (l.size() > 0) return getType(l.get(0)); } throw new IllegalArgumentException(String.format("Unknown type for literal %s", literal)); }
Example #20
Source File: HiveORCAccessorTest.java From pxf with Apache License 2.0 | 5 votes |
@Test public void parseFilterWithISNULL() { SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().isNull("FOO", PredicateLeaf.Type.STRING).end().build(); String expected = toKryo(sarg); context.setFilterString("a1o8"); try { accessor.openForRead(); } catch (Exception e) { // Ignore exception thrown by openForRead complaining about file foo not found } assertEquals(expected, accessor.getJobConf().get(SARG_PUSHDOWN)); }
Example #21
Source File: OrcTableSource.java From flink with Apache License 2.0 | 5 votes |
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) { if (literalOnRight(comp)) { return toOrcType(((Literal) comp.right()).resultType()); } else { return toOrcType(((Literal) comp.left()).resultType()); } }
Example #22
Source File: OrcTableSource.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private PredicateLeaf.Type getLiteralType(BinaryComparison comp) { if (literalOnRight(comp)) { return toOrcType(((Literal) comp.right()).resultType()); } else { return toOrcType(((Literal) comp.left()).resultType()); } }
Example #23
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTimePredicates() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration()); rowOrcInputFormat.addPredicate( // OR new OrcSplitReader.Or( // timestamp pred new OrcSplitReader.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")), // date pred new OrcSplitReader.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25"))) ); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader()); assertNotNull(sarg); assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString()); assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString()); }
Example #24
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDecimalPredicate() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); rowOrcInputFormat.addPredicate( new OrcSplitReader.Not( // decimal pred new OrcSplitReader.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5)))); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = getSearchArgument(spy.getReader().getRecordReader()); assertNotNull(sarg); assertEquals("(not leaf-0)", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString()); }
Example #25
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = IllegalArgumentException.class) public void testPredicateWithInvalidColumn() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration()); rowOrcInputFormat.addPredicate( new OrcSplitReader.Equals("unknown", PredicateLeaf.Type.LONG, 42)); }
Example #26
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadFileWithFilter() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); rowOrcInputFormat.selectFields(0, 1); // read head and tail of file rowOrcInputFormat.addPredicate( new OrcSplitReader.Or( new OrcSplitReader.LessThan("_col0", PredicateLeaf.Type.LONG, 10L), new OrcSplitReader.Not( new OrcSplitReader.LessThanEquals("_col0", PredicateLeaf.Type.LONG, 1920000L)) )); rowOrcInputFormat.addPredicate( new OrcSplitReader.Equals("_col1", PredicateLeaf.Type.STRING, "M")); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); // open split rowOrcInputFormat.open(splits[0]); // read and count all rows long cnt = 0; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } // check that only the first and last stripes of the file have been read. // Each stripe has 5000 rows, except the last which has 800 rows. assertEquals(5800, cnt); }
Example #27
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadFileWithEvolvedSchema() throws IOException { rowOrcInputFormat = new OrcRowInputFormat( getPath(TEST_FILE_FLAT), "struct<_col0:int,_col1:string,_col4:string,_col3:string>", // previous version of schema new Configuration()); rowOrcInputFormat.selectFields(3, 0, 2); rowOrcInputFormat.addPredicate( new OrcSplitReader.LessThan("_col0", PredicateLeaf.Type.LONG, 10L)); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); assertEquals(1, splits.length); rowOrcInputFormat.openInputFormat(); // open split rowOrcInputFormat.open(splits[0]); // read and validate first row assertFalse(rowOrcInputFormat.reachedEnd()); Row row = rowOrcInputFormat.nextRecord(null); assertNotNull(row); assertEquals(3, row.getArity()); assertEquals("Primary", row.getField(0)); assertEquals(1, row.getField(1)); assertEquals("M", row.getField(2)); // read and count remaining rows long cnt = 1; while (!rowOrcInputFormat.reachedEnd()) { assertNotNull(rowOrcInputFormat.nextRecord(null)); cnt++; } // check that only the first and last stripes of the file have been read. // Each stripe has 5000 rows, except the last which has 800 rows. assertEquals(5000, cnt); }
Example #28
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDecimalPredicate() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Not( // decimal pred new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5)))); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(not leaf-0)", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString()); }
Example #29
Source File: OrcSplitReader.java From flink with Apache License 2.0 | 4 votes |
ColumnPredicate(String columnName, PredicateLeaf.Type literalType) { this.columnName = columnName; this.literalType = literalType; }
Example #30
Source File: OrcRowInputFormat.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
ColumnPredicate(String columnName, PredicateLeaf.Type literalType) { this.columnName = columnName; this.literalType = literalType; }