org.apache.parquet.filter2.predicate.Operators.BinaryColumn Java Examples

The following examples show how to use org.apache.parquet.filter2.predicate.Operators.BinaryColumn. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testEqFixed() throws Exception {
  BinaryColumn b = binaryColumn("fixed_field");

  // Only V2 supports dictionary encoding for FIXED_LEN_BYTE_ARRAY values
  if (version == PARQUET_2_0) {
    assertTrue("Should drop block for -2",
        canDrop(eq(b, toBinary("-2", 17)), ccmd, dictionaries));
  }

  assertFalse("Should not drop block for -1",
      canDrop(eq(b, toBinary("-1", 17)), ccmd, dictionaries));

  assertFalse("Should not drop block for null",
      canDrop(eq(b, null), ccmd, dictionaries));
}
 
Example #2
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnd() throws Exception {
  BinaryColumn col = binaryColumn("binary_field");

  // both evaluate to false (no upper-case letters are in the dictionary)
  FilterPredicate B = eq(col, Binary.fromString("B"));
  FilterPredicate C = eq(col, Binary.fromString("C"));

  // both evaluate to true (all lower-case letters are in the dictionary)
  FilterPredicate x = eq(col, Binary.fromString("x"));
  FilterPredicate y = eq(col, Binary.fromString("y"));

  assertTrue("Should drop when either predicate must be false",
      canDrop(and(B, y), ccmd, dictionaries));
  assertTrue("Should drop when either predicate must be false",
      canDrop(and(x, C), ccmd, dictionaries));
  assertTrue("Should drop when either predicate must be false",
      canDrop(and(B, C), ccmd, dictionaries));
  assertFalse("Should not drop when either predicate could be true",
      canDrop(and(x, y), ccmd, dictionaries));
}
 
Example #3
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testOr() throws Exception {
  BinaryColumn col = binaryColumn("binary_field");

  // both evaluate to false (no upper-case letters are in the dictionary)
  FilterPredicate B = eq(col, Binary.fromString("B"));
  FilterPredicate C = eq(col, Binary.fromString("C"));

  // both evaluate to true (all lower-case letters are in the dictionary)
  FilterPredicate x = eq(col, Binary.fromString("x"));
  FilterPredicate y = eq(col, Binary.fromString("y"));

  assertFalse("Should not drop when one predicate could be true",
      canDrop(or(B, y), ccmd, dictionaries));
  assertFalse("Should not drop when one predicate could be true",
      canDrop(or(x, C), ccmd, dictionaries));
  assertTrue("Should drop when both predicates must be false",
      canDrop(or(B, C), ccmd, dictionaries));
  assertFalse("Should not drop when one predicate could be true",
      canDrop(or(x, y), ccmd, dictionaries));
}
 
Example #4
Source File: TestRecordLevelFilters.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNameNotStartWithP() throws Exception {
  BinaryColumn name = binaryColumn("name");

  FilterPredicate pred = not(userDefined(name, StartWithP.class));

  List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));

  assertFilter(found, new UserFilter() {
    @Override
    public boolean keep(User u) {
      return u.getName() == null || !u.getName().startsWith("p");
    }
  });
}
 
Example #5
Source File: TestRecordLevelFilters.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNameNotNull() throws Exception {
  BinaryColumn name = binaryColumn("name");

  FilterPredicate pred = notEq(name, null);

  List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));

  assertFilter(found, new UserFilter() {
    @Override
    public boolean keep(User u) {
      return u.getName() != null;
    }
  });
}
 
Example #6
Source File: TestRecordLevelFilters.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testAllFilter() throws Exception {
  BinaryColumn name = binaryColumn("name");

  FilterPredicate pred = eq(name, Binary.fromString("no matches"));

  List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
  assertEquals(new ArrayList<Group>(), found);
}
 
Example #7
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testGtEqMissingColumn() throws Exception {
  BinaryColumn b = binaryColumn("missing_column");

  assertTrue("Should drop block for any non-null query",
      canDrop(gtEq(b, Binary.fromString("any")), ccmd, dictionaries));
}
 
Example #8
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testGtMissingColumn() throws Exception {
  BinaryColumn b = binaryColumn("missing_column");

  assertTrue("Should drop block for any non-null query",
      canDrop(gt(b, Binary.fromString("any")), ccmd, dictionaries));
}
 
Example #9
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testLtEqMissingColumn() throws Exception {
  BinaryColumn b = binaryColumn("missing_column");

  assertTrue("Should drop block for any non-null query",
      canDrop(ltEq(b, Binary.fromString("any")), ccmd, dictionaries));
}
 
Example #10
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testLtMissingColumn() throws Exception {
  BinaryColumn b = binaryColumn("missing_column");

  assertTrue("Should drop block for any non-null query",
      canDrop(lt(b, Binary.fromString("any")), ccmd, dictionaries));
}
 
Example #11
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotEqMissingColumn() throws Exception {
  BinaryColumn b = binaryColumn("missing_column");

  assertFalse("Should not drop block for non-null query",
      canDrop(notEq(b, Binary.fromString("any")), ccmd, dictionaries));

  assertTrue("Should not drop block null query",
      canDrop(notEq(b, null), ccmd, dictionaries));
}
 
Example #12
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testEqMissingColumn() throws Exception {
  BinaryColumn b = binaryColumn("missing_column");

  assertTrue("Should drop block for non-null query",
      canDrop(eq(b, Binary.fromString("any")), ccmd, dictionaries));

  assertFalse("Should not drop block null query",
      canDrop(eq(b, null), ccmd, dictionaries));
}
 
Example #13
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testLtFixed() throws Exception {
  BinaryColumn fixed = binaryColumn("fixed_field");

  // Only V2 supports dictionary encoding for FIXED_LEN_BYTE_ARRAY values
  if (version == PARQUET_2_0) {
  assertTrue("Should drop: < lowest value",
      canDrop(lt(fixed, DECIMAL_VALUES[0]), ccmd, dictionaries));
  }

  assertFalse("Should not drop: < 2nd lowest value",
      canDrop(lt(fixed, DECIMAL_VALUES[1]), ccmd, dictionaries));
}
 
Example #14
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotEqBinary() throws Exception {
  BinaryColumn sharp = binaryColumn("single_value_field");
  BinaryColumn sharpAndNull = binaryColumn("optional_single_value_field");
  BinaryColumn b = binaryColumn("binary_field");

  assertTrue("Should drop block with only the excluded value",
      canDrop(notEq(sharp, Binary.fromString("sharp")), ccmd, dictionaries));

  assertFalse("Should not drop block with any other value",
      canDrop(notEq(sharp, Binary.fromString("applause")), ccmd, dictionaries));

  assertFalse("Should not drop block with only the excluded value and null",
      canDrop(notEq(sharpAndNull, Binary.fromString("sharp")), ccmd, dictionaries));

  assertFalse("Should not drop block with any other value",
      canDrop(notEq(sharpAndNull, Binary.fromString("applause")), ccmd, dictionaries));

  assertFalse("Should not drop block with a known value",
      canDrop(notEq(b, Binary.fromString("x")), ccmd, dictionaries));

  assertFalse("Should not drop block with a known value",
      canDrop(notEq(b, Binary.fromString("B")), ccmd, dictionaries));

  assertFalse("Should not drop block for null",
      canDrop(notEq(b, null), ccmd, dictionaries));
}
 
Example #15
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testEqInt96() throws Exception {
  BinaryColumn b = binaryColumn("int96_field");

  // INT96 ordering is undefined => no filtering shall be done
  assertFalse("Should not drop block for -2",
      canDrop(eq(b, toBinary("-2", 12)), ccmd, dictionaries));

  assertFalse("Should not drop block for -1",
      canDrop(eq(b, toBinary("-1", 12)), ccmd, dictionaries));

  assertFalse("Should not drop block for null",
      canDrop(eq(b, null), ccmd, dictionaries));
}
 
Example #16
Source File: DictionaryFilterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testEqBinary() throws Exception {
  BinaryColumn b = binaryColumn("binary_field");
  FilterPredicate pred = eq(b, Binary.fromString("c"));

  assertFalse("Should not drop block for lower case letters",
      canDrop(pred, ccmd, dictionaries));

  assertTrue("Should drop block for upper case letters",
      canDrop(eq(b, Binary.fromString("A")), ccmd, dictionaries));

  assertFalse("Should not drop block for null",
      canDrop(eq(b, null), ccmd, dictionaries));
}
 
Example #17
Source File: TestFilterApiMethods.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializable() throws Exception {    
  BinaryColumn binary = binaryColumn("foo");
  FilterPredicate p = and(or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))), userDefined(longColumn, new IsMultipleOf(7)));
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  ObjectOutputStream oos = new ObjectOutputStream(baos);
  oos.writeObject(p);
  oos.close();

  ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
  FilterPredicate read = (FilterPredicate) is.readObject();
  assertEquals(p, read);
}
 
Example #18
Source File: TestColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testFilterWithoutNullCounts() {
  ColumnIndex columnIndex = ColumnIndexBuilder.build(
      Types.required(BINARY).as(UTF8).named("test_binary_utf8"),
      BoundaryOrder.ASCENDING,
      asList(true, true, false, false, true, false, true, false),
      null,
      toBBList(
          null,
          null,
          stringBinary("Beeblebrox"),
          stringBinary("Dent"),
          null,
          stringBinary("Jeltz"),
          null,
          stringBinary("Slartibartfast")),
      toBBList(
          null,
          null,
          stringBinary("Dent"),
          stringBinary("Dent"),
          null,
          stringBinary("Prefect"),
          null,
          stringBinary("Slartibartfast")));
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertNull(columnIndex.getNullCounts());
  assertCorrectNullPages(columnIndex, true, true, false, false, true, false, true, false);
  assertCorrectValues(columnIndex.getMaxValues(),
      null,
      null,
      stringBinary("Dent"),
      stringBinary("Dent"),
      null,
      stringBinary("Prefect"),
      null,
      stringBinary("Slartibartfast"));
  assertCorrectValues(columnIndex.getMinValues(),
      null,
      null,
      stringBinary("Beeblebrox"),
      stringBinary("Dent"),
      null,
      stringBinary("Jeltz"),
      null,
      stringBinary("Slartibartfast"));

  BinaryColumn col = binaryColumn("test_col");
  assertCorrectFiltering(columnIndex, eq(col, stringBinary("Dent")), 2, 3);
  assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 5, 6, 7);
  assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0, 1, 2, 3, 4, 5, 6, 7);
  assertCorrectFiltering(columnIndex, notEq(col, null), 2, 3, 5, 7);
  assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6, 7);
  assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 2, 3, 5, 7);
}
 
Example #19
Source File: FilterApi.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public static BinaryColumn binaryColumn(String columnPath) {
  return new BinaryColumn(ColumnPath.fromDotString(columnPath));
}