Java Code Examples for org.apache.parquet.hadoop.ParquetReader#close()

The following examples show how to use org.apache.parquet.hadoop.ParquetReader#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private List<TestRecord> readParquetFilesAvro(File outputFile)
    throws IOException {
  ParquetReader<org.apache.gobblin.test.avro.TestRecord> reader = null;
  List<TestRecord> records = new ArrayList<>();
  try {
    reader = new AvroParquetReader<>(new Path(outputFile.toString()));
    for (org.apache.gobblin.test.avro.TestRecord value = reader.read(); value != null; value = reader.read()) {
      records.add(new TestRecord(value.getPartition(),
          value.getSequence(),
          value.getPayload()));
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }
  return records;

}
 
Example 2
Source File: ParquetHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
protected List<TestRecord> readParquetFilesProto(File outputFile)
    throws IOException {
  ParquetReader<TestRecordProtos.TestRecordOrBuilder> reader = null;
  List<TestRecord> records = new ArrayList<>();
  try {
    reader = new ProtoParquetReader<>(new Path(outputFile.toString()));
    for (TestRecordProtos.TestRecordOrBuilder value = reader.read(); value != null; value = reader.read()) {
      records.add(new TestRecord(value.getPartition(),
          value.getSequence(),
          value.getPayload()));
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }
  return records;
}
 
Example 3
Source File: ParquetHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
protected List<TestRecord> readParquetFilesGroup(File outputFile)
    throws IOException {
  ParquetReader<Group> reader = null;
  List<Group> records = new ArrayList<>();
  try {
    reader = new ParquetReader<>(new Path(outputFile.toString()), new SimpleReadSupport());
    for (Group value = reader.read(); value != null; value = reader.read()) {
      records.add(value);
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }
  return records.stream().map(value -> new TestRecord(
      value.getInteger(TestConstants.PARTITION_FIELD_NAME, 0),
      value.getLong(TestConstants.SEQUENCE_FIELD_NAME, 0),
      value.getString(TestConstants.PAYLOAD_FIELD_NAME, 0)
  )).collect(Collectors.toList());
}
 
Example 4
Source File: TestPruneColumnsCommand.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void validateColumns(String inputFile, List<String> prunePaths) throws IOException {
  ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), new Path(inputFile)).withConf(conf).build();
  for (int i = 0; i < numRecord; i++) {
    Group group = reader.read();
    if (!prunePaths.contains("DocId")) {
      assertEquals(1l, group.getLong("DocId", 0));
    }
    if (!prunePaths.contains("Name")) {
      assertEquals("foo", group.getBinary("Name", 0).toStringUsingUTF8());
    }
    if (!prunePaths.contains("Gender")) {
      assertEquals("male", group.getBinary("Gender", 0).toStringUsingUTF8());
    }
    if (!prunePaths.contains("Links")) {
      Group subGroup = group.getGroup("Links", 0);
      if (!prunePaths.contains("Links.Backward")) {
        assertEquals(2l, subGroup.getLong("Backward", 0));
      }
      if (!prunePaths.contains("Links.Forward")) {
        assertEquals(3l, subGroup.getLong("Forward", 0));
      }
    }
  }
  reader.close();
}
 
Example 5
Source File: ReadBenchmarks.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
{
  ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
  for (int i = 0; i < nRows; i++) {
    Group group = reader.read();
    blackhole.consume(group.getBinary("binary_field", 0));
    blackhole.consume(group.getInteger("int32_field", 0));
    blackhole.consume(group.getLong("int64_field", 0));
    blackhole.consume(group.getBoolean("boolean_field", 0));
    blackhole.consume(group.getFloat("float_field", 0));
    blackhole.consume(group.getDouble("double_field", 0));
    blackhole.consume(group.getBinary("flba_field", 0));
    blackhole.consume(group.getInt96("int96_field", 0));
  }
  reader.close();
}
 
Example 6
Source File: TestFiltersWithMissingColumns.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public static long countFilteredRecords(Path path, FilterPredicate pred) throws IOException{
  ParquetReader<Group> reader = ParquetReader
      .builder(new GroupReadSupport(), path)
      .withFilter(FilterCompat.get(pred))
      .build();

  long count = 0;
  try {
    while (reader.read() != null) {
      count += 1;
    }
  } finally {
    reader.close();
  }
  return count;
}
 
Example 7
Source File: ParquetFileReader.java    From streamx with Apache License 2.0 5 votes vote down vote up
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
  AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
  ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
  ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
  GenericRecord record;
  Schema schema = null;
  while ((record = parquetReader.read()) != null) {
    schema = avroData.toConnectSchema(record.getSchema());
  }
  parquetReader.close();
  return schema;
}
 
Example 8
Source File: ParquetFileReader.java    From streamx with Apache License 2.0 5 votes vote down vote up
@Override
public Collection<Object> readData(Configuration conf, Path path) throws IOException {
  Collection<Object> result = new ArrayList<>();
  AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
  ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
  ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
  GenericRecord record;
  while ((record = parquetReader.read()) != null) {
    result.add(record);
  }
  parquetReader.close();
  return result;
}
 
Example 9
Source File: TestParquetInLining.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleInlineFileSystem() throws IOException {
  Path outerInMemFSPath = getRandomOuterInMemPath();
  Path outerPath = new Path(FILE_SCHEME + outerInMemFSPath.toString().substring(outerInMemFSPath.toString().indexOf(':')));
  generatedPath = outerPath;
  ParquetWriter inlineWriter = new AvroParquetWriter(outerInMemFSPath, HoodieTestDataGenerator.AVRO_SCHEMA,
      CompressionCodecName.GZIP, 100 * 1024 * 1024, 1024 * 1024, true, inMemoryConf);
  // write few records
  List<GenericRecord> recordsToWrite = getParquetHoodieRecords();
  for (GenericRecord rec : recordsToWrite) {
    inlineWriter.write(rec);
  }
  inlineWriter.close();
  byte[] inlineBytes = getBytesToInline(outerInMemFSPath);
  long startOffset = generateOuterFile(outerPath, inlineBytes);

  long inlineLength = inlineBytes.length;

  // Generate phantom inline file
  Path inlinePath = getPhantomFile(outerPath, startOffset, inlineLength);

  // instantiate Parquet reader
  ParquetReader inLineReader = AvroParquetReader.builder(inlinePath).withConf(inlineConf).build();
  List<GenericRecord> records = readParquetGenericRecords(inLineReader);
  assertArrayEquals(recordsToWrite.toArray(), records.toArray());
  inLineReader.close();
}
 
Example 10
Source File: HeadCommand.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  long num = DEFAULT;
  if (options.hasOption('n')) {
    num = Long.parseLong(options.getOptionValue('n'));
  }

  String[] args = options.getArgs();
  String input = args[0];

  ParquetReader<SimpleRecord> reader = null;
  try {
    PrintWriter writer = new PrintWriter(Main.out, true);
    reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
    for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
      value.prettyPrint(writer);
      writer.println();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
      }
    }
  }
}
 
Example 11
Source File: CatCommand.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  String[] args = options.getArgs();
  String input = args[0];

  ParquetReader<SimpleRecord> reader = null;
  try {
    PrintWriter writer = new PrintWriter(Main.out, true);
    reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
    ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input));
    JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema());

    for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
      if (options.hasOption('j')) {
        writer.write(formatter.formatRecord(value));
      } else {
        value.prettyPrint(writer);
      }
      writer.println();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
      }
    }
  }
}
 
Example 12
Source File: CompressionConveterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateColumns(String file, int numRecord, TestDocs testDocs) throws IOException {
  ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), new Path(file)).withConf(conf).build();
  for (int i = 0; i < numRecord; i++) {
    Group group = reader.read();
    assertTrue(group.getLong("DocId", 0) == testDocs.docId[i]);
    assertArrayEquals(group.getBinary("Name", 0).getBytes(), testDocs.name[i].getBytes());
    assertArrayEquals(group.getBinary("Gender", 0).getBytes(), testDocs.gender[i].getBytes());
    Group subGroup = group.getGroup("Links", 0);
    assertArrayEquals(subGroup.getBinary("Backward", 0).getBytes(), testDocs.linkBackward[i].getBytes());
    assertArrayEquals(subGroup.getBinary("Forward", 0).getBytes(), testDocs.linkForward[i].getBytes());
  }
  reader.close();
}
 
Example 13
Source File: ScroogeBinaryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testScroogeBinaryEncoding() throws Exception {
  StringAndBinary expected = new StringAndBinary.Immutable("test",
      ByteBuffer.wrap(new byte[] {-123, 20, 33}));

  File temp = tempDir.newFile(UUID.randomUUID().toString());
  temp.deleteOnExit();
  temp.delete();

  Path path = new Path(temp.getPath());

  ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
      path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
  writer.write(expected);
  writer.close();

  // read using the parquet-thrift version to isolate the write path
  ParquetReader<org.apache.parquet.thrift.test.binary.StringAndBinary> reader = ThriftParquetReader.<org.apache.parquet.thrift.test.binary.StringAndBinary>
      build(path)
      .withThriftClass(org.apache.parquet.thrift.test.binary.StringAndBinary.class)
      .build();
  org.apache.parquet.thrift.test.binary.StringAndBinary record = reader.read();
  reader.close();

  Assert.assertEquals("String should match after serialization round trip",
      "test", record.s);
  Assert.assertEquals("ByteBuffer should match after serialization round trip",
      ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b);
}
 
Example 14
Source File: ScroogeBinaryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testScroogeBinaryDecoding() throws Exception {
  StringAndBinary expected = new StringAndBinary.Immutable("test",
      ByteBuffer.wrap(new byte[] {-123, 20, 33}));

  File temp = tempDir.newFile(UUID.randomUUID().toString());
  temp.deleteOnExit();
  temp.delete();

  Path path = new Path(temp.getPath());

  ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
      path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
  writer.write(expected);
  writer.close();

  Configuration conf = new Configuration();
  conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
  ParquetReader<StringAndBinary> reader = ParquetReader.<StringAndBinary>
      builder(new ScroogeReadSupport(), path)
      .withConf(conf)
      .build();
  StringAndBinary record = reader.read();
  reader.close();

  Assert.assertEquals("String should match after serialization round trip",
      "test", record.s());
  Assert.assertEquals("ByteBuffer should match after serialization round trip",
      ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b());
}
 
Example 15
Source File: TestBinary.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testBinary() throws IOException {
  StringAndBinary expected = new StringAndBinary("test",
      ByteBuffer.wrap(new byte[] { -123, 20, 33 }));
  File temp = tempDir.newFile(UUID.randomUUID().toString());
  temp.deleteOnExit();
  temp.delete();

  Path path = new Path(temp.getPath());

  ThriftParquetWriter<StringAndBinary> writer =
      new ThriftParquetWriter<StringAndBinary>(
          path, StringAndBinary.class, CompressionCodecName.SNAPPY);
  writer.write(expected);
  writer.close();

  ParquetReader<StringAndBinary> reader = ThriftParquetReader.<StringAndBinary>
      build(path)
      .withThriftClass(StringAndBinary.class)
      .build();


  StringAndBinary record = reader.read();
  reader.close();

  assertSchema(ParquetFileReader.readFooter(new Configuration(), path));
  assertEquals("Should match after serialization round trip",
      expected, record);
}