parquet.hadoop.ParquetReader Java Examples

The following examples show how to use parquet.hadoop.ParquetReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private List<TestRecord> readParquetFilesAvro(File outputFile)
    throws IOException {
  ParquetReader<org.apache.gobblin.test.avro.TestRecord> reader = null;
  List<TestRecord> records = new ArrayList<>();
  try {
    reader = new AvroParquetReader<>(new Path(outputFile.toString()));
    for (org.apache.gobblin.test.avro.TestRecord value = reader.read(); value != null; value = reader.read()) {
      records.add(new TestRecord(value.getPartition(),
          value.getSequence(),
          value.getPayload()));
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }
  return records;

}
 
Example #2
Source File: ParquetHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
protected List<TestRecord> readParquetFilesProto(File outputFile)
    throws IOException {
  ParquetReader<TestRecordProtos.TestRecordOrBuilder> reader = null;
  List<TestRecord> records = new ArrayList<>();
  try {
    reader = new ProtoParquetReader<>(new Path(outputFile.toString()));
    TestRecordProtos.TestRecordOrBuilder value = reader.read();
    while (value!= null) {
      records.add(new TestRecord(value.getPartition(),
          value.getSequence(),
          value.getPayload()));
      value = reader.read();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }
  return records;
}
 
Example #3
Source File: ParquetHdfsDataWriterTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
protected List<TestRecord> readParquetFilesGroup(File outputFile)
    throws IOException {
  ParquetReader<Group> reader = null;
  List<Group> records = new ArrayList<>();
  try {
    reader = new ParquetReader<>(new Path(outputFile.toString()), new SimpleReadSupport());
    for (Group value = reader.read(); value != null; value = reader.read()) {
      records.add(value);
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
        System.out.println(ex.getMessage());
      }
    }
  }
  return records.stream().map(value -> new TestRecord(
      value.getInteger(TestConstants.PARTITION_FIELD_NAME, 0),
      value.getInteger(TestConstants.SEQUENCE_FIELD_NAME, 0),
      value.getString(TestConstants.PAYLOAD_FIELD_NAME, 0)
  )).collect(Collectors.toList());
}
 
Example #4
Source File: ParquetAvroStockReader.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
/**
 * Read the file.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputFile = new Path(cli.getArgValueAsString(CliCommonOpts.MrIOpts.INPUT));

  ParquetReader<Stock> reader = new AvroParquetReader<Stock>(inputFile);

  Stock stock;
  while ((stock = reader.read()) != null) {
    System.out.println(ToStringBuilder.reflectionToString(stock,
        ToStringStyle.SIMPLE_STYLE
    ));
  }

  reader.close();

  return 0;
}
 
Example #5
Source File: CatCommand.java    From parquet-tools with Apache License 2.0 6 votes vote down vote up
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  String[] args = options.getArgs();
  String input = args[0];

  ParquetReader<SimpleRecord> reader = null;
  try {
    PrintWriter writer = new PrintWriter(Main.out, true);
    reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
    for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
      value.prettyPrint(writer);
      writer.println();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
      }
    }
  }
}
 
Example #6
Source File: HeadCommand.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  long num = DEFAULT;
  if (options.hasOption('n')) {
    num = Long.parseLong(options.getOptionValue('n'));
  }

  String[] args = options.getArgs();
  String input = args[0];

  ParquetReader<SimpleRecord> reader = null;
  try {
    PrintWriter writer = new PrintWriter(Main.out, true);
    reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
    for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
      value.prettyPrint(writer);
      writer.println();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
      }
    }
  }
}