Java Code Examples for org.apache.parquet.column.impl.ColumnWriteStoreV1#flush()

The following examples show how to use org.apache.parquet.column.impl.ColumnWriteStoreV1#flush() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMemColumn.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testMemColumn() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
  ColumnDescriptor path = schema.getColumnDescription(new String[] {"foo", "bar"});
  MemPageStore memPageStore = new MemPageStore(10);
  ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
  ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
  columnWriter.write(42l, 0, 0);
  memColumnsStore.endRecord();
  memColumnsStore.flush();

  ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
  for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
    assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
    assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
    assertEquals(columnReader.getLong(), 42);
    columnReader.consume();
  }
}
 
Example 2
Source File: TestMemColumn.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testMemColumnBinary() throws Exception {
  MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
  String[] col = new String[]{"foo", "bar"};
  MemPageStore memPageStore = new MemPageStore(10);

  ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
  ColumnDescriptor path1 = mt.getColumnDescription(col);
  ColumnDescriptor path = path1;

  ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
  columnWriter.write(Binary.fromString("42"), 0, 0);
  memColumnsStore.endRecord();
  memColumnsStore.flush();

  ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
  for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
    assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
    assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
    assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
    columnReader.consume();
  }
}
 
Example 3
Source File: TestMemColumn.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testMemColumnSeveralPages() throws Exception {
  MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
  String[] col = new String[]{"foo", "bar"};
  MemPageStore memPageStore = new MemPageStore(10);
  ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
  ColumnDescriptor path1 = mt.getColumnDescription(col);
  ColumnDescriptor path = path1;

  ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
  for (int i = 0; i < 2000; i++) {
    columnWriter.write(42l, 0, 0);
    memColumnsStore.endRecord();
  }
  memColumnsStore.flush();

  ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
  for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
    assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
    assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
    assertEquals(columnReader.getLong(), 42);
    columnReader.consume();
  }
}
 
Example 4
Source File: PerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void write(MemPageStore memPageStore) {
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore,
      ParquetProperties.builder()
          .withPageSize(50*1024*1024)
          .withDictionaryEncoding(false)
          .build());
  MessageColumnIO columnIO = newColumnFactory(schema);

  GroupWriter groupWriter = new GroupWriter(columnIO.getRecordWriter(columns), schema);
  groupWriter.write(r1);
  groupWriter.write(r2);

  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 100000);
  write(memPageStore, groupWriter, 1000000);
  columns.flush();
  System.out.println();
  System.out.println(columns.getBufferedSize() + " bytes used total");
  System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
 
Example 5
Source File: TestFiltered.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private MemPageStore writeTestRecords(MessageColumnIO columnIO, int number) {
  MemPageStore memPageStore = new MemPageStore(number * 2);
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore,
      ParquetProperties.builder()
          .withPageSize(800)
          .withDictionaryEncoding(false)
          .build());

  RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
  GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
  for ( int i = 0; i < number; i++ ) {
    groupWriter.write(r1);
    groupWriter.write(r2);
  }
  recordWriter.flush();
  columns.flush();
  return memPageStore;
}
 
Example 6
Source File: TupleConsumerPerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  String pigSchema = pigSchema(false, false);
  String pigSchemaProjected = pigSchema(true, false);
  String pigSchemaNoString = pigSchema(true, true);
  MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));

  MemPageStore memPageStore = new MemPageStore(0);
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore, ParquetProperties.builder()
          .withPageSize(50*1024*1024)
          .withDictionaryEncoding(false)
          .build());
  write(memPageStore, columns, schema, pigSchema);
  columns.flush();
  read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
  System.out.println(columns.getBufferedSize()+" bytes used total");
  System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
 
Example 7
Source File: TestColumnIO.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void writeGroups(MessageType writtenSchema, MemPageStore memPageStore, Group... groups) {
  ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
  ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
  MessageColumnIO columnIO = columnIOFactory.getColumnIO(writtenSchema);
  RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
  GroupWriter groupWriter = new GroupWriter(recordWriter, writtenSchema);
  for (Group group : groups) {
    groupWriter.write(group);
  }
  recordWriter.flush();
  columns.flush();
}
 
Example 8
Source File: TestColumnIO.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testPushParser() {
  MemPageStore memPageStore = new MemPageStore(1);
  ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
  MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
  RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
  new GroupWriter(recordWriter, schema).write(r1);
  recordWriter.flush();
  columns.flush();

  RecordReader<Void> recordReader = columnIO.getRecordReader(memPageStore, new ExpectationValidatingConverter(expectedEventsForR1, schema));
  recordReader.read();

}
 
Example 9
Source File: TestParquetReadProtocol.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private <T extends TBase<?,?>> void validate(T expected) throws TException {
  @SuppressWarnings("unchecked")
  final Class<T> thriftClass = (Class<T>)expected.getClass();
  final MemPageStore memPageStore = new MemPageStore(1);
  final ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
  final MessageType schema = schemaConverter.convert(thriftClass);
  LOG.info("{}", schema);
  final MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
  final ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore,
      ParquetProperties.builder()
          .withPageSize(10000)
          .withDictionaryEncoding(false)
          .build());
  final RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
  final StructType thriftType = schemaConverter.toStructType(thriftClass);
  ParquetWriteProtocol parquetWriteProtocol = new ParquetWriteProtocol(recordWriter, columnIO, thriftType);

  expected.write(parquetWriteProtocol);
  recordWriter.flush();
  columns.flush();

  ThriftRecordConverter<T> converter = new TBaseRecordConverter<T>(thriftClass, schema, thriftType);
  final RecordReader<T> recordReader = columnIO.getRecordReader(memPageStore, converter);

  final T result = recordReader.read();

  assertEquals(expected, result);
}