Java Code Examples for org.apache.parquet.column.ParquetProperties#WriterVersion

The following examples show how to use org.apache.parquet.column.ParquetProperties#WriterVersion . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SqlInterpreterTest.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
public File createParquetFile(int[] values,
                              ParquetProperties.WriterVersion version) throws IOException {
  File file = File.createTempFile("zeppelin-flink-input", ".par");
  file.delete();
  Path path = new Path(file.getAbsolutePath());
  Configuration conf = new Configuration();

  MessageType schema = MessageTypeParser.parseMessageType(
          "message test { "
                  + "required int32 int32_field; "
                  + "} ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory f = new SimpleGroupFactory(schema);

  ParquetWriter<Group> writer = new ParquetWriter<Group>(
          path,
          new GroupWriteSupport(),
          CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
  for (int i = 0; i < values.length; i++) {
    writer.write(f.newGroup()
            .append("int32_field", values[i]));
  }
  writer.close();
  return file;
}
 
Example 2
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public static void writeAndTest(WriteContext context) throws IOException {
  // Create the configuration, and then apply the schema to our configuration.
  Configuration configuration = new Configuration();
  GroupWriteSupport.setSchema(context.schema, configuration);
  GroupWriteSupport groupWriteSupport = new GroupWriteSupport();

  // Create the writer properties
  final int blockSize = context.blockSize;
  final int pageSize = context.pageSize;
  final int dictionaryPageSize = pageSize;
  final boolean enableDictionary = context.enableDictionary;
  final boolean enableValidation = context.enableValidation;
  ParquetProperties.WriterVersion writerVersion = context.version;
  CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;

  ParquetWriter<Group> writer = new ParquetWriter<Group>(context.fsPath,
      groupWriteSupport, codec, blockSize, pageSize, dictionaryPageSize,
      enableDictionary, enableValidation, writerVersion, configuration);

  context.write(writer);
  writer.close();

  context.test();

  context.path.delete();
}
 
Example 3
Source File: CompressionConveterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void testInternal(String srcCodec, String destCodec, ParquetProperties.WriterVersion writerVersion, int pageSize) throws Exception {
  int numRecord = 1000;
  TestDocs testDocs = new TestDocs(numRecord);
  String inputFile = createParquetFile(conf, extraMeta, numRecord, "input", srcCodec, writerVersion, pageSize, testDocs);
  String outputFile = createTempFile("output_trans");

  convertCompression(conf, inputFile, outputFile, destCodec);

  validateColumns(outputFile, numRecord, testDocs);
  validMeta(inputFile, outputFile);
  validColumnIndex(inputFile, outputFile);
}
 
Example 4
Source File: CompressionConveterTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private String createParquetFile(Configuration conf, Map<String, String> extraMeta, int numRecord, String prefix, String codec,
                                       ParquetProperties.WriterVersion writerVersion, int pageSize, TestDocs testDocs) throws IOException {
  MessageType schema = new MessageType("schema",
    new PrimitiveType(REQUIRED, INT64, "DocId"),
    new PrimitiveType(REQUIRED, BINARY, "Name"),
    new PrimitiveType(REQUIRED, BINARY, "Gender"),
    new GroupType(OPTIONAL, "Links",
      new PrimitiveType(REPEATED, BINARY, "Backward"),
      new PrimitiveType(REPEATED, BINARY, "Forward")));

  conf.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString());

  String file = createTempFile(prefix);
  ExampleParquetWriter.Builder builder = ExampleParquetWriter.builder(new Path(file))
    .withConf(conf)
    .withWriterVersion(writerVersion)
    .withExtraMetaData(extraMeta)
    .withDictionaryEncoding("DocId", true)
    .withValidation(true)
    .enablePageWriteChecksum()
    .withPageSize(pageSize)
    .withCompressionCodec(CompressionCodecName.valueOf(codec));
  try (ParquetWriter writer = builder.build()) {
    for (int i = 0; i < numRecord; i++) {
      SimpleGroup g = new SimpleGroup(schema);
      g.add("DocId", testDocs.docId[i]);
      g.add("Name", testDocs.name[i]);
      g.add("Gender", testDocs.gender[i]);
      Group links = g.addGroup("Links");
      links.add(0, testDocs.linkBackward[i]);
      links.add(1, testDocs.linkForward[i]);
      writer.write(g);
    }
  }

  return file;
}
 
Example 5
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public WriteContext(File path, MessageType schema, int blockSize, int pageSize, boolean enableDictionary, boolean enableValidation, ParquetProperties.WriterVersion version) throws IOException {
  this.path = path;
  this.fsPath = new Path(path.toString());
  this.schema = schema;
  this.blockSize = blockSize;
  this.pageSize = pageSize;
  this.enableDictionary = enableDictionary;
  this.enableValidation = enableValidation;
  this.version = version;
}
 
Example 6
Source File: ParquetFactory.java    From sylph with Apache License 2.0 4 votes vote down vote up
public ParquetFactory(
        final String writeTableDir,
        final String table,
        ParquetProperties.WriterVersion parquetVersion,
        MessageType schema)
{
    requireNonNull(writeTableDir, "writeTableDir is null");
    this.writeTableDir = writeTableDir.endsWith("/") ? writeTableDir : writeTableDir + "/";

    this.table = requireNonNull(table, "table is null");
    this.schema = requireNonNull(schema, "schema is null");
    this.parquetVersion = requireNonNull(parquetVersion, "parquetVersion is null");

    /**
     * 消费者
     * */
    final Callable<Void> consumer = () -> {
        Thread.currentThread().setName("Parquet_Factory_Consumer");
        try {
            while (!closed) {
                Runnable value = streamData.poll();
                //事件1
                if (value != null) {
                    value.run(); //put data line
                }
                //事件2 读取指示序列
                Runnable event = monitorEvent.poll();
                if (event != null) {
                    event.run();
                }
                //事件3
                if (value == null && event == null) {
                    TimeUnit.MILLISECONDS.sleep(1);
                }
            }
        }
        catch (Exception e) {
            logger.error("Parquet_Factory_Consumer error", e);
            System.exit(-1);
        }
        return null;
    };

    //register consumer
    executorPool.submit(consumer);
    //register monitor
    executorPool.submit(monitor);

    Runtime.getRuntime().addShutdownHook(new Thread(shutdownHook));
}
 
Example 7
Source File: DataGenerator.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public void generateData(Path outFile, Configuration configuration, ParquetProperties.WriterVersion version,
                         int blockSize, int pageSize, int fixedLenByteArraySize, CompressionCodecName codec, int nRows)
        throws IOException
{
  if (exists(configuration, outFile)) {
    System.out.println("File already exists " + outFile);
    return;
  }

  System.out.println("Generating data @ " + outFile);

  MessageType schema = parseMessageType(
          "message test { "
                  + "required binary binary_field; "
                  + "required int32 int32_field; "
                  + "required int64 int64_field; "
                  + "required boolean boolean_field; "
                  + "required float float_field; "
                  + "required double double_field; "
                  + "required fixed_len_byte_array(" + fixedLenByteArraySize +") flba_field; "
                  + "required int96 int96_field; "
                  + "} ");

  GroupWriteSupport.setSchema(schema, configuration);
  SimpleGroupFactory f = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(outFile, new GroupWriteSupport(), codec, blockSize,
                                                         pageSize, DICT_PAGE_SIZE, true, false, version, configuration);

  //generate some data for the fixed len byte array field
  char[] chars = new char[fixedLenByteArraySize];
  Arrays.fill(chars, '*');

  for (int i = 0; i < nRows; i++) {
    writer.write(
      f.newGroup()
        .append("binary_field", randomUUID().toString())
        .append("int32_field", i)
        .append("int64_field", 64l)
        .append("boolean_field", true)
        .append("float_field", 1.0f)
        .append("double_field", 2.0d)
        .append("flba_field", new String(chars))
        .append("int96_field", Binary.fromConstantByteArray(new byte[12]))
    );
  }
  writer.close();
}
 
Example 8
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public DataContext(long seed, File path, int blockSize, int pageSize, boolean enableDictionary, ParquetProperties.WriterVersion version) throws IOException {
  super(path, buildSchema(seed), blockSize, pageSize, enableDictionary, true, version);

  this.random = new Random(seed);
  this.recordCount = random.nextInt(MAX_TOTAL_ROWS);

  int fixedLength = schema.getType("fixed-binary").asPrimitiveType().getTypeLength();

  randomGenerators = Arrays.<RandomValueGenerator<?>>asList(
      new RandomValues.IntGenerator(random.nextLong()),
      new RandomValues.LongGenerator(random.nextLong()),
      new RandomValues.Int96Generator(random.nextLong()),
      new RandomValues.FloatGenerator(random.nextLong()),
      new RandomValues.DoubleGenerator(random.nextLong()),
      new RandomValues.StringGenerator(random.nextLong()),
      new RandomValues.BinaryGenerator(random.nextLong()),
      new RandomValues.FixedGenerator(random.nextLong(), fixedLength),
      new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
      new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
      new RandomValues.UnconstrainedFloatGenerator(random.nextLong()),
      new RandomValues.UnconstrainedDoubleGenerator(random.nextLong()),
      new RandomValues.IntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE),
      new RandomValues.UIntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE),
      new RandomValues.IntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE),
      new RandomValues.UIntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE),
      new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
      new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
      new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
      new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
      new RandomValues.UnconstrainedIntGenerator(random.nextLong()),
      new RandomValues.UnconstrainedLongGenerator(random.nextLong()),
      new RandomValues.FixedGenerator(random.nextLong(), fixedLength),
      new RandomValues.BinaryGenerator(random.nextLong()),
      new RandomValues.StringGenerator(random.nextLong()),
      new RandomValues.StringGenerator(random.nextLong()),
      new RandomValues.StringGenerator(random.nextLong()),
      new RandomValues.BinaryGenerator(random.nextLong()),
      new RandomValues.IntGenerator(random.nextLong()),
      new RandomValues.IntGenerator(random.nextLong()),
      new RandomValues.LongGenerator(random.nextLong()),
      new RandomValues.LongGenerator(random.nextLong()),
      new RandomValues.LongGenerator(random.nextLong()),
      new RandomValues.FixedGenerator(random.nextLong(), 12)
  );
}
 
Example 9
Source File: ParquetConfig.java    From nifi with Apache License 2.0 4 votes vote down vote up
public ParquetProperties.WriterVersion getWriterVersion() {
    return writerVersion;
}
 
Example 10
Source File: ParquetConfig.java    From nifi with Apache License 2.0 4 votes vote down vote up
public void setWriterVersion(ParquetProperties.WriterVersion writerVersion) {
    this.writerVersion = writerVersion;
}
 
Example 11
Source File: ExampleParquetWriter.java    From parquet-mr with Apache License 2.0 3 votes vote down vote up
/**
 * Create a new {@link ExampleParquetWriter}.
 *
 * @param file The file name to write to.
 * @param writeSupport The schema to write with.
 * @param compressionCodecName Compression code to use, or CompressionCodecName.UNCOMPRESSED
 * @param blockSize the block size threshold.
 * @param pageSize See parquet write up. Blocks are subdivided into pages for alignment and other purposes.
 * @param enableDictionary Whether to use a dictionary to compress columns.
 * @param conf The Configuration to use.
 * @throws IOException
 */
ExampleParquetWriter(Path file, WriteSupport<Group> writeSupport,
                     CompressionCodecName compressionCodecName,
                     int blockSize, int pageSize, boolean enableDictionary,
                     boolean enableValidation,
                     ParquetProperties.WriterVersion writerVersion,
                     Configuration conf)
    throws IOException {
  super(file, writeSupport, compressionCodecName, blockSize, pageSize,
      pageSize, enableDictionary, enableValidation, writerVersion, conf);
}