Java Code Examples for parquet.hadoop.ParquetOutputFormat

The following examples show how to use parquet.hadoop.ParquetOutputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: parquet-flinktacular   Source File: ParquetAvroExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
Example 2
Source Project: parquet-flinktacular   Source File: ParquetThriftExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void writeThrift(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ParquetThriftOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	ParquetThriftOutputFormat.setThriftClass(job, Person.class);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
Example 3
@Override
public void setOutputFile( String file, boolean override ) throws Exception {
  inClassloader( () -> {
    S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( file, job.getConfiguration() );
    outputFile = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( file ) );
    FileSystem fs = FileSystem.get( outputFile.toUri(), job.getConfiguration() );
    if ( fs.exists( outputFile ) ) {
      if ( override ) {
        fs.delete( outputFile, true );
      } else {
        throw new FileAlreadyExistsException( file );
      }
    }
    ParquetOutputFormat.setOutputPath( job, outputFile.getParent() );
  } );
}
 
Example 4
@Override
public void setVersion( VERSION version ) throws Exception {
  inClassloader( () -> {
    ParquetProperties.WriterVersion writerVersion;
    switch ( version ) {
      case VERSION_1_0:
        writerVersion = ParquetProperties.WriterVersion.PARQUET_1_0;
        break;
      case VERSION_2_0:
        writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0;
        break;
      default:
        writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0;
        break;
    }
    job.getConfiguration().set( ParquetOutputFormat.WRITER_VERSION, writerVersion.toString() );
  } );
}
 
Example 5
@Override
public void setCompression( COMPRESSION comp ) throws Exception {
  inClassloader( () -> {
    CompressionCodecName codec;
    switch ( comp ) {
      case SNAPPY:
        codec = CompressionCodecName.SNAPPY;
        break;
      case GZIP:
        codec = CompressionCodecName.GZIP;
        break;
      case LZO:
        codec = CompressionCodecName.LZO;
        break;
      default:
        codec = CompressionCodecName.UNCOMPRESSED;
        break;
    }
    ParquetOutputFormat.setCompression( job, codec );
  } );
}
 
Example 6
public static void writeProtobuf(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ProtoParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	ProtoParquetOutputFormat.setProtobufClass(job, Person.class);
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
Example 7
public PentahoTwitterOutputFormat()  {
  logger.info( "We are initializing parquet output format" );

  inClassloader( () -> {
    ConfigurationProxy conf = new ConfigurationProxy();

    job = Job.getInstance( conf );

    job.getConfiguration().set( ParquetOutputFormat.ENABLE_JOB_SUMMARY, "false" );
    ParquetOutputFormat.setEnableDictionary( job, false );
  } );
}
 
Example 8
@Override
public void enableDictionary( boolean useDictionary ) throws Exception {
  inClassloader( () -> ParquetOutputFormat.setEnableDictionary( job, useDictionary ) );
}
 
Example 9
@Override
public void setRowGroupSize( int size ) throws Exception {
  inClassloader( () -> ParquetOutputFormat.setBlockSize( job, size ) );
}
 
Example 10
@Override
public void setDataPageSize( int size ) throws Exception {
  inClassloader( () -> ParquetOutputFormat.setPageSize( job, size ) );
}
 
Example 11
@Override
public void setDictionaryPageSize( int size ) throws Exception {
  inClassloader( () -> ParquetOutputFormat.setDictionaryPageSize( job, size ) );
}