Example 1
Source Project: spork   Source File:    License: Apache License 2.0 6 votes vote down vote up
public void testMultiStore() throws Exception {
    pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);");
    pigServer.registerQuery("B = order A by a0;");
    pigServer.registerQuery("store B into '" + OUTPUT2 + "' using OrcStorage();");
    pigServer.registerQuery("store B into '" + OUTPUT3 +"' using OrcStorage('-c SNAPPY');");

    Path outputFilePath = new Path(new Path(OUTPUT2), "part-r-00000");
    Reader reader = OrcFile.createReader(fs, outputFilePath);
    assertEquals(reader.getNumberOfRows(), 2);
    assertEquals(reader.getCompression(), CompressionKind.ZLIB);

    Path outputFilePath2 = new Path(new Path(OUTPUT3), "part-r-00000");
    reader = OrcFile.createReader(fs, outputFilePath2);
    assertEquals(reader.getNumberOfRows(), 2);
    assertEquals(reader.getCompression(), CompressionKind.SNAPPY);

    verifyData(outputFilePath, outputFilePath2, fs, 2);
Example 2
Source Project: nifi   Source File:    License: Apache License 2.0 6 votes vote down vote up
public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema)
        throws IOException, SchemaNotFoundException {

    final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
    final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
    final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
    final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean();
    TypeInfo orcSchema = NiFiOrcUtils.getOrcSchema(schema, normalizeForHive);
    final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize);
    final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet()
            ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue()
            : NiFiOrcUtils.normalizeHiveTableName(schema.getIdentifier().getName().orElse("unknown"));
    final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean();

    return new ORCHDFSRecordWriter(orcWriter, schema, hiveTableName, hiveFieldNames);
Example 3
Source Project: spork   Source File:    License: Apache License 2.0 5 votes vote down vote up
public OrcStorage(String options) {
    String[] optsArr = options.split(" ");
    try {
        CommandLine configuredOptions = parser.parse(validOptions, optsArr);
        if (configuredOptions.hasOption('s')) {
            stripeSize = Long.parseLong(configuredOptions.getOptionValue('s'));
        if (configuredOptions.hasOption('r')) {
            rowIndexStride = Integer.parseInt(configuredOptions.getOptionValue('r'));
        if (configuredOptions.hasOption('b')) {
            bufferSize = Integer.parseInt(configuredOptions.getOptionValue('b'));
        blockPadding = configuredOptions.hasOption('p');
        if (configuredOptions.hasOption('c')) {
            compress = CompressionKind.valueOf(configuredOptions.getOptionValue('c'));
        if (configuredOptions.hasOption('v')) {
            version = Version.byName(configuredOptions.getOptionValue('v'));
    } catch (ParseException e) {
        log.error("Exception in OrcStorage", e);
        log.error("OrcStorage called with arguments " + options);
        warn("ParseException in OrcStorage", PigWarning.UDF_WARNING_1);
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("OrcStorage(',', '[options]')", validOptions);
        throw new RuntimeException(e);