org.apache.hadoop.hive.ql.io.RCFileOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.RCFileOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HCatalogUtil.java    From incubator-tajo with Apache License 2.0 6 votes vote down vote up
public static String getStoreType(String fileFormat) {
  Preconditions.checkNotNull(fileFormat);

  String[] fileFormatArrary = fileFormat.split("\\.");
  if(fileFormatArrary.length < 1) {
    throw new CatalogException("Hive file output format is wrong. - file output format:" + fileFormat);
  }

  String outputFormatClass = fileFormatArrary[fileFormatArrary.length-1];
  if(outputFormatClass.equals(HiveIgnoreKeyTextOutputFormat.class.getSimpleName())) {
    return CatalogProtos.StoreType.CSV.name();
  } else if(outputFormatClass.equals(RCFileOutputFormat.class.getSimpleName())) {
      return CatalogProtos.StoreType.RCFILE.name();
  } else {
    throw new CatalogException("Not supported file output format. - file output format:" + fileFormat);
  }
}
 
Example #2
Source File: RcFileTester.java    From presto with Apache License 2.0 5 votes vote down vote up
private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf(false);
    Optional<String> codecName = compression.getCodecName();
    codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s));

    return new RCFileOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            codecName.isPresent(),
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> {});
}
 
Example #3
Source File: HiveDialectITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAlterPartition() throws Exception {
	tableEnv.executeSql("create table tbl (x tinyint,y string) partitioned by (p1 bigint,p2 date)");
	tableEnv.executeSql("alter table tbl add partition (p1=1000,p2='2020-05-01') partition (p1=2000,p2='2020-01-01')");
	CatalogPartitionSpec spec1 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {{
		put("p1", "1000");
		put("p2", "2020-05-01");
	}});
	CatalogPartitionSpec spec2 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {{
		put("p1", "2000");
		put("p2", "2020-01-01");
	}});
	ObjectPath tablePath = new ObjectPath("default", "tbl");

	Table hiveTable = hiveCatalog.getHiveTable(tablePath);

	// change location
	String location = warehouse + "/new_part_location";
	tableEnv.executeSql(String.format("alter table tbl partition (p1=1000,p2='2020-05-01') set location '%s'", location));
	Partition partition = hiveCatalog.getHivePartition(hiveTable, spec1);
	assertEquals(location, locationPath(partition.getSd().getLocation()));

	// change file format
	tableEnv.executeSql("alter table tbl partition (p1=2000,p2='2020-01-01') set fileformat rcfile");
	partition = hiveCatalog.getHivePartition(hiveTable, spec2);
	assertEquals(LazyBinaryColumnarSerDe.class.getName(), partition.getSd().getSerdeInfo().getSerializationLib());
	assertEquals(RCFileInputFormat.class.getName(), partition.getSd().getInputFormat());
	assertEquals(RCFileOutputFormat.class.getName(), partition.getSd().getOutputFormat());

	// change serde
	tableEnv.executeSql(String.format("alter table tbl partition (p1=1000,p2='2020-05-01') set serde '%s' with serdeproperties('%s'='%s')",
			LazyBinarySerDe.class.getName(), serdeConstants.LINE_DELIM, "\n"));
	partition = hiveCatalog.getHivePartition(hiveTable, spec1);
	assertEquals(LazyBinarySerDe.class.getName(), partition.getSd().getSerdeInfo().getSerializationLib());
	assertEquals("\n", partition.getSd().getSerdeInfo().getParameters().get(serdeConstants.LINE_DELIM));
}
 
Example #4
Source File: HiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void setStoreLocation(String location, Job job) throws IOException {
    super.setStoreLocation(location, job);
    // set number of columns if this is set in context.
    Properties p = getUDFProperties();
    if (p != null) {
        numColumns = Integer.parseInt(p.getProperty("numColumns", "-1"));
    }

    if (numColumns > 0) {
        RCFileOutputFormat.setColumnNumber(job.getConfiguration(), numColumns);
    }
}
 
Example #5
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
        CompressionCodec codec, int columnCount) throws IOException {
    fs.delete(file, true);
    int rowsWritten = 0;

    resetRandomGenerators();

    RCFileOutputFormat.setColumnNumber(conf, columnNum);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);

    byte[][] columnRandom;

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }

    for (int i = 0; i < rowCount; i++) {
        nextRandomRow(columnRandom, bytes, columnCount);
        rowsWritten++;
        writer.append(bytes);
    }
    writer.close();

    return rowsWritten;
}
 
Example #6
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
        CompressionCodec codec, int columnCount) throws IOException {
    fs.delete(file, true);
    int rowsWritten = 0;


    RCFileOutputFormat.setColumnNumber(conf, columnNum);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);

    byte[][] columnRandom;

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }

    for (int i = 0; i < rowCount; i++) {

        bytes.resetValid(columnRandom.length);
        for (int j = 0; j < columnRandom.length; j++) {
            columnRandom[j]= "Sample value".getBytes();
            bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
        }
        rowsWritten++;
        writer.append(bytes);
    }
    writer.close();

    return rowsWritten;
}
 
Example #7
Source File: TestAllLoader.java    From spork with Apache License 2.0 2 votes vote down vote up
@Override
public void writeTestData(File file, int recordCounts, int columnCount,
        String colSeparator) throws IOException {

    // write random test data

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);

    RCFileOutputFormat.setColumnNumber(conf, columnCount);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, new Path(
            file.getAbsolutePath()));

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnCount);

    for (int c = 0; c < columnCount; c++) {
        bytes.set(c, new BytesRefWritable());
    }

    try {

        for (int r = 0; r < recordCounts; r++) {
            // foreach row write n columns

            for (int c = 0; c < columnCount; c++) {

                byte[] stringbytes = String.valueOf(Math.random())
                        .getBytes();
                bytes.get(c).set(stringbytes, 0, stringbytes.length);

            }

            writer.append(bytes);

        }

    } finally {
        writer.close();
    }

}