org.apache.hadoop.hive.ql.io.RCFileInputFormat Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.RCFileInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ProfileFactory.java From pxf with Apache License 2.0 | 6 votes |
/** * The method which returns optimal profile * * @param inputFormat input format of table/partition * @param hasComplexTypes whether record has complex types, see @EnumHiveToGpdbType * @param userProfileName profile name provided by user * @return name of optimal profile */ public static String get(InputFormat inputFormat, boolean hasComplexTypes, String userProfileName) { String profileName = null; if (HIVE_ORC_VECTORIZED_PROFILE.equals(userProfileName)) return userProfileName; if (inputFormat instanceof TextInputFormat && !hasComplexTypes) { profileName = HIVE_TEXT_PROFILE; } else if (inputFormat instanceof RCFileInputFormat) { profileName = HIVE_RC_PROFILE; } else if (inputFormat instanceof OrcInputFormat) { profileName = HIVE_ORC_PROFILE; } else { //Default case profileName = HIVE_PROFILE; } return profileName; }
Example #2
Source File: RcFileTester.java From presto with Apache License 2.0 | 5 votes |
private static Properties createTableProperties(String name, String type) { Properties orderTableProperties = new Properties(); orderTableProperties.setProperty("columns", name); orderTableProperties.setProperty("columns.types", type); orderTableProperties.setProperty("file.inputformat", RCFileInputFormat.class.getName()); return orderTableProperties; }
Example #3
Source File: ProfileFactoryTest.java From pxf with Apache License 2.0 | 5 votes |
@Test public void get() throws Exception { // For TextInputFormat when table has no complex types, HiveText profile should be used String profileName = ProfileFactory.get(new TextInputFormat(), false); assertEquals("HiveText", profileName); // For TextInputFormat when table has complex types, Hive profile should be used, HiveText doesn't support complex types yet profileName = ProfileFactory.get(new TextInputFormat(), true); assertEquals("Hive", profileName); // For RCFileInputFormat when table has complex types, HiveRC profile should be used profileName = ProfileFactory.get(new RCFileInputFormat(), true); assertEquals("HiveRC", profileName); // For RCFileInputFormat when table has no complex types, HiveRC profile should be used profileName = ProfileFactory.get(new RCFileInputFormat(), false); assertEquals("HiveRC", profileName); // For OrcInputFormat when table has complex types, HiveORC profile should be used profileName = ProfileFactory.get(new OrcInputFormat(), true); assertEquals("HiveORC", profileName); // For OrcInputFormat when table has no complex types, HiveORC profile should be used profileName = ProfileFactory.get(new OrcInputFormat(), false); assertEquals("HiveORC", profileName); // For other formats Hive profile should be used profileName = ProfileFactory.get(new SequenceFileInputFilter(), false); assertEquals("Hive", profileName); }
Example #4
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 5 votes |
/** * Find the rowcount based on stats in Hive metastore or estimate using filesize/filetype/recordSize/split size * * @param statsParams parameters controling the stats calculations * @param statsFromMetastore * @param sizeRatio Ration of this split contributing to all stats in given <i>statsFromMetastore</i> * @param splitSizeInBytes * @param format * @param estimatedRecordSize * @return */ public static long findRowCountInSplit(StatsEstimationParameters statsParams, HiveDatasetStats statsFromMetastore, final double sizeRatio, final long splitSizeInBytes, InputFormat<?, ?> format, final int estimatedRecordSize) { final Class<? extends InputFormat> inputFormat = format == null ? null : ((Class<? extends InputFormat>) format.getClass()); double compressionFactor = 1.0; if (MapredParquetInputFormat.class.equals(inputFormat)) { compressionFactor = 30; } else if (OrcInputFormat.class.equals(inputFormat)) { compressionFactor = 30f; } else if (AvroContainerInputFormat.class.equals(inputFormat)) { compressionFactor = 10f; } else if (RCFileInputFormat.class.equals(inputFormat)) { compressionFactor = 10f; } final long estimatedRowCount = (long) Math.ceil(splitSizeInBytes * compressionFactor / estimatedRecordSize); // Metastore stats are for complete partition. Multiply it by the size ratio of this split final long metastoreRowCount = (long) Math.ceil(sizeRatio * statsFromMetastore.getRecordCount()); logger.trace("Hive stats estimation: compression factor '{}', recordSize '{}', estimated '{}', from metastore '{}'", compressionFactor, estimatedRecordSize, estimatedRowCount, metastoreRowCount); if (statsParams.useMetastoreStats() && statsFromMetastore.hasContent()) { return metastoreRowCount; } // return the maximum of estimate and metastore count return Math.max(estimatedRowCount, metastoreRowCount); }
Example #5
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 5 votes |
/** * Find the rowcount based on stats in Hive metastore or estimate using filesize/filetype/recordSize/split size * * @param statsParams parameters controling the stats calculations * @param statsFromMetastore * @param sizeRatio Ration of this split contributing to all stats in given <i>statsFromMetastore</i> * @param splitSizeInBytes * @param format * @param estimatedRecordSize * @return */ public static long findRowCountInSplit(StatsEstimationParameters statsParams, HiveDatasetStats statsFromMetastore, final double sizeRatio, final long splitSizeInBytes, InputFormat<?, ?> format, final int estimatedRecordSize) { final Class<? extends InputFormat> inputFormat = format == null ? null : ((Class<? extends InputFormat>) format.getClass()); double compressionFactor = 1.0; if (MapredParquetInputFormat.class.equals(inputFormat)) { compressionFactor = 30; } else if (OrcInputFormat.class.equals(inputFormat)) { compressionFactor = 30f; } else if (AvroContainerInputFormat.class.equals(inputFormat)) { compressionFactor = 10f; } else if (RCFileInputFormat.class.equals(inputFormat)) { compressionFactor = 10f; } final long estimatedRowCount = (long) Math.ceil(splitSizeInBytes * compressionFactor / estimatedRecordSize); // Metastore stats are for complete partition. Multiply it by the size ratio of this split final long metastoreRowCount = (long) Math.ceil(sizeRatio * statsFromMetastore.getRecordCount()); logger.trace("Hive stats estimation: compression factor '{}', recordSize '{}', estimated '{}', from metastore '{}'", compressionFactor, estimatedRecordSize, estimatedRowCount, metastoreRowCount); if (statsParams.useMetastoreStats() && statsFromMetastore.hasContent()) { return metastoreRowCount; } // return the maximum of estimate and metastore count return Math.max(estimatedRowCount, metastoreRowCount); }
Example #6
Source File: HiveCatalogUtil.java From tajo with Apache License 2.0 | 5 votes |
public static String getDataFormat(StorageDescriptor descriptor) { Preconditions.checkNotNull(descriptor); String serde = descriptor.getSerdeInfo().getSerializationLib(); String inputFormat = descriptor.getInputFormat(); if (LazySimpleSerDe.class.getName().equals(serde)) { if (TextInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.TEXT; } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.SEQUENCE_FILE; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } } else if (LazyBinarySerDe.class.getName().equals(serde)) { if (SequenceFileInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.SEQUENCE_FILE; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) { if (RCFileInputFormat.class.getName().equals(inputFormat)) { return BuiltinStorages.RCFILE; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } } else if (ParquetHiveSerDe.class.getName().equals(serde)) { return BuiltinStorages.PARQUET; } else if (AvroSerDe.class.getName().equals(serde)) { return BuiltinStorages.AVRO; } else if (OrcSerde.class.getName().equals(serde)) { return BuiltinStorages.ORC; } else if (RegexSerDe.class.getName().equals(serde)) { return BuiltinStorages.REGEX; } else { throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat)); } }
Example #7
Source File: HiveDialectITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAlterPartition() throws Exception { tableEnv.executeSql("create table tbl (x tinyint,y string) partitioned by (p1 bigint,p2 date)"); tableEnv.executeSql("alter table tbl add partition (p1=1000,p2='2020-05-01') partition (p1=2000,p2='2020-01-01')"); CatalogPartitionSpec spec1 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {{ put("p1", "1000"); put("p2", "2020-05-01"); }}); CatalogPartitionSpec spec2 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {{ put("p1", "2000"); put("p2", "2020-01-01"); }}); ObjectPath tablePath = new ObjectPath("default", "tbl"); Table hiveTable = hiveCatalog.getHiveTable(tablePath); // change location String location = warehouse + "/new_part_location"; tableEnv.executeSql(String.format("alter table tbl partition (p1=1000,p2='2020-05-01') set location '%s'", location)); Partition partition = hiveCatalog.getHivePartition(hiveTable, spec1); assertEquals(location, locationPath(partition.getSd().getLocation())); // change file format tableEnv.executeSql("alter table tbl partition (p1=2000,p2='2020-01-01') set fileformat rcfile"); partition = hiveCatalog.getHivePartition(hiveTable, spec2); assertEquals(LazyBinaryColumnarSerDe.class.getName(), partition.getSd().getSerdeInfo().getSerializationLib()); assertEquals(RCFileInputFormat.class.getName(), partition.getSd().getInputFormat()); assertEquals(RCFileOutputFormat.class.getName(), partition.getSd().getOutputFormat()); // change serde tableEnv.executeSql(String.format("alter table tbl partition (p1=1000,p2='2020-05-01') set serde '%s' with serdeproperties('%s'='%s')", LazyBinarySerDe.class.getName(), serdeConstants.LINE_DELIM, "\n")); partition = hiveCatalog.getHivePartition(hiveTable, spec1); assertEquals(LazyBinarySerDe.class.getName(), partition.getSd().getSerdeInfo().getSerializationLib()); assertEquals("\n", partition.getSd().getSerdeInfo().getParameters().get(serdeConstants.LINE_DELIM)); }
Example #8
Source File: HiveRCFileAccessor.java From pxf with Apache License 2.0 | 4 votes |
/** * Constructs a HiveRCFileAccessor. */ public HiveRCFileAccessor() { super(new RCFileInputFormat()); }