org.apache.hadoop.hive.ql.io.RCFileInputFormat Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.RCFileInputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ProfileFactory.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * The method which returns optimal profile
 *
 * @param inputFormat input format of table/partition
 * @param hasComplexTypes whether record has complex types, see @EnumHiveToGpdbType
 * @param userProfileName profile name provided by user
 * @return name of optimal profile
 */
public static String get(InputFormat inputFormat, boolean hasComplexTypes, String userProfileName) {
    String profileName = null;
    if (HIVE_ORC_VECTORIZED_PROFILE.equals(userProfileName))
        return userProfileName;
    if (inputFormat instanceof TextInputFormat && !hasComplexTypes) {
        profileName = HIVE_TEXT_PROFILE;
    } else if (inputFormat instanceof RCFileInputFormat) {
        profileName = HIVE_RC_PROFILE;
    } else if (inputFormat instanceof OrcInputFormat) {
        profileName = HIVE_ORC_PROFILE;
    } else {
        //Default case
        profileName = HIVE_PROFILE;
    }
    return profileName;
}
 
Example #2
Source File: RcFileTester.java    From presto with Apache License 2.0 5 votes vote down vote up
private static Properties createTableProperties(String name, String type)
{
    Properties orderTableProperties = new Properties();
    orderTableProperties.setProperty("columns", name);
    orderTableProperties.setProperty("columns.types", type);
    orderTableProperties.setProperty("file.inputformat", RCFileInputFormat.class.getName());
    return orderTableProperties;
}
 
Example #3
Source File: ProfileFactoryTest.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Test
public void get() throws Exception {

    // For TextInputFormat when table has no complex types, HiveText profile should be used
    String profileName = ProfileFactory.get(new TextInputFormat(), false);
    assertEquals("HiveText", profileName);

    // For TextInputFormat when table has complex types, Hive profile should be used, HiveText doesn't support complex types yet
    profileName = ProfileFactory.get(new TextInputFormat(), true);
    assertEquals("Hive", profileName);

    // For RCFileInputFormat when table has complex types, HiveRC profile should be used
    profileName = ProfileFactory.get(new RCFileInputFormat(), true);
    assertEquals("HiveRC", profileName);

    // For RCFileInputFormat when table has no complex types, HiveRC profile should be used
    profileName = ProfileFactory.get(new RCFileInputFormat(), false);
    assertEquals("HiveRC", profileName);

    // For OrcInputFormat when table has complex types, HiveORC profile should be used
    profileName = ProfileFactory.get(new OrcInputFormat(), true);
    assertEquals("HiveORC", profileName);

    // For OrcInputFormat when table has no complex types, HiveORC profile should be used
    profileName = ProfileFactory.get(new OrcInputFormat(), false);
    assertEquals("HiveORC", profileName);

    // For other formats Hive profile should be used
    profileName = ProfileFactory.get(new SequenceFileInputFilter(), false);
    assertEquals("Hive", profileName);
}
 
Example #4
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Find the rowcount based on stats in Hive metastore or estimate using filesize/filetype/recordSize/split size
 *
 * @param statsParams         parameters controling the stats calculations
 * @param statsFromMetastore
 * @param sizeRatio           Ration of this split contributing to all stats in given <i>statsFromMetastore</i>
 * @param splitSizeInBytes
 * @param format
 * @param estimatedRecordSize
 * @return
 */
public static long findRowCountInSplit(StatsEstimationParameters statsParams, HiveDatasetStats statsFromMetastore,
                                       final double sizeRatio, final long splitSizeInBytes, InputFormat<?, ?> format,
                                       final int estimatedRecordSize) {

  final Class<? extends InputFormat> inputFormat =
    format == null ? null : ((Class<? extends InputFormat>) format.getClass());

  double compressionFactor = 1.0;
  if (MapredParquetInputFormat.class.equals(inputFormat)) {
    compressionFactor = 30;
  } else if (OrcInputFormat.class.equals(inputFormat)) {
    compressionFactor = 30f;
  } else if (AvroContainerInputFormat.class.equals(inputFormat)) {
    compressionFactor = 10f;
  } else if (RCFileInputFormat.class.equals(inputFormat)) {
    compressionFactor = 10f;
  }

  final long estimatedRowCount = (long) Math.ceil(splitSizeInBytes * compressionFactor / estimatedRecordSize);

  // Metastore stats are for complete partition. Multiply it by the size ratio of this split
  final long metastoreRowCount = (long) Math.ceil(sizeRatio * statsFromMetastore.getRecordCount());

  logger.trace("Hive stats estimation: compression factor '{}', recordSize '{}', estimated '{}', from metastore '{}'",
    compressionFactor, estimatedRecordSize, estimatedRowCount, metastoreRowCount);

  if (statsParams.useMetastoreStats() && statsFromMetastore.hasContent()) {
    return metastoreRowCount;
  }

  // return the maximum of estimate and metastore count
  return Math.max(estimatedRowCount, metastoreRowCount);
}
 
Example #5
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Find the rowcount based on stats in Hive metastore or estimate using filesize/filetype/recordSize/split size
 *
 * @param statsParams         parameters controling the stats calculations
 * @param statsFromMetastore
 * @param sizeRatio           Ration of this split contributing to all stats in given <i>statsFromMetastore</i>
 * @param splitSizeInBytes
 * @param format
 * @param estimatedRecordSize
 * @return
 */
public static long findRowCountInSplit(StatsEstimationParameters statsParams, HiveDatasetStats statsFromMetastore,
                                       final double sizeRatio, final long splitSizeInBytes, InputFormat<?, ?> format,
                                       final int estimatedRecordSize) {

  final Class<? extends InputFormat> inputFormat =
    format == null ? null : ((Class<? extends InputFormat>) format.getClass());

  double compressionFactor = 1.0;
  if (MapredParquetInputFormat.class.equals(inputFormat)) {
    compressionFactor = 30;
  } else if (OrcInputFormat.class.equals(inputFormat)) {
    compressionFactor = 30f;
  } else if (AvroContainerInputFormat.class.equals(inputFormat)) {
    compressionFactor = 10f;
  } else if (RCFileInputFormat.class.equals(inputFormat)) {
    compressionFactor = 10f;
  }

  final long estimatedRowCount = (long) Math.ceil(splitSizeInBytes * compressionFactor / estimatedRecordSize);

  // Metastore stats are for complete partition. Multiply it by the size ratio of this split
  final long metastoreRowCount = (long) Math.ceil(sizeRatio * statsFromMetastore.getRecordCount());

  logger.trace("Hive stats estimation: compression factor '{}', recordSize '{}', estimated '{}', from metastore '{}'",
    compressionFactor, estimatedRecordSize, estimatedRowCount, metastoreRowCount);

  if (statsParams.useMetastoreStats() && statsFromMetastore.hasContent()) {
    return metastoreRowCount;
  }

  // return the maximum of estimate and metastore count
  return Math.max(estimatedRowCount, metastoreRowCount);
}
 
Example #6
Source File: HiveCatalogUtil.java    From tajo with Apache License 2.0 5 votes vote down vote up
public static String getDataFormat(StorageDescriptor descriptor) {
  Preconditions.checkNotNull(descriptor);

  String serde = descriptor.getSerdeInfo().getSerializationLib();
  String inputFormat = descriptor.getInputFormat();

  if (LazySimpleSerDe.class.getName().equals(serde)) {
    if (TextInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.TEXT;
    } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinarySerDe.class.getName().equals(serde)) {
    if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
    if (RCFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.RCFILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (ParquetHiveSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.PARQUET;
  } else if (AvroSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.AVRO;
  } else if (OrcSerde.class.getName().equals(serde)) {
    return BuiltinStorages.ORC;
  } else if (RegexSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.REGEX;
  } else {
    throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
  }
}
 
Example #7
Source File: HiveDialectITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAlterPartition() throws Exception {
	tableEnv.executeSql("create table tbl (x tinyint,y string) partitioned by (p1 bigint,p2 date)");
	tableEnv.executeSql("alter table tbl add partition (p1=1000,p2='2020-05-01') partition (p1=2000,p2='2020-01-01')");
	CatalogPartitionSpec spec1 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {{
		put("p1", "1000");
		put("p2", "2020-05-01");
	}});
	CatalogPartitionSpec spec2 = new CatalogPartitionSpec(new LinkedHashMap<String, String>() {{
		put("p1", "2000");
		put("p2", "2020-01-01");
	}});
	ObjectPath tablePath = new ObjectPath("default", "tbl");

	Table hiveTable = hiveCatalog.getHiveTable(tablePath);

	// change location
	String location = warehouse + "/new_part_location";
	tableEnv.executeSql(String.format("alter table tbl partition (p1=1000,p2='2020-05-01') set location '%s'", location));
	Partition partition = hiveCatalog.getHivePartition(hiveTable, spec1);
	assertEquals(location, locationPath(partition.getSd().getLocation()));

	// change file format
	tableEnv.executeSql("alter table tbl partition (p1=2000,p2='2020-01-01') set fileformat rcfile");
	partition = hiveCatalog.getHivePartition(hiveTable, spec2);
	assertEquals(LazyBinaryColumnarSerDe.class.getName(), partition.getSd().getSerdeInfo().getSerializationLib());
	assertEquals(RCFileInputFormat.class.getName(), partition.getSd().getInputFormat());
	assertEquals(RCFileOutputFormat.class.getName(), partition.getSd().getOutputFormat());

	// change serde
	tableEnv.executeSql(String.format("alter table tbl partition (p1=1000,p2='2020-05-01') set serde '%s' with serdeproperties('%s'='%s')",
			LazyBinarySerDe.class.getName(), serdeConstants.LINE_DELIM, "\n"));
	partition = hiveCatalog.getHivePartition(hiveTable, spec1);
	assertEquals(LazyBinarySerDe.class.getName(), partition.getSd().getSerdeInfo().getSerializationLib());
	assertEquals("\n", partition.getSd().getSerdeInfo().getParameters().get(serdeConstants.LINE_DELIM));
}
 
Example #8
Source File: HiveRCFileAccessor.java    From pxf with Apache License 2.0 4 votes vote down vote up
/**
 * Constructs a HiveRCFileAccessor.
 */
public HiveRCFileAccessor() {
    super(new RCFileInputFormat());
}