org.apache.hadoop.hive.ql.io.IOConstants Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.IOConstants.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcFileWriter.java From presto with Apache License 2.0 | 6 votes |
@VisibleForTesting OrcFileWriter(List<Long> columnIds, List<Type> columnTypes, File target, boolean writeMetadata) { this.columnTypes = ImmutableList.copyOf(requireNonNull(columnTypes, "columnTypes is null")); checkArgument(columnIds.size() == columnTypes.size(), "ids and types mismatch"); checkArgument(isUnique(columnIds), "ids must be unique"); List<StorageType> storageTypes = ImmutableList.copyOf(toStorageTypes(columnTypes)); Iterable<String> hiveTypeNames = storageTypes.stream().map(StorageType::getHiveTypeName).collect(toList()); List<String> columnNames = columnIds.stream() .map(Objects::toString) .collect(toImmutableList()); Properties properties = new Properties(); properties.setProperty(IOConstants.COLUMNS, Joiner.on(',').join(columnNames)); properties.setProperty(IOConstants.COLUMNS_TYPES, Joiner.on(':').join(hiveTypeNames)); serializer = createSerializer(properties); recordWriter = createRecordWriter(new Path(target.toURI()), columnIds, columnTypes, writeMetadata); tableInspector = getStandardStructObjectInspector(columnNames, getJavaObjectInspectors(storageTypes)); structFields = ImmutableList.copyOf(tableInspector.getAllStructFieldRefs()); orcRow = tableInspector.create(); }
Example #2
Source File: HiveTableInputFormat.java From flink with Apache License 2.0 | 6 votes |
private void addSchemaToConf(JobConf jobConf) { // set columns/types -- including partition cols List<String> typeStrs = Arrays.stream(fieldTypes) .map(t -> HiveTypeUtil.toHiveTypeInfo(t, true).toString()) .collect(Collectors.toList()); jobConf.set(IOConstants.COLUMNS, String.join(",", fieldNames)); jobConf.set(IOConstants.COLUMNS_TYPES, String.join(",", typeStrs)); // set schema evolution -- excluding partition cols int numNonPartCol = fieldNames.length - partitionKeys.size(); jobConf.set(SCHEMA_EVOLUTION_COLUMNS, String.join(",", Arrays.copyOfRange(fieldNames, 0, numNonPartCol))); jobConf.set(SCHEMA_EVOLUTION_COLUMNS_TYPES, String.join(",", typeStrs.subList(0, numNonPartCol))); // in older versions, parquet reader also expects the selected col indices in conf, excluding part cols String readColIDs = Arrays.stream(selectedFields) .filter(i -> i < numNonPartCol) .mapToObj(String::valueOf) .collect(Collectors.joining(",")); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIDs); }
Example #3
Source File: HiveUtilities.java From dremio-oss with Apache License 2.0 | 6 votes |
/** * Helper method which sets config to read transactional (ACID) tables. Prerequisite is <i>job</i> * contains the table properties. * @param job */ public static void addACIDPropertiesIfNeeded(final JobConf job) { if (!AcidUtils.isTablePropertyTransactional(job)) { return; } AcidUtils.setAcidOperationalProperties(job, true, null); // Add ACID related properties if (Utilities.isSchemaEvolutionEnabled(job, true) && job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null && job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) { // If the schema evolution columns and types are already set, then there is no additional conf to set. return; } // Get them from table properties and set them as schema evolution properties job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, job.get(serdeConstants.LIST_COLUMNS)); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, job.get(serdeConstants.LIST_COLUMN_TYPES)); }
Example #4
Source File: HiveUtilities.java From dremio-oss with Apache License 2.0 | 6 votes |
/** * Helper method which sets config to read transactional (ACID) tables. Prerequisite is <i>job</i> * contains the table properties. * @param job */ public static void addACIDPropertiesIfNeeded(final JobConf job) { if (!AcidUtils.isTablePropertyTransactional(job)) { return; } AcidUtils.setTransactionalTableScan(job, true); // Add ACID related properties if (Utilities.isSchemaEvolutionEnabled(job, true) && job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null && job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) { // If the schema evolution columns and types are already set, then there is no additional conf to set. return; } // Get them from table properties and set them as schema evolution properties job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, job.get(serdeConstants.LIST_COLUMNS)); job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, job.get(serdeConstants.LIST_COLUMN_TYPES)); }
Example #5
Source File: TestCachingOrcDataSource.java From presto with Apache License 2.0 | 6 votes |
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty(IOConstants.COLUMNS, "test"); tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> {}); }
Example #6
Source File: ParquetRecordWriterUtil.java From presto with Apache License 2.0 | 5 votes |
private static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties) throws IOException { if (conf.get(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA) == null) { List<String> columnNames = Splitter.on(',').splitToList(properties.getProperty(IOConstants.COLUMNS)); List<TypeInfo> columnTypes = getTypeInfosFromTypeString(properties.getProperty(IOConstants.COLUMNS_TYPES)); MessageType schema = HiveSchemaConverter.convert(columnNames, columnTypes); setParquetSchema(conf, schema); } ParquetOutputFormat<ParquetHiveRecord> outputFormat = new ParquetOutputFormat<>(new DataWritableWriteSupport()); return new ParquetRecordWriterWrapper(outputFormat, conf, target.toString(), Reporter.NULL, properties); }
Example #7
Source File: DataWritableReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * It creates the readContext for Parquet side with the requested schema during the init phase. * * @param configuration needed to get the wanted columns * @param keyValueMetaData // unused * @param fileSchema parquet file schema * @return the parquet ReadContext */ @Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration, final Map<String, String> keyValueMetaData, final MessageType fileSchema) { final String columns = configuration.get(IOConstants.COLUMNS); final Map<String, String> contextMetadata = new HashMap<String, String>(); if (columns != null) { final List<String> listColumns = getColumns(columns); final List<Type> typeListTable = new ArrayList<Type>(); for (final String col : listColumns) { // listColumns contains partition columns which are metadata only if (fileSchema.containsField(col)) { typeListTable.add(fileSchema.getType(col)); } else { // below allows schema evolution typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col)); } } MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable); contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString()); MessageType requestedSchemaByUser = tableSchema; final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); final List<Type> typeListWanted = new ArrayList<Type>(); for (final Integer idx : indexColumnsWanted) { typeListWanted.add(tableSchema.getType(listColumns.get(idx))); } requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(), typeListWanted), fileSchema, configuration); return new ReadContext(requestedSchemaByUser, contextMetadata); } else { contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } }
Example #8
Source File: ParquetHiveSerDe.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }
Example #9
Source File: IcebergFileWriterFactory.java From presto with Apache License 2.0 | 5 votes |
private IcebergFileWriter createParquetWriter( Path outputPath, Schema icebergSchema, List<IcebergColumnHandle> columns, JobConf jobConf, ConnectorSession session) { Properties properties = new Properties(); properties.setProperty(IOConstants.COLUMNS, columns.stream() .map(IcebergColumnHandle::getName) .collect(joining(","))); properties.setProperty(IOConstants.COLUMNS_TYPES, columns.stream() .map(column -> toHiveType(column.getType()).getHiveTypeName().toString()) .collect(joining(":"))); setParquetSchema(jobConf, convert(icebergSchema, "table")); jobConf.set(ParquetOutputFormat.COMPRESSION, getCompressionCodec(session).getParquetCompressionCodec().name()); return new IcebergRecordFileWriter( outputPath, columns.stream() .map(IcebergColumnHandle::getName) .collect(toImmutableList()), fromHiveStorageFormat(HiveStorageFormat.PARQUET), properties, HiveStorageFormat.PARQUET.getEstimatedWriterSystemMemoryUsage(), jobConf, typeManager, session); }
Example #10
Source File: IndexRSerde.java From indexr with Apache License 2.0 | 5 votes |
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); if (Strings.isEmpty(columnNameProperty)) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (Strings.isEmpty(columnTypeProperty)) { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size())); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); stats = new SerDeStats(); serdeSize = 0; }
Example #11
Source File: MapredParquetOutputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which * contains the real output format */ @Override public FileSinkOperator.RecordWriter getHiveRecordWriter( final JobConf jobConf, final Path finalOutPath, final Class<? extends Writable> valueClass, final boolean isCompressed, final Properties tableProperties, final Progressable progress) throws IOException { LOG.info("creating new record writer...{}", this); final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES); List<String> columnNames; List<TypeInfo> columnTypes; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf); return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress); }
Example #12
Source File: HiveSerDeConverter.java From incubator-gobblin with Apache License 2.0 | 5 votes |
private void setColumnsIfPossible(WorkUnitState state) throws SerDeException { AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator( AvroSerdeUtils.determineSchemaOrReturnErrorSchema(state.getProperties())); List<String> columnNames = aoig.getColumnNames(); List<TypeInfo> columnTypes = aoig.getColumnTypes(); state.setProp(IOConstants.COLUMNS, StringUtils.join(columnNames, ",")); state.setProp(IOConstants.COLUMNS_TYPES, StringUtils.join(columnTypes, ",")); }
Example #13
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 5 votes |
@Test public void testTableUsingParquet() throws Exception { TableMeta meta = new TableMeta("PARQUET", new KeyValueSet()); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("c_custkey", TajoDataTypes.Type.INT4) .add("c_name", TajoDataTypes.Type.TEXT) .add("c_address", TajoDataTypes.Type.TEXT) .add("c_nationkey", TajoDataTypes.Type.INT4) .add("c_phone", TajoDataTypes.Type.TEXT) .add("c_acctbal", TajoDataTypes.Type.FLOAT8) .add("c_mktsegment", TajoDataTypes.Type.TEXT) .add("c_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta, new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, CUSTOMER)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.PARQUET); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } store.dropTable(DB_NAME, CUSTOMER); }
Example #14
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 5 votes |
@Test public void testTableUsingSequenceFileWithBinarySerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE); TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.SEQUENCEFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_BINARY_SERDE, table1.getMeta().getProperty(StorageConstants.SEQUENCEFILE_SERDE)); store.dropTable(DB_NAME, REGION); }
Example #15
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 5 votes |
@Test public void testTableUsingSequenceFileWithTextSerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE); options.set(StorageConstants.TEXT_DELIMITER, "\u0001"); options.set(StorageConstants.TEXT_NULL, NullDatum.DEFAULT_TEXT); TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.SEQUENCEFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getProperty(StorageConstants.SEQUENCEFILE_SERDE)); assertEquals("\u0001", StringEscapeUtils.unescapeJava(table1.getMeta().getProperty(StorageConstants .TEXT_DELIMITER))); assertEquals(NullDatum.DEFAULT_TEXT, table1.getMeta().getProperty(StorageConstants.TEXT_NULL)); store.dropTable(DB_NAME, REGION); }
Example #16
Source File: OrcFlowFileWriter.java From nifi with Apache License 2.0 | 4 votes |
public OrcFlowFileWriter(OutputStream flowFileOutputStream, Path path, Configuration conf, ObjectInspector inspector, long stripeSize, CompressionKind compress, int bufferSize, int rowIndexStride, MemoryManager memoryManager, boolean addBlockPadding, OrcFile.Version version, OrcFile.WriterCallback callback, EncodingStrategy encodingStrategy, CompressionStrategy compressionStrategy, float paddingTolerance, long blockSizeValue, String bloomFilterColumnNames, double bloomFilterFpp) throws IOException { this.flowFileOutputStream = flowFileOutputStream; this.path = path; this.conf = conf; this.callback = callback; callbackContext = (callback != null) ? () -> OrcFlowFileWriter.this : null; this.adjustedStripeSize = stripeSize; this.defaultStripeSize = stripeSize; this.version = version; this.encodingStrategy = encodingStrategy; this.compressionStrategy = compressionStrategy; this.addBlockPadding = addBlockPadding; this.blockSize = blockSizeValue; this.paddingTolerance = paddingTolerance; this.compress = compress; this.rowIndexStride = rowIndexStride; this.memoryManager = memoryManager; buildIndex = rowIndexStride > 0; codec = createCodec(compress); String allColumns = conf.get(IOConstants.COLUMNS); if (allColumns == null) { allColumns = getColumnNamesFromInspector(inspector); } this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize); if (version == OrcFile.Version.V_0_11) { /* do not write bloom filters for ORC v11 */ this.bloomFilterColumns = OrcUtils.includeColumns(null, allColumns, inspector); } else { this.bloomFilterColumns = OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector); } this.bloomFilterFpp = bloomFilterFpp; treeWriter = createTreeWriter(inspector, streamFactory, false); if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) { throw new IllegalArgumentException("Row stride must be at least " + MIN_ROW_INDEX_STRIDE); } // ensure that we are able to handle callbacks before we register ourselves memoryManager.addWriter(path, stripeSize, this); }
Example #17
Source File: OrcFlowFileWriter.java From nifi with Apache License 2.0 | 4 votes |
@VisibleForTesting int getEstimatedBufferSize(int bs) { return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs); }
Example #18
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableWithNullValue() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava("\u0002")); options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava("\u0003")); TableMeta meta = new TableMeta(BuiltinStorages.TEXT, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("s_suppkey", TajoDataTypes.Type.INT4) .add("s_name", TajoDataTypes.Type.TEXT) .add("s_address", TajoDataTypes.Type.TEXT) .add("s_nationkey", TajoDataTypes.Type.INT4) .add("s_phone", TajoDataTypes.Type.TEXT) .add("s_acctbal", TajoDataTypes.Type.FLOAT8) .add("s_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, SUPPLIER), schema, meta, new Path(warehousePath, new Path(DB_NAME, SUPPLIER)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, SUPPLIER)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, SUPPLIER); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); //IgnoreKeyTextOutputFormat was deprecated assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, SUPPLIER)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_DELIMITER), table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER)); assertEquals(table.getMeta().getProperty(StorageConstants.TEXT_NULL), table1.getMeta().getProperty(StorageConstants.TEXT_NULL)); assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER), StringEscapeUtils.escapeJava("\u0002")); assertEquals(table1.getMeta().getProperty(StorageConstants.TEXT_NULL), StringEscapeUtils.escapeJava("\u0003")); Map<String, String> expected = getProperties(DB_NAME, SUPPLIER); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, SUPPLIER, toSet); Map<String, String> actual = getProperties(DB_NAME, SUPPLIER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, SUPPLIER, toUnset); actual = getProperties(DB_NAME, SUPPLIER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, SUPPLIER); }
Example #19
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableUsingRCFileWithTextSerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE); TableMeta meta = new TableMeta(BuiltinStorages.RCFILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.RCFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getProperty(StorageConstants.RCFILE_SERDE)); Map<String, String> expected = getProperties(DB_NAME, REGION); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, REGION, toSet); Map<String, String> actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, REGION, toUnset); actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, REGION); }
Example #20
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableUsingRCFileWithBinarySerde() throws Exception { KeyValueSet options = new KeyValueSet(); options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE); TableMeta meta = new TableMeta(BuiltinStorages.RCFILE, options); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("r_regionkey", TajoDataTypes.Type.INT4) .add("r_name", TajoDataTypes.Type.TEXT) .add("r_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, REGION), schema, meta, new Path(warehousePath, new Path(DB_NAME, REGION)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, REGION)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.RCFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, REGION); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); assertEquals(descriptor.getOutputFormat(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, REGION)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StorageConstants.DEFAULT_BINARY_SERDE, table1.getMeta().getProperty(StorageConstants.RCFILE_SERDE)); Map<String, String> expected = getProperties(DB_NAME, REGION); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, REGION, toSet); Map<String, String> actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, REGION, toUnset); actual = getProperties(DB_NAME, REGION); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, REGION); }
Example #21
Source File: TestHiveCatalogStore.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testTableUsingTextFile() throws Exception { TableMeta meta = new TableMeta(BuiltinStorages.TEXT, new KeyValueSet()); org.apache.tajo.catalog.Schema schema = SchemaBuilder.builder() .add("c_custkey", TajoDataTypes.Type.INT4) .add("c_name", TajoDataTypes.Type.TEXT) .add("c_address", TajoDataTypes.Type.TEXT) .add("c_nationkey", TajoDataTypes.Type.INT4) .add("c_phone", TajoDataTypes.Type.TEXT) .add("c_acctbal", TajoDataTypes.Type.FLOAT8) .add("c_mktsegment", TajoDataTypes.Type.TEXT) .add("c_comment", TajoDataTypes.Type.TEXT) .build(); TableDesc table = new TableDesc(IdentifierUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta, new Path(warehousePath, new Path(DB_NAME, CUSTOMER)).toUri()); store.createTable(table.getProto()); assertTrue(store.existTable(DB_NAME, CUSTOMER)); StorageFormatDescriptor descriptor = formatFactory.get(IOConstants.TEXTFILE); org.apache.hadoop.hive.ql.metadata.Table hiveTable = store.getHiveTable(DB_NAME, CUSTOMER); assertEquals(descriptor.getInputFormat(), hiveTable.getSd().getInputFormat()); //IgnoreKeyTextOutputFormat was deprecated assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), hiveTable.getSd().getOutputFormat()); TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER)); assertEquals(table.getName(), table1.getName()); assertEquals(table.getUri(), table1.getUri()); assertEquals(table.getSchema().size(), table1.getSchema().size()); for (int i = 0; i < table.getSchema().size(); i++) { assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName()); } assertEquals(StringEscapeUtils.escapeJava(StorageConstants.DEFAULT_FIELD_DELIMITER), table1.getMeta().getProperty(StorageConstants.TEXT_DELIMITER)); Map<String, String> expected = getProperties(DB_NAME, CUSTOMER); Map<String, String> toSet = new ImmutableMap.Builder<String, String>() .put("key1", "value1") .put("key2", "value2") .build(); expected.putAll(toSet); setProperty(DB_NAME, CUSTOMER, toSet); Map<String, String> actual = getProperties(DB_NAME, CUSTOMER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertEquals(actual.get("key2"), expected.get("key2")); Set<String> toUnset = Sets.newHashSet("key2", "key3"); for (String key : toUnset) { expected.remove(key); } unSetProperty(DB_NAME, CUSTOMER, toUnset); actual = getProperties(DB_NAME, CUSTOMER); assertEquals(actual.get(StorageConstants.TEXT_DELIMITER), expected.get(StorageConstants.TEXT_DELIMITER)); assertEquals(actual.get("key1"), expected.get("key1")); assertNull(actual.get("key2")); store.dropTable(DB_NAME, CUSTOMER); }
Example #22
Source File: OrcFlowFileWriter.java From localization_nifi with Apache License 2.0 | 4 votes |
@VisibleForTesting int getEstimatedBufferSize(int bs) { return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs); }
Example #23
Source File: OrcFlowFileWriter.java From localization_nifi with Apache License 2.0 | 4 votes |
public OrcFlowFileWriter(OutputStream flowFileOutputStream, Path path, Configuration conf, ObjectInspector inspector, long stripeSize, CompressionKind compress, int bufferSize, int rowIndexStride, MemoryManager memoryManager, boolean addBlockPadding, OrcFile.Version version, OrcFile.WriterCallback callback, EncodingStrategy encodingStrategy, CompressionStrategy compressionStrategy, float paddingTolerance, long blockSizeValue, String bloomFilterColumnNames, double bloomFilterFpp) throws IOException { this.flowFileOutputStream = flowFileOutputStream; this.path = path; this.conf = conf; this.callback = callback; callbackContext = (callback != null) ? () -> OrcFlowFileWriter.this : null; this.adjustedStripeSize = stripeSize; this.defaultStripeSize = stripeSize; this.version = version; this.encodingStrategy = encodingStrategy; this.compressionStrategy = compressionStrategy; this.addBlockPadding = addBlockPadding; this.blockSize = blockSizeValue; this.paddingTolerance = paddingTolerance; this.compress = compress; this.rowIndexStride = rowIndexStride; this.memoryManager = memoryManager; buildIndex = rowIndexStride > 0; codec = createCodec(compress); String allColumns = conf.get(IOConstants.COLUMNS); if (allColumns == null) { allColumns = getColumnNamesFromInspector(inspector); } this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize); if (version == OrcFile.Version.V_0_11) { /* do not write bloom filters for ORC v11 */ this.bloomFilterColumns = OrcUtils.includeColumns(null, allColumns, inspector); } else { this.bloomFilterColumns = OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector); } this.bloomFilterFpp = bloomFilterFpp; treeWriter = createTreeWriter(inspector, streamFactory, false); if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) { throw new IllegalArgumentException("Row stride must be at least " + MIN_ROW_INDEX_STRIDE); } // ensure that we are able to handle callbacks before we register ourselves memoryManager.addWriter(path, stripeSize, this); }
Example #24
Source File: HiveUtil.java From presto with Apache License 2.0 | 4 votes |
public static List<HiveType> getColumnTypes(Properties schema) { return toHiveTypes(schema.getProperty(IOConstants.COLUMNS_TYPES, "")); }
Example #25
Source File: HiveUtil.java From presto with Apache License 2.0 | 4 votes |
public static List<String> getColumnNames(Properties schema) { return COLUMN_NAMES_SPLITTER.splitToList(schema.getProperty(IOConstants.COLUMNS, "")); }