org.apache.hadoop.hive.serde2.SerDeUtils Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.SerDeUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveTableUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create properties info to initialize a SerDe.
 * @param storageDescriptor
 * @return
 */
public static Properties createPropertiesFromStorageDescriptor(StorageDescriptor storageDescriptor) {
	SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
	Map<String, String> parameters = serDeInfo.getParameters();
	Properties properties = new Properties();
	properties.setProperty(
			serdeConstants.SERIALIZATION_FORMAT,
			parameters.get(serdeConstants.SERIALIZATION_FORMAT));
	List<String> colTypes = new ArrayList<>();
	List<String> colNames = new ArrayList<>();
	List<FieldSchema> cols = storageDescriptor.getCols();
	for (FieldSchema col: cols){
		colTypes.add(col.getType());
		colNames.add(col.getName());
	}
	properties.setProperty(serdeConstants.LIST_COLUMNS, StringUtils.join(colNames, String.valueOf(SerDeUtils.COMMA)));
	// Note: serdeConstants.COLUMN_NAME_DELIMITER is not defined in previous Hive. We use a literal to save on shim
	properties.setProperty("column.name.delimite", String.valueOf(SerDeUtils.COMMA));
	properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, StringUtils.join(colTypes, DEFAULT_LIST_COLUMN_TYPES_SEPARATOR));
	properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
	properties.putAll(parameters);
	return properties;
}
 
Example #2
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static void serializeList(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nullable final ListObjectInspector loi) throws SerDeException {
    ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
    List<?> olist = loi.getList(obj);

    if (olist == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACKET);
        for (int i = 0; i < olist.size(); i++) {
            if (i > 0) {
                sb.append(SerDeUtils.COMMA);
            }
            buildJSONString(sb, olist.get(i), listElementObjectInspector);
        }
        sb.append(SerDeUtils.RBRACKET);
    }
}
 
Example #3
Source File: TableMapping.java    From Hive-Cassandra with Apache License 2.0 6 votes vote down vote up
/**
 * Serialize a object into bytes.
 * @param foi object inspector
 * @param decalred output object inspector
 * @param obj object to be serialized
 * @param useJsonSerialize true to use json serialization
 * @return object in serialized bytes
 * @throws IOException when error happens
 */
protected byte[] serializeToBytes(ObjectInspector foi, ObjectInspector doi, Object obj, boolean useJsonSerialize) throws IOException {
  serializeStream.reset();
  boolean isNotNull;
  if (!foi.getCategory().equals(Category.PRIMITIVE)
              && useJsonSerialize) {
    isNotNull = serialize(SerDeUtils.getJSONString(obj, foi),
                PrimitiveObjectInspectorFactory.javaStringObjectInspector, doi, 1);
  } else {
    isNotNull = serialize(obj, foi, doi, 1);
  }
  if (!isNotNull) {
    return null;
  }
  byte[] key = new byte[serializeStream.getCount()];
  System.arraycopy(serializeStream.getData(), 0, key, 0, serializeStream.getCount());

  return key;
}
 
Example #4
Source File: TsFileSerDe.java    From incubator-iotdb with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(@Nullable Configuration conf, Properties tbl) throws SerDeException {

  final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
  final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
  final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl
          .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);


  deviceId = tbl.getProperty(DEVICE_ID);


  if (columnNameProperty == null || columnNameProperty.isEmpty()
  || columnTypeProperty == null || columnTypeProperty.isEmpty()) {
    columnNames = Collections.emptyList();
    columnTypes = Collections.emptyList();
  }
  else {
    columnNames = StringInternUtils.internStringsInList(
            Arrays.asList(columnNameProperty.split(columnNameDelimiter)));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }

  // Check column and types equals
  if (columnTypes.size() != columnNames.size()) {
    throw new TsFileSerDeException("len(columnNames) != len(columnTypes)");
  }

  oi = createObjectInspector();
}
 
Example #5
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static void serializeMap(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nonnull final MapObjectInspector moi) throws SerDeException {
    ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
    ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
    Map<?, ?> omap = moi.getMap(obj);
    if (omap == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACE);
        boolean first = true;
        for (Object entry : omap.entrySet()) {
            if (first) {
                first = false;
            } else {
                sb.append(SerDeUtils.COMMA);
            }
            Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
            StringBuilder keyBuilder = new StringBuilder();
            buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
            String keyString = keyBuilder.toString().trim();
            if ((!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE)) {
                appendWithQuotes(sb, keyString);
            } else {
                sb.append(keyString);
            }
            sb.append(SerDeUtils.COLON);
            buildJSONString(sb, e.getValue(), mapValueObjectInspector);
        }
        sb.append(SerDeUtils.RBRACE);
    }
}
 
Example #6
Source File: HiveWriterFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
private void checkInitialize() throws Exception {
	if (initialized) {
		return;
	}

	JobConf jobConf = confWrapper.conf();
	Object serdeLib = Class.forName(serDeInfo.getSerializationLib()).newInstance();
	Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer,
			"Expect a SerDe lib implementing both Serializer and Deserializer, but actually got "
					+ serdeLib.getClass().getName());
	this.recordSerDe = (Serializer) serdeLib;
	ReflectionUtils.setConf(recordSerDe, jobConf);

	// TODO: support partition properties, for now assume they're same as table properties
	SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null);

	this.formatFields = allColumns.length - partitionColumns.length;
	this.hiveConversions = new HiveObjectConversion[formatFields];
	this.converters = new DataFormatConverter[formatFields];
	List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length);
	for (int i = 0; i < formatFields; i++) {
		DataType type = allTypes[i];
		ObjectInspector objectInspector = HiveInspectors.getObjectInspector(type);
		objectInspectors.add(objectInspector);
		hiveConversions[i] = HiveInspectors.getConversion(
				objectInspector, type.getLogicalType(), hiveShim);
		converters[i] = DataFormatConverters.getConverterForDataType(type);
	}

	this.formatInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
			Arrays.asList(allColumns).subList(0, formatFields),
			objectInspectors);
	this.initialized = true;
}
 
Example #7
Source File: HiveRecordWriter.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public AbstractSerDe createSerde() throws SerializationError {
    try {
        Properties tableProps = table.getMetadata();
        tableProps.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(",").join(inputColumns));
        tableProps.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(":").join(inputTypes));
        NiFiRecordSerDe serde = new NiFiRecordSerDe(recordReader, log);
        SerDeUtils.initializeSerDe(serde, conf, tableProps, null);
        this.serde = serde;
        return serde;
    } catch (SerDeException e) {
        throw new SerializationError("Error initializing serde " + NiFiRecordSerDe.class.getName(), e);
    }
}
 
Example #8
Source File: HiveTableOutputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	try {
		StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
		Object serdeLib = Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance();
		Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer,
				"Expect a SerDe lib implementing both Serializer and Deserializer, but actually got " + serdeLib.getClass().getName());
		recordSerDe = (Serializer) serdeLib;
		ReflectionUtils.setConf(recordSerDe, jobConf);
		// TODO: support partition properties, for now assume they're same as table properties
		SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null);
		outputClass = recordSerDe.getSerializedClass();
	} catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) {
		throw new FlinkRuntimeException("Error initializing Hive serializer", e);
	}

	TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
		+ String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0")
		+ taskNumber + "_0");

	this.jobConf.set("mapred.task.id", taskAttemptID.toString());
	this.jobConf.setInt("mapred.task.partition", taskNumber);
	// for hadoop 2.2
	this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
	this.jobConf.setInt("mapreduce.task.partition", taskNumber);

	this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

	if (!isDynamicPartition) {
		staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation());
	} else {
		dynamicPartitionOffset = fieldNames.length - partitionColumns.size() + hiveTablePartition.getPartitionSpec().size();
	}

	numNonPartitionColumns = isPartitioned ? fieldNames.length - partitionColumns.size() : fieldNames.length;
	hiveConversions = new HiveObjectConversion[numNonPartitionColumns];
	List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length);
	for (int i = 0; i < numNonPartitionColumns; i++) {
		ObjectInspector objectInspector = HiveInspectors.getObjectInspector(fieldTypes[i]);
		objectInspectors.add(objectInspector);
		hiveConversions[i] = HiveInspectors.getConversion(objectInspector, fieldTypes[i].getLogicalType());
	}

	if (!isPartitioned) {
		rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
			Arrays.asList(fieldNames),
			objectInspectors);
	} else {
		rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
			Arrays.asList(fieldNames).subList(0, fieldNames.length - partitionColumns.size()),
			objectInspectors);
		defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
				HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
	}
}
 
Example #9
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private static void buildJSONString(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nonnull final ObjectInspector oi) throws SerDeException {
    switch (oi.getCategory()) {
        case PRIMITIVE: {
            PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
            serializePrimitive(sb, obj, poi);
            break;
        }
        case LIST: {
            ListObjectInspector loi = (ListObjectInspector) oi;
            serializeList(sb, obj, loi);
            break;
        }
        case MAP: {
            MapObjectInspector moi = (MapObjectInspector) oi;
            serializeMap(sb, obj, moi);
            break;
        }
        case STRUCT: {
            StructObjectInspector soi = (StructObjectInspector) oi;
            serializeStruct(sb, obj, soi, null);
            break;
        }
        case UNION: {
            UnionObjectInspector uoi = (UnionObjectInspector) oi;
            if (obj == null) {
                sb.append("null");
            } else {
                sb.append(SerDeUtils.LBRACE);
                sb.append(uoi.getTag(obj));
                sb.append(SerDeUtils.COLON);
                buildJSONString(sb, uoi.getField(obj),
                    uoi.getObjectInspectors().get(uoi.getTag(obj)));
                sb.append(SerDeUtils.RBRACE);
            }
            break;
        }
        default:
            throw new SerDeException("Unknown type in ObjectInspector: " + oi.getCategory());
    }
}
 
Example #10
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Nonnull
private static StringBuilder appendWithQuotes(@Nonnull final StringBuilder sb,
        @Nonnull final String value) {
    return sb.append(SerDeUtils.QUOTE).append(value).append(SerDeUtils.QUOTE);
}