org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestObjectInspector.java    From hive-dwrf with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that fields can be accessed from the OrcStructObjectInspector in a case
 * insensitive manner.
 * @throws Exception
 */
@Test
public void testCaseInsensitiveFieldsStruct() throws Exception {
  OrcStruct struct = new OrcStruct(Lists.newArrayList(FIELD_0));
  struct.setFieldValue(0, new Text("a"));

  // Test control case (cases match)
  StructField field = NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0);
  Assert.assertEquals("a",
      ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject(
          NON_LAZY_STRUCT_OI.getStructFieldData(struct, field)));
  // Test upper case
  field = NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0.toUpperCase());
  Assert.assertEquals("a",
      ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject(
          NON_LAZY_STRUCT_OI.getStructFieldData(struct, field)));
  // Test lower case (even if someone changes the value of FIELD_0 in the future either upper
  // or lower case should be different from the actual case)
  field = NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0.toLowerCase());
  Assert.assertEquals("a",
      ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject(
          NON_LAZY_STRUCT_OI.getStructFieldData(struct, field)));
}
 
Example #2
Source File: TestHiveGenericUDF.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
	checkArgument(arguments.length == 2);

	// TEST for constant arguments
	checkArgument(arguments[1] instanceof ConstantObjectInspector);
	Object constant = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue();
	checkArgument(constant instanceof IntWritable);
	checkArgument(((IntWritable) constant).get() == 1);

	if (arguments[0] instanceof IntObjectInspector ||
			arguments[0] instanceof StringObjectInspector) {
		return arguments[0];
	} else {
		throw new RuntimeException("Not support argument: " + arguments[0]);
	}
}
 
Example #3
Source File: WriterImpl.java    From hive-dwrf with Apache License 2.0 6 votes vote down vote up
@Override
void write(Object obj) throws IOException {
  if (obj != null) {
    Text val = ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj);
    buffer[bufferIndex++] = new Text(val);
    setRawDataSize(val.getLength());
    // Increment the memory estimate by the buffered bytes
    memoryEstimate.incrementTotalMemory(val.getLength());
    bufferedBytes += val.getLength();
  } else {
      buffer[bufferIndex++] = null;
      setRawDataSize(RawDatasizeConst.NULL_SIZE);
  }
  if (bufferIndex == buffer.length) {
    flush();
  }
}
 
Example #4
Source File: TestHiveGenericUDF.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
	checkArgument(arguments.length == 2);

	// TEST for constant arguments
	checkArgument(arguments[1] instanceof ConstantObjectInspector);
	Object constant = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue();
	checkArgument(constant instanceof IntWritable);
	checkArgument(((IntWritable) constant).get() == 1);

	if (arguments[0] instanceof IntObjectInspector ||
			arguments[0] instanceof StringObjectInspector) {
		return arguments[0];
	} else {
		throw new RuntimeException("Not support argument: " + arguments[0]);
	}
}
 
Example #5
Source File: IndexRSerde.java    From indexr with Apache License 2.0 6 votes vote down vote up
private static Writable createPrimitive(Object obj, PrimitiveObjectInspector inspector)
        throws SerDeException {
    if (obj == null) {
        return null;
    }
    switch (inspector.getPrimitiveCategory()) {
        case DOUBLE:
            return new DoubleWritable(((DoubleObjectInspector) inspector).get(obj));
        case FLOAT:
            return new FloatWritable(((FloatObjectInspector) inspector).get(obj));
        case INT:
            return new IntWritable(((IntObjectInspector) inspector).get(obj));
        case LONG:
            return new LongWritable(((LongObjectInspector) inspector).get(obj));
        case STRING:
            return new Text(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj));
        case DATE:
            return ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj);
        case TIMESTAMP:
            return ((TimestampObjectInspector) inspector).getPrimitiveWritableObject(obj);
        default:
            throw new SerDeException("Can't serialize primitive : " + inspector.getPrimitiveCategory());
    }
}
 
Example #6
Source File: GeneralLearnerBaseUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static FeatureType getFeatureType(@Nonnull ListObjectInspector featureListOI)
        throws UDFArgumentException {
    final ObjectInspector featureOI = featureListOI.getListElementObjectInspector();
    if (featureOI instanceof StringObjectInspector) {
        return FeatureType.STRING;
    } else if (featureOI instanceof IntObjectInspector) {
        return FeatureType.INT;
    } else if (featureOI instanceof LongObjectInspector) {
        return FeatureType.LONG;
    } else {
        throw new UDFArgumentException("Feature object inspector must be one of "
                + "[StringObjectInspector, IntObjectInspector, LongObjectInspector]: "
                + featureOI.toString());
    }
}
 
Example #7
Source File: ParseUserAgent.java    From yauaa with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
    // ================================
    // Check the input
    // This UDF accepts one argument
    if (args.length != 1) {
        throw new UDFArgumentException("The argument list must be exactly 1 element");
    }

    // The first argument must be a String
    ObjectInspector inputOI = args[0];
    if (!(inputOI instanceof StringObjectInspector)) {
        throw new UDFArgumentException("The argument must be a string");
    }
    useragentOI = (StringObjectInspector) inputOI;

    // ================================
    // Initialize the parser
    userAgentAnalyzer = UserAgentAnalyzer
        .newBuilder()
        .hideMatcherLoadStats()
        .delayInitialization()
        .build();

    fieldNames = userAgentAnalyzer.getAllPossibleFieldNamesSorted();

    // ================================
    // Define the output
    // https://stackoverflow.com/questions/26026027/how-to-return-struct-from-hive-udf

    // Define the field names for the struct<> and their types
    List<ObjectInspector> fieldObjectInspectors = new ArrayList<>(fieldNames.size());

    fieldNames
        .forEach(f -> fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector));

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
}
 
Example #8
Source File: ParquetHiveSerDe.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Writable createPrimitive(final Object obj, final PrimitiveObjectInspector inspector)
    throws SerDeException {
  if (obj == null) {
    return null;
  }
  switch (inspector.getPrimitiveCategory()) {
  case VOID:
    return null;
  case BOOLEAN:
    return new BooleanWritable(((BooleanObjectInspector) inspector).get(obj) ? Boolean.TRUE : Boolean.FALSE);
  case BYTE:
    return new ByteWritable((byte) ((ByteObjectInspector) inspector).get(obj));
  case DOUBLE:
    return new DoubleWritable(((DoubleObjectInspector) inspector).get(obj));
  case FLOAT:
    return new FloatWritable(((FloatObjectInspector) inspector).get(obj));
  case INT:
    return new IntWritable(((IntObjectInspector) inspector).get(obj));
  case LONG:
    return new LongWritable(((LongObjectInspector) inspector).get(obj));
  case SHORT:
    return new ShortWritable((short) ((ShortObjectInspector) inspector).get(obj));
  case STRING:
    return new BinaryWritable(Binary.fromString(((StringObjectInspector) inspector).getPrimitiveJavaObject(obj)));
  default:
    throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
  }
}
 
Example #9
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static StringObjectInspector asStringOI(@Nonnull final ObjectInspector[] argOIs,
        final int argIndex) throws UDFArgumentException {
    final ObjectInspector oi = getObjectInspector(argOIs, argIndex);
    if (!isStringOI(oi)) {
        throw new UDFArgumentException(
            "argOIs[" + argIndex + "] type must be String: " + oi.getTypeName());
    }
    return (StringObjectInspector) oi;
}
 
Example #10
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static StringObjectInspector asStringOI(@Nonnull final ObjectInspector argOI)
        throws UDFArgumentException {
    if (!isStringOI(argOI)) {
        throw new UDFArgumentException("Argument type must be String: " + argOI.getTypeName());
    }
    return (StringObjectInspector) argOI;
}
 
Example #11
Source File: BitcoinUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
public void BitcoinTransactionHashSegwitUDFInvalidArguments() throws HiveException {
	BitcoinTransactionHashSegwitUDF bthUDF = new BitcoinTransactionHashSegwitUDF();
	UDFArgumentLengthException exNull = assertThrows(UDFArgumentLengthException.class, ()->bthUDF.initialize(null), "Exception is thrown in case of null parameter");
	UDFArgumentLengthException exLen = assertThrows(UDFArgumentLengthException.class, ()->bthUDF.initialize(new ObjectInspector[2]), "Exception is thrown in case of invalid length parameter");
	
	StringObjectInspector[] testStringOI = new StringObjectInspector[1];
	testStringOI[0]=PrimitiveObjectInspectorFactory.javaStringObjectInspector;
	UDFArgumentException wrongType = assertThrows(UDFArgumentException.class, ()->bthUDF.initialize(testStringOI), "Exception is thrown in case of invalid type of parameter");

}
 
Example #12
Source File: BitcoinUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
 public void BitcoinTransactionHashUDFInvalidArguments() throws HiveException {
final BitcoinTransactionHashUDF bthUDF = new BitcoinTransactionHashUDF();
UDFArgumentLengthException exNull = assertThrows(UDFArgumentLengthException.class, ()->bthUDF.initialize(null), "Exception is thrown in case of null parameter");
UDFArgumentLengthException exLen = assertThrows(UDFArgumentLengthException.class, ()->bthUDF.initialize(new ObjectInspector[2]), "Exception is thrown in case of invalid length parameter");

StringObjectInspector[] testStringOI = new StringObjectInspector[1];
testStringOI[0]=PrimitiveObjectInspectorFactory.javaStringObjectInspector;
UDFArgumentException wrongType = assertThrows(UDFArgumentException.class, ()->bthUDF.initialize(testStringOI), "Exception is thrown in case of invalid type of parameter");

 }
 
Example #13
Source File: HiveFieldConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
  final int len = value.getLength();
  checkSizeLimit(len);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, len);
}
 
Example #14
Source File: HiveFieldConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
  final int len = value.getLength();
  checkSizeLimit(len);
  final byte[] valueBytes = value.getBytes();
  ((VarCharVector) outputVV).setSafe(outputIndex, valueBytes, 0, len);
}
 
Example #15
Source File: FlexibleOdpsDataTransferUDTF.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void process(Object[] args) throws HiveException {
  // First 5 args are access_id, access_key, end_point, tunnel_endpoint and project_name
  if (this.odps == null) {
    StringObjectInspector soi0 = (StringObjectInspector) confObjectInspectors[0];
    StringObjectInspector soi1 = (StringObjectInspector) confObjectInspectors[1];
    StringObjectInspector soi2 = (StringObjectInspector) confObjectInspectors[2];
    StringObjectInspector soi3 = (StringObjectInspector) confObjectInspectors[3];
    StringObjectInspector soi4 = (StringObjectInspector) confObjectInspectors[4];

    String accessId = soi0.getPrimitiveJavaObject(args[0]).trim();
    String accessKey = soi1.getPrimitiveJavaObject(args[1]).trim();
    String endpoint = soi2.getPrimitiveJavaObject(args[2]).trim();
    String tunnelEndpoint = soi3.getPrimitiveJavaObject(args[3]).trim();
    String projectName = soi4.getPrimitiveJavaObject(args[4]).trim();

    AliyunAccount account = new AliyunAccount(accessId, accessKey);
    this.odps = new Odps(account);
    this.odps.setDefaultProject(projectName);
    this.odps.setEndpoint(endpoint);
    this.tunnel = new TableTunnel(odps);
    if (!tunnelEndpoint.isEmpty()) {
      this.tunnel.setEndpoint(tunnelEndpoint);
    }
  }

  Object[] restArgs = new Object[args.length - 5];
  for (int i = 5; i < args.length; i++) {
    restArgs[i - 5] = args[i];
  }
  super.process(restArgs);
}
 
Example #16
Source File: TestDataWritableWriter.java    From presto with Apache License 2.0 4 votes vote down vote up
/**
 * It writes the primitive value to the Parquet RecordConsumer.
 *
 * @param value The object that contains the primitive value.
 * @param inspector The object inspector used to get the correct value type.
 */
private void writePrimitive(Object value, PrimitiveObjectInspector inspector)
{
    if (value == null) {
        return;
    }

    switch (inspector.getPrimitiveCategory()) {
        case VOID:
            return;
        case DOUBLE:
            recordConsumer.addDouble(((DoubleObjectInspector) inspector).get(value));
            break;
        case BOOLEAN:
            recordConsumer.addBoolean(((BooleanObjectInspector) inspector).get(value));
            break;
        case FLOAT:
            recordConsumer.addFloat(((FloatObjectInspector) inspector).get(value));
            break;
        case BYTE:
            recordConsumer.addInteger(((ByteObjectInspector) inspector).get(value));
            break;
        case INT:
            recordConsumer.addInteger(((IntObjectInspector) inspector).get(value));
            break;
        case LONG:
            recordConsumer.addLong(((LongObjectInspector) inspector).get(value));
            break;
        case SHORT:
            recordConsumer.addInteger(((ShortObjectInspector) inspector).get(value));
            break;
        case STRING:
            String v = ((StringObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(Binary.fromString(v));
            break;
        case CHAR:
            String vChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(value).getStrippedValue();
            recordConsumer.addBinary(Binary.fromString(vChar));
            break;
        case VARCHAR:
            String vVarchar = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(value).getValue();
            recordConsumer.addBinary(Binary.fromString(vVarchar));
            break;
        case BINARY:
            byte[] vBinary = ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(Binary.fromByteArray(vBinary));
            break;
        case TIMESTAMP:
            Timestamp ts = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addBinary(NanoTimeUtils.getNanoTime(ts, false).toBinary());
            break;
        case DECIMAL:
            HiveDecimal vDecimal = ((HiveDecimal) inspector.getPrimitiveJavaObject(value));
            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) inspector.getTypeInfo();
            recordConsumer.addBinary(decimalToBinary(vDecimal, decTypeInfo));
            break;
        case DATE:
            Date vDate = ((DateObjectInspector) inspector).getPrimitiveJavaObject(value);
            recordConsumer.addInteger(DateWritable.dateToDays(vDate));
            break;
        default:
            throw new IllegalArgumentException("Unsupported primitive data type: " + inspector.getPrimitiveCategory());
    }
}
 
Example #17
Source File: HiveTestUDFImpls.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
  if (arguments[0] == null || arguments[0].get() == null) {
    return null;
  }

  Object input = arguments[0].get();
  switch(inputType) {
    case BOOLEAN:
      return ((BooleanObjectInspector)argumentOI).get(input) ? Boolean.TRUE : Boolean.FALSE;
    case BYTE:
      return new Byte(((ByteObjectInspector)argumentOI).get(input));
    case SHORT:
      return new Short(((ShortObjectInspector)argumentOI).get(input));
    case INT:
      return new Integer(((IntObjectInspector)argumentOI).get(input));
    case LONG:
      return new Long(((LongObjectInspector)argumentOI).get(input));
    case FLOAT:
      return new Float(((FloatObjectInspector)argumentOI).get(input));
    case DOUBLE:
      return new Double(((DoubleObjectInspector)argumentOI).get(input));
    case STRING:
      return PrimitiveObjectInspectorUtils.getString(input, (StringObjectInspector)argumentOI);
    case BINARY:
      return PrimitiveObjectInspectorUtils.getBinary(input, (BinaryObjectInspector) argumentOI).getBytes();
    case VARCHAR:
      if (outputType == PrimitiveCategory.CHAR) {
        HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar(input, (HiveVarcharObjectInspector) argumentOI);
        return new HiveChar(hiveVarchar.getValue(), HiveChar.MAX_CHAR_LENGTH);
      } else {
        return PrimitiveObjectInspectorUtils.getHiveVarchar(input, (HiveVarcharObjectInspector)argumentOI);
      }
    case CHAR:
      return PrimitiveObjectInspectorUtils.getHiveChar(input, (HiveCharObjectInspector) argumentOI);
    case DATE:
      return PrimitiveObjectInspectorUtils.getDate(input, (DateObjectInspector) argumentOI);
    case TIMESTAMP:
      return PrimitiveObjectInspectorUtils.getTimestamp(input, (TimestampObjectInspector) argumentOI);
    case DECIMAL:
      // return type is a HiveVarchar
      HiveDecimal decimalValue =
          PrimitiveObjectInspectorUtils.getHiveDecimal(input, (HiveDecimalObjectInspector) argumentOI);
      return new HiveVarchar(decimalValue.toString(), HiveVarchar.MAX_VARCHAR_LENGTH);
  }

  throw new UnsupportedOperationException(String.format("Unexpected input type '%s' in Test UDF", inputType));
}
 
Example #18
Source File: HiveTypeSystem.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
protected boolean isStringType(ObjectInspector dataType) {
  return dataType instanceof StringObjectInspector;
}
 
Example #19
Source File: HiveString.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public HiveString(Object object, StringObjectInspector stringObjectInspector, StdFactory stdFactory) {
  super(stdFactory);
  _object = object;
  _stringObjectInspector = stringObjectInspector;
}
 
Example #20
Source File: HiveStringType.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public HiveStringType(StringObjectInspector stringObjectInspector) {
  _stringObjectInspector = stringObjectInspector;
}
 
Example #21
Source File: CacheablePrimitiveObjectInspectorConverter.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Text convert(Object input) {
  if (input == null) {
    return null;
  }
  Text t = new Text();

  switch (inputOI.getPrimitiveCategory()) {
    case VOID:
      return null;
    case BOOLEAN:
      t.set(((BooleanObjectInspector) inputOI).get(input) ? trueBytes
          : falseBytes);
      return t;
    case BYTE:
      out.reset();
      LazyInteger.writeUTF8NoException(out, ((ByteObjectInspector) inputOI).get(input));
      t.set(out.getData(), 0, out.getLength());
      return t;
    case SHORT:
      out.reset();
      LazyInteger.writeUTF8NoException(out, ((ShortObjectInspector) inputOI).get(input));
      t.set(out.getData(), 0, out.getLength());
      return t;
    case INT:
      out.reset();
      LazyInteger.writeUTF8NoException(out, ((IntObjectInspector) inputOI).get(input));
      t.set(out.getData(), 0, out.getLength());
      return t;
    case LONG:
      out.reset();
      LazyLong.writeUTF8NoException(out, ((LongObjectInspector) inputOI).get(input));
      t.set(out.getData(), 0, out.getLength());
      return t;
    case FLOAT:
      t.set(String.valueOf(((FloatObjectInspector) inputOI).get(input)));
      return t;
    case DOUBLE:
      t.set(String.valueOf(((DoubleObjectInspector) inputOI).get(input)));
      return t;
    case STRING:
      if (inputOI.preferWritable()) {
        t.set(((StringObjectInspector) inputOI).getPrimitiveWritableObject(input));
      } else {
        t.set(((StringObjectInspector) inputOI).getPrimitiveJavaObject(input));
      }
      return t;
    case CHAR:
      // when converting from char, the value should be stripped of any trailing spaces.
      if (inputOI.preferWritable()) {
        // char text value is already stripped of trailing space
        t.set(((HiveCharObjectInspector) inputOI).getPrimitiveWritableObject(input)
            .getStrippedValue());
      } else {
        t.set(((HiveCharObjectInspector) inputOI).getPrimitiveJavaObject(input).getStrippedValue());
      }
      return t;
    case VARCHAR:
      if (inputOI.preferWritable()) {
        t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveWritableObject(input)
            .toString());
      } else {
        t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveJavaObject(input).toString());
      }
      return t;
    case DATE:
      t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
      return t;
    case TIMESTAMP:
      t.set(((TimestampObjectInspector) inputOI)
          .getPrimitiveWritableObject(input).toString());
      return t;
    case BINARY:
      BinaryObjectInspector binaryOI = (BinaryObjectInspector) inputOI;
      if (binaryOI.preferWritable()) {
        BytesWritable bytes = binaryOI.getPrimitiveWritableObject(input);
        t.set(bytes.getBytes(), 0, bytes.getLength());
      } else {
        t.set(binaryOI.getPrimitiveJavaObject(input));
      }
      return t;
    case DECIMAL:
      t.set(((HiveDecimalObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
      return t;
    default:
      throw new RuntimeException("Hive 2 Internal error: type = " + inputOI.getTypeName());
  }
}
 
Example #22
Source File: HiveColumnarSerdeResolver.java    From pxf with Apache License 2.0 4 votes vote down vote up
private void resolvePrimitive(Object o, PrimitiveObjectInspector oi) throws IOException {

        if (!firstColumn) {
            builder.append(delimiter);
        }

        if (o == null) {
            builder.append(nullChar);
        } else {
            switch (oi.getPrimitiveCategory()) {
                case BOOLEAN:
                    builder.append(((BooleanObjectInspector) oi).get(o));
                    break;
                case SHORT:
                    builder.append(((ShortObjectInspector) oi).get(o));
                    break;
                case INT:
                    builder.append(((IntObjectInspector) oi).get(o));
                    break;
                case LONG:
                    builder.append(((LongObjectInspector) oi).get(o));
                    break;
                case FLOAT:
                    builder.append(((FloatObjectInspector) oi).get(o));
                    break;
                case DOUBLE:
                    builder.append(((DoubleObjectInspector) oi).get(o));
                    break;
                case DECIMAL:
                    builder.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o).bigDecimalValue());
                    break;
                case STRING:
                    builder.append(((StringObjectInspector) oi).getPrimitiveJavaObject(o));
                    break;
                case BINARY:
                    byte[] bytes = ((BinaryObjectInspector) oi).getPrimitiveJavaObject(o);
                    Utilities.byteArrayToOctalString(bytes, builder);
                    break;
                case TIMESTAMP:
                    builder.append(((TimestampObjectInspector) oi).getPrimitiveJavaObject(o));
                    break;
                case BYTE:  /* TINYINT */
                    builder.append(Short.valueOf(((ByteObjectInspector) oi).get(o)));
                    break;
                default:
                    throw new UnsupportedTypeException(oi.getTypeName()
                            + " conversion is not supported by HiveColumnarSerdeResolver");
            }
        }
        firstColumn = false;
    }
 
Example #23
Source File: HiveStringObjectConverter.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
@Override
public Object convert(ObjectInspector objectInspector, Object o, TypeInfo odpsTypeInfo) {
  StringObjectInspector stringObjectInspector = (StringObjectInspector) objectInspector;
  return stringObjectInspector.getPrimitiveJavaObject(o);
}
 
Example #24
Source File: HiveDynamoDBItemType.java    From emr-dynamodb-connector with Apache License 2.0 4 votes vote down vote up
/**
 * Converts a Hive column of type {@code Map&lt;String,String&gt;} into a DynamoDB item.
 *
 * It is expected that the Hive data is a map of type &lt;String, String&gt;. The key in Hive data
 * map is converted to a DynamoDB attribute name. The corresponding value in Hive data map is
 * converted into DynamoDB AttributeValue. This attribute value is expected to be a JSON
 * serialized AttributeValue.
 *
 * @param data                 Data from Hive
 * @param fieldObjectInspector The object inspector for the Hive data. Must have TypeName
 *                             Map&lt;String,String&gt;.
 *
 * @return DynamoDB item representation of provided data from Hive as a
 *         Map&lt;String,AttributeValue&gt;.
 *
 * @throws SerDeException
 */
public Map<String, AttributeValue> parseDynamoDBData(Object data, ObjectInspector
    fieldObjectInspector) throws SerDeException {

  if (!HiveDynamoDBTypeFactory.isHiveDynamoDBItemMapType(fieldObjectInspector)) {
    throw new SerDeException(getClass().toString() + " Expecting a MapObjectInspector of type "
        + "map<string,string> for a column which maps DynamoDB item. But we got: "
        + fieldObjectInspector.getTypeName());
  }

  Map<String, AttributeValue> item = new HashMap<>();

  /* map is of type <String, String> */
  MapObjectInspector mapOI = (MapObjectInspector) fieldObjectInspector;
  StringObjectInspector mapKeyObjectInspector = (StringObjectInspector) mapOI
      .getMapKeyObjectInspector();
  StringObjectInspector mapValueObjectInspector = (StringObjectInspector) mapOI
      .getMapValueObjectInspector();

  /*
   * Get the underlying map object. This is expected to be of type
   * <String,String>
   */
  Map<?, ?> map = mapOI.getMap(data);

  if (map == null || map.isEmpty()) {
    throw new SerDeException("Hive data cannot be null.");
  }

  /* Reconstruct the item */
  for (Entry<?, ?> entry : map.entrySet()) {

    /* Get the string key, value pair */
    String dynamoDBAttributeName = mapKeyObjectInspector.getPrimitiveJavaObject(entry.getKey());
    String dynamoDBAttributeValue = mapValueObjectInspector.getPrimitiveJavaObject(entry.getValue());

    /* Deserialize the AttributeValue string */
    AttributeValue deserializedAttributeValue = deserializeAttributeValue(dynamoDBAttributeValue);

    item.put(dynamoDBAttributeName, deserializedAttributeValue);
  }
  return item;
}
 
Example #25
Source File: DynamoDBDataParser.java    From emr-dynamodb-connector with Apache License 2.0 4 votes vote down vote up
public static String getString(Object data, ObjectInspector objectInspector) {
  return ((StringObjectInspector) objectInspector).getPrimitiveJavaObject(data);
}
 
Example #26
Source File: OdpsDataTransferUDTF.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
@Override
public void process(Object[] args) throws HiveException {
  try {
    if(this.odps == null) {
      OdpsConfig odpsConfig = new OdpsConfig("res/console/conf/odps_config.ini");
      AliyunAccount account = new AliyunAccount(odpsConfig.getAccessId(), odpsConfig.getAccessKey());
      this.odps = new Odps(account);
      this.odps.setDefaultProject(odpsConfig.getProjectName());
      this.odps.setEndpoint(odpsConfig.getOdpsEndpoint());
      this.tunnel = new TableTunnel(odps);
      if (odpsConfig.getTunnelEndpoint() != null) {
        this.tunnel.setEndpoint(odpsConfig.getTunnelEndpoint());
      }
    }

    if (currentOdpsTableName == null) {
      StringObjectInspector soi0 = (StringObjectInspector) objectInspectors[0];
      StringObjectInspector soi1 = (StringObjectInspector) objectInspectors[1];
      StringObjectInspector soi2 = (StringObjectInspector) objectInspectors[2];

      currentOdpsTableName = soi0.getPrimitiveJavaObject(args[0]).trim();

      String odpsColumnNameString = soi1.getPrimitiveJavaObject(args[1]).trim();
      odpsColumnNames = new ArrayList<>();
      if (!odpsColumnNameString.isEmpty()) {
        odpsColumnNames.addAll(Arrays.asList(trimAll(odpsColumnNameString.split(","))));
      }

      String odpsPartitionColumnNameString = soi2.getPrimitiveJavaObject(args[2]).trim();
      odpsPartitionColumnNames = new ArrayList<>();
      if (!odpsPartitionColumnNameString.isEmpty()) {
        odpsPartitionColumnNames.addAll(
            Arrays.asList(trimAll(odpsPartitionColumnNameString.split(","))));
      }
    }

    List<Object> hiveColumnValues = new ArrayList<>();
    List<Object> hivePartitionColumnValues = new ArrayList<>();
    for (int i = 0; i < odpsColumnNames.size(); i++) {
      hiveColumnValues.add(args[i + 3]);
    }
    for (int i = 0; i < odpsPartitionColumnNames.size(); i++) {
      hivePartitionColumnValues.add(args[i + 3 + odpsColumnNames.size()]);
    }

    // Get partition spec
    String partitionSpec = getPartitionSpec(hivePartitionColumnValues);

    // Create new tunnel upload session & record writer or reuse the current ones
    if (currentOdpsPartitionSpec == null || !currentOdpsPartitionSpec.equals(partitionSpec)) {
      resetUploadSession(partitionSpec);
    }

    Record record = uploadSession.newRecord();
    for (int i = 0; i < odpsColumnNames.size(); i++) {
      String odpsColumnName = odpsColumnNames.get(i);
      Object value = hiveColumnValues.get(i);
      if (value == null) {
        continue;
      }

      // Handle data types
      ObjectInspector objectInspector = objectInspectors[i + 3];
      TypeInfo typeInfo = odps.tables()
          .get(currentOdpsTableName)
          .getSchema()
          .getColumn(odpsColumnName)
          .getTypeInfo();

      record.set(odpsColumnName, HiveObjectConverter.convert(objectInspector, value, typeInfo));
    }

    recordWriter.write(record);
  } catch (Exception e) {
    e.printStackTrace();
    throw new HiveException(e);
  }
}
 
Example #27
Source File: ExcelSerde.java    From hadoopoffice with Apache License 2.0 4 votes vote down vote up
/**
 * Initializes the SerDe \n
 * You can define in the table properties (additionally to the standard Hive properties) the following options \n
 * office.hive.write.defaultSheetName: The sheetname to which data should be written (note: as an input any sheets can be read or selected sheets according to HadoopOffice configuration values) \n
 * Any of the HadoopOffice options (hadoopoffice.*), such as encryption, signing, low footprint mode, linked workbooks, can be defined in the table properties @see <a href="https://github.com/ZuInnoTe/hadoopoffice/wiki/Hadoop-File-Format">HadoopOffice configuration</a>\n
 * @param conf Hadoop Configuration
 * @param prop table properties. 
 * @param partitionProperties ignored. Partitions are not supported.
 */

@Override
public void initialize(Configuration conf, Properties prop, Properties partitionProperties) throws SerDeException {
	LOG.debug("Initializing Excel Hive Serde");
	LOG.debug("Configuring Hive-only options");
	// configure hadoopoffice specific hive options

	String defaultSheetNameStr = prop.getProperty(ExcelSerde.CONF_DEFAULTSHEETNAME);
	if (defaultSheetNameStr != null) {
		this.defaultSheetName = defaultSheetNameStr;
	}
// copy hadoopoffice options
	LOG.debug("Configuring HadoopOffice Format");
	Set<Entry<Object, Object>> entries = prop.entrySet();
	for (Entry<Object, Object> entry : entries) {
		if ((entry.getKey() instanceof String) && ((String) entry.getKey()).startsWith(ExcelSerde.HOSUFFIX)) {
			if (("TRUE".equalsIgnoreCase((String) entry.getValue()))
					|| ("FALSE".equalsIgnoreCase(((String) entry.getValue())))) {
				conf.setBoolean((String) entry.getKey(), Boolean.valueOf((String) entry.getValue()));
			} else {
				conf.set((String) entry.getKey(), (String) entry.getValue());
			}
		}
	}

	// create object inspector (always a struct = row)
	LOG.debug("Creating object inspector");
	this.columnNames = Arrays.asList(prop.getProperty(serdeConstants.LIST_COLUMNS).split(","));
	this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(prop.getProperty(serdeConstants.LIST_COLUMN_TYPES));
	final List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
	for (TypeInfo currentColumnType : columnTypes) {
		columnOIs.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(currentColumnType));
	}
	this.oi = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
	// create converter
	LOG.debug("Creating converter");
	HadoopOfficeReadConfiguration hocr = new HadoopOfficeReadConfiguration(conf);
	this.readConverter = new ExcelConverterSimpleSpreadSheetCellDAO(hocr.getSimpleDateFormat(), hocr.getSimpleDecimalFormat(), hocr.getSimpleDateTimeFormat());
	HadoopOfficeWriteConfiguration howc = new HadoopOfficeWriteConfiguration(conf,"");
	this.writeConverter = new ExcelConverterSimpleSpreadSheetCellDAO(howc.getSimpleDateFormat(), howc.getSimpleDecimalFormat(), howc.getSimpleDateTimeFormat());
	// configure writing of header
	this.writeHeader=howc.getWriteHeader();
	GenericDataType[] columnsGD = new GenericDataType[columnNames.size()];
	for (int i = 0; i < columnOIs.size(); i++) {
		ObjectInspector currentOI = columnOIs.get(i);
		if (currentOI instanceof BooleanObjectInspector) {
			columnsGD[i] = new GenericBooleanDataType();
		} else if (currentOI instanceof DateObjectInspector) {
			columnsGD[i] = new GenericDateDataType();
		} else if (currentOI instanceof TimestampObjectInspector) {
			columnsGD[i] = new GenericTimestampDataType();
		}
		else if (currentOI instanceof ByteObjectInspector) {
			columnsGD[i] = new GenericByteDataType();
		} else if (currentOI instanceof ShortObjectInspector) {
			columnsGD[i] = new GenericShortDataType();
		} else if (currentOI instanceof IntObjectInspector) {
			columnsGD[i] = new GenericIntegerDataType();
		} else if (currentOI instanceof LongObjectInspector) {
			columnsGD[i] = new GenericLongDataType();
		} else if (currentOI instanceof DoubleObjectInspector) {
			columnsGD[i] = new GenericDoubleDataType();
		} else if (currentOI instanceof FloatObjectInspector) {
			columnsGD[i] = new GenericFloatDataType();
		} else if (currentOI instanceof HiveDecimalObjectInspector) {
			HiveDecimalObjectInspector currentOIHiveDecimalOI = (HiveDecimalObjectInspector) currentOI;
			columnsGD[i] = new GenericBigDecimalDataType(currentOIHiveDecimalOI.precision(),
					currentOIHiveDecimalOI.scale());
		} else if (currentOI instanceof StringObjectInspector) {
			columnsGD[i] = new GenericStringDataType();
		} else {
			LOG.warn("Could not detect desired datatype for column " + i + ". Type " + currentOI.getTypeName()
					+ ". Using String");
			columnsGD[i] = new GenericStringDataType();
		}
	}
	this.readConverter.setSchemaRow(columnsGD);
	this.writeConverter.setSchemaRow(columnsGD);
	// create nullrow
	this.nullRow = new Object[this.columnNames.size()];
	// set writerow
	this.currentWriteRow = 0;
	// set outputrow
	this.outputRow = new Object[this.columnNames.size()];
	LOG.debug("Finished Initialization");
}
 
Example #28
Source File: SerDeUtils.java    From presto with Apache License 2.0 4 votes vote down vote up
private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector)
{
    requireNonNull(builder, "parent builder is null");

    if (object == null) {
        builder.appendNull();
        return;
    }

    switch (inspector.getPrimitiveCategory()) {
        case BOOLEAN:
            BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object));
            return;
        case BYTE:
            TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object));
            return;
        case SHORT:
            SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object));
            return;
        case INT:
            IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object));
            return;
        case LONG:
            BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object));
            return;
        case FLOAT:
            RealType.REAL.writeLong(builder, floatToRawIntBits(((FloatObjectInspector) inspector).get(object)));
            return;
        case DOUBLE:
            DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object));
            return;
        case STRING:
            type.writeSlice(builder, Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object)));
            return;
        case VARCHAR:
            type.writeSlice(builder, Slices.utf8Slice(((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(object).getValue()));
            return;
        case CHAR:
            CharType charType = (CharType) type;
            HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object);
            type.writeSlice(builder, truncateToLengthAndTrimSpaces(Slices.utf8Slice(hiveChar.getValue()), charType.getLength()));
            return;
        case DATE:
            DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector));
            return;
        case TIMESTAMP:
            TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector));
            return;
        case BINARY:
            VARBINARY.writeSlice(builder, Slices.wrappedBuffer(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object)));
            return;
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            HiveDecimalWritable hiveDecimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object);
            if (decimalType.isShort()) {
                decimalType.writeLong(builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale()));
            }
            else {
                decimalType.writeSlice(builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale()));
            }
            return;
    }
    throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory());
}
 
Example #29
Source File: OrcFlowFileWriter.java    From localization_nifi with Apache License 2.0 2 votes vote down vote up
/**
 * Method to retrieve text values from the value object, which can be overridden
 * by subclasses.
 *
 * @param obj value
 * @return Text text value from obj
 */
Text getTextValue(Object obj) {
    return ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj);
}
 
Example #30
Source File: OrcFlowFileWriter.java    From nifi with Apache License 2.0 2 votes vote down vote up
/**
 * Method to retrieve text values from the value object, which can be overridden
 * by subclasses.
 *
 * @param obj value
 * @return Text text value from obj
 */
Text getTextValue(Object obj) {
    return ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj);
}