org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonSerdeUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static void serializeList(@Nonnull final StringBuilder sb, @Nullable final Object obj,
        @Nullable final ListObjectInspector loi) throws SerDeException {
    ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
    List<?> olist = loi.getList(obj);

    if (olist == null) {
        sb.append("null");
    } else {
        sb.append(SerDeUtils.LBRACKET);
        for (int i = 0; i < olist.size(); i++) {
            if (i > 0) {
                sb.append(SerDeUtils.COMMA);
            }
            buildJSONString(sb, olist.get(i), listElementObjectInspector);
        }
        sb.append(SerDeUtils.RBRACKET);
    }
}
 
Example #2
Source File: PassiveAggressiveUDTFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testPA1TrainWithoutParameter() throws UDFArgumentException {
    PassiveAggressiveUDTF udtf = new PassiveAggressiveUDTF.PA1();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ListObjectInspector intListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(intOI);

    /* define aggressive parameter */
    udtf.initialize(new ObjectInspector[] {intListOI, intOI});

    /* train weights */
    List<?> features = (List<?>) intListOI.getList(new Object[] {1, 2, 3});
    udtf.train(features, 1);

    /* check weights */
    assertEquals(0.3333333f, udtf.model.get(1).get(), 1e-5f);
    assertEquals(0.3333333f, udtf.model.get(2).get(), 1e-5f);
    assertEquals(0.3333333f, udtf.model.get(3).get(), 1e-5f);
}
 
Example #3
Source File: BitcoinTransactionHashSegwitUDF.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
/**
* Read list of Bitcoin transaction outputs from a table in Hive in any format (e.g. ORC, Parquet)
*
* @param loi ObjectInspector for processing the Object containing a list
* @param listOfOutputsObject object containing the list of outputs to a Bitcoin Transaction
*
* @return a list of BitcoinTransactionOutputs 
*
*/

private List<BitcoinTransactionOutput> readListOfOutputsFromTable(ListObjectInspector loi, Object listOfOutputsObject) {
int listLength=loi.getListLength(listOfOutputsObject);
List<BitcoinTransactionOutput> result=new ArrayList<>(listLength);
StructObjectInspector listOfOutputsElementObjectInspector = (StructObjectInspector)loi.getListElementObjectInspector();
	for (int i=0;i<listLength;i++) {
		Object currentListOfOutputsObject = loi.getListElement(listOfOutputsObject,i);
		StructField valueSF = listOfOutputsElementObjectInspector.getStructFieldRef("value");
		StructField txoutscriptlengthSF = listOfOutputsElementObjectInspector.getStructFieldRef("txoutscriptlength");
		StructField txoutscriptSF = listOfOutputsElementObjectInspector.getStructFieldRef("txoutscript");
		if ((valueSF==null) || (txoutscriptlengthSF==null) || (txoutscriptSF==null)) {
			LOG.warn("Invalid BitcoinTransactionOutput detected at position "+i);
			return new ArrayList<>();
		}
		HiveDecimal currentValue=hdoi.getPrimitiveJavaObject(listOfOutputsElementObjectInspector.getStructFieldData(currentListOfOutputsObject,valueSF));	
		byte[] currentTxOutScriptLength=wboi.getPrimitiveJavaObject(listOfOutputsElementObjectInspector.getStructFieldData(currentListOfOutputsObject,txoutscriptlengthSF));
		byte[] currentTxOutScript=wboi.getPrimitiveJavaObject(listOfOutputsElementObjectInspector.getStructFieldData(currentListOfOutputsObject,txoutscriptSF));
		BitcoinTransactionOutput currentBitcoinTransactionOutput = new BitcoinTransactionOutput(currentValue.bigDecimalValue().toBigIntegerExact(),currentTxOutScriptLength,currentTxOutScript);
		result.add(currentBitcoinTransactionOutput);
	}
return result;
}
 
Example #4
Source File: TestDataWritableWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
private void writeSingleLevelArray(Object value, ListObjectInspector inspector, GroupType type)
{
    // Get the internal array structure
    Type elementType = type.getType(0);

    recordConsumer.startGroup();

    List<?> arrayValues = inspector.getList(value);
    if (!arrayValues.isEmpty()) {
        recordConsumer.startField(elementType.getName(), 0);
        ObjectInspector elementInspector = inspector.getListElementObjectInspector();

        for (Object element : arrayValues) {
            if (element == null) {
                throw new IllegalArgumentException("Array elements are requires in given schema definition");
            }
            writeValue(element, elementInspector, elementType);
        }

        recordConsumer.endField(elementType.getName(), 0);
    }
    recordConsumer.endGroup();
}
 
Example #5
Source File: WriterImpl.java    From hive-dwrf with Apache License 2.0 6 votes vote down vote up
ListTreeWriter(int columnId,
               ObjectInspector inspector,
               StreamFactory writer,
               boolean nullable, Configuration conf,
               boolean useVInts, boolean lowMemoryMode,
               MemoryEstimate memoryEstimate) throws IOException {
  super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
  ListObjectInspector listObjectInspector = (ListObjectInspector) inspector;
  childrenWriters = new TreeWriter[1];
  childrenWriters[0] =
    createTreeWriter(listObjectInspector.getListElementObjectInspector(),
      writer, true, conf, useVInts, lowMemoryMode, memoryEstimate);
  lengths =
    new RunLengthIntegerWriter(writer.createStream(columnId,
        OrcProto.Stream.Kind.LENGTH), false, INT_BYTE_SIZE, useVInts);
  recordPosition(rowIndexPosition);
}
 
Example #6
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
public static void toDoubleArray(@Nullable final Object argObj,
        @Nonnull final ListObjectInspector listOI,
        @Nonnull final PrimitiveObjectInspector elemOI, @Nonnull final double[] out,
        final double nullValue) throws UDFArgumentException {
    if (argObj == null) {
        return;
    }
    final int length = listOI.getListLength(argObj);
    if (out.length != length) {
        throw new UDFArgumentException(
            "Dimension mismatched. Expected: " + out.length + ", Actual: " + length);
    }
    for (int i = 0; i < length; i++) {
        Object o = listOI.getListElement(argObj, i);
        if (o == null) {
            out[i] = nullValue;
            continue;
        }
        out[i] = PrimitiveObjectInspectorUtils.getDouble(o, elemOI);
    }
    return;
}
 
Example #7
Source File: ParquetHiveSerDe.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private ArrayWritable createArray(final Object obj, final ListObjectInspector inspector)
    throws SerDeException {
  final List<?> sourceArray = inspector.getList(obj);
  final ObjectInspector subInspector = inspector.getListElementObjectInspector();
  final List<Writable> array = new ArrayList<Writable>();
  if (sourceArray != null) {
    for (final Object curObj : sourceArray) {
      final Writable newObj = createObject(curObj, subInspector);
      if (newObj != null) {
        array.add(newObj);
      }
    }
  }
  if (array.size() > 0) {
    final ArrayWritable subArray = new ArrayWritable(array.get(0).getClass(),
        array.toArray(new Writable[array.size()]));
    return new ArrayWritable(Writable.class, new Writable[] {subArray});
  } else {
    return null;
  }
}
 
Example #8
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
public static void toDoubleArray(@Nullable final Object argObj,
        @Nonnull final ListObjectInspector listOI,
        @Nonnull final PrimitiveObjectInspector elemOI, @Nonnull final double[] out,
        final boolean avoidNull) throws UDFArgumentException {
    if (argObj == null) {
        return;
    }
    final int length = listOI.getListLength(argObj);
    if (out.length != length) {
        throw new UDFArgumentException(
            "Dimension mismatched. Expected: " + out.length + ", Actual: " + length);
    }
    for (int i = 0; i < length; i++) {
        Object o = listOI.getListElement(argObj, i);
        if (o == null) {
            if (avoidNull) {
                continue;
            }
            throw new UDFArgumentException("Found null at index " + i);
        }
        out[i] = PrimitiveObjectInspectorUtils.getDouble(o, elemOI);
    }
    return;
}
 
Example #9
Source File: OnehotEncodingUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static StructObjectInspector internalMergeOutputOI(
        @CheckForNull PrimitiveObjectInspector[] inputOIs) throws UDFArgumentException {
    Preconditions.checkNotNull(inputOIs);

    final int numOIs = inputOIs.length;
    final List<String> fieldNames = new ArrayList<String>(numOIs);
    final List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numOIs);
    for (int i = 0; i < numOIs; i++) {
        fieldNames.add("f" + String.valueOf(i));
        ObjectInspector elemOI = ObjectInspectorUtils.getStandardObjectInspector(
            inputOIs[i], ObjectInspectorCopyOption.WRITABLE);
        ListObjectInspector listOI =
                ObjectInspectorFactory.getStandardListObjectInspector(elemOI);
        fieldOIs.add(listOI);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #10
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nullable
public static double[] asDoubleArray(@Nullable final Object argObj,
        @Nonnull final ListObjectInspector listOI,
        @Nonnull final PrimitiveObjectInspector elemOI, final boolean avoidNull)
        throws UDFArgumentException {
    if (argObj == null) {
        return null;
    }
    final int length = listOI.getListLength(argObj);
    final double[] ary = new double[length];
    for (int i = 0; i < length; i++) {
        Object o = listOI.getListElement(argObj, i);
        if (o == null) {
            if (avoidNull) {
                continue;
            }
            throw new UDFArgumentException("Found null at index " + i);
        }
        ary[i] = PrimitiveObjectInspectorUtils.getDouble(o, elemOI);
    }
    return ary;
}
 
Example #11
Source File: VectorDotUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2) {
        throw new UDFArgumentLengthException("Expected 2 arguments, but got " + argOIs.length);
    }

    ObjectInspector argOI0 = argOIs[0];
    if (!HiveUtils.isNumberListOI(argOI0)) {
        throw new UDFArgumentException(
            "Expected array<number> for the first argument: " + argOI0.getTypeName());
    }
    ListObjectInspector xListOI = HiveUtils.asListOI(argOI0);

    ObjectInspector argOI1 = argOIs[1];
    if (HiveUtils.isNumberListOI(argOI1)) {
        this.evaluator = new Dot2DVectors(xListOI, HiveUtils.asListOI(argOI1));
        return PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    } else if (HiveUtils.isNumberOI(argOI1)) {
        this.evaluator = new Multiply2D1D(xListOI, argOI1);
        return ObjectInspectorFactory.getStandardListObjectInspector(
            PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
    } else {
        throw new UDFArgumentException(
            "Expected array<number> or number for the send argument: " + argOI1.getTypeName());
    }
}
 
Example #12
Source File: SerDeUtils.java    From presto with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, boolean filterNullMapKeys)
{
    switch (inspector.getCategory()) {
        case PRIMITIVE:
            serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector);
            return null;
        case LIST:
            return serializeList(type, builder, object, (ListObjectInspector) inspector);
        case MAP:
            return serializeMap(type, builder, object, (MapObjectInspector) inspector, filterNullMapKeys);
        case STRUCT:
            return serializeStruct(type, builder, object, (StructObjectInspector) inspector);
        case UNION:
            return serializeUnion(type, builder, object, (UnionObjectInspector) inspector);
    }
    throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory());
}
 
Example #13
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * Deparse a Hive object into a Jackson-serializable object. This uses
 * the ObjectInspector to extract the column data.
 *
 * @param obj - Hive object to deparse
 * @param oi  - ObjectInspector for the object
 * @return - A deparsed object
 */
private Object deparseObject(Object obj, ObjectInspector oi) {
	switch (oi.getCategory()) {
		case LIST:
			return deparseList(obj, (ListObjectInspector) oi);
		case MAP:
			return deparseMap(obj, (MapObjectInspector) oi);
		case PRIMITIVE:
			return deparsePrimitive(obj, (PrimitiveObjectInspector) oi);
		case STRUCT:
			return deparseStruct(obj, (StructObjectInspector) oi, false);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
	}
}
 
Example #14
Source File: XGBoostBatchPredictUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static LabeledPointWithRowId parseDenseFeatures(@Nonnull final Writable rowId,
        @Nonnull final Object argObj, @Nonnull final ListObjectInspector featureListOI,
        @Nonnull final PrimitiveObjectInspector featureElemOI) throws UDFArgumentException {
    final int size = featureListOI.getListLength(argObj);

    final float[] values = new float[size];
    for (int i = 0; i < size; i++) {
        final Object o = featureListOI.getListElement(argObj, i);
        if (o == null) {
            values[i] = Float.NaN;
        } else {
            float v = PrimitiveObjectInspectorUtils.getFloat(o, featureElemOI);
            values[i] = v;
        }
    }

    return new LabeledPointWithRowId(rowId, /* dummy label */ 0.f, null, values);

}
 
Example #15
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nullable
public static ArrayList<Object> copyListObject(@Nonnull final DeferredObject argument,
        @Nonnull final ListObjectInspector loi,
        @Nonnull final ObjectInspectorCopyOption objectInspectorOption) throws HiveException {
    final Object o = argument.get();
    if (o == null) {
        return null;
    }

    final int length = loi.getListLength(o);
    final ArrayList<Object> list = new ArrayList<Object>(length);
    for (int i = 0; i < length; i++) {
        Object e = ObjectInspectorUtils.copyToStandardObject(loi.getListElement(o, i),
            loi.getListElementObjectInspector(), objectInspectorOption);
        list.add(e);
    }
    return list;
}
 
Example #16
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private ColumnVector getColumnVector(ObjectInspector oi) {
  Category category = oi.getCategory();
  switch (category) {

    case PRIMITIVE:
      return getPrimitiveColumnVector((PrimitiveObjectInspector)oi);
    case LIST:
      return getListColumnVector((ListObjectInspector)oi);
    case STRUCT:
      return getStructColumnVector((StructObjectInspector)oi);
    case MAP:
      return getMapColumnVector((MapObjectInspector)oi);
    case UNION:
      return getUnionColumnVector((UnionObjectInspector)oi);
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", category)
        .build(logger);
  }
}
 
Example #17
Source File: BitcoinTransactionHashUDF.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
/**
* Read list of Bitcoin transaction outputs from a table in Hive in any format (e.g. ORC, Parquet)
*
* @param loi ObjectInspector for processing the Object containing a list
* @param listOfOutputsObject object containing the list of outputs to a Bitcoin Transaction
*
* @return a list of BitcoinTransactionOutputs 
*
*/

private List<BitcoinTransactionOutput> readListOfOutputsFromTable(ListObjectInspector loi, Object listOfOutputsObject) {
int listLength=loi.getListLength(listOfOutputsObject);
List<BitcoinTransactionOutput> result=new ArrayList<>(listLength);
StructObjectInspector listOfOutputsElementObjectInspector = (StructObjectInspector)loi.getListElementObjectInspector();
	for (int i=0;i<listLength;i++) {
		Object currentListOfOutputsObject = loi.getListElement(listOfOutputsObject,i);
		StructField valueSF = listOfOutputsElementObjectInspector.getStructFieldRef("value");
		StructField txoutscriptlengthSF = listOfOutputsElementObjectInspector.getStructFieldRef("txoutscriptlength");
		StructField txoutscriptSF = listOfOutputsElementObjectInspector.getStructFieldRef("txoutscript");
		if ((valueSF==null) || (txoutscriptlengthSF==null) || (txoutscriptSF==null)) {
			LOG.warn("Invalid BitcoinTransactionOutput detected at position "+i);
			return new ArrayList<>();
		}
		HiveDecimal currentValue=hdoi.getPrimitiveJavaObject(listOfOutputsElementObjectInspector.getStructFieldData(currentListOfOutputsObject,valueSF));	
		byte[] currentTxOutScriptLength=wboi.getPrimitiveJavaObject(listOfOutputsElementObjectInspector.getStructFieldData(currentListOfOutputsObject,txoutscriptlengthSF));
		byte[] currentTxOutScript=wboi.getPrimitiveJavaObject(listOfOutputsElementObjectInspector.getStructFieldData(currentListOfOutputsObject,txoutscriptSF));
		BitcoinTransactionOutput currentBitcoinTransactionOutput = new BitcoinTransactionOutput(currentValue.bigDecimalValue().toBigIntegerExact(),currentTxOutScriptLength,currentTxOutScript);
		result.add(currentBitcoinTransactionOutput);
	}
return result;
}
 
Example #18
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private ColumnVector getColumnVector(ObjectInspector oi) {
  Category category = oi.getCategory();
  switch (category) {

    case PRIMITIVE:
      return getPrimitiveColumnVector((PrimitiveObjectInspector)oi);
    case LIST:
      return getListColumnVector((ListObjectInspector)oi);
    case STRUCT:
      return getStructColumnVector((StructObjectInspector)oi);
    case MAP:
      return getMapColumnVector((MapObjectInspector)oi);
    case UNION:
      return getUnionColumnVector((UnionObjectInspector)oi);
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", category)
        .build(logger);
  }
}
 
Example #19
Source File: GeneralClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testInspectOptimizerOptions() throws Exception {
    GeneralClassifierUDTF udtf = new GeneralClassifierUDTF();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        "-opt adam -reg l1 -inspect_opts");

    try {
        udtf.initialize(new ObjectInspector[] {stringListOI, intOI, params});
        Assert.fail("should not come here");
    } catch (UDFArgumentException e) {
        Assert.assertTrue(e.getMessage().contains("adam"));
    }
}
 
Example #20
Source File: XGBoostOnlinePredictUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public StructObjectInspector initialize(@Nonnull ObjectInspector[] argOIs)
        throws UDFArgumentException {
    if (argOIs.length != 4 && argOIs.length != 5) {
        showHelp("Invalid argment length=" + argOIs.length);
    }
    processOptions(argOIs);

    this.rowIdOI = HiveUtils.asPrimitiveObjectInspector(argOIs, 0);
    ListObjectInspector listOI = HiveUtils.asListOI(argOIs, 1);
    this.featureListOI = listOI;
    ObjectInspector elemOI = listOI.getListElementObjectInspector();
    if (HiveUtils.isNumberOI(elemOI)) {
        this.featureElemOI = HiveUtils.asDoubleCompatibleOI(elemOI);
        this.denseFeatures = true;
    } else if (HiveUtils.isStringOI(elemOI)) {
        this.denseFeatures = false;
    } else {
        throw new UDFArgumentException(
            "Expected array<string|double> for the 2nd argment but got an unexpected features type: "
                    + listOI.getTypeName());
    }
    this.modelIdOI = HiveUtils.asStringOI(argOIs, 2);
    this.modelOI = HiveUtils.asStringOI(argOIs, 3);
    return getReturnOI(rowIdOI);
}
 
Example #21
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
/**
 * Deparse a Hive object into a Jackson-serializable object. This uses the
 * ObjectInspector to extract the column data.
 *
 * @param obj
 *            - Hive object to deparse
 * @param oi
 *            - ObjectInspector for the object
 * @return - A deparsed object
 */
private Object deparseObject(final Object obj, final ObjectInspector oi) {
	switch (oi.getCategory()) {
	case PRIMITIVE:
		return deparsePrimitive(obj, (PrimitiveObjectInspector) oi);
	case LIST:
		return deparseList(obj, (ListObjectInspector) oi);
	case MAP:
		return deparseMap(obj, (MapObjectInspector) oi);
	case STRUCT:
		return deparseStruct(obj, (StructObjectInspector) oi, false);
	case UNION:
		// Unsupported by JSON
	default:
		return null;
	}
}
 
Example #22
Source File: Fallout.java    From hive-funnel-udf with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 1) {
        throw new UDFArgumentLengthException("The operator 'fallout' accepts 1 argument.");
    }

    // Check that the argument is a list type
    if (arguments[0].getCategory() != ObjectInspector.Category.LIST) {
        throw new UDFArgumentTypeException(1, "Only list type arguments are accepted, but " + arguments[0].getTypeName() + " was passed.");
    }

    // Check that the list is of type long
    // May want to add support for int/double/float later
    switch (((PrimitiveObjectInspector) ((ListObjectInspector) arguments[0]).getListElementObjectInspector()).getPrimitiveCategory()) {
        case LONG:
            break;
        default:
            throw new UDFArgumentTypeException(1, "A long array argument should be passed, but " + arguments[0].getTypeName() + " was passed instead.");
    }

    // Get the list object inspector
    listInputObjectInspector = (ListObjectInspector) arguments[0];

    // This UDF will return a list of doubles
    return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
}
 
Example #23
Source File: PassiveAggressiveUDTFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testPA1TrainWithParameter() throws UDFArgumentException {
    PassiveAggressiveUDTF udtf = new PassiveAggressiveUDTF.PA1();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ListObjectInspector intListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(intOI);

    ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-c 0.1");
    /* define aggressive parameter */
    udtf.initialize(new ObjectInspector[] {intListOI, intOI, param});

    /* train weights */
    List<?> features = (List<?>) intListOI.getList(new Object[] {1, 2, 3});
    udtf.train(features, 1);

    /* check weights */
    assertEquals(0.1000000f, udtf.model.get(1).get(), 1e-5f);
    assertEquals(0.1000000f, udtf.model.get(2).get(), 1e-5f);
    assertEquals(0.1000000f, udtf.model.get(3).get(), 1e-5f);
}
 
Example #24
Source File: GeneralClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testEve() throws IOException, HiveException {
    String filePath = "adam_test_10000.tsv.gz";
    String options =
            "-loss logloss -opt Eve -reg l1 -lambda 0.0001 -iter 10 -mini_batch 1 -cv_rate 0.00005";

    GeneralClassifierUDTF udtf = new GeneralClassifierUDTF();

    ListObjectInspector stringListOI = ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, options);

    udtf.initialize(new ObjectInspector[] {stringListOI,
            PrimitiveObjectInspectorFactory.javaIntObjectInspector, params});

    BufferedReader reader = readFile(filePath);
    for (String line = reader.readLine(); line != null; line = reader.readLine()) {
        StringTokenizer tokenizer = new StringTokenizer(line, " ");

        String featureLine = tokenizer.nextToken();
        List<String> X = Arrays.asList(featureLine.split(","));

        String labelLine = tokenizer.nextToken();
        Integer y = Integer.valueOf(labelLine);

        udtf.process(new Object[] {X, y});
    }

    udtf.finalizeTraining();

    Assert.assertTrue(
        "CumulativeLoss is expected to be less than 800: " + udtf.getCumulativeLoss(),
        udtf.getCumulativeLoss() < 800);
}
 
Example #25
Source File: XGBoostBatchPredictUDTF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
private static LabeledPointWithRowId parseSparseFeatures(@Nonnull final Writable rowId,
        @Nonnull final Object argObj, @Nonnull final ListObjectInspector featureListOI)
        throws UDFArgumentException {
    final int size = featureListOI.getListLength(argObj);
    final IntArrayList indices = new IntArrayList(size);
    final FloatArrayList values = new FloatArrayList(size);

    for (int i = 0; i < size; i++) {
        Object f = featureListOI.getListElement(argObj, i);
        if (f == null) {
            continue;
        }
        final String str = f.toString();
        final int pos = str.indexOf(':');
        if (pos < 1) {
            throw new UDFArgumentException("Invalid feature format: " + str);
        }
        final int index;
        final float value;
        try {
            index = Integer.parseInt(str.substring(0, pos));
            value = Float.parseFloat(str.substring(pos + 1));
        } catch (NumberFormatException e) {
            throw new UDFArgumentException("Failed to parse a feature value: " + str);
        }
        indices.add(index);
        values.add(value);
    }

    return new LabeledPointWithRowId(rowId, /* dummy label */ 0.f, indices.toArray(),
        values.toArray());
}
 
Example #26
Source File: DynamoDBDataParser.java    From emr-dynamodb-connector with Apache License 2.0 5 votes vote down vote up
public static Object getListObject(List<AttributeValue> data, ObjectInspector objectInspector) {
  ListObjectInspector listOI = (ListObjectInspector) objectInspector;
  ObjectInspector elementOI = listOI.getListElementObjectInspector();
  HiveDynamoDBType elementType = HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(elementOI);

  List<Object> values = new ArrayList<>();
  for (AttributeValue av : data) {
    values.add(elementType.getHiveData(av, elementOI));
  }

  return values;
}
 
Example #27
Source File: ChangeFinder2DTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testPoissonDist() throws HiveException {
    final int examples = 10000;
    final int dims = 3;
    final PoissonDistribution[] poisson =
            new PoissonDistribution[] {new PoissonDistribution(10.d),
                    new PoissonDistribution(5.d), new PoissonDistribution(20.d)};
    final Random rand = new Random(42);
    final Double[] x = new Double[dims];
    final List<Double> xList = Arrays.asList(x);

    Parameters params = new Parameters();
    params.set(LossFunction.logloss);
    params.r1 = 0.01d;
    params.k = 6;
    params.T1 = 10;
    params.T2 = 5;
    PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi);
    final ChangeFinder2D cf = new ChangeFinder2D(params, listOI);
    final double[] outScores = new double[2];

    println("# time x0 x1 x2 outlier change");
    for (int i = 0; i < examples; i++) {
        double r = rand.nextDouble();
        x[0] = r * poisson[0].sample();
        x[1] = r * poisson[1].sample();
        x[2] = r * poisson[2].sample();

        cf.update(xList, outScores);
        printf("%d %f %f %f %f %f%n", i, x[0], x[1], x[2], outScores[0], outScores[1]);
    }
}
 
Example #28
Source File: PerceptronUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testInitialize() throws UDFArgumentException {
    PerceptronUDTF udtf = new PerceptronUDTF();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ListObjectInspector intListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(intOI);

    /* test for INT_TYPE_NAME feature */
    StructObjectInspector intListSOI =
            udtf.initialize(new ObjectInspector[] {intListOI, intOI});
    assertEquals("struct<feature:int,weight:float>", intListSOI.getTypeName());

    /* test for STRING_TYPE_NAME feature */
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);
    StructObjectInspector stringListSOI =
            udtf.initialize(new ObjectInspector[] {stringListOI, intOI});
    assertEquals("struct<feature:string,weight:float>", stringListSOI.getTypeName());

    /* test for BIGINT_TYPE_NAME feature */
    ObjectInspector longOI = PrimitiveObjectInspectorFactory.javaLongObjectInspector;
    ListObjectInspector longListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(longOI);
    StructObjectInspector longListSOI =
            udtf.initialize(new ObjectInspector[] {longListOI, intOI});
    assertEquals("struct<feature:bigint,weight:float>", longListSOI.getTypeName());
}
 
Example #29
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 5 votes vote down vote up
/**
 * Deparses a list and its elements.
 *
 * @param obj - Hive object to deparse
 * @param listOI  - ObjectInspector for the object
 * @return - A deparsed object
 */
private Object deparseList(Object obj, ListObjectInspector listOI) {
	List<Object> list = new ArrayList<Object>();
	List<?> field = listOI.getList(obj);
	ObjectInspector elemOI = listOI.getListElementObjectInspector();
	for (Object elem : field) {
		list.add(deparseObject(elem, elemOI));
	}
	return list;
}
 
Example #30
Source File: Feature.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nullable
public static Feature[] parseFFMFeatures(@Nonnull final Object arg,
        @Nonnull final ListObjectInspector listOI, @Nullable final Feature[] probes,
        final int numFeatures, final int numFields) throws HiveException {
    if (arg == null) {
        return null;
    }

    final int length = listOI.getListLength(arg);
    final Feature[] ary;
    if (probes != null && probes.length == length) {
        ary = probes;
    } else {
        ary = new Feature[length];
    }

    int j = 0;
    for (int i = 0; i < length; i++) {
        Object o = listOI.getListElement(arg, i);
        if (o == null) {
            continue;
        }
        String s = o.toString();
        Feature f = ary[j];
        if (f == null) {
            f = parseFFMFeature(s, numFeatures, numFields);
        } else {
            parseFFMFeature(s, f, numFeatures, numFields);
        }
        ary[j] = f;
        j++;
    }
    if (j == length) {
        return ary;
    } else {
        Feature[] dst = new Feature[j];
        System.arraycopy(ary, 0, dst, 0, j);
        return dst;
    }
}