org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestParseUserAgent.java    From yauaa with Apache License 2.0 6 votes vote down vote up
@Test
public void testBasic() throws HiveException {
    // This is an edge case where the webview fields are calulcated AND wiped again.
    String userAgent = "Mozilla/5.0 (Linux; Android 5.1.1; KFFOWI Build/LMY47O) AppleWebKit/537.36 (KHTML, like Gecko) " +
        "Version/4.0 Chrome/41.51020.2250.0246 Mobile Safari/537.36 cordova-amazon-fireos/3.4.0 AmazonWebAppPlatform/3.4.0;2.0";

    ParseUserAgent parseUserAgent = new ParseUserAgent();

    StandardStructObjectInspector resultInspector = (StandardStructObjectInspector) parseUserAgent
        .initialize(new ObjectInspector[]{
            PrimitiveObjectInspectorFactory.javaStringObjectInspector
        });

    for (int i = 0; i < 100000; i++) {
        Object row = parseUserAgent.evaluate(new DeferredObject[]{new DeferredJavaObject(userAgent)});
        checkField(resultInspector, row, "DeviceClass", "Tablet");
        checkField(resultInspector, row, "OperatingSystemNameVersion", "FireOS 3.4.0");
        checkField(resultInspector, row, "WebviewAppName", "Unknown");
    }
}
 
Example #2
Source File: HiveTypeConverter.java    From metacat with Apache License 2.0 6 votes vote down vote up
@Override
public Type toMetacatType(final String type) {
    // Hack to fix presto "varchar" type coming in with no length which is required by Hive.
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
        "varchar".equals(type.toLowerCase()) ? serdeConstants.STRING_TYPE_NAME : type);
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
    // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
    // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
    // their original case
    if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
        final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi;
        oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector(
            structTypeInfo.getAllStructFieldNames(), objectInspector);
    }
    return getCanonicalType(oi);
}
 
Example #3
Source File: HiveUtils.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
        inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
 
Example #4
Source File: TestParseUserAgent.java    From yauaa with Apache License 2.0 5 votes vote down vote up
private void checkField(StandardStructObjectInspector resultInspector, Object row, String fieldName, String expectedValue) {
    final Object result = resultInspector.getStructFieldData(row, resultInspector.getStructFieldRef(fieldName));

    if (result == null) {
        assertNull(expectedValue);
    } else {
        assertEquals(expectedValue, result.toString());
    }
}
 
Example #5
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
/**
 * Receives the passed in argument object inspectors and returns the desired
 * return type's object inspector to inform hive of return type of UDAF.
 *
 * @param mode
 *          Mode (i.e. PARTIAL 1, COMPLETE...) for determining input and output
 *          object inspector type.
 * @param parameters
 *          List of object inspectors for input arguments.
 * @return The object inspector type indicates the UDAF return type (i.e.
 *         returned type of terminate(...)).
 */
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);

  if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
    inputInspector_ = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // mode = partial2 || final
    intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
  }

  if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
    // intermediate results need to include the lgK and the target HLL type
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(LG_K_FIELD, HLL_TYPE_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.STRING),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #6
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
/**
 * Receives the passed in argument object inspectors and returns the desired
 * return type's object inspector to inform hive of return type of UDAF.
 *
 * @param mode
 *          Mode (i.e. PARTIAL 1, COMPLETE...) for determining input and output
 *          object inspector type.
 * @param parameters
 *          List of object inspectors for input arguments.
 * @return The object inspector type indicates the UDAF return type (i.e.
 *         returned type of terminate(...)).
 */
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);

  if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
    inputInspector_ = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      seedInspector_ = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // mode = partial2 || final
    intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
  }

  if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
    // intermediate results need to include the lgK and the target HLL type
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(LG_K_FIELD, SEED_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.LONG),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #7
Source File: IntersectSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);
  if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
    inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      seedObjectInspector = (PrimitiveObjectInspector) parameters[1];
    }
  } else {
    intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
  }

  if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
    // intermediate results need to include the seed
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(SEED_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory
          .getPrimitiveWritableObjectInspector(PrimitiveCategory.LONG),
        PrimitiveObjectInspectorFactory
          .getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #8
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
/**
 * Receives the passed in argument object inspectors and returns the desired
 * return type's object inspector to inform hive of return type of UDAF.
 *
 * @param mode
 *          Mode (i.e. PARTIAL 1, COMPLETE...) for determining input/output
 *          object inspector type.
 * @param parameters
 *          List of object inspectors for input arguments.
 * @return The object inspector type indicates the UDAF return type (i.e.
 *         returned type of terminate(...)).
 */
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);

  if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
    inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      nominalEntriesObjectInspector = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      seedObjectInspector = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // mode = partial2 || final
    intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
  }

  if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
    // intermediate results need to include the nominal number of entries and the seed
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(NOMINAL_ENTRIES_FIELD, SEED_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.LONG),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #9
Source File: TestParquetDecimalScaling.java    From presto with Apache License 2.0 4 votes vote down vote up
private static void createParquetFile(
        Path path,
        StandardStructObjectInspector inspector,
        Iterator<?>[] iterators,
        MessageType parquetSchema,
        List<String> columnNames)
{
    Properties tableProperties = createTableProperties(columnNames, Collections.singletonList(inspector));

    JobConf jobConf = new JobConf();
    jobConf.setEnum(COMPRESSION, UNCOMPRESSED);
    jobConf.setBoolean(ENABLE_DICTIONARY, false);
    jobConf.setEnum(WRITER_VERSION, PARQUET_2_0);

    try {
        FileSinkOperator.RecordWriter recordWriter = new TestMapredParquetOutputFormat(Optional.of(parquetSchema), true)
                .getHiveRecordWriter(
                        jobConf,
                        path,
                        Text.class,
                        false,
                        tableProperties,
                        () -> {});

        Object row = inspector.create();
        List<StructField> fields = ImmutableList.copyOf(inspector.getAllStructFieldRefs());

        while (stream(iterators).allMatch(Iterator::hasNext)) {
            for (int i = 0; i < fields.size(); i++) {
                Object value = iterators[i].next();
                inspector.setStructFieldData(row, fields.get(i), value);
            }

            ParquetHiveSerDe serde = new ParquetHiveSerDe();
            serde.initialize(jobConf, tableProperties, null);
            Writable record = serde.serialize(row, inspector);
            recordWriter.write(record);
        }

        recordWriter.close(false);
    }
    catch (IOException | SerDeException e) {
        throw new RuntimeException(e);
    }
}
 
Example #10
Source File: HiveTypeConverter.java    From metacat with Apache License 2.0 4 votes vote down vote up
public SameCaseStandardStructObjectInspector(final List<String> realFieldNames,
                                             final StandardStructObjectInspector structObjectInspector) {
    this.realFieldNames = realFieldNames;
    this.structObjectInspector = structObjectInspector;
}