org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
public static LazySimpleSerDe getLineSerde(@Nonnull final PrimitiveObjectInspector... OIs)
        throws SerDeException {
    if (OIs.length == 0) {
        throw new IllegalArgumentException("OIs must be specified");
    }
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();

    StringBuilder columnNames = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    for (int i = 0; i < OIs.length; i++) {
        columnNames.append('c').append(i + 1).append(',');
        columnTypes.append(OIs[i].getTypeName()).append(',');
    }
    columnNames.deleteCharAt(columnNames.length() - 1);
    columnTypes.deleteCharAt(columnTypes.length() - 1);

    tbl.setProperty("columns", columnNames.toString());
    tbl.setProperty("columns.types", columnTypes.toString());
    serde.initialize(conf, tbl);
    return serde;
}
 
Example #2
Source File: HiveOutputFormatFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateOutputFormat() {
	TableSchema schema = TableSchema.builder().field("x", DataTypes.INT()).build();
	SerDeInfo serDeInfo = new SerDeInfo("name", LazySimpleSerDe.class.getName(), Collections.emptyMap());
	HiveWriterFactory writerFactory = new HiveWriterFactory(
			new JobConf(),
			VerifyURIOutputFormat.class,
			serDeInfo, schema,
			new String[0],
			new Properties(),
			HiveShimLoader.loadHiveShim(HiveShimLoader.getHiveVersion()),
			false);
	HiveOutputFormatFactory factory = new HiveOutputFormatFactory(writerFactory);
	org.apache.flink.core.fs.Path path = new org.apache.flink.core.fs.Path(TEST_URI_SCHEME, TEST_URI_AUTHORITY, "/foo/path");
	factory.createOutputFormat(path);
}
 
Example #3
Source File: HiveCatalog.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void setStorageFormat(StorageDescriptor sd, Map<String, String> properties) {
	// TODO: allow user to specify storage format. Simply use text format for now
	String storageFormatName = DEFAULT_HIVE_TABLE_STORAGE_FORMAT;
	StorageFormatDescriptor storageFormatDescriptor = storageFormatFactory.get(storageFormatName);
	checkArgument(storageFormatDescriptor != null, "Unknown storage format " + storageFormatName);
	sd.setInputFormat(storageFormatDescriptor.getInputFormat());
	sd.setOutputFormat(storageFormatDescriptor.getOutputFormat());
	String serdeLib = storageFormatDescriptor.getSerde();
	sd.getSerdeInfo().setSerializationLib(serdeLib != null ? serdeLib : LazySimpleSerDe.class.getName());
}
 
Example #4
Source File: HiveUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
public static LazySimpleSerDe getKeyValueLineSerde(
        @Nonnull final PrimitiveObjectInspector keyOI,
        @Nonnull final PrimitiveObjectInspector valueOI) throws SerDeException {
    LazySimpleSerDe serde = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    Properties tbl = new Properties();
    tbl.setProperty("columns", "key,value");
    tbl.setProperty("columns.types", keyOI.getTypeName() + "," + valueOI.getTypeName());
    serde.initialize(conf, tbl);
    return serde;
}
 
Example #5
Source File: HiveCatalogUtil.java    From tajo with Apache License 2.0 5 votes vote down vote up
public static String getDataFormat(StorageDescriptor descriptor) {
  Preconditions.checkNotNull(descriptor);

  String serde = descriptor.getSerdeInfo().getSerializationLib();
  String inputFormat = descriptor.getInputFormat();

  if (LazySimpleSerDe.class.getName().equals(serde)) {
    if (TextInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.TEXT;
    } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinarySerDe.class.getName().equals(serde)) {
    if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
    if (RCFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.RCFILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (ParquetHiveSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.PARQUET;
  } else if (AvroSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.AVRO;
  } else if (OrcSerde.class.getName().equals(serde)) {
    return BuiltinStorages.ORC;
  } else if (RegexSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.REGEX;
  } else {
    throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
  }
}
 
Example #6
Source File: AccumuloSerde.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
private void initAccumuloSerdeParameters(Configuration conf, Properties properties)
        throws SerDeException{
    String colMapping = properties.getProperty(COLUMN_MAPPINGS);
    String colTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    String name = getClass().getName();
    fetchCols = AccumuloHiveUtils.parseColumnMapping(colMapping);
    if (colTypeProperty == null) {
        StringBuilder builder = new StringBuilder();
        for (String fetchCol : fetchCols) { //default to all string if no column type property.
            builder.append(serdeConstants.STRING_TYPE_NAME + ":");
        }
        int indexOfLastColon = builder.lastIndexOf(":");
        builder.replace(indexOfLastColon, indexOfLastColon+1, "");
        properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, builder.toString());
    }

    serDeParameters = LazySimpleSerDe.initSerdeParams(conf, properties, name);
    if (fetchCols.size() != serDeParameters.getColumnNames().size()) {
        throw new SerDeException(name + ": Hive table definition has "
                + serDeParameters.getColumnNames().size() +
                " elements while " + COLUMN_MAPPINGS + " has " +
                fetchCols.size() + " elements. " + printColumnMismatchTip(fetchCols.size(),
                serDeParameters.getColumnNames().size()));
    }

    if(log.isInfoEnabled())
        log.info("Serde initialized successfully for column mapping: " + colMapping);
}
 
Example #7
Source File: Hive1SerDeParametersShim.java    From emr-dynamodb-connector with Apache License 2.0 4 votes vote down vote up
Hive1SerDeParametersShim(Configuration configuration, Properties properties, String serDeName)
    throws SerDeException {
  this.realSerDeParameters = LazySimpleSerDe.initSerdeParams(configuration, properties,
      serDeName);
}
 
Example #8
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI) throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI);
            }
        } else {
            LazySimpleSerDe serde = HiveUtils.getLineSerde(labelOI, featureOI, weightOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid the case that key or value is null
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    model.set(k, new WeightValue(v, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}
 
Example #9
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI, WritableFloatObjectInspector covarOI)
        throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI,
                    covarOI);
            }
        } else {
            LazySimpleSerDe serde =
                    HiveUtils.getLineSerde(labelOI, featureOI, weightOI, covarOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            StructField c4ref = lineOI.getStructFieldRef("c4");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();
            FloatObjectInspector c4refOI =
                    (FloatObjectInspector) c4ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    Object f3 = fields.get(3);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid unexpected case
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    float cov =
                            (f3 == null) ? WeightValueWithCovar.DEFAULT_COVAR : c4refOI.get(f3);
                    model.set(k, new WeightValueWithCovar(v, cov, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}
 
Example #10
Source File: DistributedCacheLookupUDF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private static void loadValues(Object2ObjectMap<Object, Object> map, File file,
        PrimitiveObjectInspector keyOI, PrimitiveObjectInspector valueOI)
        throws IOException, SerDeException {
    if (!file.exists()) {
        return;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                loadValues(map, f, keyOI, valueOI);
            }
        } else {
            LazySimpleSerDe serde = HiveUtils.getKeyValueLineSerde(keyOI, valueOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField keyRef = lineOI.getStructFieldRef("key");
            StructField valueRef = lineOI.getStructFieldRef("value");
            PrimitiveObjectInspector keyRefOI =
                    (PrimitiveObjectInspector) keyRef.getFieldObjectInspector();
            PrimitiveObjectInspector valueRefOI =
                    (PrimitiveObjectInspector) valueRef.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object k = keyRefOI.getPrimitiveJavaObject(f0);
                    Object v = valueRefOI.getPrimitiveWritableObject(valueRefOI.copyObject(f1));
                    map.put(k, v);
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
}
 
Example #11
Source File: CassandraColumnSerDe.java    From Hive-Cassandra with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize the cassandra serialization and deserialization parameters from table properties and configuration.
 *
 * @param job
 * @param tbl
 * @param serdeName
 * @throws SerDeException
 */
@Override
protected void initCassandraSerDeParameters(Configuration job, Properties tbl, String serdeName)
    throws SerDeException {
  cassandraColumnFamily = getCassandraColumnFamily(tbl);
  cassandraColumnNames = parseOrCreateColumnMapping(tbl);

  cassandraColumnNamesBytes = new ArrayList<BytesWritable>();
  for (String columnName : cassandraColumnNames) {
    cassandraColumnNamesBytes.add(new BytesWritable(columnName.getBytes()));
  }

  iKey = cassandraColumnNames.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);

  serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);

  validatorType = parseOrCreateValidatorType(tbl);

  setTableMapping();

  if (cassandraColumnNames.size() != serdeParams.getColumnNames().size()) {
    throw new SerDeException(serdeName + ": columns has " +
        serdeParams.getColumnNames().size() +
        " elements while cassandra.columns.mapping has " +
        cassandraColumnNames.size() + " elements" +
        " (counting the key if implicit)");
  }

  // we just can make sure that "StandardColumn:" is mapped to MAP<String,?>
  for (int i = 0; i < cassandraColumnNames.size(); i++) {
    String cassandraColName = cassandraColumnNames.get(i);
    if (cassandraColName.endsWith(":")) {
      TypeInfo typeInfo = serdeParams.getColumnTypes().get(i);
      if ((typeInfo.getCategory() != Category.MAP) ||
          (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getTypeName()
              != Constants.STRING_TYPE_NAME)) {

        throw new SerDeException(
            serdeName + ": Cassandra column family '"
                + cassandraColName
                + "' should be mapped to map<string,?> but is mapped to "
                + typeInfo.getTypeName());
      }
    }
  }
}