Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils#getTypeInfosFromTypeString()

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils#getTypeInfosFromTypeString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveUtils.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
        inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
 
Example 2
Source File: MDSSerde.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){
  Set<String> columnNameSet = new HashSet<String>();
  for( String columnName : projectionColumnNames.split(",") ){
    columnNameSet.add( columnName );
  }

  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  String[] splitNames = columnNameProperty.split(",");

  ArrayList<String> projectionColumnNameList = new ArrayList<String>();
  ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>();
  for( int i = 0 ; i < fieldTypes.size() ; i++ ){
    if( columnNameSet.contains( splitNames[i] ) ){
      projectionColumnNameList.add( splitNames[i] );
      projectionFieldTypeList.add( fieldTypes.get(i) );
    }
    filedIndexMap.put( splitNames[i] , i );
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( projectionColumnNameList );
  rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );

  return rootType;
}
 
Example 3
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 4
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testMapValues() throws SerDeException {
    List<String> columnNames = Arrays.asList("a,b".split(","));
    List<TypeInfo> columnTypes =
            TypeInfoUtils.getTypeInfosFromTypeString("array<string>,map<string,int>");

    Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} ");
    Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}");
    Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}");

    List<Object> expected1 = Arrays.<Object>asList(Arrays.<String>asList("aaa"),
        createHashMapStringInteger("bbb", 1));
    List<Object> expected2 = Arrays.<Object>asList(Arrays.<String>asList("yyy"),
        createHashMapStringInteger("zzz", 123));
    List<Object> expected3 = Arrays.<Object>asList(Arrays.<String>asList("a"),
        createHashMapStringInteger("x", 11, "y", 22, "z", null));

    List<Object> result1 = JsonSerdeUtils.deserialize(text1, columnNames, columnTypes);
    List<Object> result2 = JsonSerdeUtils.deserialize(text2, columnNames, columnTypes);
    List<Object> result3 = JsonSerdeUtils.deserialize(text3, columnNames, columnTypes);

    Assert.assertEquals(expected1, result1);
    Assert.assertEquals(expected2, result2);
    Assert.assertEquals(expected3, result3);
}
 
Example 5
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 6
Source File: OrcSerde.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties table) {
  // Read the configuration parameters
  String columnNameProperty = table.getProperty("columns");
  // NOTE: if "columns.types" is missing, all columns will be of String type
  String columnTypeProperty = table.getProperty("columns.types");

  // Parse the configuration parameters
  ArrayList<String> columnNames = EMPTY_STRING_ARRAYLIST;
  if (columnNameProperty != null && columnNameProperty.length() > 0) {
    String[] splits = columnNameProperty.split(",");
    columnNames = new ArrayList<String>(splits.length);

    for(String name: splits) {
      columnNames.add(name);
    }
  }
  if (columnTypeProperty == null) {
    // Default type: all string
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < columnNames.size(); i++) {
      if (i > 0) {
        sb.append(":");
      }
      sb.append("string");
    }
    columnTypeProperty = sb.toString();
  }

  ArrayList<TypeInfo> fieldTypes =
    TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  StructTypeInfo rootType = new StructTypeInfo();
  rootType.setAllStructFieldNames(columnNames);
  rootType.setAllStructFieldTypeInfos(fieldTypes);
  inspector = new OrcLazyRowObjectInspector(rootType);
}
 
Example 7
Source File: TestHiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) {
  List<TypeInfo> columnTypes;

  if (columnsTypeStr.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr);
  }

  return columnTypes;
}
 
Example 8
Source File: ParquetHiveSerDe.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}
 
Example 9
Source File: MapredParquetOutputFormat.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which
 * contains the real output format
 */
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(
    final JobConf jobConf,
    final Path finalOutPath,
    final Class<? extends Writable> valueClass,
    final boolean isCompressed,
    final Properties tableProperties,
    final Progressable progress) throws IOException {

  LOG.info("creating new record writer...{}", this);

  final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES);
  List<String> columnNames;
  List<TypeInfo> columnTypes;

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }

  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }

  DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf);
  return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress);
}
 
Example 10
Source File: AvroSchemaGenerator.java    From HiveKa with Apache License 2.0 5 votes vote down vote up
public Schema getSchema(String columnNamesStr, String columnTypesStr,
                        String columnCommentsStr, String namespace, String name,
                        String doc) {
  List<String> columnNames = Arrays.asList(columnNamesStr.split(","));
  List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesStr);
  List<String> columnComments;
  if (columnCommentsStr.isEmpty()) {
    columnComments = new ArrayList<String>();
  } else {
    columnComments = Arrays.asList(columnCommentsStr.split(","));
  }

  return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, namespace, name, doc);
}
 
Example 11
Source File: LWSerDe.java    From hive-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tblProperties) throws SerDeException {
  colNames = Arrays.asList(tblProperties.getProperty(serdeConstants.LIST_COLUMNS).split(","));
  colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tblProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
  typeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
  inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
  row = new ArrayList<>();
  enableFieldMapping = Boolean.valueOf(tblProperties.getProperty(ENABLE_FIELD_MAPPING, "false"));
}
 
Example 12
Source File: IndexRSerde.java    From indexr with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

    if (Strings.isEmpty(columnNameProperty)) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (Strings.isEmpty(columnTypeProperty)) {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size()));
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " +
                "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
                columnTypes);
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    stats = new SerDeStats();
    serdeSize = 0;
}
 
Example 13
Source File: FromJsonUDF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2 && argOIs.length != 3) {
        throw new UDFArgumentException(
            "from_json takes two or three arguments: " + argOIs.length);
    }

    this.jsonOI = HiveUtils.asStringOI(argOIs[0]);

    String typeString = HiveUtils.getConstString(argOIs[1]);
    this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(typeString);

    if (argOIs.length == 3) {
        final ObjectInspector argOI2 = argOIs[2];
        if (HiveUtils.isConstString(argOI2)) {
            String names = HiveUtils.getConstString(argOI2);
            this.columnNames = ArrayUtils.asKryoSerializableList(names.split(","));
        } else if (HiveUtils.isConstStringListOI(argOI2)) {
            this.columnNames =
                    ArrayUtils.asKryoSerializableList(HiveUtils.getConstStringArray(argOI2));
        } else {
            throw new UDFArgumentException("Expected `const array<string>` or `const string`"
                    + " but got an unexpected OI type for the third argument: " + argOI2);
        }
    }

    return getObjectInspector(columnTypes, columnNames);
}
 
Example 14
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Test
public void testRWNull() throws Exception {
    List<Object> nlist = new ArrayList<Object>(13);
    {
        nlist.add(null); // tinyint
        nlist.add(null); // smallint
        nlist.add(null); // int
        nlist.add(null); // bigint
        nlist.add(null); // double
        nlist.add(null); // float
        nlist.add(null); // string
        nlist.add(null); // string
        nlist.add(null); // struct
        nlist.add(null); // array
        nlist.add(null); // map
        nlist.add(null); // bool
        nlist.add(null); // complex
        nlist.add(null); //decimal(5,2)
        nlist.add(null); //char(10)
        nlist.add(null); //varchar(20)
        nlist.add(null); //date
        nlist.add(null); //timestamp
        nlist.add(null); //binary
    }

    DefaultHCatRecord r = new DefaultHCatRecord(nlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(nlist, deserialized);
}
 
Example 15
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Test
public void testRW() throws Exception {
    List<Object> rlist = new ArrayList<Object>(13);
    {
        rlist.add(new Byte("123"));
        rlist.add(new Short("456"));
        rlist.add(new Integer(789));
        rlist.add(new Long(1000L));
        rlist.add(new Double(5.3D));
        rlist.add(new Float(2.39F));
        rlist.add(new String("hcat\nand\nhadoop"));
        rlist.add(null);

        List<Object> innerStruct = new ArrayList<Object>(2);
        innerStruct.add(new String("abc"));
        innerStruct.add(new String("def"));
        rlist.add(innerStruct);

        List<Integer> innerList = new ArrayList<Integer>();
        innerList.add(314);
        innerList.add(007);
        rlist.add(innerList);

        Map<Short, String> map = new HashMap<Short, String>(3);
        map.put(new Short("2"), "hcat is cool");
        map.put(new Short("3"), "is it?");
        map.put(new Short("4"), "or is it not?");
        rlist.add(map);

        rlist.add(new Boolean(true));

        List<Object> c1 = new ArrayList<Object>();
        List<Object> c1_1 = new ArrayList<Object>();
        c1_1.add(new Integer(12));
        List<Object> i2 = new ArrayList<Object>();
        List<Integer> ii1 = new ArrayList<Integer>();
        ii1.add(new Integer(13));
        ii1.add(new Integer(14));
        i2.add(ii1);
        Map<String, List<?>> ii2 = new HashMap<String, List<?>>();
        List<Integer> iii1 = new ArrayList<Integer>();
        iii1.add(new Integer(15));
        ii2.put("phew", iii1);
        i2.add(ii2);
        c1_1.add(i2);
        c1.add(c1_1);
        rlist.add(c1);
        rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2
        rlist.add(new HiveChar("hive\nchar", 10));
        rlist.add(new HiveVarchar("hive\nvarchar", 20));
        rlist.add(Date.valueOf("2014-01-07"));
        rlist.add(new Timestamp(System.currentTimeMillis()));
        rlist.add("hive\nbinary".getBytes("UTF-8"));
    }

    DefaultHCatRecord r = new DefaultHCatRecord(rlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(rlist, deserialized);
}
 
Example 16
Source File: JsonSerdeUtilsTest.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
/**
 * This test tests that our json deserialization is not too strict, as per HIVE-6166
 *
 * i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like :
 * 
 * <pre>
 *                        {
 *                            "x" : "abc" ,
 *                            "t" : {
 *                                "a" : "1",
 *                                "b" : "2",
 *                                "c" : [
 *                                    { "x" : 2 , "y" : 3 } ,
 *                                    { "x" : 3 , "y" : 2 }
 *                                ]
 *                            } ,
 *                            "s" : {
 *                                "a" : 2 ,
 *                                "b" : "blah",
 *                                "c": "woo"
 *                            }
 *                        }
 * </pre>
 *
 * Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it
 * should read k as null.
 */
@Test
public void testLooseJsonReadability() throws Exception {
    List<String> columnNames = Arrays.asList("s,k".split(","));
    List<TypeInfo> columnTypes =
            TypeInfoUtils.getTypeInfosFromTypeString("struct<a:int,b:string>,int");

    Text jsonText1 = new Text("{ \"x\" : \"abc\" , "
            + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
            + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }");

    Text jsonText2 = new Text("{ \"x\" : \"abc\" , "
            + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
            + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } , " + "\"k\" : 113 "
            + "}");

    List<Object> expected1 = Arrays.<Object>asList(Arrays.asList(2, "blah"), null);
    List<Object> expected2 = Arrays.<Object>asList(Arrays.asList(2, "blah"), 113);
    List<Object> result1 = JsonSerdeUtils.deserialize(jsonText1, columnNames, columnTypes);
    List<Object> result2 = JsonSerdeUtils.deserialize(jsonText2, columnNames, columnTypes);

    Assert.assertEquals(expected1, result1);
    Assert.assertEquals(expected2, result2);
}
 
Example 17
Source File: EmoSerDe.java    From emodb with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(Configuration config, Properties properties)
        throws SerDeException {
    // Get the column names and types from the configuration properties
    String columnNamesProperty = properties.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypesProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);

    List<String> columnNames;
    List<TypeInfo> columnTypes;
    List<ObjectInspector> columnInspectors;

    if (columnNamesProperty.isEmpty()) {
        columnNames = ImmutableList.of();
    } else {
        columnNames = Arrays.asList(columnNamesProperty.split(","));
    }

    if (columnTypesProperty.isEmpty()) {
        columnTypes = ImmutableList.of();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
    }

    int numColumns = columnNames.size();
    checkArgument(columnTypes.size() == numColumns);

    _columns = Lists.newArrayListWithCapacity(numColumns);
    _values = Lists.newArrayListWithCapacity(numColumns);
    columnInspectors = Lists.newArrayListWithCapacity(numColumns);

    // Initialize the types and inspectors for each column
    for (int i=0; i < numColumns; i++) {
        TypeInfo type = columnTypes.get(i);

        ObjectInspector columnInspector = getObjectInspectorForType(type);

        _columns.add(Maps.immutableEntry(columnNames.get(i), type));
        _values.add(null);

        columnInspectors.add(columnInspector);
    }

    _inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnInspectors);
}
 
Example 18
Source File: ExcelSerde.java    From hadoopoffice with Apache License 2.0 4 votes vote down vote up
/**
 * Initializes the SerDe \n
 * You can define in the table properties (additionally to the standard Hive properties) the following options \n
 * office.hive.write.defaultSheetName: The sheetname to which data should be written (note: as an input any sheets can be read or selected sheets according to HadoopOffice configuration values) \n
 * Any of the HadoopOffice options (hadoopoffice.*), such as encryption, signing, low footprint mode, linked workbooks, can be defined in the table properties @see <a href="https://github.com/ZuInnoTe/hadoopoffice/wiki/Hadoop-File-Format">HadoopOffice configuration</a>\n
 * @param conf Hadoop Configuration
 * @param prop table properties. 
 * @param partitionProperties ignored. Partitions are not supported.
 */

@Override
public void initialize(Configuration conf, Properties prop, Properties partitionProperties) throws SerDeException {
	LOG.debug("Initializing Excel Hive Serde");
	LOG.debug("Configuring Hive-only options");
	// configure hadoopoffice specific hive options

	String defaultSheetNameStr = prop.getProperty(ExcelSerde.CONF_DEFAULTSHEETNAME);
	if (defaultSheetNameStr != null) {
		this.defaultSheetName = defaultSheetNameStr;
	}
// copy hadoopoffice options
	LOG.debug("Configuring HadoopOffice Format");
	Set<Entry<Object, Object>> entries = prop.entrySet();
	for (Entry<Object, Object> entry : entries) {
		if ((entry.getKey() instanceof String) && ((String) entry.getKey()).startsWith(ExcelSerde.HOSUFFIX)) {
			if (("TRUE".equalsIgnoreCase((String) entry.getValue()))
					|| ("FALSE".equalsIgnoreCase(((String) entry.getValue())))) {
				conf.setBoolean((String) entry.getKey(), Boolean.valueOf((String) entry.getValue()));
			} else {
				conf.set((String) entry.getKey(), (String) entry.getValue());
			}
		}
	}

	// create object inspector (always a struct = row)
	LOG.debug("Creating object inspector");
	this.columnNames = Arrays.asList(prop.getProperty(serdeConstants.LIST_COLUMNS).split(","));
	this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(prop.getProperty(serdeConstants.LIST_COLUMN_TYPES));
	final List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
	for (TypeInfo currentColumnType : columnTypes) {
		columnOIs.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(currentColumnType));
	}
	this.oi = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
	// create converter
	LOG.debug("Creating converter");
	HadoopOfficeReadConfiguration hocr = new HadoopOfficeReadConfiguration(conf);
	this.readConverter = new ExcelConverterSimpleSpreadSheetCellDAO(hocr.getSimpleDateFormat(), hocr.getSimpleDecimalFormat(), hocr.getSimpleDateTimeFormat());
	HadoopOfficeWriteConfiguration howc = new HadoopOfficeWriteConfiguration(conf,"");
	this.writeConverter = new ExcelConverterSimpleSpreadSheetCellDAO(howc.getSimpleDateFormat(), howc.getSimpleDecimalFormat(), howc.getSimpleDateTimeFormat());
	// configure writing of header
	this.writeHeader=howc.getWriteHeader();
	GenericDataType[] columnsGD = new GenericDataType[columnNames.size()];
	for (int i = 0; i < columnOIs.size(); i++) {
		ObjectInspector currentOI = columnOIs.get(i);
		if (currentOI instanceof BooleanObjectInspector) {
			columnsGD[i] = new GenericBooleanDataType();
		} else if (currentOI instanceof DateObjectInspector) {
			columnsGD[i] = new GenericDateDataType();
		} else if (currentOI instanceof TimestampObjectInspector) {
			columnsGD[i] = new GenericTimestampDataType();
		}
		else if (currentOI instanceof ByteObjectInspector) {
			columnsGD[i] = new GenericByteDataType();
		} else if (currentOI instanceof ShortObjectInspector) {
			columnsGD[i] = new GenericShortDataType();
		} else if (currentOI instanceof IntObjectInspector) {
			columnsGD[i] = new GenericIntegerDataType();
		} else if (currentOI instanceof LongObjectInspector) {
			columnsGD[i] = new GenericLongDataType();
		} else if (currentOI instanceof DoubleObjectInspector) {
			columnsGD[i] = new GenericDoubleDataType();
		} else if (currentOI instanceof FloatObjectInspector) {
			columnsGD[i] = new GenericFloatDataType();
		} else if (currentOI instanceof HiveDecimalObjectInspector) {
			HiveDecimalObjectInspector currentOIHiveDecimalOI = (HiveDecimalObjectInspector) currentOI;
			columnsGD[i] = new GenericBigDecimalDataType(currentOIHiveDecimalOI.precision(),
					currentOIHiveDecimalOI.scale());
		} else if (currentOI instanceof StringObjectInspector) {
			columnsGD[i] = new GenericStringDataType();
		} else {
			LOG.warn("Could not detect desired datatype for column " + i + ". Type " + currentOI.getTypeName()
					+ ". Using String");
			columnsGD[i] = new GenericStringDataType();
		}
	}
	this.readConverter.setSchemaRow(columnsGD);
	this.writeConverter.setSchemaRow(columnsGD);
	// create nullrow
	this.nullRow = new Object[this.columnNames.size()];
	// set writerow
	this.currentWriteRow = 0;
	// set outputrow
	this.outputRow = new Object[this.columnNames.size()];
	LOG.debug("Finished Initialization");
}
 
Example 19
Source File: SolrSerde.java    From hive-solr with MIT License 4 votes vote down vote up
@Override
public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException {

    row=new ArrayList<Object>();

    // Read Column Names
    String columnNameProp = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    if (columnNameProp != null && columnNameProp.length() > 0) {
        columnNames = Arrays.asList(columnNameProp.split(","));
    } else {
        columnNames = new ArrayList<String>();
    }

    // Read Column Types
    String columnTypeProp = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    // default all string
    if (columnTypeProp == null) {
        String[] types = new String[columnNames.size()];
        Arrays.fill(types, 0, types.length, serdeConstants.STRING_TYPE_NAME);
        columnTypeProp = StringUtils.join(types, ":");
    }
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProp);

    // Check column and types equals
    if (columnTypes.size() != columnNames.size()) {
        throw new SerDeException("len(columnNames) != len(columntTypes)");
    }

    // Create ObjectInspectors from the type information for each column
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>();
    ObjectInspector oi;
    for (int c = 0; c < columnNames.size(); c++) {
        oi = TypeInfoUtils
                .getStandardJavaObjectInspectorFromTypeInfo(columnTypes
                        .get(c));
        columnOIs.add(oi);
    }
    objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);





}
 
Example 20
Source File: SMSerDe.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
  * An initialization function used to gather information about the table.
  * Typically, a SerDe implementation will be interested in the list of
  * column names and their types. That information will be used to help
  * perform actual serialization and deserialization of data.
  */
 //@Override
 public void initialize(Configuration conf, Properties tbl) throws SerDeException {
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl);
     // Get a list of the table's column names.
     tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME);
     String hbaseDir = null;
     if (conf != null) {
         hbaseDir = conf.get(HConstants.HBASE_DIR);
     }
     if (hbaseDir == null)
     	hbaseDir = System.getProperty(HConstants.HBASE_DIR);
     if (hbaseDir == null)
     	throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ...");
     if (conf != null) {
         conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
         conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
         conf.set(HConstants.HBASE_DIR, hbaseDir);
         if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook");
         }
         if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook");
         }
     }

     if (sqlUtil == null)
         sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
     String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS);
     colNames.clear();
     for (String split: colNamesStr.split(","))
     	colNames.add(split.toUpperCase());
     String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
     colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
     objectCache = new ArrayList<Object>(colTypes.size());
     if (tableName != null) {
         tableName = tableName.trim().toUpperCase();
         try {
             if (!sqlUtil.checkTableExists(tableName))
             	throw new SerDeException(String.format("table %s does not exist...",tableName));
             if (conf != null) {
                 ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames);
                 conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode());

               //  TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder);
               //  conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String());
             }
} catch (Exception e) {
	throw new SerDeException(e);
}
     } 
      
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes);

     
     rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
     rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
     //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName());
     Log.info("--------Finished initialize");
 }