Java Code Examples for org.apache.spark.sql.types.StructType#apply()

The following examples show how to use org.apache.spark.sql.types.StructType#apply() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FrameRDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
/**
 * Obtain column vector from DataFrame schema
 * 
 * @param dfschema schema as StructType
 * @param containsID if true, contains ID column
 * @return 0-based column index of vector column, -1 if no vector.
 */
private static int getColVectFromDFSchema(StructType dfschema, boolean containsID) {
	int off = containsID ? 1 : 0;
	for( int i=off; i<dfschema.fields().length; i++ ) {
		StructField structType = dfschema.apply(i);
		if(structType.dataType() instanceof VectorUDT)
			return i-off;
	}
	
	return -1;
}
 
Example 2
Source File: SparkMLEncoder.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
public DataField createDataField(FieldName name){
	StructType schema = getSchema();

	StructField field = schema.apply(name.getValue());

	org.apache.spark.sql.types.DataType sparkDataType = field.dataType();

	if(sparkDataType instanceof StringType){
		return createDataField(name, OpType.CATEGORICAL, DataType.STRING);
	} else

	if(sparkDataType instanceof IntegralType){
		return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER);
	} else

	if(sparkDataType instanceof DoubleType){
		return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
	} else

	if(sparkDataType instanceof BooleanType){
		return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN);
	} else

	{
		throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type");
	}
}
 
Example 3
Source File: FrameRDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
/**
 * Obtain column vector from DataFrame schema
 * 
 * @param dfschema schema as StructType
 * @param containsID if true, contains ID column
 * @return 0-based column index of vector column, -1 if no vector.
 */
private static int getColVectFromDFSchema(StructType dfschema, boolean containsID) {
	int off = containsID ? 1 : 0;
	for( int i=off; i<dfschema.fields().length; i++ ) {
		StructField structType = dfschema.apply(i);
		if(structType.dataType() instanceof VectorUDT)
			return i-off;
	}
	
	return -1;
}
 
Example 4
Source File: ColumnExploder.java    From jpmml-evaluator-spark with GNU Affero General Public License v3.0 4 votes vote down vote up
private StructType getStructSchema(StructType schema){
	StructField structField = schema.apply(getStructCol());

	return (StructType)structField.dataType();
}
 
Example 5
Source File: SqlResultsWriter.java    From geowave with Apache License 2.0 4 votes vote down vote up
public void writeResults(String typeName) {
  if (typeName == null) {
    typeName = DEFAULT_TYPE_NAME;
    LOGGER.warn(
        "Using default type name (adapter id): '" + DEFAULT_TYPE_NAME + "' for SQL output");
  }

  final StructType schema = results.schema();
  final SimpleFeatureType featureType = SchemaConverter.schemaToFeatureType(schema, typeName);

  final SimpleFeatureBuilder sfBuilder = new SimpleFeatureBuilder(featureType);

  final FeatureDataAdapter featureAdapter = new FeatureDataAdapter(featureType);

  final DataStore featureStore = outputDataStore.createDataStore();
  final Index featureIndex =
      new SpatialDimensionalityTypeProvider().createIndex(new SpatialOptions());
  featureStore.addType(featureAdapter, featureIndex);
  try (Writer writer = featureStore.createWriter(featureAdapter.getTypeName())) {

    final List<Row> rows = results.collectAsList();

    for (int r = 0; r < rows.size(); r++) {
      final Row row = rows.get(r);

      for (int i = 0; i < schema.fields().length; i++) {
        final StructField field = schema.apply(i);
        final Object rowObj = row.apply(i);
        if (rowObj != null) {
          if (field.name().equals("geom")) {
            final Geometry geom = (Geometry) rowObj;

            sfBuilder.set("geom", geom);
          } else if (field.dataType() == DataTypes.TimestampType) {
            final long millis = ((Timestamp) rowObj).getTime();
            final Date date = new Date(millis);

            sfBuilder.set(field.name(), date);
          } else {
            sfBuilder.set(field.name(), rowObj);
          }
        }
      }

      final SimpleFeature sf = sfBuilder.buildFeature("result-" + nf.format(r));

      writer.write(sf);
    }
  }
}