Java Code Examples for org.apache.spark.sql.types.DataTypes#createStructField()

The following examples show how to use org.apache.spark.sql.types.DataTypes#createStructField() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TranslateFunction.java    From envelope with Apache License 2.0 6 votes vote down vote up
private StructType addFieldNameUnderscores(StructType without) {
  List<StructField> withFields = Lists.newArrayList();

  for (StructField withoutField : without.fields()) {
    String withName = "_" + withoutField.name();
    if (Arrays.asList(without.fieldNames()).contains(withName)) {
      throw new RuntimeException("Can not append raw field '" + withName + "' because that " +
          "field already exists as a result of the translation");
    }

    StructField withField = DataTypes.createStructField(
        withName, withoutField.dataType(), withoutField.nullable(), withoutField.metadata());

    withFields.add(withField);
  }

  return DataTypes.createStructType(withFields);
}
 
Example 2
Source File: TestRowUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test
public void testAppendWithSchema() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));

  Row row = new RowWithSchema(schema, "hello", 1, 2.0);
  Row appendRow = RowUtils.append(row, "field4", DataTypes.BooleanType, false, true);
  appendRow = RowUtils.append(appendRow, "field5", DataTypes.StringType, false, "world");

  assertEquals(appendRow.length(), 5);
  assertEquals(appendRow.getAs("field1"), "hello");
  assertEquals(appendRow.getAs("field2"), 1);
  assertEquals(appendRow.getAs("field3"), 2.0);
  assertEquals(appendRow.getAs("field4"), true);
  assertEquals(appendRow.getAs("field5"), "world");
}
 
Example 3
Source File: TestTimestampTimeModel.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Before
public void before() {
  field = DataTypes.createStructField("time", DataTypes.TimestampType, true);
  schema = DataTypes.createStructType(Lists.newArrayList(field));
  
  tm = new TimestampTimeModel();
  tm.configure(ConfigFactory.empty());
  tm.configureFieldNames(Lists.newArrayList(field.name()));
  
  Timestamp firstTs = new Timestamp(1000L);
  firstTs.setNanos(1000);
  Timestamp secondTs = new Timestamp(2000L);
  secondTs.setNanos(100);
  Timestamp thirdTs = new Timestamp(2000L);
  thirdTs.setNanos(101);
  
  first = new RowWithSchema(schema, firstTs);
  second = new RowWithSchema(schema, secondTs);
  third = new RowWithSchema(schema, thirdTs);
}
 
Example 4
Source File: TestRowUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test
public void testAppendRow() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType baseSchema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));
  Row base = new RowWithSchema(baseSchema, "hello", 1, 1.0);

  StructField field4 = DataTypes.createStructField("field4", DataTypes.StringType, true);
  StructField field5 = DataTypes.createStructField("field5", DataTypes.IntegerType, true);
  StructField field6 = DataTypes.createStructField("field6", DataTypes.FloatType, true);
  StructType appendSchema = DataTypes.createStructType(Lists.newArrayList(field4, field5, field6));
  Row append = new RowWithSchema(appendSchema, "world", -1, -1.0);

  Row appended = RowUtils.append(base, append);

  Row expected = new RowWithSchema(
      DataTypes.createStructType(Lists.newArrayList(field1, field2, field3, field4, field5, field6)),
      "hello", 1, 1.0, "world", -1, -1.0);

  assertEquals(expected, appended);
}
 
Example 5
Source File: CustomDataFrame.java    From sparkResearch with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    SparkSession sparkSession = SparkSession.builder()
            .master("local")
            .appName("spark app")
            .getOrCreate();

    //创建普通的JavaRDD
    JavaRDD<String> javaRDD = sparkSession.sparkContext().textFile("URL", 1).toJavaRDD();
    //字符串编码的模式
    String schema = "name age";

    //根据模式的字符串生成模式
    List<StructField> structFieldList = new ArrayList<>();
    for (String fieldName : schema.split(" ")) {
        StructField structField = DataTypes.createStructField(fieldName, DataTypes.StringType, true);
        structFieldList.add(structField);
    }
    StructType structType = DataTypes.createStructType(structFieldList);

    JavaRDD<Row> rowJavaRDD = javaRDD.map(new Function<String, Row>() {
        @Override
        public Row call(String v1) {
            String[] attirbutes = v1.split(",");
            return RowFactory.create(attirbutes[0], attirbutes[1].trim());
        }
    });

    //将模式应用于RDD
    Dataset<Row> dataset = sparkSession.createDataFrame(rowJavaRDD, structType);

    //创建临时视图
    dataset.createOrReplaceTempView("user");
    Dataset<Row> result = sparkSession.sql("select * from user");
    result.show();
}
 
Example 6
Source File: TestSchemaUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testSubsetSchemaSomeFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));

  StructType subset = SchemaUtils.subsetSchema(schema, Lists.newArrayList("field1", "field3"));

  assertEquals(subset.fields().length, 2);
  assertEquals(subset.fields()[0].name(), "field1");
  assertEquals(subset.fields()[1].name(), "field3");
}
 
Example 7
Source File: JavaRDDToDataset.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Converts a JavaRDD<Row> to a Dataset<Row>. This method only
 * supports simple data types and all data need to be not null.
 * 
 * @param data JavaRDD of Row objects
 * @param colNames names of the columns in a row
 * @return
 */
public static Dataset<Row> getDataset(JavaRDD<Row> data, String...colNames) {
	// create the schema for the dataset
	Row row = data.first();
	int length = row.length();
	
	if (length != colNames.length) {
		throw new IllegalArgumentException("colNames length does not match row length");
	}
	
	StructField[] sf = new StructField[length];
	
	for (int i = 0; i < row.size(); i++) {
		Object o = row.get(i);

		// TODO add more types
		if (o instanceof String) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.StringType, false);
		} else if (o instanceof Integer) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.IntegerType, false);
		} else if (o instanceof Long) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.LongType, false);
		} else if (o instanceof Float) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.FloatType, false);
		} else if (o instanceof Double) {
			sf[i] = DataTypes.createStructField(colNames[i], DataTypes.DoubleType, false);
		} else if (o instanceof Boolean) {
               sf[i] = DataTypes.createStructField(colNames[i], DataTypes.BooleanType, false);
		} else {
			System.out.println("Data type not implemented yet");
		}
	}
	StructType schema = new StructType(sf);

	// convert JavaRDD to Dataset
	SparkSession spark = SparkSession.builder().getOrCreate();
	return spark.createDataFrame(data, schema);
}
 
Example 8
Source File: TestRowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testSubsetRowNoFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));
  StructType subsetSchema = DataTypes.createStructType(Lists.<StructField>newArrayList());

  Row row = new RowWithSchema(schema, "hello", 1, 2.0);
  Row subsetRow = RowUtils.subsetRow(row, subsetSchema);

  assertEquals(subsetRow.length(), 0);
}
 
Example 9
Source File: TestSchemaUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testSubtractSchemaNoFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));

  StructType subset = SchemaUtils.subtractSchema(schema, Lists.<String>newArrayList());

  assertEquals(subset.fields().length, 3);
  assertEquals(subset.fields()[0].name(), "field1");
  assertEquals(subset.fields()[1].name(), "field2");
  assertEquals(subset.fields()[2].name(), "field3");
}
 
Example 10
Source File: InteractionCenter.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a schema to create Spark Datasets. This schema must match the
 * order in which the data are return by the {@code getAsObject()} method.
 * 
 * @param index
 *            an integer index to label an interaction center
 * @return schema to represent an interaction center in a Spark Dataset.
 */
public static StructField[] getStructFields(int index) {
    boolean nullable = true;
    return new StructField[] { DataTypes.createStructField("atom" + index, DataTypes.StringType, nullable),
            DataTypes.createStructField("element" + index, DataTypes.StringType, nullable),
            DataTypes.createStructField("group" + index, DataTypes.StringType, nullable),
            DataTypes.createStructField("groupNum" + index, DataTypes.StringType, nullable),
            DataTypes.createStructField("type" + index, DataTypes.StringType, nullable),
            DataTypes.createStructField("chain" + index, DataTypes.StringType, nullable),
            DataTypes.createStructField("nbFactor" + index, DataTypes.FloatType, nullable)};
}
 
Example 11
Source File: TestRowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testSubsetRowAllFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));

  Row row = new RowWithSchema(schema, "hello", 1, 2.0);
  Row subsetRow = RowUtils.subsetRow(row, schema);

  assertEquals(row, subsetRow);
}
 
Example 12
Source File: BulkDataSetWriter.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private StructType createSchema() {
    List<StructField> fields = new ArrayList<>();
    StructField field = DataTypes.createStructField("conglomerateId", DataTypes.LongType, true);
    fields.add(field);

    field = DataTypes.createStructField("key", DataTypes.StringType, true);
    fields.add(field);

    field = DataTypes.createStructField("value", DataTypes.BinaryType, true);
    fields.add(field);

    StructType schema = DataTypes.createStructType(fields);

    return schema;
}
 
Example 13
Source File: TargetColumnProducer.java    From jpmml-evaluator-spark with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public StructField init(Evaluator evaluator){
	TargetField field = getField();

	DataType dataType = field.getDataType();

	return DataTypes.createStructField(getColumnName(), SchemaUtil.translateDataType(dataType), false);
}
 
Example 14
Source File: StringDatetimeTimeModel.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public void configureFieldNames(List<String> fieldNames) {
  this.field = DataTypes.createStructField(fieldNames.get(0), DataTypes.StringType, true);
}
 
Example 15
Source File: LongMillisTimeModel.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public void configureFieldNames(List<String> fieldNames) {
  this.field = DataTypes.createStructField(fieldNames.get(0), DataTypes.LongType, true);
}
 
Example 16
Source File: StringDateTimeModel.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public void configureFieldNames(List<String> fieldNames) {
  this.field = DataTypes.createStructField(fieldNames.get(0), DataTypes.StringType, true);
}
 
Example 17
Source File: SQLBoolean.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public StructField getStructField(String columnName) {
	return DataTypes.createStructField(columnName, DataTypes.BooleanType, true);
}
 
Example 18
Source File: SQLTinyint.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public StructField getStructField(String columnName) {
	return DataTypes.createStructField(columnName, DataTypes.ByteType, true);
}
 
Example 19
Source File: SQLTimestamp.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public StructField getStructField(String columnName) {
	return DataTypes.createStructField(columnName, DataTypes.TimestampType, true);
}
 
Example 20
Source File: SQLInteger.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public StructField getStructField(String columnName) {
	return DataTypes.createStructField(columnName, DataTypes.IntegerType, true);
}