Java Code Examples for org.apache.spark.sql.types.StringType

The following examples show how to use org.apache.spark.sql.types.StringType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: iceberg   Source File: AvroWithSparkSchemaVisitor.java    License: Apache License 2.0 6 votes vote down vote up
public static <T> T visit(DataType type, Schema schema, AvroWithSparkSchemaVisitor<T> visitor) {
  switch (schema.getType()) {
    case RECORD:
      Preconditions.checkArgument(type instanceof StructType, "Invalid struct: %s is not a struct", type);
      return visitRecord((StructType) type, schema, visitor);

    case UNION:
      return visitUnion(type, schema, visitor);

    case ARRAY:
      return visitArray(type, schema, visitor);

    case MAP:
      Preconditions.checkArgument(type instanceof MapType, "Invalid map: %s is not a map", type);
      MapType map = (MapType) type;
      Preconditions.checkArgument(map.keyType() instanceof StringType,
          "Invalid map: %s is not a string", map.keyType());
      return visitor.map(map, schema, visit(map.valueType(), schema.getValueType(), visitor));

    default:
      return visitor.primitive(type, schema);
  }
}
 
Example 2
static
public DataType translateDataType(org.apache.spark.sql.types.DataType sparkDataType){

	if(sparkDataType instanceof StringType){
		return DataType.STRING;
	} else

	if(sparkDataType instanceof IntegralType){
		return DataType.INTEGER;
	} else

	if(sparkDataType instanceof DoubleType){
		return DataType.DOUBLE;
	} else

	if(sparkDataType instanceof BooleanType){
		return DataType.BOOLEAN;
	} else

	{
		throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type");
	}
}
 
Example 3
Source Project: indexr   Source File: IndexRUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static SegmentSchema sparkSchemaToIndexRSchema(List<StructField> sparkSchema, IsIndexed isIndexed) {
    List<ColumnSchema> columns = new ArrayList<>();
    for (StructField f : sparkSchema) {
        SQLType type;
        if (f.dataType() instanceof IntegerType) {
            type = SQLType.INT;
        } else if (f.dataType() instanceof LongType) {
            type = SQLType.BIGINT;
        } else if (f.dataType() instanceof FloatType) {
            type = SQLType.FLOAT;
        } else if (f.dataType() instanceof DoubleType) {
            type = SQLType.DOUBLE;
        } else if (f.dataType() instanceof StringType) {
            type = SQLType.VARCHAR;
        } else if (f.dataType() instanceof DateType) {
            type = SQLType.DATE;
        } else if (f.dataType() instanceof TimestampType) {
            type = SQLType.DATETIME;
        } else {
            throw new IllegalStateException("Unsupported type: " + f.dataType());
        }
        columns.add(new ColumnSchema(f.name(), type, isIndexed.apply(f.name())));
    }
    return new SegmentSchema(columns);
}
 
Example 4
Source Project: stocator   Source File: TestSuite.java    License: Apache License 2.0 6 votes vote down vote up
public void test16(SparkSession spark, Dataset<Row> schemaFlights, String containerOut, String type)
    throws Exception {
  System.out.println("*********************************");
  System.out.println("T16: Non overwrite mode " + containerOut);
  String o1 = containerOut + "myData/123";
  StructType schema = DataTypes
      .createStructType(new StructField[] { DataTypes.createStructField("NAME", DataTypes.StringType, false),
          DataTypes.createStructField("STRING_VALUE", DataTypes.StringType, false),
          DataTypes.createStructField("NUM_VALUE", DataTypes.IntegerType, false), });
  Row r1 = RowFactory.create("name1", "value1", 1);
  Row r2 = RowFactory.create("name2", "value2", 2);
  List<Row> rowList = ImmutableList.of(r1, r2);
  Dataset<Row> rows = spark.createDataFrame(rowList, schema);
  try {
    if (type.equals(Constants.PARQUET_TYPE)) {
      rows.write().mode(SaveMode.Overwrite).parquet(o1);
    } else if (type.equals(Constants.JSON_TYPE)) {
      rows.write().mode(SaveMode.Overwrite).json(o1);
    }
  } catch (Exception e) {
    deleteData(o1, spark.sparkContext().hadoopConfiguration(), dataCreate);
    throw e;
  } finally {
    deleteData(o1, spark.sparkContext().hadoopConfiguration(), dataCreate);
  }
}
 
Example 5
Source Project: iceberg   Source File: StructInternalRow.java    License: Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("checkstyle:CyclomaticComplexity")
public Object get(int ordinal, DataType dataType) {
  if (dataType instanceof IntegerType) {
    return getInt(ordinal);
  } else if (dataType instanceof LongType) {
    return getLong(ordinal);
  } else if (dataType instanceof StringType) {
    return getUTF8String(ordinal);
  } else if (dataType instanceof FloatType) {
    return getFloat(ordinal);
  } else if (dataType instanceof DoubleType) {
    return getDouble(ordinal);
  } else if (dataType instanceof DecimalType) {
    DecimalType decimalType = (DecimalType) dataType;
    return getDecimal(ordinal, decimalType.precision(), decimalType.scale());
  } else if (dataType instanceof BinaryType) {
    return getBinary(ordinal);
  } else if (dataType instanceof StructType) {
    return getStruct(ordinal, ((StructType) dataType).size());
  } else if (dataType instanceof ArrayType) {
    return getArray(ordinal);
  } else if (dataType instanceof MapType) {
    return getMap(ordinal);
  } else if (dataType instanceof BooleanType) {
    return getBoolean(ordinal);
  } else if (dataType instanceof ByteType) {
    return getByte(ordinal);
  } else if (dataType instanceof ShortType) {
    return getShort(ordinal);
  }
  return null;
}
 
Example 6
Source Project: iceberg   Source File: Reader.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Converts the objects into instances used by Spark's InternalRow.
 *
 * @param value a data value
 * @param type the Spark data type
 * @return the value converted to the representation expected by Spark's InternalRow.
 */
private static Object convert(Object value, DataType type) {
  if (type instanceof StringType) {
    return UTF8String.fromString(value.toString());
  } else if (type instanceof BinaryType) {
    ByteBuffer buffer = (ByteBuffer) value;
    return buffer.get(new byte[buffer.remaining()]);
  } else if (type instanceof DecimalType) {
    return Decimal.fromDecimal(value);
  }
  return value;
}
 
Example 7
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void codingToStruct() {

  DataType codingType = getField(conditionSchema, true, "severity", "coding");

  Assert.assertTrue(getField(codingType, true, "system") instanceof StringType);
  Assert.assertTrue(getField(codingType, true, "version") instanceof StringType);
  Assert.assertTrue(getField(codingType, true, "code") instanceof StringType);
  Assert.assertTrue(getField(codingType, true, "display") instanceof StringType);
  Assert.assertTrue(getField(codingType, true, "userSelected") instanceof BooleanType);
}
 
Example 8
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void codeableConceptToStruct() {

  DataType codeableType = getField(conditionSchema, true, "severity");

  Assert.assertTrue(codeableType instanceof StructType);
  Assert.assertTrue(getField(codeableType, true, "coding") instanceof ArrayType);
  Assert.assertTrue(getField(codeableType, true, "text") instanceof StringType);
}
 
Example 9
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void expandChoiceFields() {
  Assert.assertTrue(getField(conditionSchema, true, "onsetPeriod") instanceof StructType);
  Assert.assertTrue(getField(conditionSchema, true, "onsetRange") instanceof StructType);
  Assert.assertTrue(getField(conditionSchema, true, "onsetDateTime") instanceof StringType);
  Assert.assertTrue(getField(conditionSchema, true, "onsetString") instanceof StringType);
  Assert.assertTrue(getField(conditionSchema, true, "onsetAge") instanceof StructType);
}
 
Example 10
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void reference() {
  Assert.assertTrue(
      getField(observationSchema, true, "subject", "reference") instanceof StringType);
  Assert
      .assertTrue(getField(observationSchema, true, "subject", "display") instanceof StringType);
}
 
Example 11
public DataField createDataField(FieldName name){
	StructType schema = getSchema();

	StructField field = schema.apply(name.getValue());

	org.apache.spark.sql.types.DataType sparkDataType = field.dataType();

	if(sparkDataType instanceof StringType){
		return createDataField(name, OpType.CATEGORICAL, DataType.STRING);
	} else

	if(sparkDataType instanceof IntegralType){
		return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER);
	} else

	if(sparkDataType instanceof DoubleType){
		return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
	} else

	if(sparkDataType instanceof BooleanType){
		return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN);
	} else

	{
		throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type");
	}
}
 
Example 12
Source Project: indexr   Source File: IndexRUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static List<StructField> indexrSchemaToSparkSchema(SegmentSchema schema) {
    List<StructField> fields = new ArrayList<>();
    for (ColumnSchema cs : schema.getColumns()) {
        DataType dataType;
        switch (cs.getSqlType()) {
            case INT:
                dataType = DataTypes.IntegerType;
                break;
            case BIGINT:
                dataType = DataTypes.LongType;
                break;
            case FLOAT:
                dataType = DataTypes.FloatType;
                break;
            case DOUBLE:
                dataType = DataTypes.DoubleType;
                break;
            case VARCHAR:
                dataType = DataTypes.StringType;
                break;
            case DATE:
                dataType = DataTypes.DateType;
                break;
            case DATETIME:
                dataType = DataTypes.TimestampType;
                break;
            default:
                throw new IllegalStateException("Unsupported type: " + cs.getSqlType());
        }
        fields.add(new StructField(cs.getName(), dataType, scala.Boolean.box(false), Metadata.empty()));
    }
    return fields;
}
 
Example 13
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void resourceHasId() {

  Assert.assertTrue(getField(conditionSchema, true, "id") instanceof StringType);
}
 
Example 14
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void boundCodeToString() {

  Assert.assertTrue(getField(conditionSchema, true, "verificationStatus") instanceof StringType);
}
 
Example 15
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void idToString() {
  Assert.assertTrue(getField(conditionSchema, true, "id") instanceof StringType);
}
 
Example 16
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void narrativeToStruct() {

  Assert.assertTrue(getField(conditionSchema, true, "text", "status") instanceof StringType);
  Assert.assertTrue(getField(conditionSchema, true, "text", "div") instanceof StringType);
}
 
Example 17
Source Project: bunsen   Source File: SchemaConverterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void timeToString() {
  Assert.assertTrue((getField(observationSchema, true, "valueTime") instanceof StringType));
}