Java Code Examples for org.apache.spark.sql.types.StructType#add()

The following examples show how to use org.apache.spark.sql.types.StructType#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LocalWithSparkSessionTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected static void populateSSWithCSVData(KylinConfig kylinConfig, String project, SparkSession sparkSession) {

        ProjectInstance projectInstance = ProjectManager.getInstance(kylinConfig).getProject(project);
        Preconditions.checkArgument(projectInstance != null);
        for (String table : projectInstance.getTables()) {

            if ("DEFAULT.STREAMING_TABLE".equals(table)) {
                continue;
            }
            if (!new File(String.format(Locale.ROOT, CSV_TABLE_DIR, table)).exists()) {
                continue;
            }
            TableDesc tableDesc = TableMetadataManager.getInstance(kylinConfig).getTableDesc(table, project);
            ColumnDesc[] columns = tableDesc.getColumns();
            StructType schema = new StructType();
            for (ColumnDesc column : columns) {
                schema = schema.add(column.getName(), convertType(column.getType()), false);
            }
            Dataset<Row> ret = sparkSession.read().schema(schema).csv(String.format(Locale.ROOT, CSV_TABLE_DIR, table));
            ret.createOrReplaceTempView(tableDesc.getName());
        }

    }
 
Example 2
Source File: PushDownRunnerSparkImplTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    createTestMetadata();
    ss = SparkSession.builder().appName("local").master("local[1]").getOrCreate();
    SparderContext.setSparkSession(ss);
    StructType schema = new StructType();
    schema = schema.add("TRANS_ID", DataTypes.LongType, false);
    schema = schema.add("ORDER_ID", DataTypes.LongType, false);
    schema = schema.add("CAL_DT", DataTypes.DateType, false);
    schema = schema.add("LSTG_FORMAT_NAME", DataTypes.StringType, false);
    schema = schema.add("LEAF_CATEG_ID", DataTypes.LongType, false);
    schema = schema.add("LSTG_SITE_ID", DataTypes.IntegerType, false);
    schema = schema.add("SLR_SEGMENT_CD", DataTypes.FloatType, false);
    schema = schema.add("SELLER_ID", DataTypes.LongType, false);
    schema = schema.add("PRICE", DataTypes.createDecimalType(19, 4), false);
    schema = schema.add("ITEM_COUNT", DataTypes.DoubleType, false);
    schema = schema.add("TEST_COUNT_DISTINCT_BITMAP", DataTypes.StringType, false);
    ss.read().schema(schema).csv("../../examples/test_case_data/parquet_test/data/DEFAULT.TEST_KYLIN_FACT.csv")
            .createOrReplaceTempView("TEST_KYLIN_FACT");
}
 
Example 3
Source File: Spark1Shims.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public DataFrame getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return SQLContext.getOrCreate(sc)
          .createDataFrame(rows, schema);
}
 
Example 4
Source File: Spark3Shims.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}
 
Example 5
Source File: Spark2Shims.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}
 
Example 6
Source File: SparkTempViewProvider.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
public void createOrReplace(String tableName, List<String> headers, List<List<Comparable>> rows) {
  try {
    if (headers.isEmpty() || rows.isEmpty()) {
      return;
    }

    if (rows.stream().filter(row -> row.size() != headers.size()).count() > 0) {
      throw new HoodieException("Invalid row, does not match headers " + headers.size() + " " + rows.size());
    }

    // replace all whitespaces in headers to make it easy to write sql queries
    List<String> headersNoSpaces = headers.stream().map(title -> title.replaceAll("\\s+",""))
            .collect(Collectors.toList());

    // generate schema for table
    StructType structType = new StructType();
    for (int i = 0; i < headersNoSpaces.size(); i++) {
      // try guessing data type from column data.
      DataType headerDataType = getDataType(rows.get(0).get(i));
      structType = structType.add(DataTypes.createStructField(headersNoSpaces.get(i), headerDataType, true));
    }
    List<Row> records = rows.stream().map(row -> RowFactory.create(row.toArray(new Comparable[row.size()])))
            .collect(Collectors.toList());
    Dataset<Row> dataset = this.sqlContext.createDataFrame(records, structType);
    dataset.createOrReplaceTempView(tableName);
    System.out.println("Wrote table view: " + tableName);
  } catch (Throwable ex) {
    // log full stack trace and rethrow. Without this its difficult to debug failures, if any
    LOG.error("unable to write ", ex);
    throw new HoodieException(ex);
  }
}
 
Example 7
Source File: SchemaUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
public static StructType appendFields(StructType from, List<StructField> fields) {
  StructType to = DataTypes.createStructType(from.fields());

  for (StructField field : fields) {
    to = to.add(field);
  }

  return to;
}
 
Example 8
Source File: ColumnExploder.java    From jpmml-evaluator-spark with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public StructType transformSchema(StructType schema){
	StructType structSchema = getStructSchema(schema);

	StructType result = schema;

	StructField[] fields = structSchema.fields();
	for(StructField field : fields){
		result = result.add(field);
	}

	return result;
}