Java Code Examples for org.apache.spark.sql.types.StructType#add()

The following examples show how to use org.apache.spark.sql.types.StructType#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: LocalWithSparkSessionTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

protected static void populateSSWithCSVData(KylinConfig kylinConfig, String project, SparkSession sparkSession) {

        ProjectInstance projectInstance = ProjectManager.getInstance(kylinConfig).getProject(project);
        Preconditions.checkArgument(projectInstance != null);
        for (String table : projectInstance.getTables()) {

            if ("DEFAULT.STREAMING_TABLE".equals(table)) {
                continue;
            }
            if (!new File(String.format(Locale.ROOT, CSV_TABLE_DIR, table)).exists()) {
                continue;
            }
            TableDesc tableDesc = TableMetadataManager.getInstance(kylinConfig).getTableDesc(table, project);
            ColumnDesc[] columns = tableDesc.getColumns();
            StructType schema = new StructType();
            for (ColumnDesc column : columns) {
                schema = schema.add(column.getName(), convertType(column.getType()), false);
            }
            Dataset<Row> ret = sparkSession.read().schema(schema).csv(String.format(Locale.ROOT, CSV_TABLE_DIR, table));
            ret.createOrReplaceTempView(tableDesc.getName());
        }

    }

Example 2

Source File: PushDownRunnerSparkImplTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Before
public void setUp() throws Exception {
    createTestMetadata();
    ss = SparkSession.builder().appName("local").master("local[1]").getOrCreate();
    SparderContext.setSparkSession(ss);
    StructType schema = new StructType();
    schema = schema.add("TRANS_ID", DataTypes.LongType, false);
    schema = schema.add("ORDER_ID", DataTypes.LongType, false);
    schema = schema.add("CAL_DT", DataTypes.DateType, false);
    schema = schema.add("LSTG_FORMAT_NAME", DataTypes.StringType, false);
    schema = schema.add("LEAF_CATEG_ID", DataTypes.LongType, false);
    schema = schema.add("LSTG_SITE_ID", DataTypes.IntegerType, false);
    schema = schema.add("SLR_SEGMENT_CD", DataTypes.FloatType, false);
    schema = schema.add("SELLER_ID", DataTypes.LongType, false);
    schema = schema.add("PRICE", DataTypes.createDecimalType(19, 4), false);
    schema = schema.add("ITEM_COUNT", DataTypes.DoubleType, false);
    schema = schema.add("TEST_COUNT_DISTINCT_BITMAP", DataTypes.StringType, false);
    ss.read().schema(schema).csv("../../examples/test_case_data/parquet_test/data/DEFAULT.TEST_KYLIN_FACT.csv")
            .createOrReplaceTempView("TEST_KYLIN_FACT");
}

Example 3

Source File: Spark1Shims.java From zeppelin with Apache License 2.0

6 votes

@Override
public DataFrame getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return SQLContext.getOrCreate(sc)
          .createDataFrame(rows, schema);
}

Example 4

Source File: Spark3Shims.java From zeppelin with Apache License 2.0

6 votes

@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}

Example 5

Source File: Spark2Shims.java From zeppelin with Apache License 2.0

6 votes

@Override
public Dataset<Row> getAsDataFrame(String value) {
  String[] lines = value.split("\\n");
  String head = lines[0];
  String[] columns = head.split("\t");
  StructType schema = new StructType();
  for (String column : columns) {
    schema = schema.add(column, "String");
  }

  List<Row> rows = new ArrayList<>();
  for (int i = 1; i < lines.length; ++i) {
    String[] tokens = lines[i].split("\t");
    Row row = new GenericRow(tokens);
    rows.add(row);
  }
  return sparkSession.createDataFrame(rows, schema);
}

Example 6

Source File: SparkTempViewProvider.java From hudi with Apache License 2.0

5 votes

@Override
public void createOrReplace(String tableName, List<String> headers, List<List<Comparable>> rows) {
  try {
    if (headers.isEmpty() || rows.isEmpty()) {
      return;
    }

    if (rows.stream().filter(row -> row.size() != headers.size()).count() > 0) {
      throw new HoodieException("Invalid row, does not match headers " + headers.size() + " " + rows.size());
    }

    // replace all whitespaces in headers to make it easy to write sql queries
    List<String> headersNoSpaces = headers.stream().map(title -> title.replaceAll("\\s+",""))
            .collect(Collectors.toList());

    // generate schema for table
    StructType structType = new StructType();
    for (int i = 0; i < headersNoSpaces.size(); i++) {
      // try guessing data type from column data.
      DataType headerDataType = getDataType(rows.get(0).get(i));
      structType = structType.add(DataTypes.createStructField(headersNoSpaces.get(i), headerDataType, true));
    }
    List<Row> records = rows.stream().map(row -> RowFactory.create(row.toArray(new Comparable[row.size()])))
            .collect(Collectors.toList());
    Dataset<Row> dataset = this.sqlContext.createDataFrame(records, structType);
    dataset.createOrReplaceTempView(tableName);
    System.out.println("Wrote table view: " + tableName);
  } catch (Throwable ex) {
    // log full stack trace and rethrow. Without this its difficult to debug failures, if any
    LOG.error("unable to write ", ex);
    throw new HoodieException(ex);
  }
}

Example 7

Source File: SchemaUtils.java From envelope with Apache License 2.0

5 votes

public static StructType appendFields(StructType from, List<StructField> fields) {
  StructType to = DataTypes.createStructType(from.fields());

  for (StructField field : fields) {
    to = to.add(field);
  }

  return to;
}

Example 8

Source File: ColumnExploder.java From jpmml-evaluator-spark with GNU Affero General Public License v3.0

5 votes

@Override
public StructType transformSchema(StructType schema){
	StructType structSchema = getStructSchema(schema);

	StructType result = schema;

	StructField[] fields = structSchema.fields();
	for(StructField field : fields){
		result = result.add(field);
	}

	return result;
}