Java Code Examples for org.apache.spark.sql.types.StructType#add()
The following examples show how to use
org.apache.spark.sql.types.StructType#add() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LocalWithSparkSessionTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
protected static void populateSSWithCSVData(KylinConfig kylinConfig, String project, SparkSession sparkSession) { ProjectInstance projectInstance = ProjectManager.getInstance(kylinConfig).getProject(project); Preconditions.checkArgument(projectInstance != null); for (String table : projectInstance.getTables()) { if ("DEFAULT.STREAMING_TABLE".equals(table)) { continue; } if (!new File(String.format(Locale.ROOT, CSV_TABLE_DIR, table)).exists()) { continue; } TableDesc tableDesc = TableMetadataManager.getInstance(kylinConfig).getTableDesc(table, project); ColumnDesc[] columns = tableDesc.getColumns(); StructType schema = new StructType(); for (ColumnDesc column : columns) { schema = schema.add(column.getName(), convertType(column.getType()), false); } Dataset<Row> ret = sparkSession.read().schema(schema).csv(String.format(Locale.ROOT, CSV_TABLE_DIR, table)); ret.createOrReplaceTempView(tableDesc.getName()); } }
Example 2
Source File: PushDownRunnerSparkImplTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { createTestMetadata(); ss = SparkSession.builder().appName("local").master("local[1]").getOrCreate(); SparderContext.setSparkSession(ss); StructType schema = new StructType(); schema = schema.add("TRANS_ID", DataTypes.LongType, false); schema = schema.add("ORDER_ID", DataTypes.LongType, false); schema = schema.add("CAL_DT", DataTypes.DateType, false); schema = schema.add("LSTG_FORMAT_NAME", DataTypes.StringType, false); schema = schema.add("LEAF_CATEG_ID", DataTypes.LongType, false); schema = schema.add("LSTG_SITE_ID", DataTypes.IntegerType, false); schema = schema.add("SLR_SEGMENT_CD", DataTypes.FloatType, false); schema = schema.add("SELLER_ID", DataTypes.LongType, false); schema = schema.add("PRICE", DataTypes.createDecimalType(19, 4), false); schema = schema.add("ITEM_COUNT", DataTypes.DoubleType, false); schema = schema.add("TEST_COUNT_DISTINCT_BITMAP", DataTypes.StringType, false); ss.read().schema(schema).csv("../../examples/test_case_data/parquet_test/data/DEFAULT.TEST_KYLIN_FACT.csv") .createOrReplaceTempView("TEST_KYLIN_FACT"); }
Example 3
Source File: Spark1Shims.java From zeppelin with Apache License 2.0 | 6 votes |
@Override public DataFrame getAsDataFrame(String value) { String[] lines = value.split("\\n"); String head = lines[0]; String[] columns = head.split("\t"); StructType schema = new StructType(); for (String column : columns) { schema = schema.add(column, "String"); } List<Row> rows = new ArrayList<>(); for (int i = 1; i < lines.length; ++i) { String[] tokens = lines[i].split("\t"); Row row = new GenericRow(tokens); rows.add(row); } return SQLContext.getOrCreate(sc) .createDataFrame(rows, schema); }
Example 4
Source File: Spark3Shims.java From zeppelin with Apache License 2.0 | 6 votes |
@Override public Dataset<Row> getAsDataFrame(String value) { String[] lines = value.split("\\n"); String head = lines[0]; String[] columns = head.split("\t"); StructType schema = new StructType(); for (String column : columns) { schema = schema.add(column, "String"); } List<Row> rows = new ArrayList<>(); for (int i = 1; i < lines.length; ++i) { String[] tokens = lines[i].split("\t"); Row row = new GenericRow(tokens); rows.add(row); } return sparkSession.createDataFrame(rows, schema); }
Example 5
Source File: Spark2Shims.java From zeppelin with Apache License 2.0 | 6 votes |
@Override public Dataset<Row> getAsDataFrame(String value) { String[] lines = value.split("\\n"); String head = lines[0]; String[] columns = head.split("\t"); StructType schema = new StructType(); for (String column : columns) { schema = schema.add(column, "String"); } List<Row> rows = new ArrayList<>(); for (int i = 1; i < lines.length; ++i) { String[] tokens = lines[i].split("\t"); Row row = new GenericRow(tokens); rows.add(row); } return sparkSession.createDataFrame(rows, schema); }
Example 6
Source File: SparkTempViewProvider.java From hudi with Apache License 2.0 | 5 votes |
@Override public void createOrReplace(String tableName, List<String> headers, List<List<Comparable>> rows) { try { if (headers.isEmpty() || rows.isEmpty()) { return; } if (rows.stream().filter(row -> row.size() != headers.size()).count() > 0) { throw new HoodieException("Invalid row, does not match headers " + headers.size() + " " + rows.size()); } // replace all whitespaces in headers to make it easy to write sql queries List<String> headersNoSpaces = headers.stream().map(title -> title.replaceAll("\\s+","")) .collect(Collectors.toList()); // generate schema for table StructType structType = new StructType(); for (int i = 0; i < headersNoSpaces.size(); i++) { // try guessing data type from column data. DataType headerDataType = getDataType(rows.get(0).get(i)); structType = structType.add(DataTypes.createStructField(headersNoSpaces.get(i), headerDataType, true)); } List<Row> records = rows.stream().map(row -> RowFactory.create(row.toArray(new Comparable[row.size()]))) .collect(Collectors.toList()); Dataset<Row> dataset = this.sqlContext.createDataFrame(records, structType); dataset.createOrReplaceTempView(tableName); System.out.println("Wrote table view: " + tableName); } catch (Throwable ex) { // log full stack trace and rethrow. Without this its difficult to debug failures, if any LOG.error("unable to write ", ex); throw new HoodieException(ex); } }
Example 7
Source File: SchemaUtils.java From envelope with Apache License 2.0 | 5 votes |
public static StructType appendFields(StructType from, List<StructField> fields) { StructType to = DataTypes.createStructType(from.fields()); for (StructField field : fields) { to = to.add(field); } return to; }
Example 8
Source File: ColumnExploder.java From jpmml-evaluator-spark with GNU Affero General Public License v3.0 | 5 votes |
@Override public StructType transformSchema(StructType schema){ StructType structSchema = getStructSchema(schema); StructType result = schema; StructField[] fields = structSchema.fields(); for(StructField field : fields){ result = result.add(field); } return result; }