Java Code Examples for org.apache.spark.sql.types.DataTypes#createStructType()

The following examples show how to use org.apache.spark.sql.types.DataTypes#createStructType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProtobufUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
/**
 * Construct a {@code Dataset} schema from a {@code Descriptor}
 * <p>
 * This iterates and recurses through a {@link com.google.protobuf.Descriptors.Descriptor} and produces a
 * {@link StructType} for {@link org.apache.spark.sql.Dataset<Row>}.
 * Protobuf {@code oneof} fields are flattened into discrete {@link StructField} instances.
 * <p>
 * This will pass the value of {@link Descriptors.FieldDescriptor#isRequired()} to the associated {@link StructField}.
 *
 * @param descriptor the Descriptor to convert
 * @return the converted StructType
 */
public static StructType buildSchema(Descriptors.Descriptor descriptor) {
  List<StructField> members = new ArrayList<>();
  List<Descriptors.FieldDescriptor> protoFields = descriptor.getFields();

  for (Descriptors.FieldDescriptor fieldDescriptor : protoFields) {
    DataType fieldType = convertType(fieldDescriptor);
     StructField structField = DataTypes.createStructField(fieldDescriptor.getName(), fieldType,
        !fieldDescriptor.isRequired());
    members.add(structField);
    LOG.debug("FieldDescriptor[{}] => StructField[{}] ", fieldDescriptor.getFullName(), structField);
  }

  if (members.isEmpty()) {
    throw new RuntimeException("No FieldDescriptors found");
  }

  return DataTypes.createStructType(members.toArray(new StructField[0]));
}
 
Example 2
Source File: TranslateFunction.java    From envelope with Apache License 2.0 6 votes vote down vote up
private StructType addFieldNameUnderscores(StructType without) {
  List<StructField> withFields = Lists.newArrayList();

  for (StructField withoutField : without.fields()) {
    String withName = "_" + withoutField.name();
    if (Arrays.asList(without.fieldNames()).contains(withName)) {
      throw new RuntimeException("Can not append raw field '" + withName + "' because that " +
          "field already exists as a result of the translation");
    }

    StructField withField = DataTypes.createStructField(
        withName, withoutField.dataType(), withoutField.nullable(), withoutField.metadata());

    withFields.add(withField);
  }

  return DataTypes.createStructType(withFields);
}
 
Example 3
Source File: JavaStocks.java    From spark-ts-examples with Apache License 2.0 6 votes vote down vote up
private static DataFrame loadObservations(JavaSparkContext sparkContext, SQLContext sqlContext,
    String path) {
  JavaRDD<Row> rowRdd = sparkContext.textFile(path).map((String line) -> {
      String[] tokens = line.split("\t");
      ZonedDateTime dt = ZonedDateTime.of(Integer.parseInt(tokens[0]),
          Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]), 0, 0, 0, 0,
          ZoneId.systemDefault());
      String symbol = tokens[3];
      double price = Double.parseDouble(tokens[5]);
      return RowFactory.create(Timestamp.from(dt.toInstant()), symbol, price);
  });
  List<StructField> fields = new ArrayList();
  fields.add(DataTypes.createStructField("timestamp", DataTypes.TimestampType, true));
  fields.add(DataTypes.createStructField("symbol", DataTypes.StringType, true));
  fields.add(DataTypes.createStructField("price", DataTypes.DoubleType, true));
  StructType schema = DataTypes.createStructType(fields);
  return sqlContext.createDataFrame(rowRdd, schema);
}
 
Example 4
Source File: TestAvroUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test
public void toSchemaArraysNested() throws Exception {

  StructType input = DataTypes.createStructType(Lists.newArrayList(
      // Outer
      DataTypes.createStructField("Outer", DataTypes.createArrayType(
          // Inner
          DataTypes.createArrayType(DataTypes.IntegerType, false),
          false), false)
  ));

  Schema schema = AvroUtils.schemaFor(input);

  assertEquals("Invalid field count", 1, schema.getFields().size());
  assertEquals("Invalid field name", "Outer", schema.getFields().get(0).name());
  assertEquals("Invalid field type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getType());
  assertEquals("Invalid outer element type, i.e the inner type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getElementType().getType());
  assertEquals("Invalid inner element type", Schema.Type.INT, schema.getFields().get(0).schema().getElementType().getElementType().getType());

  //System.out.println(schema.toString(true));
}
 
Example 5
Source File: IfZeroVectorBridgeTest.java    From spark-transformers with Apache License 2.0 6 votes vote down vote up
public DataFrame createDF(JavaRDD<Tuple2<Vector, String>> rdd) {

        // Generate the schema based on the string of schema
        List<StructField> fields = new ArrayList<StructField>();
        fields.add(DataTypes.createStructField("vectorized_count", new VectorUDT(), true));
        fields.add(DataTypes.createStructField("product_title", DataTypes.StringType, true));

        StructType schema = DataTypes.createStructType(fields);
        // Convert records of the RDD (people) to Rows.
        JavaRDD<Row> rowRDD = rdd.map(
                new Function<Tuple2<Vector, String>, Row>() {
                    public Row call(Tuple2<Vector, String> record) {
                        return RowFactory.create(record._1(), record._2());
                    }
                });

        return sqlContext.createDataFrame(rowRDD, schema);
    }
 
Example 6
Source File: MLContextTest.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Test
public void testDataFrameGoodMetadataDML() {
	System.out.println("MLContextTest - DataFrame good metadata DML");

	List<String> list = new ArrayList<>();
	list.add("10,20,30");
	list.add("40,50,60");
	list.add("70,80,90");
	JavaRDD<String> javaRddString = sc.parallelize(list);

	JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
	List<StructField> fields = new ArrayList<>();
	fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
	fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
	fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
	StructType schema = DataTypes.createStructType(fields);
	Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);

	MatrixMetadata mm = new MatrixMetadata(3, 3, 9);

	Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
	setExpectedStdOut("sum: 450.0");
	ml.execute(script);
}
 
Example 7
Source File: TestSchemaUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test
public void testAppendFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));
  StructField field4 = DataTypes.createStructField("field4", DataTypes.BooleanType, true);
  StructField field5 = DataTypes.createStructField("field5", DataTypes.StringType, true);

  StructType appendSchema = SchemaUtils.appendFields(schema, Lists.newArrayList(field4, field5));

  assertEquals(appendSchema.length(), 5);
  assertEquals(appendSchema.fields()[0], field1);
  assertEquals(appendSchema.fields()[1], field2);
  assertEquals(appendSchema.fields()[2], field3);
  assertEquals(appendSchema.fields()[3], field4);
  assertEquals(appendSchema.fields()[4], field5);
}
 
Example 8
Source File: TestNanosWithSeqNumTimeModel.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendFields() {
  StructType withoutSchema = DataTypes.createStructType(
      Lists.newArrayList(
          DataTypes.createStructField("other", DataTypes.StringType, true)));
  
  Row without = new RowWithSchema(withoutSchema, "hello");
  Row with = tm.appendFields(without);
  
  assertEquals(with.schema(), withoutSchema.add(nanoField).add(seqNumField));
}
 
Example 9
Source File: TestRowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testDifferent() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));

  Row row1 = new RowWithSchema(schema, "hello", 1, 2.0);
  Row row2 = new RowWithSchema(schema, "hello", 10, -2.0);

  assertTrue(RowUtils.different(row1, row2, Lists.newArrayList("field1", "field2", "field3")));
  assertTrue(!RowUtils.different(row1, row2, Lists.newArrayList("field1")));
}
 
Example 10
Source File: TestStringDateTimeModel.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendFields() {
  StructType withoutSchema = DataTypes.createStructType(
      Lists.newArrayList(
          DataTypes.createStructField("other", DataTypes.StringType, true)));
  
  Row without = new RowWithSchema(withoutSchema, "hello");
  Row with = tm.appendFields(without);
  
  assertEquals(with.schema(), withoutSchema.add(field));
}
 
Example 11
Source File: SchemaUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
public static StructType appendFields(StructType from, List<StructField> fields) {
  StructType to = DataTypes.createStructType(from.fields());

  for (StructField field : fields) {
    to = to.add(field);
  }

  return to;
}
 
Example 12
Source File: TestAvroUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void toSchemaNullable() throws Exception {

  StructType input = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("field1", DataTypes.BooleanType, false),
      DataTypes.createStructField("field2", DataTypes.StringType, true),
      DataTypes.createStructField("field3", DataTypes.DateType, false),
      DataTypes.createStructField("field4", DataTypes.TimestampType, false)
  ));

  Schema schema = AvroUtils.schemaFor(input);

  assertEquals("Invalid field count", 4, schema.getFields().size());

  // Not nullable
  assertEquals("Invalid field name", "field1", schema.getFields().get(0).name());
  assertEquals("Invalid field type", Schema.Type.BOOLEAN, schema.getFields().get(0).schema().getType());
  assertEquals("Invalid field default", null, schema.getFields().get(0).defaultVal());

  // Nullable, as opposed to Optional, Avro construct, but no default
  assertEquals("Invalid nullable (union) type", Schema.Type.UNION, schema.getFields().get(1).schema().getType());
  assertEquals("Invalid nullable (union) type count", 2, schema.getFields().get(1).schema().getTypes().size());
  assertEquals("Invalid field type", Schema.Type.STRING, schema.getFields().get(1).schema().getTypes().get(0).getType());
  assertEquals("Invalid union default", null, schema.getFields().get(1).defaultVal());

  //System.out.println(schema.toString(true));
}
 
Example 13
Source File: TestPivotDeriver.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testStaticPivot() throws Exception {
  List<Row> sourceList = Lists.newArrayList(
      RowFactory.create("A", "hello", "1"),
      RowFactory.create("A", "world", "2"),
      RowFactory.create("B", "hello", "3"),
      RowFactory.create("C", "world", "4"),
      RowFactory.create("D", "dummy", "5"));
  StructType schema = DataTypes.createStructType(Lists.newArrayList(
    DataTypes.createStructField("entity_id", DataTypes.StringType, true),
    DataTypes.createStructField("key", DataTypes.StringType, true),
    DataTypes.createStructField("value", DataTypes.StringType, true)
  ));
  Dataset<Row> source = Contexts.getSparkSession().createDataFrame(sourceList, schema);

  Map<String, Dataset<Row>> dependencies = Maps.newHashMap();
  dependencies.put("source", source);
  
  Config config = ConfigFactory.empty()
      .withValue(PivotDeriver.STEP_NAME_CONFIG, ConfigValueFactory.fromAnyRef("source"))
      .withValue(PivotDeriver.ENTITY_KEY_FIELD_NAMES_CONFIG, ConfigValueFactory.fromAnyRef(Lists.newArrayList("entity_id")))
      .withValue(PivotDeriver.PIVOT_KEY_FIELD_NAME_CONFIG, ConfigValueFactory.fromAnyRef("key"))
      .withValue(PivotDeriver.PIVOT_VALUE_FIELD_NAME_CONFIG, ConfigValueFactory.fromAnyRef("value"))
      .withValue(PivotDeriver.PIVOT_KEYS_SOURCE_CONFIG, ConfigValueFactory.fromAnyRef(PivotDeriver.PIVOT_KEYS_SOURCE_STATIC))
      .withValue(PivotDeriver.PIVOT_KEYS_LIST_CONFIG, ConfigValueFactory.fromAnyRef(Lists.newArrayList("hello", "world")));

  PivotDeriver d = new PivotDeriver();
  assertNoValidationFailures(d, config);
  d.configure(config);
  
  List<Row> results = d.derive(dependencies).collectAsList();
  
  assertEquals(results.size(), 4);
  assertTrue(results.contains(RowFactory.create("A", "1", "2")));
  assertTrue(results.contains(RowFactory.create("B", "3", null)));
  assertTrue(results.contains(RowFactory.create("C", null, "4")));
  assertTrue(results.contains(RowFactory.create("D", null, null)));
}
 
Example 14
Source File: ValueRow.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public StructType schema() {
	StructField[] fields = new StructField[ncols];
	for (int i = 0; i < ncols;i++)
		fields[i] = column[i].getStructField(getNamedColumn(i));
	return DataTypes.createStructType(fields);
}
 
Example 15
Source File: ControlDataSet.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 *
 * Not Supported
 *
 * @param dsp
 * @param partitionBy
 * @param location
 * @param context
 * @return
 */
@Override
public DataSet<ExecRow> writeParquetFile(DataSetProcessor dsp, int[] partitionBy, String location, String compression, OperationContext context) {

    try {
        //Generate Table Schema
        String[] colNames;
        DataValueDescriptor[] dvds;
        if (context.getOperation() instanceof DMLWriteOperation) {
            dvds  = context.getOperation().getExecRowDefinition().getRowArray();
            colNames = ((DMLWriteOperation) context.getOperation()).getColumnNames();
        } else if (context.getOperation() instanceof ExportOperation) {
            dvds = context.getOperation().getLeftOperation().getLeftOperation().getExecRowDefinition().getRowArray();
            ExportOperation export = (ExportOperation) context.getOperation();
            ResultColumnDescriptor[] descriptors = export.getSourceResultColumnDescriptors();
            colNames = new String[descriptors.length];
            int i = 0;
            for (ResultColumnDescriptor rcd : export.getSourceResultColumnDescriptors()) {
                colNames[i++] = rcd.getName();
            }
        } else {
            throw new IllegalArgumentException("Unsupported operation type: " + context.getOperation());
        }
        StructField[] fields = new StructField[colNames.length];
        for (int i=0 ; i<colNames.length ; i++){
            fields[i] = dvds[i].getStructField(colNames[i]);
        }
        StructType tableSchema = DataTypes.createStructType(fields);
        RecordWriter<Void, Object> rw = ParquetWriterService.getFactory().getParquetRecordWriter(location, compression, tableSchema);

        try {
            ExpressionEncoder<Row> encoder = RowEncoder.apply(tableSchema);
            while (iterator.hasNext()) {
                ValueRow vr = (ValueRow) iterator.next();
                context.recordWrite();

                rw.write(null, encoder.toRow(vr));
            }
        } finally {
            rw.close(null);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    ValueRow valueRow=new ValueRow(1);
    valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
    return new ControlDataSet(Collections.singletonList(valueRow).iterator());
}
 
Example 16
Source File: TestEventTimeHistoryPlanner.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Test
public void testCarryForwardMultipleWhenNullOutOfOrderMultipleValued() {
  p = new EventTimeHistoryPlanner();
  config = config.
      withValue(EventTimeHistoryPlanner.CARRY_FORWARD_CONFIG_NAME, ConfigValueFactory.fromAnyRef(true)).
      withValue(EventTimeHistoryPlanner.VALUE_FIELD_NAMES_CONFIG_NAME, ConfigValueFactory.fromAnyRef(Lists.newArrayList("value1","value2")));
  assertNoValidationFailures(p, config);
  p.configure(config);

  arrivingSchema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("key", DataTypes.StringType, false),
      DataTypes.createStructField("value1", DataTypes.StringType, true),
      DataTypes.createStructField("value2", DataTypes.StringType, true),
      DataTypes.createStructField("timestamp", DataTypes.LongType, false)));
  existingSchema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("key", DataTypes.StringType, false),
      DataTypes.createStructField("value1", DataTypes.StringType, false),
      DataTypes.createStructField("value2", DataTypes.StringType, false),
      DataTypes.createStructField("timestamp", DataTypes.LongType, false),
      DataTypes.createStructField("startdate", DataTypes.LongType, false),
      DataTypes.createStructField("enddate", DataTypes.LongType, false),
      DataTypes.createStructField("currentflag", DataTypes.StringType, false),
      DataTypes.createStructField("lastupdated", DataTypes.StringType, false)));

  existing.add(new RowWithSchema(existingSchema, "a", "hello1:100", "hello2:100", 100L, 100L, 253402214400000L, EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_YES, ""));
  arriving.add(new RowWithSchema(arrivingSchema, "a", null, "hello2:200", 200L));
  arriving.add(new RowWithSchema(arrivingSchema, "a", "hello1:150", null, 150L));
  key = new RowWithSchema(keySchema, "a");

  List<Row> planned = p.planMutationsForKey(key, arriving, existing);

  assertEquals(planned.size(), 3);
  assertEquals(PlannerUtils.getMutationType(planned.get(0)), MutationType.UPDATE);
  assertEquals(planned.get(0).getAs("value1"), "hello1:100");
  assertEquals(planned.get(0).getAs("value2"), "hello2:100");
  assertEquals(planned.get(0).getAs("startdate"), 100L);
  assertEquals(planned.get(0).getAs("enddate"), 149L);
  assertEquals(planned.get(0).getAs("currentflag"), EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_NO);
  assertEquals(PlannerUtils.getMutationType(planned.get(1)), MutationType.INSERT);
  assertEquals(planned.get(1).getAs("value1"), "hello1:150");
  assertEquals(planned.get(1).getAs("value2"), "hello2:100");
  assertEquals(planned.get(1).getAs("startdate"), 150L);
  assertEquals(planned.get(1).getAs("enddate"), 199L);
  assertEquals(planned.get(1).getAs("currentflag"), EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_NO);
  assertEquals(PlannerUtils.getMutationType(planned.get(2)), MutationType.INSERT);
  assertEquals(planned.get(2).getAs("value1"), "hello1:150");
  assertEquals(planned.get(2).getAs("value2"), "hello2:200");
  assertEquals(planned.get(2).getAs("startdate"), 200L);
  assertEquals(planned.get(2).getAs("enddate"), 253402214400000L);
  assertEquals(planned.get(2).getAs("currentflag"), EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_YES);
}
 
Example 17
Source File: VideoStreamProcessor.java    From video-stream-classification with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
//Read properties
Properties prop = PropertyFileReader.readPropertyFile();

//SparkSesion
SparkSession spark = SparkSession
	      .builder()
	      .appName("VideoStreamProcessor")
	      .master(prop.getProperty("spark.master.url"))
	      .getOrCreate();	

//directory to save image files with motion detected
final String processedImageDir = prop.getProperty("processed.output.dir");
logger.warn("Output directory for saving processed images is set to "+processedImageDir+". This is configured in processed.output.dir key of property file.");

//create schema for json message
StructType schema =  DataTypes.createStructType(new StructField[] { 
		DataTypes.createStructField("cameraId", DataTypes.StringType, true),
		DataTypes.createStructField("timestamp", DataTypes.TimestampType, true),
		DataTypes.createStructField("rows", DataTypes.IntegerType, true),
		DataTypes.createStructField("cols", DataTypes.IntegerType, true),
		DataTypes.createStructField("type", DataTypes.IntegerType, true),
		DataTypes.createStructField("data", DataTypes.StringType, true)
		});


//Create DataSet from stream messages from kafka
   Dataset<VideoEventData> ds = spark
     .readStream()
     .format("kafka")
     .option("kafka.bootstrap.servers", prop.getProperty("kafka.bootstrap.servers"))
     .option("subscribe", prop.getProperty("kafka.topic"))
     .option("kafka.max.partition.fetch.bytes", prop.getProperty("kafka.max.partition.fetch.bytes"))
     .option("kafka.max.poll.records", prop.getProperty("kafka.max.poll.records"))
     .load()
     .selectExpr("CAST(value AS STRING) as message")
     .select(functions.from_json(functions.col("message"),schema).as("json"))
     .select("json.*")
     .as(Encoders.bean(VideoEventData.class)); 
   
   //key-value pair of cameraId-VideoEventData
KeyValueGroupedDataset<String, VideoEventData> kvDataset = ds.groupByKey(new MapFunction<VideoEventData, String>() {
	@Override
	public String call(VideoEventData value) throws Exception {
		return value.getCameraId();
	}
}, Encoders.STRING());
	
//process
Dataset<VideoEventData> processedDataset = kvDataset.mapGroupsWithState(new MapGroupsWithStateFunction<String, VideoEventData, VideoEventData,VideoEventData>(){
	@Override
	public VideoEventData call(String key, Iterator<VideoEventData> values, GroupState<VideoEventData> state) throws Exception {
		logger.warn("CameraId="+key+" PartitionId="+TaskContext.getPartitionId());
		VideoEventData existing = null;
		//check previous state
		if (state.exists()) {
			existing = state.get();
		}
		//classify image
		VideoEventData processed = ImageProcessor.process(key,values,processedImageDir,existing);
		
		//update last processed
		if(processed != null){
			state.update(processed);
		}
		return processed;
	}}, Encoders.bean(VideoEventData.class), Encoders.bean(VideoEventData.class));

//start
 StreamingQuery query = processedDataset.writeStream()
	      .outputMode("update")
	      .format("console")
	      .start();
 
 //await
    query.awaitTermination();
}
 
Example 18
Source File: AllButEmptyStringAggregationFunction.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public AllButEmptyStringAggregationFunction() {
    inputSchema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("value", DataTypes.StringType, true)});
    bufferSchema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("currentSelection", DataTypes.StringType, true)});
}
 
Example 19
Source File: StringDateTimeModel.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public StructType getSchema() {
  return DataTypes.createStructType(Lists.newArrayList(field));
}
 
Example 20
Source File: SparkDataSetTest.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
    public void testFoobar() {
        List<Row> foo = new ArrayList();
        for (int i = 0; i< 10; i++) {
            ValueRow row = new ValueRow(1);
            row.setColumn(1,new SQLInteger(i));
            foo.add(row);
        }

        StructType schema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("col1", DataTypes.IntegerType, true)});

//        ValueRow row = new ValueRow(2);
//        row.setColumn(1,new SQLDouble());
//        row.setColumn(2,new SQLInteger());

/*

        SpliceSpark.getSession().read().parquet("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/simple_parquet")
                .select(new Column("0"),new Column("1"))
                .filter(col("0").gt(1).or(col("0").lt(4))).explain(true);
*/
        SpliceSpark.getSessionUnsafe().createDataFrame(foo,schema).write().format("orc").mode(SaveMode.Append)
                .orc("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/orc_it");

        Column filter = (new Column("col1")).gt(1l).and(new Column("col1").lt(1l));

        SpliceSpark.getSessionUnsafe().read().schema(schema)
                .orc("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/orc_it")
                .filter(filter).show();
//                .select(new Column("0"),new Column("1")).show();

/*
        Dataset<Row> leftSide = SpliceSpark.getSession().createDataFrame(foo,foo.get(0).schema());
        Dataset<Row> rightSide = SpliceSpark.getSession().createDataFrame(foo.subList(0,8),foo.get(0).schema());

        Column col =
                (leftSide.col("0").equalTo(rightSide.col("0"))).
                and((leftSide.col("1")).equalTo(rightSide.col("1")));
        leftSide.join(rightSide,col,"inner").explain(true);
        leftSide.join(rightSide,col,"inner").show(10);
        leftSide.join(broadcast(rightSide),col,"leftouter").explain(true);
        leftSide.join(broadcast(rightSide),col,"leftouter").show(10);
        leftSide.join(broadcast(rightSide),col,"leftanti").show(10);
        */
    }