Java Code Examples for org.apache.spark.sql.types.DataTypes#createStructType()

The following examples show how to use org.apache.spark.sql.types.DataTypes#createStructType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ProtobufUtils.java From envelope with Apache License 2.0

6 votes

/**
 * Construct a {@code Dataset} schema from a {@code Descriptor}
 * <p>
 * This iterates and recurses through a {@link com.google.protobuf.Descriptors.Descriptor} and produces a
 * {@link StructType} for {@link org.apache.spark.sql.Dataset<Row>}.
 * Protobuf {@code oneof} fields are flattened into discrete {@link StructField} instances.
 * <p>
 * This will pass the value of {@link Descriptors.FieldDescriptor#isRequired()} to the associated {@link StructField}.
 *
 * @param descriptor the Descriptor to convert
 * @return the converted StructType
 */
public static StructType buildSchema(Descriptors.Descriptor descriptor) {
  List<StructField> members = new ArrayList<>();
  List<Descriptors.FieldDescriptor> protoFields = descriptor.getFields();

  for (Descriptors.FieldDescriptor fieldDescriptor : protoFields) {
    DataType fieldType = convertType(fieldDescriptor);
     StructField structField = DataTypes.createStructField(fieldDescriptor.getName(), fieldType,
        !fieldDescriptor.isRequired());
    members.add(structField);
    LOG.debug("FieldDescriptor[{}] => StructField[{}] ", fieldDescriptor.getFullName(), structField);
  }

  if (members.isEmpty()) {
    throw new RuntimeException("No FieldDescriptors found");
  }

  return DataTypes.createStructType(members.toArray(new StructField[0]));
}

Example 2

Source File: TranslateFunction.java From envelope with Apache License 2.0

6 votes

private StructType addFieldNameUnderscores(StructType without) {
  List<StructField> withFields = Lists.newArrayList();

  for (StructField withoutField : without.fields()) {
    String withName = "_" + withoutField.name();
    if (Arrays.asList(without.fieldNames()).contains(withName)) {
      throw new RuntimeException("Can not append raw field '" + withName + "' because that " +
          "field already exists as a result of the translation");
    }

    StructField withField = DataTypes.createStructField(
        withName, withoutField.dataType(), withoutField.nullable(), withoutField.metadata());

    withFields.add(withField);
  }

  return DataTypes.createStructType(withFields);
}

Example 3

Source File: JavaStocks.java From spark-ts-examples with Apache License 2.0

6 votes

private static DataFrame loadObservations(JavaSparkContext sparkContext, SQLContext sqlContext,
    String path) {
  JavaRDD<Row> rowRdd = sparkContext.textFile(path).map((String line) -> {
      String[] tokens = line.split("\t");
      ZonedDateTime dt = ZonedDateTime.of(Integer.parseInt(tokens[0]),
          Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]), 0, 0, 0, 0,
          ZoneId.systemDefault());
      String symbol = tokens[3];
      double price = Double.parseDouble(tokens[5]);
      return RowFactory.create(Timestamp.from(dt.toInstant()), symbol, price);
  });
  List<StructField> fields = new ArrayList();
  fields.add(DataTypes.createStructField("timestamp", DataTypes.TimestampType, true));
  fields.add(DataTypes.createStructField("symbol", DataTypes.StringType, true));
  fields.add(DataTypes.createStructField("price", DataTypes.DoubleType, true));
  StructType schema = DataTypes.createStructType(fields);
  return sqlContext.createDataFrame(rowRdd, schema);
}

Example 4

Source File: TestAvroUtils.java From envelope with Apache License 2.0

6 votes

@Test
public void toSchemaArraysNested() throws Exception {

  StructType input = DataTypes.createStructType(Lists.newArrayList(
      // Outer
      DataTypes.createStructField("Outer", DataTypes.createArrayType(
          // Inner
          DataTypes.createArrayType(DataTypes.IntegerType, false),
          false), false)
  ));

  Schema schema = AvroUtils.schemaFor(input);

  assertEquals("Invalid field count", 1, schema.getFields().size());
  assertEquals("Invalid field name", "Outer", schema.getFields().get(0).name());
  assertEquals("Invalid field type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getType());
  assertEquals("Invalid outer element type, i.e the inner type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getElementType().getType());
  assertEquals("Invalid inner element type", Schema.Type.INT, schema.getFields().get(0).schema().getElementType().getElementType().getType());

  //System.out.println(schema.toString(true));
}

Example 5

Source File: IfZeroVectorBridgeTest.java From spark-transformers with Apache License 2.0

6 votes

public DataFrame createDF(JavaRDD<Tuple2<Vector, String>> rdd) {

        // Generate the schema based on the string of schema
        List<StructField> fields = new ArrayList<StructField>();
        fields.add(DataTypes.createStructField("vectorized_count", new VectorUDT(), true));
        fields.add(DataTypes.createStructField("product_title", DataTypes.StringType, true));

        StructType schema = DataTypes.createStructType(fields);
        // Convert records of the RDD (people) to Rows.
        JavaRDD<Row> rowRDD = rdd.map(
                new Function<Tuple2<Vector, String>, Row>() {
                    public Row call(Tuple2<Vector, String> record) {
                        return RowFactory.create(record._1(), record._2());
                    }
                });

        return sqlContext.createDataFrame(rowRDD, schema);
    }

Example 6

Source File: MLContextTest.java From systemds with Apache License 2.0

6 votes

@Test
public void testDataFrameGoodMetadataDML() {
	System.out.println("MLContextTest - DataFrame good metadata DML");

	List<String> list = new ArrayList<>();
	list.add("10,20,30");
	list.add("40,50,60");
	list.add("70,80,90");
	JavaRDD<String> javaRddString = sc.parallelize(list);

	JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
	List<StructField> fields = new ArrayList<>();
	fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
	fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
	fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
	StructType schema = DataTypes.createStructType(fields);
	Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);

	MatrixMetadata mm = new MatrixMetadata(3, 3, 9);

	Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
	setExpectedStdOut("sum: 450.0");
	ml.execute(script);
}

Example 7

Source File: TestSchemaUtils.java From envelope with Apache License 2.0

6 votes

@Test
public void testAppendFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));
  StructField field4 = DataTypes.createStructField("field4", DataTypes.BooleanType, true);
  StructField field5 = DataTypes.createStructField("field5", DataTypes.StringType, true);

  StructType appendSchema = SchemaUtils.appendFields(schema, Lists.newArrayList(field4, field5));

  assertEquals(appendSchema.length(), 5);
  assertEquals(appendSchema.fields()[0], field1);
  assertEquals(appendSchema.fields()[1], field2);
  assertEquals(appendSchema.fields()[2], field3);
  assertEquals(appendSchema.fields()[3], field4);
  assertEquals(appendSchema.fields()[4], field5);
}

Example 8

Source File: TestNanosWithSeqNumTimeModel.java From envelope with Apache License 2.0

5 votes

@Test
public void testAppendFields() {
  StructType withoutSchema = DataTypes.createStructType(
      Lists.newArrayList(
          DataTypes.createStructField("other", DataTypes.StringType, true)));
  
  Row without = new RowWithSchema(withoutSchema, "hello");
  Row with = tm.appendFields(without);
  
  assertEquals(with.schema(), withoutSchema.add(nanoField).add(seqNumField));
}

Example 9

Source File: TestRowUtils.java From envelope with Apache License 2.0

5 votes

@Test
public void testDifferent() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));

  Row row1 = new RowWithSchema(schema, "hello", 1, 2.0);
  Row row2 = new RowWithSchema(schema, "hello", 10, -2.0);

  assertTrue(RowUtils.different(row1, row2, Lists.newArrayList("field1", "field2", "field3")));
  assertTrue(!RowUtils.different(row1, row2, Lists.newArrayList("field1")));
}

Example 10

Source File: TestStringDateTimeModel.java From envelope with Apache License 2.0

5 votes

@Test
public void testAppendFields() {
  StructType withoutSchema = DataTypes.createStructType(
      Lists.newArrayList(
          DataTypes.createStructField("other", DataTypes.StringType, true)));
  
  Row without = new RowWithSchema(withoutSchema, "hello");
  Row with = tm.appendFields(without);
  
  assertEquals(with.schema(), withoutSchema.add(field));
}

Example 11

Source File: SchemaUtils.java From envelope with Apache License 2.0

5 votes

public static StructType appendFields(StructType from, List<StructField> fields) {
  StructType to = DataTypes.createStructType(from.fields());

  for (StructField field : fields) {
    to = to.add(field);
  }

  return to;
}

Example 12

Source File: TestAvroUtils.java From envelope with Apache License 2.0

5 votes

@Test
public void toSchemaNullable() throws Exception {

  StructType input = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("field1", DataTypes.BooleanType, false),
      DataTypes.createStructField("field2", DataTypes.StringType, true),
      DataTypes.createStructField("field3", DataTypes.DateType, false),
      DataTypes.createStructField("field4", DataTypes.TimestampType, false)
  ));

  Schema schema = AvroUtils.schemaFor(input);

  assertEquals("Invalid field count", 4, schema.getFields().size());

  // Not nullable
  assertEquals("Invalid field name", "field1", schema.getFields().get(0).name());
  assertEquals("Invalid field type", Schema.Type.BOOLEAN, schema.getFields().get(0).schema().getType());
  assertEquals("Invalid field default", null, schema.getFields().get(0).defaultVal());

  // Nullable, as opposed to Optional, Avro construct, but no default
  assertEquals("Invalid nullable (union) type", Schema.Type.UNION, schema.getFields().get(1).schema().getType());
  assertEquals("Invalid nullable (union) type count", 2, schema.getFields().get(1).schema().getTypes().size());
  assertEquals("Invalid field type", Schema.Type.STRING, schema.getFields().get(1).schema().getTypes().get(0).getType());
  assertEquals("Invalid union default", null, schema.getFields().get(1).defaultVal());

  //System.out.println(schema.toString(true));
}

Example 13

Source File: TestPivotDeriver.java From envelope with Apache License 2.0

5 votes

@Test
public void testStaticPivot() throws Exception {
  List<Row> sourceList = Lists.newArrayList(
      RowFactory.create("A", "hello", "1"),
      RowFactory.create("A", "world", "2"),
      RowFactory.create("B", "hello", "3"),
      RowFactory.create("C", "world", "4"),
      RowFactory.create("D", "dummy", "5"));
  StructType schema = DataTypes.createStructType(Lists.newArrayList(
    DataTypes.createStructField("entity_id", DataTypes.StringType, true),
    DataTypes.createStructField("key", DataTypes.StringType, true),
    DataTypes.createStructField("value", DataTypes.StringType, true)
  ));
  Dataset<Row> source = Contexts.getSparkSession().createDataFrame(sourceList, schema);

  Map<String, Dataset<Row>> dependencies = Maps.newHashMap();
  dependencies.put("source", source);
  
  Config config = ConfigFactory.empty()
      .withValue(PivotDeriver.STEP_NAME_CONFIG, ConfigValueFactory.fromAnyRef("source"))
      .withValue(PivotDeriver.ENTITY_KEY_FIELD_NAMES_CONFIG, ConfigValueFactory.fromAnyRef(Lists.newArrayList("entity_id")))
      .withValue(PivotDeriver.PIVOT_KEY_FIELD_NAME_CONFIG, ConfigValueFactory.fromAnyRef("key"))
      .withValue(PivotDeriver.PIVOT_VALUE_FIELD_NAME_CONFIG, ConfigValueFactory.fromAnyRef("value"))
      .withValue(PivotDeriver.PIVOT_KEYS_SOURCE_CONFIG, ConfigValueFactory.fromAnyRef(PivotDeriver.PIVOT_KEYS_SOURCE_STATIC))
      .withValue(PivotDeriver.PIVOT_KEYS_LIST_CONFIG, ConfigValueFactory.fromAnyRef(Lists.newArrayList("hello", "world")));

  PivotDeriver d = new PivotDeriver();
  assertNoValidationFailures(d, config);
  d.configure(config);
  
  List<Row> results = d.derive(dependencies).collectAsList();
  
  assertEquals(results.size(), 4);
  assertTrue(results.contains(RowFactory.create("A", "1", "2")));
  assertTrue(results.contains(RowFactory.create("B", "3", null)));
  assertTrue(results.contains(RowFactory.create("C", null, "4")));
  assertTrue(results.contains(RowFactory.create("D", null, null)));
}

Example 14

Source File: ValueRow.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public StructType schema() {
	StructField[] fields = new StructField[ncols];
	for (int i = 0; i < ncols;i++)
		fields[i] = column[i].getStructField(getNamedColumn(i));
	return DataTypes.createStructType(fields);
}

Example 15

Source File: ControlDataSet.java From spliceengine with GNU Affero General Public License v3.0

4 votes

/**
 *
 * Not Supported
 *
 * @param dsp
 * @param partitionBy
 * @param location
 * @param context
 * @return
 */
@Override
public DataSet<ExecRow> writeParquetFile(DataSetProcessor dsp, int[] partitionBy, String location, String compression, OperationContext context) {

    try {
        //Generate Table Schema
        String[] colNames;
        DataValueDescriptor[] dvds;
        if (context.getOperation() instanceof DMLWriteOperation) {
            dvds  = context.getOperation().getExecRowDefinition().getRowArray();
            colNames = ((DMLWriteOperation) context.getOperation()).getColumnNames();
        } else if (context.getOperation() instanceof ExportOperation) {
            dvds = context.getOperation().getLeftOperation().getLeftOperation().getExecRowDefinition().getRowArray();
            ExportOperation export = (ExportOperation) context.getOperation();
            ResultColumnDescriptor[] descriptors = export.getSourceResultColumnDescriptors();
            colNames = new String[descriptors.length];
            int i = 0;
            for (ResultColumnDescriptor rcd : export.getSourceResultColumnDescriptors()) {
                colNames[i++] = rcd.getName();
            }
        } else {
            throw new IllegalArgumentException("Unsupported operation type: " + context.getOperation());
        }
        StructField[] fields = new StructField[colNames.length];
        for (int i=0 ; i<colNames.length ; i++){
            fields[i] = dvds[i].getStructField(colNames[i]);
        }
        StructType tableSchema = DataTypes.createStructType(fields);
        RecordWriter<Void, Object> rw = ParquetWriterService.getFactory().getParquetRecordWriter(location, compression, tableSchema);

        try {
            ExpressionEncoder<Row> encoder = RowEncoder.apply(tableSchema);
            while (iterator.hasNext()) {
                ValueRow vr = (ValueRow) iterator.next();
                context.recordWrite();

                rw.write(null, encoder.toRow(vr));
            }
        } finally {
            rw.close(null);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    ValueRow valueRow=new ValueRow(1);
    valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
    return new ControlDataSet(Collections.singletonList(valueRow).iterator());
}

Example 16

Source File: TestEventTimeHistoryPlanner.java From envelope with Apache License 2.0

4 votes

@Test
public void testCarryForwardMultipleWhenNullOutOfOrderMultipleValued() {
  p = new EventTimeHistoryPlanner();
  config = config.
      withValue(EventTimeHistoryPlanner.CARRY_FORWARD_CONFIG_NAME, ConfigValueFactory.fromAnyRef(true)).
      withValue(EventTimeHistoryPlanner.VALUE_FIELD_NAMES_CONFIG_NAME, ConfigValueFactory.fromAnyRef(Lists.newArrayList("value1","value2")));
  assertNoValidationFailures(p, config);
  p.configure(config);

  arrivingSchema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("key", DataTypes.StringType, false),
      DataTypes.createStructField("value1", DataTypes.StringType, true),
      DataTypes.createStructField("value2", DataTypes.StringType, true),
      DataTypes.createStructField("timestamp", DataTypes.LongType, false)));
  existingSchema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("key", DataTypes.StringType, false),
      DataTypes.createStructField("value1", DataTypes.StringType, false),
      DataTypes.createStructField("value2", DataTypes.StringType, false),
      DataTypes.createStructField("timestamp", DataTypes.LongType, false),
      DataTypes.createStructField("startdate", DataTypes.LongType, false),
      DataTypes.createStructField("enddate", DataTypes.LongType, false),
      DataTypes.createStructField("currentflag", DataTypes.StringType, false),
      DataTypes.createStructField("lastupdated", DataTypes.StringType, false)));

  existing.add(new RowWithSchema(existingSchema, "a", "hello1:100", "hello2:100", 100L, 100L, 253402214400000L, EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_YES, ""));
  arriving.add(new RowWithSchema(arrivingSchema, "a", null, "hello2:200", 200L));
  arriving.add(new RowWithSchema(arrivingSchema, "a", "hello1:150", null, 150L));
  key = new RowWithSchema(keySchema, "a");

  List<Row> planned = p.planMutationsForKey(key, arriving, existing);

  assertEquals(planned.size(), 3);
  assertEquals(PlannerUtils.getMutationType(planned.get(0)), MutationType.UPDATE);
  assertEquals(planned.get(0).getAs("value1"), "hello1:100");
  assertEquals(planned.get(0).getAs("value2"), "hello2:100");
  assertEquals(planned.get(0).getAs("startdate"), 100L);
  assertEquals(planned.get(0).getAs("enddate"), 149L);
  assertEquals(planned.get(0).getAs("currentflag"), EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_NO);
  assertEquals(PlannerUtils.getMutationType(planned.get(1)), MutationType.INSERT);
  assertEquals(planned.get(1).getAs("value1"), "hello1:150");
  assertEquals(planned.get(1).getAs("value2"), "hello2:100");
  assertEquals(planned.get(1).getAs("startdate"), 150L);
  assertEquals(planned.get(1).getAs("enddate"), 199L);
  assertEquals(planned.get(1).getAs("currentflag"), EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_NO);
  assertEquals(PlannerUtils.getMutationType(planned.get(2)), MutationType.INSERT);
  assertEquals(planned.get(2).getAs("value1"), "hello1:150");
  assertEquals(planned.get(2).getAs("value2"), "hello2:200");
  assertEquals(planned.get(2).getAs("startdate"), 200L);
  assertEquals(planned.get(2).getAs("enddate"), 253402214400000L);
  assertEquals(planned.get(2).getAs("currentflag"), EventTimeHistoryPlanner.CURRENT_FLAG_DEFAULT_YES);
}

Example 17

Source File: VideoStreamProcessor.java From video-stream-classification with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
//Read properties
Properties prop = PropertyFileReader.readPropertyFile();

//SparkSesion
SparkSession spark = SparkSession
	      .builder()
	      .appName("VideoStreamProcessor")
	      .master(prop.getProperty("spark.master.url"))
	      .getOrCreate();	

//directory to save image files with motion detected
final String processedImageDir = prop.getProperty("processed.output.dir");
logger.warn("Output directory for saving processed images is set to "+processedImageDir+". This is configured in processed.output.dir key of property file.");

//create schema for json message
StructType schema =  DataTypes.createStructType(new StructField[] { 
		DataTypes.createStructField("cameraId", DataTypes.StringType, true),
		DataTypes.createStructField("timestamp", DataTypes.TimestampType, true),
		DataTypes.createStructField("rows", DataTypes.IntegerType, true),
		DataTypes.createStructField("cols", DataTypes.IntegerType, true),
		DataTypes.createStructField("type", DataTypes.IntegerType, true),
		DataTypes.createStructField("data", DataTypes.StringType, true)
		});


//Create DataSet from stream messages from kafka
   Dataset<VideoEventData> ds = spark
     .readStream()
     .format("kafka")
     .option("kafka.bootstrap.servers", prop.getProperty("kafka.bootstrap.servers"))
     .option("subscribe", prop.getProperty("kafka.topic"))
     .option("kafka.max.partition.fetch.bytes", prop.getProperty("kafka.max.partition.fetch.bytes"))
     .option("kafka.max.poll.records", prop.getProperty("kafka.max.poll.records"))
     .load()
     .selectExpr("CAST(value AS STRING) as message")
     .select(functions.from_json(functions.col("message"),schema).as("json"))
     .select("json.*")
     .as(Encoders.bean(VideoEventData.class)); 
   
   //key-value pair of cameraId-VideoEventData
KeyValueGroupedDataset<String, VideoEventData> kvDataset = ds.groupByKey(new MapFunction<VideoEventData, String>() {
	@Override
	public String call(VideoEventData value) throws Exception {
		return value.getCameraId();
	}
}, Encoders.STRING());
	
//process
Dataset<VideoEventData> processedDataset = kvDataset.mapGroupsWithState(new MapGroupsWithStateFunction<String, VideoEventData, VideoEventData,VideoEventData>(){
	@Override
	public VideoEventData call(String key, Iterator<VideoEventData> values, GroupState<VideoEventData> state) throws Exception {
		logger.warn("CameraId="+key+" PartitionId="+TaskContext.getPartitionId());
		VideoEventData existing = null;
		//check previous state
		if (state.exists()) {
			existing = state.get();
		}
		//classify image
		VideoEventData processed = ImageProcessor.process(key,values,processedImageDir,existing);
		
		//update last processed
		if(processed != null){
			state.update(processed);
		}
		return processed;
	}}, Encoders.bean(VideoEventData.class), Encoders.bean(VideoEventData.class));

//start
 StreamingQuery query = processedDataset.writeStream()
	      .outputMode("update")
	      .format("console")
	      .start();
 
 //await
    query.awaitTermination();
}

Example 18

Source File: AllButEmptyStringAggregationFunction.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License

4 votes

public AllButEmptyStringAggregationFunction() {
    inputSchema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("value", DataTypes.StringType, true)});
    bufferSchema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("currentSelection", DataTypes.StringType, true)});
}

Example 19

Source File: StringDateTimeModel.java From envelope with Apache License 2.0

4 votes

@Override
public StructType getSchema() {
  return DataTypes.createStructType(Lists.newArrayList(field));
}

Example 20

Source File: SparkDataSetTest.java From spliceengine with GNU Affero General Public License v3.0

4 votes

@Test
    public void testFoobar() {
        List<Row> foo = new ArrayList();
        for (int i = 0; i< 10; i++) {
            ValueRow row = new ValueRow(1);
            row.setColumn(1,new SQLInteger(i));
            foo.add(row);
        }

        StructType schema = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("col1", DataTypes.IntegerType, true)});

//        ValueRow row = new ValueRow(2);
//        row.setColumn(1,new SQLDouble());
//        row.setColumn(2,new SQLInteger());

/*

        SpliceSpark.getSession().read().parquet("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/simple_parquet")
                .select(new Column("0"),new Column("1"))
                .filter(col("0").gt(1).or(col("0").lt(4))).explain(true);
*/
        SpliceSpark.getSessionUnsafe().createDataFrame(foo,schema).write().format("orc").mode(SaveMode.Append)
                .orc("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/orc_it");

        Column filter = (new Column("col1")).gt(1l).and(new Column("col1").lt(1l));

        SpliceSpark.getSessionUnsafe().read().schema(schema)
                .orc("/Users/jleach/Documents/workspace/spliceengine/hbase_sql/target/external/orc_it")
                .filter(filter).show();
//                .select(new Column("0"),new Column("1")).show();

/*
        Dataset<Row> leftSide = SpliceSpark.getSession().createDataFrame(foo,foo.get(0).schema());
        Dataset<Row> rightSide = SpliceSpark.getSession().createDataFrame(foo.subList(0,8),foo.get(0).schema());

        Column col =
                (leftSide.col("0").equalTo(rightSide.col("0"))).
                and((leftSide.col("1")).equalTo(rightSide.col("1")));
        leftSide.join(rightSide,col,"inner").explain(true);
        leftSide.join(rightSide,col,"inner").show(10);
        leftSide.join(broadcast(rightSide),col,"leftouter").explain(true);
        leftSide.join(broadcast(rightSide),col,"leftouter").show(10);
        leftSide.join(broadcast(rightSide),col,"leftanti").show(10);
        */
    }