Java Code Examples for org.apache.spark.sql.Row#get()

The following examples show how to use org.apache.spark.sql.Row#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RowUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
public static boolean different(Row first, Row second, List<String> valueFieldNames) {
  for (String valueFieldName : valueFieldNames) {
    Object firstValue = first.get(first.fieldIndex(valueFieldName));
    Object secondValue = second.get(second.fieldIndex(valueFieldName));

    if (firstValue != null && secondValue != null && !firstValue.equals(secondValue)) {
      return true;
    }

    if ((firstValue != null && secondValue == null) || (firstValue == null && secondValue != null)) {
      return true;
    }
  }

  return false;
}
 
Example 2
Source File: TestAppendPlanner.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test(expected=IllegalArgumentException.class)
public void testNoLastUpdated() {
  Config config = ConfigFactory.empty();
  AppendPlanner ap = new AppendPlanner();
  assertNoValidationFailures(ap, config);
  ap.configure(config);

  List<Tuple2<MutationType, Dataset<Row>>> planned = ap.planMutationsForSet(dataFrame);

  assertEquals(planned.size(), 1);

  Dataset<Row> plannedDF = planned.get(0)._2();

  assertEquals(planned.get(0)._1(), MutationType.INSERT);
  assertEquals(plannedDF.count(), 1);

  Row plannedRow = plannedDF.collectAsList().get(0);
  plannedRow.get(plannedRow.fieldIndex("lastupdated"));
}
 
Example 3
Source File: NestDeriver.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Override
public Row call(Tuple2<Iterable<Row>, Iterable<Row>> cogrouped) throws Exception {
  // There should only be one 'into' record per key
  Row intoRow = cogrouped._1().iterator().next();
  Row[] fromRows = Iterables.toArray(cogrouped._2(), Row.class);
  int intoRowNumFields = intoRow.size();

  Object[] nestedValues = new Object[intoRowNumFields + 1];
  for (int i = 0; i < intoRowNumFields; i++) {
    nestedValues[i] = intoRow.get(i);
  }
  nestedValues[intoRowNumFields] = fromRows;

  Row nested = RowFactory.create(nestedValues);

  return nested;
}
 
Example 4
Source File: RowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
public static Row subsetRow(Row row, StructType subsetSchema) {
  Object[] values = new Object[subsetSchema.length()];

  int i = 0;
  for (String fieldName : subsetSchema.fieldNames()) {
    values[i] = row.get(row.fieldIndex(fieldName));
    i++;
  }

  Row subset = new RowWithSchema(subsetSchema, values);

  return subset;
}
 
Example 5
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Test
public void testOutputDataFrameOfVectorsDML() {
	System.out.println("MLContextTest - output DataFrame of vectors DML");

	String s = "m=matrix('1 2 3 4',rows=2,cols=2);";
	Script script = dml(s).out("m");
	MLResults results = ml.execute(script);
	Dataset<Row> df = results.getDataFrame("m", true);
	Dataset<Row> sortedDF = df.sort(RDDConverterUtils.DF_ID_COLUMN);

	// verify column types
	StructType schema = sortedDF.schema();
	StructField[] fields = schema.fields();
	StructField idColumn = fields[0];
	StructField vectorColumn = fields[1];
	Assert.assertTrue(idColumn.dataType() instanceof DoubleType);
	Assert.assertTrue(vectorColumn.dataType() instanceof VectorUDT);

	List<Row> list = sortedDF.collectAsList();

	Row row1 = list.get(0);
	Assert.assertEquals(1.0, row1.getDouble(0), 0.0);
	Vector v1 = (DenseVector) row1.get(1);
	double[] arr1 = v1.toArray();
	Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, arr1, 0.0);

	Row row2 = list.get(1);
	Assert.assertEquals(2.0, row2.getDouble(0), 0.0);
	Vector v2 = (DenseVector) row2.get(1);
	double[] arr2 = v2.toArray();
	Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, arr2, 0.0);
}
 
Example 6
Source File: HashDeriver.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Override
public Row call(Row toHash) {
  sb.setLength(0);

  for (int fieldNum = 0; fieldNum < toHash.schema().size(); fieldNum++) {
    if (includeInConcatenation(toHash, fieldNum)) {
      Object value = toHash.get(fieldNum);
      sb.append(value != null ? value : nullString);
      sb.append(delimiter);
    }
  }

  return RowUtils.append(toHash, sb.toString().getBytes());
}
 
Example 7
Source File: ToRecord.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(Row v1) throws Exception {
    List<Writable> ret = new ArrayList<>();
    if (v1.size() != schema.numColumns())
        throw new IllegalArgumentException("Invalid number of columns for row " + v1.size()
                        + " should have matched schema columns " + schema.numColumns());
    for (int i = 0; i < v1.size(); i++) {
        if (v1.get(i) == null)
            throw new IllegalStateException("Row item " + i + " is null");
        switch (schema.getType(i)) {
            case Double:
                ret.add(new DoubleWritable(v1.getDouble(i)));
                break;
            case Float:
                ret.add(new FloatWritable(v1.getFloat(i)));
                break;
            case Integer:
                ret.add(new IntWritable(v1.getInt(i)));
                break;
            case Long:
                ret.add(new LongWritable(v1.getLong(i)));
                break;
            default:
                throw new IllegalStateException("Illegal type");
        }

    }
    return ret;
}
 
Example 8
Source File: RandomForestRegressionModelInfoAdapterBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testRandomForestRegressionWithPipeline() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm");

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a RandomForest model.
    RandomForestRegressionModel regressionModel = new RandomForestRegressor()
            .setFeaturesCol("features").fit(trainingData);

    Pipeline pipeline = new Pipeline()
            .setStages(new PipelineStage[]{regressionModel});

    // Train model.  This also runs the indexer.
    PipelineModel sparkPipeline = pipeline.fit(trainingData);

    //Export this model
    byte[] exportedModel = ModelExporter.export(sparkPipeline, null);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = sparkPipeline.transform(testData).select("features", "prediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector v = (Vector) row.get(0);
        double actual = row.getDouble(1);

        Map<String, Object> inputData = new HashMap<String, Object>();
        inputData.put(transformer.getInputKeys().iterator().next(), v.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());

        assertEquals(actual, predicted, EPSILON);
    }
}
 
Example 9
Source File: RowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
public static Row set(Row row, String fieldName, Object replacement) {
  Object[] values = new Object[row.length()];

  for (int i = 0; i < row.schema().fields().length; i++) {
    if (i == row.fieldIndex(fieldName)) {
      values[i] = replacement;
    } else {
      values[i] = row.get(i);
    }
  }

  return new RowWithSchema(row.schema(), values);
}
 
Example 10
Source File: SQLRowId.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void read(Row row, int ordinal) throws StandardException {
    if (row.isNullAt(ordinal))
        setToNull();
    else {
        isNull = false;
        bytes = (byte[]) row.get(ordinal);
    }
}
 
Example 11
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Object getPrimitiveValue(Row row, int ord, Type type) {
  if (row.isNullAt(ord)) {
    return null;
  }
  switch (type.typeId()) {
    case BOOLEAN:
      return row.getBoolean(ord);
    case INTEGER:
      return row.getInt(ord);
    case LONG:
      return row.getLong(ord);
    case FLOAT:
      return row.getFloat(ord);
    case DOUBLE:
      return row.getDouble(ord);
    case STRING:
      return row.getString(ord);
    case BINARY:
    case FIXED:
    case UUID:
      return row.get(ord);
    case DATE:
      return row.getDate(ord);
    case TIMESTAMP:
      return row.getTimestamp(ord);
    case DECIMAL:
      return row.getDecimal(ord);
    default:
      throw new IllegalArgumentException("Unhandled type " + type);
  }
}
 
Example 12
Source File: TestHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEqualsSafe(Types.StructType struct, Record rec, Row row) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = rec.get(i);
    Object actualValue = row.get(i);

    assertEqualsSafe(fieldType, expectedValue, actualValue);
  }
}
 
Example 13
Source File: DecisionTreeRegressionModelBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecisionTreeRegression() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm");

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor()
            .setFeaturesCol("features").fit(trainingData);

    byte[] exportedModel = ModelExporter.export(regressionModel, null);

    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = regressionModel.transform(testData).select("features", "prediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector v = (Vector) row.get(0);
        double actual = row.getDouble(1);

        Map<String, Object> inputData = new HashMap<String, Object>();
        inputData.put(transformer.getInputKeys().iterator().next(), v.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());

        System.out.println(actual + ", " + predicted);
        assertEquals(actual, predicted, EPSILON);
    }
}
 
Example 14
Source File: UserType.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void read(Row row, int ordinal) throws StandardException {
	if (row.isNullAt(ordinal))
		setToNull();
	else {
		isNull = false;
		Object object = row.get(ordinal);
		if (object instanceof byte[]) {
			value = SerializationUtils.deserialize((byte[]) object);
		} else {
			value = object;
		}
	}
}
 
Example 15
Source File: SQLBinary.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void read(Row row, int ordinal) throws StandardException {
    if (row.isNullAt(ordinal))
        setToNull();
    else {
        isNull = false;
        dataValue = (byte[]) row.get(ordinal);
    }
}
 
Example 16
Source File: GenericsHelpers.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static void assertEqualsSafe(Types.StructType struct, Record expected, Row actual) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEqualsSafe(fieldType, expectedValue, actualValue);
  }
}
 
Example 17
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Test
public void testOutputDataFrameOfVectorsDML() {
	System.out.println("MLContextTest - output DataFrame of vectors DML");

	String s = "m=matrix('1 2 3 4',rows=2,cols=2);";
	Script script = dml(s).out("m");
	MLResults results = ml.execute(script);
	Dataset<Row> df = results.getDataFrame("m", true);
	Dataset<Row> sortedDF = df.sort(RDDConverterUtils.DF_ID_COLUMN);

	// verify column types
	StructType schema = sortedDF.schema();
	StructField[] fields = schema.fields();
	StructField idColumn = fields[0];
	StructField vectorColumn = fields[1];
	Assert.assertTrue(idColumn.dataType() instanceof DoubleType);
	Assert.assertTrue(vectorColumn.dataType() instanceof VectorUDT);

	List<Row> list = sortedDF.collectAsList();

	Row row1 = list.get(0);
	Assert.assertEquals(1.0, row1.getDouble(0), 0.0);
	Vector v1 = (DenseVector) row1.get(1);
	double[] arr1 = v1.toArray();
	Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, arr1, 0.0);

	Row row2 = list.get(1);
	Assert.assertEquals(2.0, row2.getDouble(0), 0.0);
	Vector v2 = (DenseVector) row2.get(1);
	double[] arr2 = v2.toArray();
	Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, arr2, 0.0);
}
 
Example 18
Source File: DecisionTreeRegressionModelBridgeTest.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Test
public void testDecisionTreeRegressionWithPipeline() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm");

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressor dt = new DecisionTreeRegressor()
            .setFeaturesCol("features");

    Pipeline pipeline = new Pipeline()
            .setStages(new PipelineStage[]{dt});

    // Train model.  This also runs the indexer.
    PipelineModel sparkPipeline = pipeline.fit(trainingData);

    //Export this model
    byte[] exportedModel = ModelExporter.export(sparkPipeline, null);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = sparkPipeline.transform(testData).select("features", "prediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector v = (Vector) row.get(0);
        double actual = row.getDouble(1);

        Map<String, Object> inputData = new HashMap<String, Object>();
        inputData.put(transformer.getInputKeys().iterator().next(), v.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());

        assertEquals(actual, predicted, EPSILON);
    }
}
 
Example 19
Source File: DecisionTreeClassificationModelBridgeTest.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Test
public void testDecisionTreeClassificationRawPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm");

    StringIndexerModel stringIndexerModel = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex")
            .fit(data);

    data = stringIndexerModel.transform(data);

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features")
            .setRawPredictionCol("rawPrediction")
            .setPredictionCol("prediction")
            .fit(trainingData);

    byte[] exportedModel = ModelExporter.export(classificationModel, null);

    Transformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = classificationModel.transform(testData).select("features", "prediction", "rawPrediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector inp = (Vector) row.get(0);
        double actual = row.getDouble(1);
        double[] actualRaw = ((Vector) row.get(2)).toArray();

        Map<String, Object> inputData = new HashMap<>();
        inputData.put(transformer.getInputKeys().iterator().next(), inp.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());
        double[] rawPrediction = (double[]) inputData.get("rawPrediction");

        assertEquals(actual, predicted, EPSILON);
        assertArrayEquals(actualRaw, rawPrediction, EPSILON);
    }
}
 
Example 20
Source File: DecisionTreeClassificationModelBridgeTest.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Test
public void testDecisionTreeClassificationWithPipeline() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm");

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    StringIndexer indexer = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex");

    // Train a DecisionTree model.
    DecisionTreeClassifier classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features");

    Pipeline pipeline = new Pipeline()
            .setStages(new PipelineStage[]{indexer, classificationModel});

    // Train model.  This also runs the indexer.
    PipelineModel sparkPipeline = pipeline.fit(trainingData);

    //Export this model
    byte[] exportedModel = ModelExporter.export(sparkPipeline, null);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = sparkPipeline.transform(testData).select("label", "features", "prediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector v = (Vector) row.get(1);
        double actual = row.getDouble(2);

        Map<String, Object> inputData = new HashMap<String, Object>();
        inputData.put("features", v.toArray());
        inputData.put("label", row.get(0).toString());
        transformer.transform(inputData);
        double predicted = (double) inputData.get("prediction");

        assertEquals(actual, predicted, EPSILON);
    }
}