Java Code Examples for org.apache.spark.sql.RowFactory#create()

The following examples show how to use org.apache.spark.sql.RowFactory#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: RDDConverterUtilsExt.java From systemds with Apache License 2.0

5 votes

@Override
public Row call(Tuple2<Row, Long> arg0) throws Exception {
	int oldNumCols = arg0._1.length();
	Object [] fields = new Object[oldNumCols + 1];
	for(int i = 0; i < oldNumCols; i++) {
		fields[i] = arg0._1.get(i);
	}
	fields[oldNumCols] = new Double(arg0._2 + 1);
	return RowFactory.create(fields);
}

Example 2

Source File: JavaVectorAssemblerExample.java From SparkDemo with MIT License

5 votes

public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaVectorAssemblerExample")
    .getOrCreate();

  // $example on$
  StructType schema = createStructType(new StructField[]{
    createStructField("id", IntegerType, false),
    createStructField("hour", IntegerType, false),
    createStructField("mobile", DoubleType, false),
    createStructField("userFeatures", new VectorUDT(), false),
    createStructField("clicked", DoubleType, false)
  });
  Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
  Dataset<Row> dataset = spark.createDataFrame(Arrays.asList(row), schema);

  VectorAssembler assembler = new VectorAssembler()
    .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
    .setOutputCol("features");

  Dataset<Row> output = assembler.transform(dataset);
  System.out.println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column " +
      "'features'");
  output.select("features", "clicked").show(false);
  // $example off$

  spark.stop();
}

Example 3

Source File: StructureToInteractingResidues.java From mmtf-spark with Apache License 2.0

5 votes

private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) {
       double cutoffDistanceSq = cutoffDistance * cutoffDistance;
	
	float[] x = structure.getxCoords();
	float[] y = structure.getyCoords();
	float[] z = structure.getzCoords();
	
	int first = groupIndices.get(index);
	int last = groupIndices.get(index+1);
	
	List<Row> rows = new ArrayList<>();
	for (int i: matches) {
		if (i == index) {
			continue;
		}
		double minDSq = Double.MAX_VALUE;
		int minIndex = -1;
		for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) {
			
			for (int k = first; k < last; k++) {
				double dx = (x[j] - x[k]);
				double dy = (y[j] - y[k]);
				double dz = (z[j] - z[k]);
				double dSq = dx*dx + dy*dy + dz*dz;
				if (dSq <= cutoffDistanceSq && dSq < minDSq) {
					minDSq = Math.min(minDSq, dSq);
					minIndex = i;
				}
			}
		}
		if (minIndex >= 0) {
			// TODO add unique group (and atom?) for each group?
			Row row = RowFactory.create(structureId, groupNames.get(index), index, groupNames.get(minIndex), minIndex, (float)Math.sqrt(minDSq));
			rows.add(row);
		}
	}
	return rows;
}

Example 4

Source File: BiojavaAligner.java From mmtf-spark with Apache License 2.0

5 votes

/**
	 * Calculates a structural alignment and returns alignment metrics.
	 * 
	 * @param alignmentAlgorithm name of the algorithm
	 * @param key unique identifier for protein chain pair
	 * @param points1 C-alpha positions of chain 1
	 * @param points2 C-alpha positions of chain 2
	 * @return
	 */
	public static List<Row> getAlignment(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) {
		// create input for BioJava alignment method
		Atom[] ca1 = getCAAtoms(points1);
		Atom[] ca2 = getCAAtoms(points2);
		
		// calculate the alignment
		AFPChain afp = null;
		try {
			StructureAlignment algorithm  = StructureAlignmentFactory.getAlgorithm(alignmentAlgorithm);
			afp = algorithm.align(ca1,ca2);
			double tmScore = AFPChainScorer.getTMScore(afp, ca1, ca2);
			afp.setTMScore(tmScore);
		} catch (StructureException e) {
			e.printStackTrace();
			return Collections.emptyList();
		} 
		
		// TODO add alignments as arrays to results
//		int[][] alignment = afp.getAfpIndex();
//		for (int i = 0; i < alignment.length; i++) {
//			System.out.println(alignment[i][0] + " - " + alignment[i][1]);
//		}

		// record the alignment metrics
		Row row = RowFactory.create(key, afp.getOptLength(), afp.getCoverage1(), 
				afp.getCoverage2(), (float) afp.getTotalRmsdOpt(), (float) afp.getTMScore());

		return Collections.singletonList(row);
	}

Example 5

Source File: AtomInteraction.java From mmtf-spark with Apache License 2.0

5 votes

/**
 * Returns interactions and geometric information in a single row.
 * 
 * @return row of interactions and geometric information
 */
public Row getMultipleInteractionsAsRow(int maxInteractions) {
	// pad interaction centers and distances with nulls, if necessary,
	// since each row must be of fixed length
	while (getNumInteractions() < maxInteractions) {
		neighbors.add(new InteractionCenter());
	}

	int length = InteractionCenter.getLength();

	Object[] data = new Object[getNumColumns(maxInteractions)];

	int index = 0;
	data[index++] = structureId;
	data[index++] = getNumberOfPolymerChains();
	
	calcCoordinationGeometry(maxInteractions);
	data[index++] = q3;
	data[index++] = q4;
	data[index++] = q5;
	data[index++] = q6;
	

	// copy data for query atom
	System.arraycopy(center.getAsObject(), 0, data, index, length);
	index += length;

	// copy data for interacting atoms
	for (int i = 0; i < neighbors.size(); i++) {
		System.arraycopy(neighbors.get(i).getAsObject(), 0, data, index, length);
		index += length;
		data[index++] = distances[i];
	}

	// copy angles
	System.arraycopy(angles, 0, data, index, angles.length);
	index += length;

	return RowFactory.create(data);
}

Example 6

Source File: MLContextTest.java From systemds with Apache License 2.0

5 votes

@Override
public Row call(String str) throws Exception {
	String[] strings = str.split(",");
	Double[] doubles = new Double[strings.length];
	for (int i = 0; i < strings.length; i++) {
		doubles[i] = Double.parseDouble(strings[i]);
	}
	return RowFactory.create((Object[]) doubles);
}

Example 7

Source File: InstanceRelationWriter.java From rdf2x with Apache License 2.0

5 votes

private static Row getAttributeRow(Instance instance, Predicate predicate, Object value) {
    return RowFactory.create(
            instance.getId(),
            predicate.getPredicateIndex(),
            LiteralType.toString(predicate.getLiteralType()),
            predicate.getLanguage(),
            value.toString()
    );
}

Example 8

Source File: MLContextFrameTest.java From systemds with Apache License 2.0

5 votes

@Test
public void testTransform() {
	System.out.println("MLContextFrameTest - transform");
	
	Row[] rowsA = {RowFactory.create("\"`@(\"(!&",2,"20news-bydate-train/comp.os.ms-windows.misc/9979"),
			RowFactory.create("\"`@(\"\"(!&\"",3,"20news-bydate-train/comp.os.ms-windows.misc/9979")};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<>();
	fieldsA.add(DataTypes.createStructField("featureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true));
	fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	ml.setExplain(true);
	ml.setExplainLevel(ExplainLevel.RECOMPILE_HOPS);
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(1.0, matrixtA[0][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}

Example 9

Source File: MLContextFrameTest.java From systemds with Apache License 2.0

5 votes

@Test
public void testInputFrameAndMatrixOutputMatrixAndFrame() {
	System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame");
	
	Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<>();
	fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(10.0, matrixtA[0][2], 0.0);
	Assert.assertEquals(20.0, matrixtA[1][2], 0.0);
	Assert.assertEquals(31.0, matrixtA[2][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}

Example 10

Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0

5 votes

@Override
public Row call(String record) throws Exception {
      String[] fields = IOUtilFunctions.splitCSV(record, _delim);
      Object[] objects = new Object[fields.length]; 
      for (int i=0; i<fields.length; i++) {
	      objects[i] = UtilFunctions.stringToObject(_schema[i], fields[i]);
      }
      return RowFactory.create(objects);
}

Example 11

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(Tuple2<Double, org.apache.spark.mllib.linalg.Vector> tup) throws Exception {
	Double doub = tup._1();
	org.apache.spark.mllib.linalg.Vector vect = tup._2();
	return RowFactory.create(doub, vect);
}

Example 12

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(Vector vect) throws Exception {
	return RowFactory.create(vect);
}

Example 13

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(org.apache.spark.mllib.linalg.Vector vect) throws Exception {
	return RowFactory.create(vect);
}

Example 14

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(Tuple2<Double, Vector> tup) throws Exception {
	Double doub = tup._1();
	Vector vect = tup._2();
	return RowFactory.create(doub, vect);
}

Example 15

Source File: StructureToAllInteractions.java From mmtf-spark with Apache License 2.0

4 votes

private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) {
       double cutoffDistanceSq = cutoffDistance * cutoffDistance;
	
	float[] x = structure.getxCoords();
	float[] y = structure.getyCoords();
	float[] z = structure.getzCoords();
	
	int first = groupIndices.get(index);
	int last = groupIndices.get(index+1);
	
	int groupIndex1 = structure.getGroupTypeIndices()[index];
	
	List<Row> rows = new ArrayList<>();
	for (int i: matches) {
		// exclude self interactions
		if (i == index) {
			continue;
		}

		for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) {
			
			for (int k = first; k < last; k++) {
				double dx = (x[j] - x[k]);
				double dy = (y[j] - y[k]);
				double dz = (z[j] - z[k]);
				double dSq = dx*dx + dy*dy + dz*dz;
				
				if (dSq < cutoffDistanceSq) {
					int aIndex1 =  k-first;
					String atomName1 = structure.getGroupAtomNames(groupIndex1)[aIndex1];
					String element1 = structure.getGroupElementNames(groupIndex1)[aIndex1];
					
					int groupIndex2 = structure.getGroupTypeIndices()[i];
                       int aIndex2 = j - groupIndices.get(i);
                       String atomName2 = structure.getGroupAtomNames(groupIndex2)[aIndex2];
					String element2 = structure.getGroupElementNames(groupIndex2)[aIndex2];

                       double d = Math.sqrt(dSq);
					Row row = RowFactory.create(structureId, groupNames.get(index), atomName1, element1, index, groupNames.get(i), atomName2, element2, i, (float)d);
					rows.add(row);		
				}
			}
		}
	}
	return rows;
}

Example 16

Source File: ExhaustiveAligner.java From mmtf-spark with Apache License 2.0

4 votes

/**
	 * Returns one or more structure alignments and their alignment scores.
	 * 
	 * @param alignmentAlgorithm
	 *            name of the algorithm
	 * @param key
	 *            unique identifier for protein chain pair
	 * @param points1
	 *            C-alpha positions of chain 1
	 * @param points2
	 *            C-alpha positions of chain 2
	 * @return list of alignment metrics
	 */
	public List<Row> getAlignments(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) {
		List<Row> rows = new ArrayList<>();

		// TODO implement exhaustive alignments here ...

		int length = Math.min(points1.length, points2.length);

		Point3d[] x = null;
		Point3d[] y = null;

		int coverage1 = 0;
		int coverage2 = 0;

		if (points1.length != length) {
			x = Arrays.copyOfRange(points1, 0, length);
			y = points2;
			coverage1 = (int) Math.rint(100.0 * length / x.length);
			coverage2 = 100;
		} else if (points2.length != length) {
			x = points1;
			y = Arrays.copyOfRange(points2, 0, length);
			coverage1 = 100;
			coverage2 = (int) Math.rint(100.0 * length / y.length);
		}

		SuperPositionQCP qcp = new SuperPositionQCP(false);
		double rmsd = qcp.getRmsd(x, y);
		double tm = 0.0;
//		if (rmsd >= maxRmsd) {
			qcp.superposeAndTransform(x, y);
			tm = TMScore(x, y);
//		}
		System.out.println("l: " + length + " c1: " + coverage1 + " c2: " + coverage2 + " rmsd: " + rmsd + " tm: " + tm);

//		int maxCoverage = Math.max(coverage1, coverage2);

		// store solutions that satisfy minimal criteria
//		if (length >= minLength && maxCoverage >= minCoverage && tm >= minTm) {
			// create a row of alignment metrics
			Row row = RowFactory.create(key, length, coverage1, coverage2, (float)rmsd, (float)tm);
			rows.add(row);
//		}

		return rows;
	}

Example 17

Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(String str) throws Exception {
	return RowFactory.create(str);
}

Example 18

Source File: SecondaryStructureExtractor.java From mmtf-spark with Apache License 2.0

4 votes

private static Row getSecStructFractions(Tuple2<String, StructureDataInterface> t) throws Exception {
	String key = t._1;
	StructureDataInterface structure = t._2;
	if (t._2.getNumChains() != 1) {
		throw new IllegalArgumentException("This method can only be applied to single polymer chain.");
	}

	StringBuilder dsspQ8 = new StringBuilder(structure.getEntitySequence(0).length());
	StringBuilder dsspQ3 = new StringBuilder(structure.getEntitySequence(0).length());

	float helix = 0;
	float sheet = 0;
	float coil = 0;

	int dsspIndex = 0;
	int structureIndex = 0;
	int seqIndex;

	for (int code : structure.getSecStructList()) {
		seqIndex = structure.getGroupSequenceIndices()[structureIndex++];
		while (dsspIndex < seqIndex) {
			dsspQ8.append("X");
			dsspQ3.append("X");
			dsspIndex++;
		}
		dsspQ8.append(DsspSecondaryStructure.getDsspCode(code).getOneLetterCode());
		dsspIndex++;
		switch (DsspSecondaryStructure.getQ3Code(code)) {

		case ALPHA_HELIX:
			helix++;
			dsspQ3.append("H");
			break;
		case EXTENDED:
			sheet++;
			dsspQ3.append("E");
			break;
		case COIL:
			coil++;
			dsspQ3.append("C");
			break;
		default:
			break;
		}
	}
	while (dsspIndex < structure.getEntitySequence(0).length()) {
		dsspQ8.append("X");
		dsspQ3.append("X");
		dsspIndex++;
	}

	int n = structure.getSecStructList().length;
	helix /= n;
	sheet /= n;
	coil /= n;

	return RowFactory.create(key, structure.getEntitySequence(0), helix, sheet, coil, dsspQ8.toString(),
			dsspQ3.toString());
}

Example 19

Source File: SparkJdbcGenerator.java From Quicksql with MIT License

4 votes

@Override
public Row call(Object ob) throws Exception {
    return RowFactory.create((Object[]) ((String) ob).split("\u0006"));
}

Example 20

Source File: DefinitionToSparkVisitor.java From bunsen with Apache License 2.0

4 votes

@Override
protected Object createComposite(Object[] children) {
  return RowFactory.create(children);
}