Java Code Examples for org.apache.spark.sql.RowFactory#create()

The following examples show how to use org.apache.spark.sql.RowFactory#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RDDConverterUtilsExt.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Override
public Row call(Tuple2<Row, Long> arg0) throws Exception {
	int oldNumCols = arg0._1.length();
	Object [] fields = new Object[oldNumCols + 1];
	for(int i = 0; i < oldNumCols; i++) {
		fields[i] = arg0._1.get(i);
	}
	fields[oldNumCols] = new Double(arg0._2 + 1);
	return RowFactory.create(fields);
}
 
Example 2
Source File: JavaVectorAssemblerExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaVectorAssemblerExample")
    .getOrCreate();

  // $example on$
  StructType schema = createStructType(new StructField[]{
    createStructField("id", IntegerType, false),
    createStructField("hour", IntegerType, false),
    createStructField("mobile", DoubleType, false),
    createStructField("userFeatures", new VectorUDT(), false),
    createStructField("clicked", DoubleType, false)
  });
  Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
  Dataset<Row> dataset = spark.createDataFrame(Arrays.asList(row), schema);

  VectorAssembler assembler = new VectorAssembler()
    .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
    .setOutputCol("features");

  Dataset<Row> output = assembler.transform(dataset);
  System.out.println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column " +
      "'features'");
  output.select("features", "clicked").show(false);
  // $example off$

  spark.stop();
}
 
Example 3
Source File: StructureToInteractingResidues.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) {
       double cutoffDistanceSq = cutoffDistance * cutoffDistance;
	
	float[] x = structure.getxCoords();
	float[] y = structure.getyCoords();
	float[] z = structure.getzCoords();
	
	int first = groupIndices.get(index);
	int last = groupIndices.get(index+1);
	
	List<Row> rows = new ArrayList<>();
	for (int i: matches) {
		if (i == index) {
			continue;
		}
		double minDSq = Double.MAX_VALUE;
		int minIndex = -1;
		for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) {
			
			for (int k = first; k < last; k++) {
				double dx = (x[j] - x[k]);
				double dy = (y[j] - y[k]);
				double dz = (z[j] - z[k]);
				double dSq = dx*dx + dy*dy + dz*dz;
				if (dSq <= cutoffDistanceSq && dSq < minDSq) {
					minDSq = Math.min(minDSq, dSq);
					minIndex = i;
				}
			}
		}
		if (minIndex >= 0) {
			// TODO add unique group (and atom?) for each group?
			Row row = RowFactory.create(structureId, groupNames.get(index), index, groupNames.get(minIndex), minIndex, (float)Math.sqrt(minDSq));
			rows.add(row);
		}
	}
	return rows;
}
 
Example 4
Source File: BiojavaAligner.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
/**
	 * Calculates a structural alignment and returns alignment metrics.
	 * 
	 * @param alignmentAlgorithm name of the algorithm
	 * @param key unique identifier for protein chain pair
	 * @param points1 C-alpha positions of chain 1
	 * @param points2 C-alpha positions of chain 2
	 * @return
	 */
	public static List<Row> getAlignment(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) {
		// create input for BioJava alignment method
		Atom[] ca1 = getCAAtoms(points1);
		Atom[] ca2 = getCAAtoms(points2);
		
		// calculate the alignment
		AFPChain afp = null;
		try {
			StructureAlignment algorithm  = StructureAlignmentFactory.getAlgorithm(alignmentAlgorithm);
			afp = algorithm.align(ca1,ca2);
			double tmScore = AFPChainScorer.getTMScore(afp, ca1, ca2);
			afp.setTMScore(tmScore);
		} catch (StructureException e) {
			e.printStackTrace();
			return Collections.emptyList();
		} 
		
		// TODO add alignments as arrays to results
//		int[][] alignment = afp.getAfpIndex();
//		for (int i = 0; i < alignment.length; i++) {
//			System.out.println(alignment[i][0] + " - " + alignment[i][1]);
//		}

		// record the alignment metrics
		Row row = RowFactory.create(key, afp.getOptLength(), afp.getCoverage1(), 
				afp.getCoverage2(), (float) afp.getTotalRmsdOpt(), (float) afp.getTMScore());

		return Collections.singletonList(row);
	}
 
Example 5
Source File: AtomInteraction.java    From mmtf-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Returns interactions and geometric information in a single row.
 * 
 * @return row of interactions and geometric information
 */
public Row getMultipleInteractionsAsRow(int maxInteractions) {
	// pad interaction centers and distances with nulls, if necessary,
	// since each row must be of fixed length
	while (getNumInteractions() < maxInteractions) {
		neighbors.add(new InteractionCenter());
	}

	int length = InteractionCenter.getLength();

	Object[] data = new Object[getNumColumns(maxInteractions)];

	int index = 0;
	data[index++] = structureId;
	data[index++] = getNumberOfPolymerChains();
	
	calcCoordinationGeometry(maxInteractions);
	data[index++] = q3;
	data[index++] = q4;
	data[index++] = q5;
	data[index++] = q6;
	

	// copy data for query atom
	System.arraycopy(center.getAsObject(), 0, data, index, length);
	index += length;

	// copy data for interacting atoms
	for (int i = 0; i < neighbors.size(); i++) {
		System.arraycopy(neighbors.get(i).getAsObject(), 0, data, index, length);
		index += length;
		data[index++] = distances[i];
	}

	// copy angles
	System.arraycopy(angles, 0, data, index, angles.length);
	index += length;

	return RowFactory.create(data);
}
 
Example 6
Source File: MLContextTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Override
public Row call(String str) throws Exception {
	String[] strings = str.split(",");
	Double[] doubles = new Double[strings.length];
	for (int i = 0; i < strings.length; i++) {
		doubles[i] = Double.parseDouble(strings[i]);
	}
	return RowFactory.create((Object[]) doubles);
}
 
Example 7
Source File: InstanceRelationWriter.java    From rdf2x with Apache License 2.0 5 votes vote down vote up
private static Row getAttributeRow(Instance instance, Predicate predicate, Object value) {
    return RowFactory.create(
            instance.getId(),
            predicate.getPredicateIndex(),
            LiteralType.toString(predicate.getLiteralType()),
            predicate.getLanguage(),
            value.toString()
    );
}
 
Example 8
Source File: MLContextFrameTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Test
public void testTransform() {
	System.out.println("MLContextFrameTest - transform");
	
	Row[] rowsA = {RowFactory.create("\"`@(\"(!&",2,"20news-bydate-train/comp.os.ms-windows.misc/9979"),
			RowFactory.create("\"`@(\"\"(!&\"",3,"20news-bydate-train/comp.os.ms-windows.misc/9979")};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<>();
	fieldsA.add(DataTypes.createStructField("featureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true));
	fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	ml.setExplain(true);
	ml.setExplainLevel(ExplainLevel.RECOMPILE_HOPS);
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(1.0, matrixtA[0][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}
 
Example 9
Source File: MLContextFrameTest.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Test
public void testInputFrameAndMatrixOutputMatrixAndFrame() {
	System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame");
	
	Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<>();
	fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(10.0, matrixtA[0][2], 0.0);
	Assert.assertEquals(20.0, matrixtA[1][2], 0.0);
	Assert.assertEquals(31.0, matrixtA[2][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}
 
Example 10
Source File: FrameRDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Override
public Row call(String record) throws Exception {
      String[] fields = IOUtilFunctions.splitCSV(record, _delim);
      Object[] objects = new Object[fields.length]; 
      for (int i=0; i<fields.length; i++) {
	      objects[i] = UtilFunctions.stringToObject(_schema[i], fields[i]);
      }
      return RowFactory.create(objects);
}
 
Example 11
Source File: MLContextTest.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Row call(Tuple2<Double, org.apache.spark.mllib.linalg.Vector> tup) throws Exception {
	Double doub = tup._1();
	org.apache.spark.mllib.linalg.Vector vect = tup._2();
	return RowFactory.create(doub, vect);
}
 
Example 12
Source File: MLContextTest.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Row call(Vector vect) throws Exception {
	return RowFactory.create(vect);
}
 
Example 13
Source File: MLContextTest.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Row call(org.apache.spark.mllib.linalg.Vector vect) throws Exception {
	return RowFactory.create(vect);
}
 
Example 14
Source File: MLContextTest.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Row call(Tuple2<Double, Vector> tup) throws Exception {
	Double doub = tup._1();
	Vector vect = tup._2();
	return RowFactory.create(doub, vect);
}
 
Example 15
Source File: StructureToAllInteractions.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) {
       double cutoffDistanceSq = cutoffDistance * cutoffDistance;
	
	float[] x = structure.getxCoords();
	float[] y = structure.getyCoords();
	float[] z = structure.getzCoords();
	
	int first = groupIndices.get(index);
	int last = groupIndices.get(index+1);
	
	int groupIndex1 = structure.getGroupTypeIndices()[index];
	
	List<Row> rows = new ArrayList<>();
	for (int i: matches) {
		// exclude self interactions
		if (i == index) {
			continue;
		}

		for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) {
			
			for (int k = first; k < last; k++) {
				double dx = (x[j] - x[k]);
				double dy = (y[j] - y[k]);
				double dz = (z[j] - z[k]);
				double dSq = dx*dx + dy*dy + dz*dz;
				
				if (dSq < cutoffDistanceSq) {
					int aIndex1 =  k-first;
					String atomName1 = structure.getGroupAtomNames(groupIndex1)[aIndex1];
					String element1 = structure.getGroupElementNames(groupIndex1)[aIndex1];
					
					int groupIndex2 = structure.getGroupTypeIndices()[i];
                       int aIndex2 = j - groupIndices.get(i);
                       String atomName2 = structure.getGroupAtomNames(groupIndex2)[aIndex2];
					String element2 = structure.getGroupElementNames(groupIndex2)[aIndex2];

                       double d = Math.sqrt(dSq);
					Row row = RowFactory.create(structureId, groupNames.get(index), atomName1, element1, index, groupNames.get(i), atomName2, element2, i, (float)d);
					rows.add(row);		
				}
			}
		}
	}
	return rows;
}
 
Example 16
Source File: ExhaustiveAligner.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
/**
	 * Returns one or more structure alignments and their alignment scores.
	 * 
	 * @param alignmentAlgorithm
	 *            name of the algorithm
	 * @param key
	 *            unique identifier for protein chain pair
	 * @param points1
	 *            C-alpha positions of chain 1
	 * @param points2
	 *            C-alpha positions of chain 2
	 * @return list of alignment metrics
	 */
	public List<Row> getAlignments(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) {
		List<Row> rows = new ArrayList<>();

		// TODO implement exhaustive alignments here ...

		int length = Math.min(points1.length, points2.length);

		Point3d[] x = null;
		Point3d[] y = null;

		int coverage1 = 0;
		int coverage2 = 0;

		if (points1.length != length) {
			x = Arrays.copyOfRange(points1, 0, length);
			y = points2;
			coverage1 = (int) Math.rint(100.0 * length / x.length);
			coverage2 = 100;
		} else if (points2.length != length) {
			x = points1;
			y = Arrays.copyOfRange(points2, 0, length);
			coverage1 = 100;
			coverage2 = (int) Math.rint(100.0 * length / y.length);
		}

		SuperPositionQCP qcp = new SuperPositionQCP(false);
		double rmsd = qcp.getRmsd(x, y);
		double tm = 0.0;
//		if (rmsd >= maxRmsd) {
			qcp.superposeAndTransform(x, y);
			tm = TMScore(x, y);
//		}
		System.out.println("l: " + length + " c1: " + coverage1 + " c2: " + coverage2 + " rmsd: " + rmsd + " tm: " + tm);

//		int maxCoverage = Math.max(coverage1, coverage2);

		// store solutions that satisfy minimal criteria
//		if (length >= minLength && maxCoverage >= minCoverage && tm >= minTm) {
			// create a row of alignment metrics
			Row row = RowFactory.create(key, length, coverage1, coverage2, (float)rmsd, (float)tm);
			rows.add(row);
//		}

		return rows;
	}
 
Example 17
Source File: RDDConverterUtilsExtTest.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Row call(String str) throws Exception {
	return RowFactory.create(str);
}
 
Example 18
Source File: SecondaryStructureExtractor.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
private static Row getSecStructFractions(Tuple2<String, StructureDataInterface> t) throws Exception {
	String key = t._1;
	StructureDataInterface structure = t._2;
	if (t._2.getNumChains() != 1) {
		throw new IllegalArgumentException("This method can only be applied to single polymer chain.");
	}

	StringBuilder dsspQ8 = new StringBuilder(structure.getEntitySequence(0).length());
	StringBuilder dsspQ3 = new StringBuilder(structure.getEntitySequence(0).length());

	float helix = 0;
	float sheet = 0;
	float coil = 0;

	int dsspIndex = 0;
	int structureIndex = 0;
	int seqIndex;

	for (int code : structure.getSecStructList()) {
		seqIndex = structure.getGroupSequenceIndices()[structureIndex++];
		while (dsspIndex < seqIndex) {
			dsspQ8.append("X");
			dsspQ3.append("X");
			dsspIndex++;
		}
		dsspQ8.append(DsspSecondaryStructure.getDsspCode(code).getOneLetterCode());
		dsspIndex++;
		switch (DsspSecondaryStructure.getQ3Code(code)) {

		case ALPHA_HELIX:
			helix++;
			dsspQ3.append("H");
			break;
		case EXTENDED:
			sheet++;
			dsspQ3.append("E");
			break;
		case COIL:
			coil++;
			dsspQ3.append("C");
			break;
		default:
			break;
		}
	}
	while (dsspIndex < structure.getEntitySequence(0).length()) {
		dsspQ8.append("X");
		dsspQ3.append("X");
		dsspIndex++;
	}

	int n = structure.getSecStructList().length;
	helix /= n;
	sheet /= n;
	coil /= n;

	return RowFactory.create(key, structure.getEntitySequence(0), helix, sheet, coil, dsspQ8.toString(),
			dsspQ3.toString());
}
 
Example 19
Source File: SparkJdbcGenerator.java    From Quicksql with MIT License 4 votes vote down vote up
@Override
public Row call(Object ob) throws Exception {
    return RowFactory.create((Object[]) ((String) ob).split("\u0006"));
}
 
Example 20
Source File: DefinitionToSparkVisitor.java    From bunsen with Apache License 2.0 4 votes vote down vote up
@Override
protected Object createComposite(Object[] children) {
  return RowFactory.create(children);
}