org.apache.spark.ml.linalg.SparseVector Java Exaples

Source File: RemoteDPParForSpark.java From systemds with Apache License 2.0

5 votes

@Override
public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0) 
	throws Exception 
{
	long rowix = arg0._2() + 1;
	
	//process row data
	int off = _containsID ? 1: 0;
	Object obj = _isVector ? arg0._1().get(off) : arg0._1();
	boolean sparse = (obj instanceof SparseVector);
	MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse);
	
	if( _isVector ) {
		Vector vect = (Vector) obj;
		if( vect instanceof SparseVector ) {
			SparseVector svect = (SparseVector) vect;
			int lnnz = svect.numNonzeros();
			for( int k=0; k<lnnz; k++ )
				mb.appendValue(0, svect.indices()[k], svect.values()[k]);
		}
		else { //dense
			for( int j=0; j<_clen; j++ )
				mb.appendValue(0, j, vect.apply(j));	
		}
	}
	else { //row
		Row row = (Row) obj;
		for( int j=off; j<off+_clen; j++ )
			mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j)));
	}
	mb.examSparsity();
	return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb));
}

Source File: GradientBoostClassificationModelTest.java From spark-transformers with Apache License 2.0

5 votes

@Test
public void testGradientBoostClassification() {
	// Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/binary_classification_test.libsvm";

	Dataset<Row> data = spark.read().format("libsvm").load(datapath);

	// Split the data into training and test sets (30% held out for testing)
	Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
	Dataset<Row> trainingData = splits[0];
	Dataset<Row> testData = splits[1];

	// Train a RandomForest model.
	GBTClassificationModel classificationModel = new GBTClassifier().fit(trainingData);

	byte[] exportedModel = ModelExporter.export(classificationModel);

	Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

	List<Row> sparkOutput =
	        classificationModel.transform(testData).select("features", "prediction","label").collectAsList();

	// compare predictions
	for (Row row : sparkOutput) {
		Map<String, Object> data_ = new HashMap<>();
		data_.put("features", ((SparseVector) row.get(0)).toArray());
		data_.put("label", (row.get(2)).toString());
		transformer.transform(data_);
		System.out.println(data_);
		System.out.println(data_.get("prediction")+" ,"+row.get(1));
		assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
	}

}

Source File: DecisionTreeRegressionModelBridgeTest.java From spark-transformers with Apache License 2.0

5 votes

@Test
public void testDecisionTreeRegressionPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/regression_test.libsvm";
	
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = regressionModel.transform(testData).select("features", "prediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(regressionModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    System.out.println(transformer);
    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
    }
}

Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0

5 votes

@Test
public void testDecisionTreeClassificationPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classifierModel = new DecisionTreeClassifier().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = classifierModel.transform(testData).select("features", "prediction","rawPrediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(classifierModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(2)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}

Source File: RemoteDPParForSpark.java From systemds with Apache License 2.0

5 votes

@Override
public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0) 
	throws Exception 
{
	long rowix = arg0._2() + 1;
	
	//process row data
	int off = _containsID ? 1: 0;
	Object obj = _isVector ? arg0._1().get(off) : arg0._1();
	boolean sparse = (obj instanceof SparseVector);
	MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse);
	
	if( _isVector ) {
		Vector vect = (Vector) obj;
		if( vect instanceof SparseVector ) {
			SparseVector svect = (SparseVector) vect;
			int lnnz = svect.numNonzeros();
			for( int k=0; k<lnnz; k++ )
				mb.appendValue(0, svect.indices()[k], svect.values()[k]);
		}
		else { //dense
			for( int j=0; j<_clen; j++ )
				mb.appendValue(0, j, vect.apply(j));
		}
	}
	else { //row
		Row row = (Row) obj;
		for( int j=off; j<off+_clen; j++ )
			mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j)));
	}
	mb.examSparsity();
	return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb));
}

Source File: RDDConverterUtils.java From systemds with Apache License 2.0

4 votes

@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();

	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long> tmp = arg0.next();
		org.apache.spark.mllib.regression.LabeledPoint row = tmp._1();
		boolean lsparse = _sparseX || (!_labels && 
				row.features() instanceof org.apache.spark.mllib.linalg.SparseVector);
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb, lsparse);
		}
		
		//process row data
		if( _labels ) {
			double val = row.label();
			mb[0].appendValue(pos, 0, val);
			_aNnz.add((val != 0) ? 1 : 0);
		}
		else { //features
			int lnnz = row.features().numNonzeros();
			if( row.features() instanceof org.apache.spark.mllib.linalg.SparseVector )
			{
				org.apache.spark.mllib.linalg.SparseVector srow = 
						(org.apache.spark.mllib.linalg.SparseVector) row.features();
				for( int k=0; k<lnnz; k++ ) {
					int gix = srow.indices()[k]+1;
					int cix = (int)UtilFunctions.computeBlockIndex(gix, _blen);
					int j = UtilFunctions.computeCellInBlock(gix, _blen);
					mb[cix-1].appendValue(pos, j, srow.values()[k]);
				}
			}
			else { //dense
				for( int cix=1, pix=0; cix<=ncblks; cix++ ) {
					int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, row.features().apply(pix++));
				}
			}
			_aNnz.add(lnnz);
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}

Source File: RDDConverterUtils.java From systemds with Apache License 2.0

4 votes

@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();
	
	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<Row,Long> tmp = arg0.next();
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb);
		}
		
		//process row data
		int off = _containsID ? 1 : 0;
		Object obj = _isVector ? tmp._1().get(off) : tmp._1();
		for( int cix=1, pix=_isVector?0:off; cix<=ncblks; cix++ ) {
			int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
			int cu = (int) Math.min(_clen, cix*_blen) + (_isVector?0:off);
			//allocate sparse row once (avoid re-allocations)
			if( mb[cix-1].isInSparseFormat() ) {
				int lnnz = countNnz(obj, _isVector, pix, cu);
				mb[cix-1].getSparseBlock().allocate(pos, lnnz);
			}
			//append data to matrix blocks
			if( _isVector ) {
				Vector vect = (Vector) obj;
				if( vect instanceof SparseVector ) {
					SparseVector svect = (SparseVector) vect;
					int[] svectIx = svect.indices();
					while( pix<svectIx.length && svectIx[pix]<cu ) {
						int j = UtilFunctions.computeCellInBlock(svectIx[pix]+1, _blen);
						mb[cix-1].appendValue(pos, j, svect.values()[pix++]);
					}
				}
				else { //dense
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, vect.apply(pix++));
				}
			}
			else { //row
				Row row = (Row) obj;
				for( int j=0; j<lclen; j++ )
					mb[cix-1].appendValue(pos, j, UtilFunctions.getDouble(row.get(pix++)));
			}
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}

Source File: DatasetClassifier.java From mmtf-spark with Apache License 2.0

4 votes

/**
 * @param args args[0] path to parquet file, args[1] name of classification column
 * @throws IOException 
 * @throws StructureException 
 */
public static void main(String[] args) throws IOException {

	if (args.length != 2) {
		System.err.println("Usage: " + DatasetClassifier.class.getSimpleName() + " <parquet file> <classification column name>");
		System.exit(1);
	}

	// name of the class label
	String label = args[1];
	
	long start = System.nanoTime();

	SparkSession spark = SparkSession
			.builder()
			.master("local[*]")
			.appName(DatasetClassifier.class.getSimpleName())
			.getOrCreate();

	Dataset<Row> data = spark.read().parquet(args[0]).cache();
	
	int featureCount = 0;
	Object vector = data.first().getAs("features");
	if (vector instanceof DenseVector) {
	   featureCount = ((DenseVector)vector).numActives();
	} else if (vector instanceof SparseVector) {
	   featureCount = ((SparseVector)vector).numActives();
	}
	
	System.out.println("Feature count            : "  + featureCount);
	
	int classCount = (int)data.select(label).distinct().count();
	System.out.println("Class count              : " + classCount);

	System.out.println("Dataset size (unbalanced): " + data.count());
	data.groupBy(label).count().show(classCount);

	data = DatasetBalancer.downsample(data, label, 1);
	
	System.out.println("Dataset size (balanced)  : " + data.count());
	data.groupBy(label).count().show(classCount);

	double testFraction = 0.3;
	long seed = 123;

	SparkMultiClassClassifier mcc;
	Map<String, String> metrics;

	DecisionTreeClassifier dtc = new DecisionTreeClassifier();
	mcc = new SparkMultiClassClassifier(dtc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	RandomForestClassifier rfc = new RandomForestClassifier();
	mcc = new SparkMultiClassClassifier(rfc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	LogisticRegression lr = new LogisticRegression();
	mcc = new SparkMultiClassClassifier(lr, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	// specify layers for the neural network
	//    input layer: dimension of feature vector
	//    output layer: number of classes
	int[] layers = new int[] {featureCount, 10, classCount};
	MultilayerPerceptronClassifier mpc = new MultilayerPerceptronClassifier()
			.setLayers(layers)
			.setBlockSize(128)
			.setSeed(1234L)
			.setMaxIter(200);

	mcc = new SparkMultiClassClassifier(mpc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	long end = System.nanoTime();

	System.out.println((end-start)/1E9 + " sec");
}

Source File: DecisionTreeRegressionModelBridgePipelineTest.java From spark-transformers with Apache License 2.0

4 votes

@Test
  public void testDecisionTreeRegressionPrediction() {
      // Load the data stored in LIBSVM format as a DataFrame.
  	String datapath = "src/test/resources/regression_test.libsvm";
  	
  	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


      // Split the data into training and test sets (30% held out for testing)
      Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
      Dataset<Row> trainingData = splits[0];
      Dataset<Row> testData = splits[1];

      StringIndexer indexer = new StringIndexer()
              .setInputCol("label")
              .setOutputCol("labelIndex").setHandleInvalid("skip");
      
DecisionTreeRegressor regressionModel =
        new DecisionTreeRegressor().setLabelCol("labelIndex").setFeaturesCol("features");

Pipeline pipeline = new Pipeline()
              .setStages(new PipelineStage[]{indexer, regressionModel});

PipelineModel sparkPipeline = pipeline.fit(trainingData);

      byte[] exportedModel = ModelExporter.export(sparkPipeline);

      Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
      List<Row> output = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();

      //compare predictions
      for (Row row : output) {
      	Map<String, Object> data_ = new HashMap<>();
          data_.put("features", ((SparseVector) row.get(0)).toArray());
          data_.put("label", (row.get(2)).toString());
          transformer.transform(data_);
          System.out.println(data_);
          System.out.println(data_.get("prediction"));
          assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
      }
  }

Source File: GradientBoostClassificationModelPipelineTest.java From spark-transformers with Apache License 2.0

4 votes

@Test
public void testGradientBoostClassification() {
	// Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/binary_classification_test.libsvm";

	Dataset<Row> data = spark.read().format("libsvm").load(datapath);
	StringIndexer indexer = new StringIndexer()
               .setInputCol("label")
               .setOutputCol("labelIndex");
	// Split the data into training and test sets (30% held out for testing)
	Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
	Dataset<Row> trainingData = splits[0];
	Dataset<Row> testData = splits[1];

	// Train a RandomForest model.
	GBTClassifier classificationModel = new GBTClassifier().setLabelCol("labelIndex")
               .setFeaturesCol("features");;

        Pipeline pipeline = new Pipeline()
                .setStages(new PipelineStage[]{indexer, classificationModel});


	 PipelineModel sparkPipeline = pipeline.fit(trainingData);

	// Export this model
	byte[] exportedModel = ModelExporter.export(sparkPipeline);

	// Import and get Transformer
	Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

	List<Row> sparkOutput = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();
	
	// compare predictions
	for (Row row : sparkOutput) {
		Map<String, Object> data_ = new HashMap<>();
		data_.put("features", ((SparseVector) row.get(0)).toArray());
		data_.put("label", (row.get(2)).toString());
		transformer.transform(data_);
		System.out.println(data_);
		System.out.println(data_.get("prediction")+" ,"+row.get(1));
		assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
	}

}

Source File: DecisionTreeClassificationModelBridgePipelineTest.java From spark-transformers with Apache License 2.0

4 votes

@Test
public void testDecisionTreeClassificationWithPipeline() {
	

    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);



    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});        

    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    StringIndexer indexer = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex");

    // Train a DecisionTree model.
    DecisionTreeClassifier classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features");

    Pipeline pipeline = new Pipeline()
            .setStages(new PipelineStage[]{indexer, classificationModel});


    // Train model.  This also runs the indexer.
    PipelineModel sparkPipeline = pipeline.fit(trainingData);

    //Export this model
    byte[] exportedModel = ModelExporter.export(sparkPipeline);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    List<Row> output = sparkPipeline.transform(testData).select("features", "label","prediction","rawPrediction").collectAsList();

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(3)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        data_.put("label", (row.get(1)).toString());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(2), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}

Source File: RDDConverterUtils.java From systemds with Apache License 2.0

4 votes

@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();

	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long> tmp = arg0.next();
		org.apache.spark.mllib.regression.LabeledPoint row = tmp._1();
		boolean lsparse = _sparseX || (!_labels && 
				row.features() instanceof org.apache.spark.mllib.linalg.SparseVector);
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb, lsparse);
		}
		
		//process row data
		if( _labels ) {
			double val = row.label();
			mb[0].appendValue(pos, 0, val);
			_aNnz.add((val != 0) ? 1 : 0);
		}
		else { //features
			int lnnz = row.features().numNonzeros();
			if( row.features() instanceof org.apache.spark.mllib.linalg.SparseVector )
			{
				org.apache.spark.mllib.linalg.SparseVector srow = 
						(org.apache.spark.mllib.linalg.SparseVector) row.features();
				for( int k=0; k<lnnz; k++ ) {
					int gix = srow.indices()[k]+1;
					int cix = (int)UtilFunctions.computeBlockIndex(gix, _blen);
					int j = UtilFunctions.computeCellInBlock(gix, _blen);
					mb[cix-1].appendValue(pos, j, srow.values()[k]);
				}
			}
			else { //dense
				for( int cix=1, pix=0; cix<=ncblks; cix++ ) {
					int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, row.features().apply(pix++));
				}
			}
			_aNnz.add(lnnz);
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}

Source File: RDDConverterUtils.java From systemds with Apache License 2.0

4 votes

@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();
	
	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<Row,Long> tmp = arg0.next();
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb);
		}
		
		//process row data
		int off = _containsID ? 1 : 0;
		Object obj = _isVector ? tmp._1().get(off) : tmp._1();
		for( int cix=1, pix=_isVector?0:off; cix<=ncblks; cix++ ) {
			int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
			int cu = (int) Math.min(_clen, cix*_blen) + (_isVector?0:off);
			//allocate sparse row once (avoid re-allocations)
			if( mb[cix-1].isInSparseFormat() ) {
				int lnnz = countNnz(obj, _isVector, pix, cu);
				mb[cix-1].getSparseBlock().allocate(pos, lnnz);
			}
			//append data to matrix blocks
			if( _isVector ) {
				Vector vect = (Vector) obj;
				if( vect instanceof SparseVector ) {
					SparseVector svect = (SparseVector) vect;
					int[] svectIx = svect.indices();
					while( pix<svectIx.length && svectIx[pix]<cu ) {
						int j = UtilFunctions.computeCellInBlock(svectIx[pix]+1, _blen);
						mb[cix-1].appendValue(pos, j, svect.values()[pix++]);
					}
				}
				else { //dense
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, vect.apply(pix++));
				}
			}
			else { //row
				Row row = (Row) obj;
				for( int j=0; j<lclen; j++ )
					mb[cix-1].appendValue(pos, j, UtilFunctions.getDouble(row.get(pix++)));
			}
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}

org.apache.spark.ml.linalg.SparseVector Java Examples