org.apache.spark.ml.linalg.SparseVector Java Examples

The following examples show how to use org.apache.spark.ml.linalg.SparseVector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RemoteDPParForSpark.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0) 
	throws Exception 
{
	long rowix = arg0._2() + 1;
	
	//process row data
	int off = _containsID ? 1: 0;
	Object obj = _isVector ? arg0._1().get(off) : arg0._1();
	boolean sparse = (obj instanceof SparseVector);
	MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse);
	
	if( _isVector ) {
		Vector vect = (Vector) obj;
		if( vect instanceof SparseVector ) {
			SparseVector svect = (SparseVector) vect;
			int lnnz = svect.numNonzeros();
			for( int k=0; k<lnnz; k++ )
				mb.appendValue(0, svect.indices()[k], svect.values()[k]);
		}
		else { //dense
			for( int j=0; j<_clen; j++ )
				mb.appendValue(0, j, vect.apply(j));	
		}
	}
	else { //row
		Row row = (Row) obj;
		for( int j=off; j<off+_clen; j++ )
			mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j)));
	}
	mb.examSparsity();
	return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb));
}
 
Example #2
Source File: GradientBoostClassificationModelTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testGradientBoostClassification() {
	// Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/binary_classification_test.libsvm";

	Dataset<Row> data = spark.read().format("libsvm").load(datapath);

	// Split the data into training and test sets (30% held out for testing)
	Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
	Dataset<Row> trainingData = splits[0];
	Dataset<Row> testData = splits[1];

	// Train a RandomForest model.
	GBTClassificationModel classificationModel = new GBTClassifier().fit(trainingData);

	byte[] exportedModel = ModelExporter.export(classificationModel);

	Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

	List<Row> sparkOutput =
	        classificationModel.transform(testData).select("features", "prediction","label").collectAsList();

	// compare predictions
	for (Row row : sparkOutput) {
		Map<String, Object> data_ = new HashMap<>();
		data_.put("features", ((SparseVector) row.get(0)).toArray());
		data_.put("label", (row.get(2)).toString());
		transformer.transform(data_);
		System.out.println(data_);
		System.out.println(data_.get("prediction")+" ,"+row.get(1));
		assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
	}

}
 
Example #3
Source File: DecisionTreeRegressionModelBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecisionTreeRegressionPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/regression_test.libsvm";
	
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = regressionModel.transform(testData).select("features", "prediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(regressionModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    System.out.println(transformer);
    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
    }
}
 
Example #4
Source File: DecisionTreeClassificationModelBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecisionTreeClassificationPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classifierModel = new DecisionTreeClassifier().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = classifierModel.transform(testData).select("features", "prediction","rawPrediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(classifierModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(2)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}
 
Example #5
Source File: RemoteDPParForSpark.java    From systemds with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0) 
	throws Exception 
{
	long rowix = arg0._2() + 1;
	
	//process row data
	int off = _containsID ? 1: 0;
	Object obj = _isVector ? arg0._1().get(off) : arg0._1();
	boolean sparse = (obj instanceof SparseVector);
	MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse);
	
	if( _isVector ) {
		Vector vect = (Vector) obj;
		if( vect instanceof SparseVector ) {
			SparseVector svect = (SparseVector) vect;
			int lnnz = svect.numNonzeros();
			for( int k=0; k<lnnz; k++ )
				mb.appendValue(0, svect.indices()[k], svect.values()[k]);
		}
		else { //dense
			for( int j=0; j<_clen; j++ )
				mb.appendValue(0, j, vect.apply(j));
		}
	}
	else { //row
		Row row = (Row) obj;
		for( int j=off; j<off+_clen; j++ )
			mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j)));
	}
	mb.examSparsity();
	return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb));
}
 
Example #6
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();

	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long> tmp = arg0.next();
		org.apache.spark.mllib.regression.LabeledPoint row = tmp._1();
		boolean lsparse = _sparseX || (!_labels && 
				row.features() instanceof org.apache.spark.mllib.linalg.SparseVector);
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb, lsparse);
		}
		
		//process row data
		if( _labels ) {
			double val = row.label();
			mb[0].appendValue(pos, 0, val);
			_aNnz.add((val != 0) ? 1 : 0);
		}
		else { //features
			int lnnz = row.features().numNonzeros();
			if( row.features() instanceof org.apache.spark.mllib.linalg.SparseVector )
			{
				org.apache.spark.mllib.linalg.SparseVector srow = 
						(org.apache.spark.mllib.linalg.SparseVector) row.features();
				for( int k=0; k<lnnz; k++ ) {
					int gix = srow.indices()[k]+1;
					int cix = (int)UtilFunctions.computeBlockIndex(gix, _blen);
					int j = UtilFunctions.computeCellInBlock(gix, _blen);
					mb[cix-1].appendValue(pos, j, srow.values()[k]);
				}
			}
			else { //dense
				for( int cix=1, pix=0; cix<=ncblks; cix++ ) {
					int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, row.features().apply(pix++));
				}
			}
			_aNnz.add(lnnz);
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}
 
Example #7
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();
	
	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<Row,Long> tmp = arg0.next();
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb);
		}
		
		//process row data
		int off = _containsID ? 1 : 0;
		Object obj = _isVector ? tmp._1().get(off) : tmp._1();
		for( int cix=1, pix=_isVector?0:off; cix<=ncblks; cix++ ) {
			int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
			int cu = (int) Math.min(_clen, cix*_blen) + (_isVector?0:off);
			//allocate sparse row once (avoid re-allocations)
			if( mb[cix-1].isInSparseFormat() ) {
				int lnnz = countNnz(obj, _isVector, pix, cu);
				mb[cix-1].getSparseBlock().allocate(pos, lnnz);
			}
			//append data to matrix blocks
			if( _isVector ) {
				Vector vect = (Vector) obj;
				if( vect instanceof SparseVector ) {
					SparseVector svect = (SparseVector) vect;
					int[] svectIx = svect.indices();
					while( pix<svectIx.length && svectIx[pix]<cu ) {
						int j = UtilFunctions.computeCellInBlock(svectIx[pix]+1, _blen);
						mb[cix-1].appendValue(pos, j, svect.values()[pix++]);
					}
				}
				else { //dense
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, vect.apply(pix++));
				}
			}
			else { //row
				Row row = (Row) obj;
				for( int j=0; j<lclen; j++ )
					mb[cix-1].appendValue(pos, j, UtilFunctions.getDouble(row.get(pix++)));
			}
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}
 
Example #8
Source File: DatasetClassifier.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
/**
 * @param args args[0] path to parquet file, args[1] name of classification column
 * @throws IOException 
 * @throws StructureException 
 */
public static void main(String[] args) throws IOException {

	if (args.length != 2) {
		System.err.println("Usage: " + DatasetClassifier.class.getSimpleName() + " <parquet file> <classification column name>");
		System.exit(1);
	}

	// name of the class label
	String label = args[1];
	
	long start = System.nanoTime();

	SparkSession spark = SparkSession
			.builder()
			.master("local[*]")
			.appName(DatasetClassifier.class.getSimpleName())
			.getOrCreate();

	Dataset<Row> data = spark.read().parquet(args[0]).cache();
	
	int featureCount = 0;
	Object vector = data.first().getAs("features");
	if (vector instanceof DenseVector) {
	   featureCount = ((DenseVector)vector).numActives();
	} else if (vector instanceof SparseVector) {
	   featureCount = ((SparseVector)vector).numActives();
	}
	
	System.out.println("Feature count            : "  + featureCount);
	
	int classCount = (int)data.select(label).distinct().count();
	System.out.println("Class count              : " + classCount);

	System.out.println("Dataset size (unbalanced): " + data.count());
	data.groupBy(label).count().show(classCount);

	data = DatasetBalancer.downsample(data, label, 1);
	
	System.out.println("Dataset size (balanced)  : " + data.count());
	data.groupBy(label).count().show(classCount);

	double testFraction = 0.3;
	long seed = 123;

	SparkMultiClassClassifier mcc;
	Map<String, String> metrics;

	DecisionTreeClassifier dtc = new DecisionTreeClassifier();
	mcc = new SparkMultiClassClassifier(dtc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	RandomForestClassifier rfc = new RandomForestClassifier();
	mcc = new SparkMultiClassClassifier(rfc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	LogisticRegression lr = new LogisticRegression();
	mcc = new SparkMultiClassClassifier(lr, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	// specify layers for the neural network
	//    input layer: dimension of feature vector
	//    output layer: number of classes
	int[] layers = new int[] {featureCount, 10, classCount};
	MultilayerPerceptronClassifier mpc = new MultilayerPerceptronClassifier()
			.setLayers(layers)
			.setBlockSize(128)
			.setSeed(1234L)
			.setMaxIter(200);

	mcc = new SparkMultiClassClassifier(mpc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	long end = System.nanoTime();

	System.out.println((end-start)/1E9 + " sec");
}
 
Example #9
Source File: DecisionTreeRegressionModelBridgePipelineTest.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Test
  public void testDecisionTreeRegressionPrediction() {
      // Load the data stored in LIBSVM format as a DataFrame.
  	String datapath = "src/test/resources/regression_test.libsvm";
  	
  	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


      // Split the data into training and test sets (30% held out for testing)
      Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
      Dataset<Row> trainingData = splits[0];
      Dataset<Row> testData = splits[1];

      StringIndexer indexer = new StringIndexer()
              .setInputCol("label")
              .setOutputCol("labelIndex").setHandleInvalid("skip");
      
DecisionTreeRegressor regressionModel =
        new DecisionTreeRegressor().setLabelCol("labelIndex").setFeaturesCol("features");

Pipeline pipeline = new Pipeline()
              .setStages(new PipelineStage[]{indexer, regressionModel});

PipelineModel sparkPipeline = pipeline.fit(trainingData);

      byte[] exportedModel = ModelExporter.export(sparkPipeline);

      Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
      List<Row> output = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();

      //compare predictions
      for (Row row : output) {
      	Map<String, Object> data_ = new HashMap<>();
          data_.put("features", ((SparseVector) row.get(0)).toArray());
          data_.put("label", (row.get(2)).toString());
          transformer.transform(data_);
          System.out.println(data_);
          System.out.println(data_.get("prediction"));
          assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
      }
  }
 
Example #10
Source File: GradientBoostClassificationModelPipelineTest.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientBoostClassification() {
	// Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/binary_classification_test.libsvm";

	Dataset<Row> data = spark.read().format("libsvm").load(datapath);
	StringIndexer indexer = new StringIndexer()
               .setInputCol("label")
               .setOutputCol("labelIndex");
	// Split the data into training and test sets (30% held out for testing)
	Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
	Dataset<Row> trainingData = splits[0];
	Dataset<Row> testData = splits[1];

	// Train a RandomForest model.
	GBTClassifier classificationModel = new GBTClassifier().setLabelCol("labelIndex")
               .setFeaturesCol("features");;

        Pipeline pipeline = new Pipeline()
                .setStages(new PipelineStage[]{indexer, classificationModel});


	 PipelineModel sparkPipeline = pipeline.fit(trainingData);

	// Export this model
	byte[] exportedModel = ModelExporter.export(sparkPipeline);

	// Import and get Transformer
	Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

	List<Row> sparkOutput = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();
	
	// compare predictions
	for (Row row : sparkOutput) {
		Map<String, Object> data_ = new HashMap<>();
		data_.put("features", ((SparseVector) row.get(0)).toArray());
		data_.put("label", (row.get(2)).toString());
		transformer.transform(data_);
		System.out.println(data_);
		System.out.println(data_.get("prediction")+" ,"+row.get(1));
		assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
	}

}
 
Example #11
Source File: DecisionTreeClassificationModelBridgePipelineTest.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Test
public void testDecisionTreeClassificationWithPipeline() {
	

    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);



    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});        

    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    StringIndexer indexer = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex");

    // Train a DecisionTree model.
    DecisionTreeClassifier classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features");

    Pipeline pipeline = new Pipeline()
            .setStages(new PipelineStage[]{indexer, classificationModel});


    // Train model.  This also runs the indexer.
    PipelineModel sparkPipeline = pipeline.fit(trainingData);

    //Export this model
    byte[] exportedModel = ModelExporter.export(sparkPipeline);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    List<Row> output = sparkPipeline.transform(testData).select("features", "label","prediction","rawPrediction").collectAsList();

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(3)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        data_.put("label", (row.get(1)).toString());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(2), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}
 
Example #12
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();

	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long> tmp = arg0.next();
		org.apache.spark.mllib.regression.LabeledPoint row = tmp._1();
		boolean lsparse = _sparseX || (!_labels && 
				row.features() instanceof org.apache.spark.mllib.linalg.SparseVector);
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb, lsparse);
		}
		
		//process row data
		if( _labels ) {
			double val = row.label();
			mb[0].appendValue(pos, 0, val);
			_aNnz.add((val != 0) ? 1 : 0);
		}
		else { //features
			int lnnz = row.features().numNonzeros();
			if( row.features() instanceof org.apache.spark.mllib.linalg.SparseVector )
			{
				org.apache.spark.mllib.linalg.SparseVector srow = 
						(org.apache.spark.mllib.linalg.SparseVector) row.features();
				for( int k=0; k<lnnz; k++ ) {
					int gix = srow.indices()[k]+1;
					int cix = (int)UtilFunctions.computeBlockIndex(gix, _blen);
					int j = UtilFunctions.computeCellInBlock(gix, _blen);
					mb[cix-1].appendValue(pos, j, srow.values()[k]);
				}
			}
			else { //dense
				for( int cix=1, pix=0; cix<=ncblks; cix++ ) {
					int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, row.features().apply(pix++));
				}
			}
			_aNnz.add(lnnz);
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}
 
Example #13
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();
	
	int ncblks = (int)Math.ceil((double)_clen/_blen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<Row,Long> tmp = arg0.next();
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _blen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _blen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _blen);
			createBlocks(rowix, (int)len, ix, mb);
		}
		
		//process row data
		int off = _containsID ? 1 : 0;
		Object obj = _isVector ? tmp._1().get(off) : tmp._1();
		for( int cix=1, pix=_isVector?0:off; cix<=ncblks; cix++ ) {
			int lclen = UtilFunctions.computeBlockSize(_clen, cix, _blen);
			int cu = (int) Math.min(_clen, cix*_blen) + (_isVector?0:off);
			//allocate sparse row once (avoid re-allocations)
			if( mb[cix-1].isInSparseFormat() ) {
				int lnnz = countNnz(obj, _isVector, pix, cu);
				mb[cix-1].getSparseBlock().allocate(pos, lnnz);
			}
			//append data to matrix blocks
			if( _isVector ) {
				Vector vect = (Vector) obj;
				if( vect instanceof SparseVector ) {
					SparseVector svect = (SparseVector) vect;
					int[] svectIx = svect.indices();
					while( pix<svectIx.length && svectIx[pix]<cu ) {
						int j = UtilFunctions.computeCellInBlock(svectIx[pix]+1, _blen);
						mb[cix-1].appendValue(pos, j, svect.values()[pix++]);
					}
				}
				else { //dense
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, vect.apply(pix++));
				}
			}
			else { //row
				Row row = (Row) obj;
				for( int j=0; j<lclen; j++ )
					mb[cix-1].appendValue(pos, j, UtilFunctions.getDouble(row.get(pix++)));
			}
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}