org.apache.spark.util.LongAccumulator Java Examples

The following examples show how to use org.apache.spark.util.LongAccumulator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkOperationContext.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void readExternal(ObjectInput in)
        throws IOException, ClassNotFoundException{
    super.readExternal(in);
    rowsRead=(LongAccumulator)in.readObject();
    rowsFiltered=(LongAccumulator)in.readObject();
    retryAttempts =(LongAccumulator)in.readObject();
    regionTooBusyExceptions =(LongAccumulator)in.readObject();
    rowsJoinedLeft=(LongAccumulator)in.readObject();
    rowsJoinedRight=(LongAccumulator)in.readObject();
    rowsProduced=(LongAccumulator)in.readObject();
    thrownErrorsRows=(LongAccumulator)in.readObject();
    retriedRows=(LongAccumulator)in.readObject();
    partialRows=(LongAccumulator)in.readObject();
    partialThrownErrorRows=(LongAccumulator)in.readObject();
    partialRetriedRows=(LongAccumulator)in.readObject();
    partialIgnoredRows=(LongAccumulator)in.readObject();
    partialWrite=(LongAccumulator)in.readObject();
    ignoredRows=(LongAccumulator)in.readObject();
    catchThrownRows=(LongAccumulator)in.readObject();
    catchRetriedRows=(LongAccumulator)in.readObject();
}
 
Example #2
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS( RDDObject rdd, String path, OutputInfo oinfo )
{
	JavaPairRDD<MatrixIndexes,MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();

	//piggyback nnz maintenance on write
	LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
	lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));

	//save file is an action which also triggers nnz maintenance
	lrdd.saveAsHadoopFile(path,
			oinfo.outputKeyClass,
			oinfo.outputValueClass,
			oinfo.outputFormatClass);

	//return nnz aggregate of all blocks
	return aNnz.value();
}
 
Example #3
Source File: SparkPSWorker.java    From systemds with Apache License 2.0 6 votes vote down vote up
public SparkPSWorker(String updFunc, String aggFunc, Statement.PSFrequency freq, int epochs, long batchSize, String program, HashMap<String, byte[]> clsMap, SparkConf conf, int port, LongAccumulator aSetup, LongAccumulator aWorker, LongAccumulator aUpdate, LongAccumulator aIndex, LongAccumulator aGrad, LongAccumulator aRPC, LongAccumulator aBatches, LongAccumulator aEpochs) {
	_updFunc = updFunc;
	_aggFunc = aggFunc;
	_freq = freq;
	_epochs = epochs;
	_batchSize = batchSize;
	_program = program;
	_clsMap = clsMap;
	_conf = conf;
	_port = port;
	_aSetup = aSetup;
	_aWorker = aWorker;
	_aUpdate = aUpdate;
	_aIndex = aIndex;
	_aGrad = aGrad;
	_aRPC = aRPC;
	_nBatches = aBatches;
	_nEpochs = aEpochs;
}
 
Example #4
Source File: RemoteDPParForSparkWorker.java    From systemds with Apache License 2.0 6 votes vote down vote up
public RemoteDPParForSparkWorker(String program, HashMap<String, byte[]> clsMap, String inputVar, String iterVar,
	boolean cpCaching, DataCharacteristics mc, boolean tSparseCol, PartitionFormat dpf, OutputInfo oinfo,
	LongAccumulator atasks, LongAccumulator aiters)
{
	_prog = program;
	_clsMap = clsMap;
	_caching = cpCaching;
	_inputVar = inputVar;
	_iterVar = iterVar;
	_oinfo = oinfo;
	
	//setup spark accumulators
	_aTasks = atasks;
	_aIters = aiters;
	
	//setup matrix block partition meta data
	_rlen = (int)dpf.getNumRows(mc);
	_clen = (int)dpf.getNumColumns(mc);
	_blen = mc.getBlocksize();
	_tSparseCol = tSparseCol;
	_dpf = dpf._dpf;
}
 
Example #5
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static long writeMatrixRDDtoHDFS( RDDObject rdd, String path, FileFormat fmt )
{
	JavaPairRDD<MatrixIndexes,MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
	InputOutputInfo oinfo = InputOutputInfo.get(DataType.MATRIX, fmt);
	
	//piggyback nnz maintenance on write
	LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
	lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));

	//save file is an action which also triggers nnz maintenance
	lrdd.saveAsHadoopFile(path,
		oinfo.keyClass,
		oinfo.valueClass,
		oinfo.outputFormatClass);

	//return nnz aggregate of all blocks
	return aNnz.value();
}
 
Example #6
Source File: RemoteDPParForSparkWorker.java    From systemds with Apache License 2.0 6 votes vote down vote up
public RemoteDPParForSparkWorker(String program, HashMap<String, byte[]> clsMap, String inputVar, String iterVar,
	boolean cpCaching, DataCharacteristics mc, boolean tSparseCol, PartitionFormat dpf, FileFormat fmt,
	LongAccumulator atasks, LongAccumulator aiters)
{
	_prog = program;
	_clsMap = clsMap;
	_caching = cpCaching;
	_inputVar = inputVar;
	_iterVar = iterVar;
	_fmt = fmt;
	
	//setup spark accumulators
	_aTasks = atasks;
	_aIters = aiters;
	
	//setup matrix block partition meta data
	_rlen = (int)dpf.getNumRows(mc);
	_clen = (int)dpf.getNumColumns(mc);
	_blen = mc.getBlocksize();
	_tSparseCol = tSparseCol;
	_dpf = dpf._dpf;
}
 
Example #7
Source File: SparkPSWorker.java    From systemds with Apache License 2.0 6 votes vote down vote up
public SparkPSWorker(String updFunc, String aggFunc, Statement.PSFrequency freq, int epochs, long batchSize, String program, HashMap<String, byte[]> clsMap, SparkConf conf, int port, LongAccumulator aSetup, LongAccumulator aWorker, LongAccumulator aUpdate, LongAccumulator aIndex, LongAccumulator aGrad, LongAccumulator aRPC, LongAccumulator aBatches, LongAccumulator aEpochs) {
	_updFunc = updFunc;
	_aggFunc = aggFunc;
	_freq = freq;
	_epochs = epochs;
	_batchSize = batchSize;
	_program = program;
	_clsMap = clsMap;
	_conf = conf;
	_port = port;
	_aSetup = aSetup;
	_aWorker = aWorker;
	_aUpdate = aUpdate;
	_aIndex = aIndex;
	_aGrad = aGrad;
	_aRPC = aRPC;
	_nBatches = aBatches;
	_nEpochs = aEpochs;
}
 
Example #8
Source File: JavaRecoverableNetworkWordCount.java    From SparkDemo with MIT License 5 votes vote down vote up
public static LongAccumulator getInstance(JavaSparkContext jsc) {
  if (instance == null) {
    synchronized (JavaDroppedWordsCounter.class) {
      if (instance == null) {
        instance = jsc.sc().longAccumulator("WordsInBlacklistCounter");
      }
    }
  }
  return instance;
}
 
Example #9
Source File: TestAccumulators.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testRequestOne() {
  AccumulatorRequest request = new AccumulatorRequest("hello", Long.class);
  
  Accumulators accumulators = new Accumulators(Collections.singleton(request));
  
  LongAccumulator accumulator = accumulators.getLongAccumulators().get("hello");
  assertEquals(accumulator.name().get(), "hello");
}
 
Example #10
Source File: StateAccumulation.java    From learning-spark-with-java with MIT License 5 votes vote down vote up
public static LongAccumulator getInstance(JavaSparkContext jsc) {
  if (instance == null) {
    synchronized (RecordCounter.class) {
      if (instance == null) {
        System.out.println("*** Initializing RecordCounter");
        instance = jsc.sc().longAccumulator("RecordCounter");
      }
    }
  }
  return instance;
}
 
Example #11
Source File: TestAccumulators.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testRequestMany() {
  AccumulatorRequest request1 = new AccumulatorRequest("hello", Long.class);
  AccumulatorRequest request2 = new AccumulatorRequest("world", Double.class);
  
  Accumulators accumulators = new Accumulators(Sets.newHashSet(request1, request2));
  
  LongAccumulator accumulator1 = accumulators.getLongAccumulators().get("hello");
  assertEquals(accumulator1.name().get(), "hello");
  
  DoubleAccumulator accumulator2 = accumulators.getDoubleAccumulators().get("world");
  assertEquals(accumulator2.name().get(), "world");
}
 
Example #12
Source File: SparkFactDistinct.java    From kylin with Apache License 2.0 5 votes vote down vote up
public FlatOutputFucntion(String cubeName, String segmentId, String metaurl, SerializableConfiguration conf,
        int samplingPercent, LongAccumulator bytesWritten) {
    this.cubeName = cubeName;
    this.segmentId = segmentId;
    this.metaUrl = metaurl;
    this.conf = conf;
    this.samplingPercent = samplingPercent;
    this.dimensionRangeInfoMap = Maps.newHashMap();
    this.bytesWritten = bytesWritten;
}
 
Example #13
Source File: PSRpcFactory.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}
 
Example #14
Source File: RemoteParForSparkWorker.java    From systemds with Apache License 2.0 5 votes vote down vote up
public RemoteParForSparkWorker(long jobid, String program, HashMap<String, byte[]> clsMap, boolean cpCaching,
		LongAccumulator atasks, LongAccumulator aiters, Map<String, Broadcast<CacheBlock>> brInputs, 
		boolean cleanCache, Map<String,String> lineage) 
{
	_jobid = jobid;
	_prog = program;
	_clsMap = clsMap;
	_initialized = false;
	_caching = cpCaching;
	_aTasks = atasks;
	_aIters = aiters;
	_brInputs = brInputs;
	_cleanCache = cleanCache;
	_lineage = lineage;
}
 
Example #15
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc,
		JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc,
		boolean hasHeader, String delim, boolean fill, double fillValue) {
	//determine unknown dimensions and sparsity if required
	//(w/ robustness for mistakenly counted header in nnz)
	if( !mc.dimsKnown(true) ) {
		LongAccumulator aNnz = sc.sc().longAccumulator("nnz");
		JavaRDD<String> tmp = input.values()
			.map(new CSVAnalysisFunction(aNnz, delim));
		long rlen = tmp.count() - (hasHeader ? 1 : 0);
		long clen = tmp.first().split(delim).length;
		long nnz = Math.min(rlen*clen, UtilFunctions.toLong(aNnz.value()));
		mc.set(rlen, clen, mc.getBlocksize(), nnz);
	}
	
	//prepare csv w/ row indexes (sorted by filenames)
	JavaPairRDD<Text,Long> prepinput = input.values()
		.zipWithIndex(); //zip row index
	
	//convert csv rdd to binary block rdd (w/ partial blocks)
	boolean sparse = requiresSparseAllocation(prepinput, mc);
	JavaPairRDD<MatrixIndexes, MatrixBlock> out = 
		prepinput.mapPartitionsToPair(new CSVToBinaryBlockFunction(
			mc, sparse, hasHeader, delim, fill, fillValue));
	
	//aggregate partial matrix blocks (w/ preferred number of output 
	//partitions as the data is likely smaller in binary block format,
	//but also to bound the size of partitions for compressed inputs)
	int parts = SparkUtils.getNumPreferredPartitions(mc, out);
	return RDDAggregateUtils.mergeByKey(out, parts, false); 
}
 
Example #16
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc,
	Dataset<Row> df, DataCharacteristics mc, boolean containsID, boolean isVector)
{
	//determine unknown dimensions and sparsity if required
	if( !mc.dimsKnown(true) ) {
		LongAccumulator aNnz = sc.sc().longAccumulator("nnz");
		JavaRDD<Row> tmp = df.javaRDD().map(new DataFrameAnalysisFunction(aNnz, containsID, isVector));
		long rlen = tmp.count();
		long clen = !isVector ? df.columns().length - (containsID?1:0) : 
				((Vector) tmp.first().get(containsID?1:0)).size();
		long nnz = UtilFunctions.toLong(aNnz.value());
		mc.set(rlen, clen, mc.getBlocksize(), nnz);
	}
	
	//ensure valid blocksizes
	if( mc.getBlocksize()<=1 )
		mc.setBlocksize(ConfigurationManager.getBlocksize());
	
	//construct or reuse row ids
	JavaPairRDD<Row, Long> prepinput = containsID ?
			df.javaRDD().mapToPair(new DataFrameExtractIDFunction(
				df.schema().fieldIndex(DF_ID_COLUMN))) :
			df.javaRDD().zipWithIndex(); //zip row index
	
	//convert csv rdd to binary block rdd (w/ partial blocks)
	boolean sparse = requiresSparseAllocation(prepinput, mc);
	JavaPairRDD<MatrixIndexes, MatrixBlock> out = 
			prepinput.mapPartitionsToPair(
				new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector));
	
	//aggregate partial matrix blocks (w/ preferred number of output 
	//partitions as the data is likely smaller in binary block format,
	//but also to bound the size of partitions for compressed inputs)
	int parts = SparkUtils.getNumPreferredPartitions(mc, out);
	return RDDAggregateUtils.mergeByKey(out, parts, false); 
}
 
Example #17
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
public LabeledPointToBinaryBlockFunction(DataCharacteristics mc, boolean labels, LongAccumulator aNnz) {
	_rlen = mc.getRows();
	_clen = mc.getCols();
	_blen = mc.getBlocksize();
	_sparseX = MatrixBlock.evalSparseFormatInMemory(
			mc.getRows(), mc.getCols(), mc.getNonZeros());
	_labels = labels;
	_aNnz = aNnz;
}
 
Example #18
Source File: SparkLeanOperationContext.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
@SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "intended")
public void readExternal(ObjectInput in)
        throws IOException, ClassNotFoundException{
    if (in.readBoolean()) {
        SpliceClient.connectionString = in.readUTF();
        SpliceClient.setClient(HConfiguration.getConfiguration().getAuthenticationTokenEnabled(), SpliceClient.Mode.EXECUTOR);
    }
    badRecordsSeen = in.readLong();
    badRecordThreshold = in.readLong();
    permissive=in.readBoolean();
    SpliceSpark.setupSpliceStaticComponents();
    boolean isOp=in.readBoolean();
    if(isOp){
        broadcastedActivation = (BroadcastedActivation)in.readObject();
        ActivationHolder ah = broadcastedActivation.getActivationHolder();
        op=(Op)ah.getOperationsMap().get(in.readInt());
        activation = ah.getActivation();
        TaskContext taskContext = TaskContext.get();
        if (taskContext != null) {
            taskContext.addTaskCompletionListener((TaskCompletionListener)(ctx) -> ah.close());
        }
    }
    badRecordsAccumulator = (Accumulable<BadRecordsRecorder,String>) in.readObject();
    importFileName= (String) in.readObject();
    rowsWritten=(LongAccumulator)in.readObject();
}
 
Example #19
Source File: SparkFactDistinct.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public FlatOutputFucntion(String cubeName, String segmentId, String metaurl, SerializableConfiguration conf,
        int samplingPercent, LongAccumulator bytesWritten) {
    this.cubeName = cubeName;
    this.segmentId = segmentId;
    this.metaUrl = metaurl;
    this.conf = conf;
    this.samplingPercent = samplingPercent;
    this.dimensionRangeInfoMap = Maps.newHashMap();
    this.bytesWritten = bytesWritten;
}
 
Example #20
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
public LabeledPointToBinaryBlockFunction(DataCharacteristics mc, boolean labels, LongAccumulator aNnz) {
	_rlen = mc.getRows();
	_clen = mc.getCols();
	_blen = mc.getBlocksize();
	_sparseX = MatrixBlock.evalSparseFormatInMemory(
			mc.getRows(), mc.getCols(), mc.getNonZeros());
	_labels = labels;
	_aNnz = aNnz;
}
 
Example #21
Source File: PSRpcFactory.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}
 
Example #22
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc,
	Dataset<Row> df, DataCharacteristics mc, boolean containsID, boolean isVector)
{
	//determine unknown dimensions and sparsity if required
	if( !mc.dimsKnown(true) ) {
		LongAccumulator aNnz = sc.sc().longAccumulator("nnz");
		JavaRDD<Row> tmp = df.javaRDD().map(new DataFrameAnalysisFunction(aNnz, containsID, isVector));
		long rlen = tmp.count();
		long clen = !isVector ? df.columns().length - (containsID?1:0) : 
				((Vector) tmp.first().get(containsID?1:0)).size();
		long nnz = UtilFunctions.toLong(aNnz.value());
		mc.set(rlen, clen, mc.getBlocksize(), nnz);
	}
	
	//ensure valid blocksizes
	if( mc.getBlocksize()<=1 )
		mc.setBlocksize(ConfigurationManager.getBlocksize());
	
	//construct or reuse row ids
	JavaPairRDD<Row, Long> prepinput = containsID ?
			df.javaRDD().mapToPair(new DataFrameExtractIDFunction(
				df.schema().fieldIndex(DF_ID_COLUMN))) :
			df.javaRDD().zipWithIndex(); //zip row index
	
	//convert csv rdd to binary block rdd (w/ partial blocks)
	boolean sparse = requiresSparseAllocation(prepinput, mc);
	JavaPairRDD<MatrixIndexes, MatrixBlock> out = 
			prepinput.mapPartitionsToPair(
				new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector));
	
	//aggregate partial matrix blocks (w/ preferred number of output 
	//partitions as the data is likely smaller in binary block format,
	//but also to bound the size of partitions for compressed inputs)
	int parts = SparkUtils.getNumPreferredPartitions(mc, out);
	return RDDAggregateUtils.mergeByKey(out, parts, false); 
}
 
Example #23
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc,
		JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc,
		boolean hasHeader, String delim, boolean fill, double fillValue) {
	//determine unknown dimensions and sparsity if required
	//(w/ robustness for mistakenly counted header in nnz)
	if( !mc.dimsKnown(true) ) {
		LongAccumulator aNnz = sc.sc().longAccumulator("nnz");
		JavaRDD<String> tmp = input.values()
			.map(new CSVAnalysisFunction(aNnz, delim));
		long rlen = tmp.count() - (hasHeader ? 1 : 0);
		long clen = tmp.first().split(delim).length;
		long nnz = Math.min(rlen*clen, UtilFunctions.toLong(aNnz.value()));
		mc.set(rlen, clen, mc.getBlocksize(), nnz);
	}
	
	//prepare csv w/ row indexes (sorted by filenames)
	JavaPairRDD<Text,Long> prepinput = input.values()
		.zipWithIndex(); //zip row index
	
	//convert csv rdd to binary block rdd (w/ partial blocks)
	boolean sparse = requiresSparseAllocation(prepinput, mc);
	JavaPairRDD<MatrixIndexes, MatrixBlock> out = 
		prepinput.mapPartitionsToPair(new CSVToBinaryBlockFunction(
			mc, sparse, hasHeader, delim, fill, fillValue));
	
	//aggregate partial matrix blocks (w/ preferred number of output 
	//partitions as the data is likely smaller in binary block format,
	//but also to bound the size of partitions for compressed inputs)
	int parts = SparkUtils.getNumPreferredPartitions(mc, out);
	return RDDAggregateUtils.mergeByKey(out, parts, false); 
}
 
Example #24
Source File: RemoteParForSparkWorker.java    From systemds with Apache License 2.0 5 votes vote down vote up
public RemoteParForSparkWorker(long jobid, String program, HashMap<String, byte[]> clsMap, boolean cpCaching,
		LongAccumulator atasks, LongAccumulator aiters, Map<String, Broadcast<CacheBlock>> brInputs, 
		boolean cleanCache, Map<String,String> lineage) 
{
	_jobid = jobid;
	_prog = program;
	_clsMap = clsMap;
	_initialized = false;
	_caching = cpCaching;
	_aTasks = atasks;
	_aIters = aiters;
	_brInputs = brInputs;
	_cleanCache = cleanCache;
	_lineage = lineage;
}
 
Example #25
Source File: RemoteDPParForSpark.java    From systemds with Apache License 2.0 4 votes vote down vote up
public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap,
		String resultFile, MatrixObject input, ExecutionContext ec, PartitionFormat dpf, OutputInfo oi, 
		boolean tSparseCol, boolean enableCPCaching, int numReducers ) 
{
	String jobname = "ParFor-DPESP";
	long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
	
	SparkExecutionContext sec = (SparkExecutionContext)ec;
	JavaSparkContext sc = sec.getSparkContext();
	
	//prepare input parameters
	MatrixObject mo = sec.getMatrixObject(matrixvar);
	DataCharacteristics mc = mo.getDataCharacteristics();

	//initialize accumulators for tasks/iterations, and inputs
	JavaPairRDD<MatrixIndexes,MatrixBlock> in = sec.getBinaryMatrixBlockRDDHandleForVariable(matrixvar);
	LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
	LongAccumulator aIters = sc.sc().longAccumulator("iterations");

	//compute number of reducers (to avoid OOMs and reduce memory pressure)
	int numParts = SparkUtils.getNumPreferredPartitions(mc, in);
	int numReducers2 = Math.max(numReducers, Math.min(numParts, (int)dpf.getNumParts(mc)));
	
	//core parfor datapartition-execute (w/ or w/o shuffle, depending on data characteristics)
	RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, 
			matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
	JavaPairRDD<Long,Writable> tmp = getPartitionedInput(sec, matrixvar, oi, dpf);
	List<Tuple2<Long,String>> out = (requiresGrouping(dpf, mo) ?
			tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping()) )
			   .mapPartitionsToPair(efun)  //execute parfor tasks, incl cleanup
	           .collect();                 //get output handles 
	
	//de-serialize results
	LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
	int numTasks = aTasks.value().intValue(); //get accumulator value
	int numIters = aIters.value().intValue(); //get accumulator value
	
	//create output symbol table entries
	RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
	
	//maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if( DMLScript.STATISTICS ){
		Statistics.maintainCPHeavyHitters(jobname, System.nanoTime()-t0);
	}
	
	return ret;
}
 
Example #26
Source File: SparkExportDataSetWriter.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
public LongAccumulator getCount() {
    return count;
}
 
Example #27
Source File: SparkKafkaDataSetWriter.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
public LongAccumulator getCount() {
    return count;
}
 
Example #28
Source File: SparkPSProxy.java    From systemds with Apache License 2.0 4 votes vote down vote up
public SparkPSProxy(TransportClient client, long rpcTimeout, LongAccumulator aRPC) {
	super();
	_client = client;
	_rpcTimeout = rpcTimeout;
	_aRPC = aRPC;
}
 
Example #29
Source File: ComputeBinaryBlockNnzFunction.java    From systemds with Apache License 2.0 4 votes vote down vote up
public ComputeBinaryBlockNnzFunction( LongAccumulator aNnz ) {
	_aNnz = aNnz;
}
 
Example #30
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 4 votes vote down vote up
public DataFrameAnalysisFunction( LongAccumulator aNnz, boolean containsID, boolean isVector) {
	_aNnz = aNnz;
	_containsID = containsID;
	_isVector = isVector;
}