Java Code Examples for org.apache.spark.api.java.JavaFutureAction

The following examples show how to use org.apache.spark.api.java.JavaFutureAction. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: SparkDemo   Source File: JavaStatusTrackerDemo.java    License: MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  SparkSession spark = SparkSession
    .builder()
    .appName(APP_NAME)
    .getOrCreate();

  final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

  // Example of implementing a progress reporter for a simple job.
  JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(
      new IdentityWithDelay<Integer>());
  JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
  while (!jobFuture.isDone()) {
    Thread.sleep(1000);  // 1 second
    List<Integer> jobIds = jobFuture.jobIds();
    if (jobIds.isEmpty()) {
      continue;
    }
    int currentJobId = jobIds.get(jobIds.size() - 1);
    SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
    SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
    System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() +
        " active, " + stageInfo.numCompletedTasks() + " complete");
  }

  System.out.println("Job results are: " + jobFuture.get());
  spark.stop();
}
 
Example 2
Source Project: incubator-nemo   Source File: SparkJavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> collectAsync() {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example 3
Source Project: incubator-nemo   Source File: SparkJavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Long> countAsync() {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example 4
Source Project: incubator-nemo   Source File: SparkJavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example 5
Source Project: incubator-nemo   Source File: SparkJavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example 6
Source Project: incubator-nemo   Source File: SparkJavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> takeAsync(final int num) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example 7
Source Project: nemo   Source File: JavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> collectAsync()  {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example 8
Source Project: nemo   Source File: JavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Long> countAsync()  {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example 9
Source Project: nemo   Source File: JavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example 10
Source Project: nemo   Source File: JavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example 11
Source Project: nemo   Source File: JavaRDD.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> takeAsync(final int num) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example 12
Source Project: sqoop-on-spark   Source File: LocalSparkJobStatus.java    License: Apache License 2.0 4 votes vote down vote up
public LocalSparkJobStatus(JavaSparkContext sparkContext, int jobId,
		JavaFutureAction<Void> future) {
	this.sparkContext = sparkContext;
	this.jobId = jobId;
	this.future = future;
}
 
Example 13
@Override
public List<String> checkIndex(PairDataSet index,
                               String indexName,
                               LeadingIndexColumnInfo leadingIndexColumnInfo,
                               long conglomerate,
                               DDLMessage.TentativeIndex tentativeIndex) throws Exception {

    this.indexName = indexName;
    this.conglomerate = conglomerate;
    this.tentativeIndex = tentativeIndex;

    List<String> messages = Lists.newLinkedList();

    // Count number of rows in base table and reuse it if the index does not exclude default or null keys
    JavaFutureAction<Long> tableCountFuture = null;
    filteredTable = baseTable.filter(new IndexFilter<>(leadingIndexColumnInfo)).index(new KeyByRowIdFunction<>());
    if (baseTableCount == 0 || leadingIndexColumnInfo != null) {
        SpliceSpark.pushScope(String.format("Count table %s.%s", schemaName, tableName));
        tableCountFuture = ((SparkPairDataSet) filteredTable).rdd.countAsync();
        SpliceSpark.popScope();
    }
    // count number of rows in the index
    SpliceSpark.pushScope(String.format("Count index %s.%s", schemaName, indexName));
    JavaFutureAction<Long> indexCountFuture = ((SparkPairDataSet)index).rdd.countAsync();
    SpliceSpark.popScope();

    messages.addAll(checkMissingIndexes(filteredTable, index));
    if (tableCountFuture != null) {
        if (leadingIndexColumnInfo == null) {
            baseTableCount = tableCountFuture.get();
        }
        else {
            filteredTableCount = tableCountFuture.get();
        }
    }

    indexCount = indexCountFuture.get();
    long tableCount = leadingIndexColumnInfo != null ? filteredTableCount : baseTableCount;

    // If index and table count do not match, or there are rows not indexed, check for invalid indexes
    if (indexCount != tableCount ||  missingIndexCount != 0) {
        messages.addAll(checkInvalidIndexes(filteredTable, index));
    }

    if (indexCount - invalidIndexCount > tableCount - missingIndexCount) {
        messages.addAll(checkDuplicateIndexes(filteredTable, index));
    }
    return messages;
}
 
Example 14
@Override
public Void call() throws Exception {
    if(!status.markRunning()){
        //the client has already cancelled us or has died before we could get started, so stop now
        return null;
    }
    int order = concurrentCompactions.incrementAndGet();
    try {
        int maxConcurrentCompactions = HConfiguration.getConfiguration().getOlapCompactionMaximumConcurrent();
        if (order > maxConcurrentCompactions) {
            status.markCompleted(new FailedOlapResult(
                    new CancellationException("Maximum number of concurrent compactions already running")));
            return null;
        }
        
        initializeJob();
        Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        if (LOG.isTraceEnabled()) {
            LOG.trace("regionLocation = " + compactionRequest.regionLocation);
        }
        conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation);
        conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String());

        SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize");
        //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1);
        //ParallelCollectionRDD rdd1 = getCompactionRDD();

        JavaSparkContext context = SpliceSpark.getContext();
        JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf,
                CompactionInputFormat.class,
                Integer.class,
                Iterator.class);
        rdd1.setName("Distribute Compaction Load");
        SpliceSpark.popScope();

        compactionRequest.compactionFunction.setContext(new SparkCompactionContext());
        SpliceSpark.pushScope(compactionRequest.scope + ": Compact files");
        JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction));
        rdd2.setName(compactionRequest.jobDetails);
        SpliceSpark.popScope();

        SpliceSpark.pushScope("Compaction");
        if (!status.isRunning()) {
            //the client timed out during our setup, so it's time to stop
            return null;
        }
        long startTime = clock.currentTimeMillis();
        JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync();
        while (!collectFuture.isDone()) {
            try {
                collectFuture.get(tickTime, TimeUnit.MILLISECONDS);
            } catch (TimeoutException te) {
                /*
                 * A TimeoutException just means that tickTime expired. That's okay, we just stick our
                 * head up and make sure that the client is still operating
                 */
            }
            if (!status.isRunning()) {
                /*
                 * The client timed out, so cancel the compaction and terminate
                 */
                collectFuture.cancel(true);
                context.cancelJobGroup(compactionRequest.jobGroup);
                return null;
            }
            if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) {
                // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction
                if (!compactionRunning(collectFuture.jobIds())) {
                    collectFuture.cancel(true);
                    context.cancelJobGroup(compactionRequest.jobGroup);
                    status.markCompleted(new FailedOlapResult(
                            new RejectedExecutionException("No resources available for running compaction in Spark")));
                    return null;
                }
            }
        }
        //the compaction completed
        List<String> sPaths = collectFuture.get();
        status.markCompleted(new CompactionResult(sPaths));
        SpliceSpark.popScope();

        if (LOG.isTraceEnabled())
            SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths);
        return null;
    } finally {
        concurrentCompactions.decrementAndGet();
    }
}