org.apache.spark.api.java.JavaFutureAction Java Examples

The following examples show how to use org.apache.spark.api.java.JavaFutureAction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaStatusTrackerDemo.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  SparkSession spark = SparkSession
    .builder()
    .appName(APP_NAME)
    .getOrCreate();

  final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

  // Example of implementing a progress reporter for a simple job.
  JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(
      new IdentityWithDelay<Integer>());
  JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
  while (!jobFuture.isDone()) {
    Thread.sleep(1000);  // 1 second
    List<Integer> jobIds = jobFuture.jobIds();
    if (jobIds.isEmpty()) {
      continue;
    }
    int currentJobId = jobIds.get(jobIds.size() - 1);
    SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
    SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
    System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() +
        " active, " + stageInfo.numCompletedTasks() + " complete");
  }

  System.out.println("Job results are: " + jobFuture.get());
  spark.stop();
}
 
Example #2
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> collectAsync() {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example #3
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Long> countAsync() {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example #4
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example #5
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example #6
Source File: SparkJavaRDD.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> takeAsync(final int num) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}
 
Example #7
Source File: JavaRDD.java    From nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> collectAsync()  {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example #8
Source File: JavaRDD.java    From nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Long> countAsync()  {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example #9
Source File: JavaRDD.java    From nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example #10
Source File: JavaRDD.java    From nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example #11
Source File: JavaRDD.java    From nemo with Apache License 2.0 4 votes vote down vote up
@Override
public JavaFutureAction<List<T>> takeAsync(final int num) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}
 
Example #12
Source File: LocalSparkJobStatus.java    From sqoop-on-spark with Apache License 2.0 4 votes vote down vote up
public LocalSparkJobStatus(JavaSparkContext sparkContext, int jobId,
		JavaFutureAction<Void> future) {
	this.sparkContext = sparkContext;
	this.jobId = jobId;
	this.future = future;
}
 
Example #13
Source File: SparkTableChecker.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<String> checkIndex(PairDataSet index,
                               String indexName,
                               LeadingIndexColumnInfo leadingIndexColumnInfo,
                               long conglomerate,
                               DDLMessage.TentativeIndex tentativeIndex) throws Exception {

    this.indexName = indexName;
    this.conglomerate = conglomerate;
    this.tentativeIndex = tentativeIndex;

    List<String> messages = Lists.newLinkedList();

    // Count number of rows in base table and reuse it if the index does not exclude default or null keys
    JavaFutureAction<Long> tableCountFuture = null;
    filteredTable = baseTable.filter(new IndexFilter<>(leadingIndexColumnInfo)).index(new KeyByRowIdFunction<>());
    if (baseTableCount == 0 || leadingIndexColumnInfo != null) {
        SpliceSpark.pushScope(String.format("Count table %s.%s", schemaName, tableName));
        tableCountFuture = ((SparkPairDataSet) filteredTable).rdd.countAsync();
        SpliceSpark.popScope();
    }
    // count number of rows in the index
    SpliceSpark.pushScope(String.format("Count index %s.%s", schemaName, indexName));
    JavaFutureAction<Long> indexCountFuture = ((SparkPairDataSet)index).rdd.countAsync();
    SpliceSpark.popScope();

    messages.addAll(checkMissingIndexes(filteredTable, index));
    if (tableCountFuture != null) {
        if (leadingIndexColumnInfo == null) {
            baseTableCount = tableCountFuture.get();
        }
        else {
            filteredTableCount = tableCountFuture.get();
        }
    }

    indexCount = indexCountFuture.get();
    long tableCount = leadingIndexColumnInfo != null ? filteredTableCount : baseTableCount;

    // If index and table count do not match, or there are rows not indexed, check for invalid indexes
    if (indexCount != tableCount ||  missingIndexCount != 0) {
        messages.addAll(checkInvalidIndexes(filteredTable, index));
    }

    if (indexCount - invalidIndexCount > tableCount - missingIndexCount) {
        messages.addAll(checkDuplicateIndexes(filteredTable, index));
    }
    return messages;
}
 
Example #14
Source File: CompactionJob.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Void call() throws Exception {
    if(!status.markRunning()){
        //the client has already cancelled us or has died before we could get started, so stop now
        return null;
    }
    int order = concurrentCompactions.incrementAndGet();
    try {
        int maxConcurrentCompactions = HConfiguration.getConfiguration().getOlapCompactionMaximumConcurrent();
        if (order > maxConcurrentCompactions) {
            status.markCompleted(new FailedOlapResult(
                    new CancellationException("Maximum number of concurrent compactions already running")));
            return null;
        }
        
        initializeJob();
        Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        if (LOG.isTraceEnabled()) {
            LOG.trace("regionLocation = " + compactionRequest.regionLocation);
        }
        conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation);
        conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String());

        SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize");
        //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1);
        //ParallelCollectionRDD rdd1 = getCompactionRDD();

        JavaSparkContext context = SpliceSpark.getContext();
        JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf,
                CompactionInputFormat.class,
                Integer.class,
                Iterator.class);
        rdd1.setName("Distribute Compaction Load");
        SpliceSpark.popScope();

        compactionRequest.compactionFunction.setContext(new SparkCompactionContext());
        SpliceSpark.pushScope(compactionRequest.scope + ": Compact files");
        JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction));
        rdd2.setName(compactionRequest.jobDetails);
        SpliceSpark.popScope();

        SpliceSpark.pushScope("Compaction");
        if (!status.isRunning()) {
            //the client timed out during our setup, so it's time to stop
            return null;
        }
        long startTime = clock.currentTimeMillis();
        JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync();
        while (!collectFuture.isDone()) {
            try {
                collectFuture.get(tickTime, TimeUnit.MILLISECONDS);
            } catch (TimeoutException te) {
                /*
                 * A TimeoutException just means that tickTime expired. That's okay, we just stick our
                 * head up and make sure that the client is still operating
                 */
            }
            if (!status.isRunning()) {
                /*
                 * The client timed out, so cancel the compaction and terminate
                 */
                collectFuture.cancel(true);
                context.cancelJobGroup(compactionRequest.jobGroup);
                return null;
            }
            if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) {
                // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction
                if (!compactionRunning(collectFuture.jobIds())) {
                    collectFuture.cancel(true);
                    context.cancelJobGroup(compactionRequest.jobGroup);
                    status.markCompleted(new FailedOlapResult(
                            new RejectedExecutionException("No resources available for running compaction in Spark")));
                    return null;
                }
            }
        }
        //the compaction completed
        List<String> sPaths = collectFuture.get();
        status.markCompleted(new CompactionResult(sPaths));
        SpliceSpark.popScope();

        if (LOG.isTraceEnabled())
            SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths);
        return null;
    } finally {
        concurrentCompactions.decrementAndGet();
    }
}