Java Code Examples for org.apache.spark.storage.StorageLevel#NONE

The following examples show how to use org.apache.spark.storage.StorageLevel#NONE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseCommitActionExecutor.java    From hudi with Apache License 2.0 5 votes vote down vote up
public HoodieWriteMetadata execute(JavaRDD<HoodieRecord<T>> inputRecordsRDD) {
  HoodieWriteMetadata result = new HoodieWriteMetadata();
  // Cache the tagged records, so we don't end up computing both
  // TODO: Consistent contract in HoodieWriteClient regarding preppedRecord storage level handling
  if (inputRecordsRDD.getStorageLevel() == StorageLevel.NONE()) {
    inputRecordsRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
  } else {
    LOG.info("RDD PreppedRecords was persisted at: " + inputRecordsRDD.getStorageLevel());
  }

  WorkloadProfile profile = null;
  if (isWorkloadProfileNeeded()) {
    profile = new WorkloadProfile(inputRecordsRDD);
    LOG.info("Workload profile :" + profile);
    saveWorkloadProfileMetadataToInflight(profile, instantTime);
  }

  // partition using the insert partitioner
  final Partitioner partitioner = getPartitioner(profile);
  JavaRDD<HoodieRecord<T>> partitionedRecords = partition(inputRecordsRDD, partitioner);
  JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
    if (WriteOperationType.isChangingRecords(operationType)) {
      return handleUpsertPartition(instantTime, partition, recordItr, partitioner);
    } else {
      return handleInsertPartition(instantTime, partition, recordItr, partitioner);
    }
  }, true).flatMap(List::iterator);

  updateIndexAndCommitIfNeeded(writeStatusRDD, result);
  return result;
}
 
Example 2
Source File: DataStep.java    From envelope with Apache License 2.0 5 votes vote down vote up
public boolean isCached() {
  if (data == null) {
    return false;
  }

  return data.storageLevel() != StorageLevel.NONE();
}
 
Example 3
Source File: GrepCaching.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	String master = args[0];
	String inFile = args[1];
	String outFile = args[2];
	String storageLevel = args[3];

	String patterns[] = new String[args.length-4];
	System.arraycopy(args, 4, patterns, 0, args.length - 4);
	System.err.println("Starting spark with master="+master+" in="+inFile);
	System.err.println("Using patterns: "+ Arrays.toString(patterns));

	SparkConf conf = new SparkConf().setAppName("Grep job").setMaster(master).set("spark.hadoop.validateOutputSpecs", "false");
	JavaSparkContext sc = new JavaSparkContext(conf);

	StorageLevel sl;
	switch(storageLevel) {
		case "MEMORY_ONLY":
			sl = StorageLevel.MEMORY_ONLY(); break;
		case "MEMORY_AND_DISK":
			sl = StorageLevel.MEMORY_AND_DISK(); break;
		case "MEMORY_ONLY_SER":
			sl = StorageLevel.MEMORY_ONLY_SER(); break;
		case "MEMORY_AND_DISK_SER":
			sl = StorageLevel.MEMORY_AND_DISK_SER(); break;
		case "NONE":
			sl = StorageLevel.NONE(); break;
		default:
			throw new RuntimeException("Unknown storage level "+storageLevel);
	}

	JavaRDD<String> file = sc.textFile(inFile).persist(sl);
	for(int p = 0; p < patterns.length; p++) {
		final String pattern = patterns[p];
		JavaRDD<String> res = file.filter(new Function<String, Boolean>() {
			private static final long serialVersionUID = 1L;
			Pattern p = Pattern.compile(pattern);

			@Override
			public Boolean call(String value) throws Exception {
				if (value == null || value.length() == 0) {
					return false;
				}
				final Matcher m = p.matcher(value);
				if (m.find()) {
					return true;
				}
				return false;
			}
		});
		res.saveAsTextFile(outFile+"_"+pattern);
	}
}
 
Example 4
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 3 votes vote down vote up
/**
 * This call removes an rdd variable from executor memory and disk if required.
 * Hence, it is intended to be used on rmvar only. Depending on the
 * ASYNCHRONOUS_VAR_DESTROY configuration, this is asynchronous or not.
 *
 * @param rvar rdd variable to remove
 */
public static void cleanupRDDVariable(JavaPairRDD<?,?> rvar)
{
	if( rvar.getStorageLevel()!=StorageLevel.NONE() ) {
		rvar.unpersist( !ASYNCHRONOUS_VAR_DESTROY );
	}
}
 
Example 5
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 3 votes vote down vote up
/**
 * This call removes an rdd variable from executor memory and disk if required.
 * Hence, it is intended to be used on rmvar only. Depending on the
 * ASYNCHRONOUS_VAR_DESTROY configuration, this is asynchronous or not.
 *
 * @param rvar rdd variable to remove
 */
public static void cleanupRDDVariable(JavaPairRDD<?,?> rvar)
{
	if( rvar.getStorageLevel()!=StorageLevel.NONE() ) {
		rvar.unpersist( !ASYNCHRONOUS_VAR_DESTROY );
	}
}