org.apache.spark.SparkEnv Java Examples

The following examples show how to use org.apache.spark.SparkEnv. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SecurityUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
public static String getTokenStoreFilePath(Config config, boolean onDriver) throws IOException {
  String tokenFilePrefix;
  if (config.hasPath(TOKENS_FILE)) {
    tokenFilePrefix = config.getString(TOKENS_FILE);
  } else {
    String userName = UserGroupInformation.getCurrentUser().getShortUserName();
    String appId;
    if (onDriver) {
      appId = Contexts.getSparkSession().sparkContext().applicationId();
    } else {
      appId = SparkEnv.get().conf().getAppId();
    }
    tokenFilePrefix = String.format("/user/%s/.sparkStaging/%s/envelope_tokens", userName, appId);
  }
  return tokenFilePrefix;
}
 
Example #2
Source File: ClusterFunctionProvider.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public static synchronized ClusterFunction getClusterFunction() throws Exception {
  // Why such a complex name?
  // When an executor dies and a new one takes its place, having just partition id won't work, because the old file
  // might not have been closed by the namenode since the old executor handling that partition may have just died.
  // So we must ensure a truly unique part which is executor id.
  // ---- BUT ----
  // Multiple partitions of the same job can run on the same executor, which is especially true now since we allow
  // the user to set fewer executors than partitions, so we need the partition id.
  // ---- BUT ----
  // Users could end up not making it unique enough, since partition id and executor id are not unique across jobs, so
  // if they use ${sdc:id()} in 2 cluster pipelines with same directory, then it will still collide, so prefix this
  // with pipeline id.
  // ---- DONE, YAY! ----
  if (clusterFunction == null) {
    clusterFunction =
        (ClusterFunction) BootstrapCluster.getClusterFunction(
            BootstrapCluster.getProperties().getProperty(ClusterModeConstants.CLUSTER_PIPELINE_NAME) +
                "-" +
                TaskContext.get().partitionId() + "-" +
                SparkEnv.get().executorId()
        );
  }
  return clusterFunction;
}
 
Example #3
Source File: SparkConfigUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Dynamic calculation of max memory to use for for spillable map. user.available.memory = spark.executor.memory *
 * (1 - spark.memory.fraction) spillable.available.memory = user.available.memory * hoodie.memory.fraction. Anytime
 * the spark.executor.memory or the spark.memory.fraction is changed, the memory used for spillable map changes
 * accordingly
 */
public static long getMaxMemoryAllowedForMerge(String maxMemoryFraction) {
  final String SPARK_EXECUTOR_MEMORY_PROP = "spark.executor.memory";
  final String SPARK_EXECUTOR_MEMORY_FRACTION_PROP = "spark.memory.fraction";
  // This is hard-coded in spark code {@link
  // https://github.com/apache/spark/blob/576c43fb4226e4efa12189b41c3bc862019862c6/core/src/main/scala/org/apache/
  // spark/memory/UnifiedMemoryManager.scala#L231} so have to re-define this here
  final String DEFAULT_SPARK_EXECUTOR_MEMORY_FRACTION = "0.6";
  // This is hard-coded in spark code {@link
  // https://github.com/apache/spark/blob/576c43fb4226e4efa12189b41c3bc862019862c6/core/src/main/scala/org/apache/
  // spark/SparkContext.scala#L471} so have to re-define this here
  final String DEFAULT_SPARK_EXECUTOR_MEMORY_MB = "1024"; // in MB
  if (SparkEnv.get() != null) {
    // 1 GB is the default conf used by Spark, look at SparkContext.scala
    long executorMemoryInBytes = Utils.memoryStringToMb(
        SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_PROP, DEFAULT_SPARK_EXECUTOR_MEMORY_MB)) * 1024 * 1024L;
    // 0.6 is the default value used by Spark,
    // look at {@link
    // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/SparkConf.scala#L507}
    double memoryFraction = Double.parseDouble(
        SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_FRACTION_PROP, DEFAULT_SPARK_EXECUTOR_MEMORY_FRACTION));
    double maxMemoryFractionForMerge = Double.parseDouble(maxMemoryFraction);
    double userAvailableMemory = executorMemoryInBytes * (1 - memoryFraction);
    long maxMemoryForMerge = (long) Math.floor(userAvailableMemory * maxMemoryFractionForMerge);
    return Math.max(DEFAULT_MIN_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES, maxMemoryForMerge);
  } else {
    return DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES;
  }
}
 
Example #4
Source File: TokenStoreListener.java    From envelope with Apache License 2.0 5 votes vote down vote up
public synchronized static TokenStoreListener get() {
  if (INSTANCE == null) {
    LOG.trace("SparkConf: " + SparkEnv.get().conf().toDebugString());
    Config config = ConfigFactory.parseString(SparkEnv.get().conf().get(ENVELOPE_CONFIGURATION_SPARK));
    INSTANCE = new TokenStoreListener(ConfigUtils.getOrElse(config, SECURITY_PREFIX, ConfigFactory.empty()));
  }
  return INSTANCE;
}
 
Example #5
Source File: GlobalWatermarkHolder.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Advances the watermarks to the next-in-line watermarks. SparkWatermarks are monotonically
 * increasing.
 */
private static void advance(final String batchId) {
  synchronized (GlobalWatermarkHolder.class) {
    final BlockManager blockManager = SparkEnv.get().blockManager();
    final Map<Integer, SparkWatermarks> newWatermarks = computeNewWatermarks(blockManager);

    if (!newWatermarks.isEmpty()) {
      writeRemoteWatermarkBlock(newWatermarks, blockManager);
      writeLocalWatermarkCopy(newWatermarks);
    } else {
      LOG.info("No new watermarks could be computed upon completion of batch: {}", batchId);
    }
  }
}
 
Example #6
Source File: GlobalWatermarkHolder.java    From beam with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
public static synchronized void clear() {
  sourceTimes.clear();
  lastWatermarkedBatchTime = 0;
  writeLocalWatermarkCopy(null);
  final SparkEnv sparkEnv = SparkEnv.get();
  if (sparkEnv != null) {
    final BlockManager blockManager = sparkEnv.blockManager();
    blockManager.removeBlock(WATERMARKS_BLOCK_ID, true);
  }
}
 
Example #7
Source File: SparkInterpreter.java    From Explorer with Apache License 2.0 5 votes vote down vote up
public synchronized SparkContext getSparkContext() {
    context.loadConfiguration(new PropertiesReader().readConfigFrom("spark_interpreter"));
    SparkContext sc =context.getConnector();
    env = SparkEnv.get();
    sparkListener = new JobProgressListener(context.getConnector().getConf());
    sc.listenerBus().addListener(sparkListener);

    return sc;
}
 
Example #8
Source File: GlobalWatermarkHolder.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Map<Integer, SparkWatermarks> load(@Nonnull String key) throws Exception {
  final BlockManager blockManager = SparkEnv.get().blockManager();
  final Map<Integer, SparkWatermarks> watermarks = fetchSparkWatermarks(blockManager);
  return watermarks != null ? watermarks : Maps.newHashMap();
}
 
Example #9
Source File: SparkInterpreter.java    From Explorer with Apache License 2.0 4 votes vote down vote up
public InterpreterResult _interpret(String[] lines) {

        SparkEnv.set(env);

        String[] linesToRun = new String[lines.length + 1];
        for (int i = 0; i < lines.length; i++) {
            linesToRun[i] = lines[i];
        }
        linesToRun[lines.length] = "print(\"\")";

      //  Console.setOut((java.io.PrintStream) binder.get("out"));
        out.reset();
        Code r = null;
        String incomplete = "";
        for (String s : linesToRun) {
            scala.tools.nsc.interpreter.Results.Result res = null;
            try {
                res = intp.interpret(incomplete + s);
            } catch (Exception e) {
                context.getConnector().clearJobGroup();
                logger.info("Interpreter exception", e);
                return new InterpreterResult(Code.ERROR, e.getMessage());
            }

            r = getResultCode(res);

            if (r == Code.ERROR) {
                context.getConnector().clearJobGroup();
                return new InterpreterResult(r, out.toString());
            } else if (r == Code.INCOMPLETE) {
                incomplete += s + "\n";
            } else {
                incomplete = "";
            }
        }

        if (r == Code.INCOMPLETE) {
            return new InterpreterResult(r, "Incomplete expression");
        } else {
            return new InterpreterResult(r, out.toString());
        }
    }