Java Code Examples for org.apache.spark.SparkConf#contains()

The following examples show how to use org.apache.spark.SparkConf#contains() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SparkContextProvider.java From rdf2x with Apache License 2.0

6 votes

/**
 * Provide a {@link JavaSparkContext} based on default settings
 *
 * @return a {@link JavaSparkContext} based on default settings
 */
public static JavaSparkContext provide() {
    SparkConf config = new SparkConf()
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .registerKryoClasses(getSerializableClasses());

    if (!config.contains("spark.app.name")) {
        config.setAppName("RDF2X");
    }
    if (!config.contains("spark.master")) {
        config.setMaster("local");
    }

    // set serialization registration required if you want to make sure you registered all your classes
    // some spark internal classes will need to be registered as well
    // config.set("spark.kryo.registrationRequired", "true");


    log.info("Getting Spark Context for config: \n{}", config.toDebugString());
    return new JavaSparkContext(config);
}

Example 2

Source File: Word2VecPerformer.java From deeplearning4j with Apache License 2.0

6 votes

public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(Word2VecVariables.ADAGRAD, false);
    negative = conf.getDouble(Word2VecVariables.NEGATIVE, 5);
    numWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    window = conf.getInt(Word2VecVariables.WINDOW, 5);
    alpha = conf.getDouble(Word2VecVariables.ALPHA, 0.025f);
    minAlpha = conf.getDouble(Word2VecVariables.MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    vectorLength = conf.getInt(Word2VecVariables.VECTOR_LENGTH, 100);
    initExpTable();

    if (negative > 0 && conf.contains(Word2VecVariables.TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(Word2VecVariables.TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }

}

Example 3

Source File: Word2VecPerformerVoid.java From deeplearning4j with Apache License 2.0

6 votes

public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(ADAGRAD, false);
    negative = conf.getDouble(NEGATIVE, 5);
    numWords = conf.getInt(NUM_WORDS, 1);
    window = conf.getInt(WINDOW, 5);
    alpha = conf.getDouble(ALPHA, 0.025f);
    minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(NUM_WORDS, 1);
    iterations = conf.getInt(ITERATIONS, 5);
    vectorLength = conf.getInt(VECTOR_LENGTH, 100);

    initExpTable();

    if (negative > 0 && conf.contains(TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }
}

Example 4

Source File: IPySparkInterpreter.java From zeppelin with Apache License 2.0

6 votes

@Override
public synchronized void open() throws InterpreterException {
  // IPySparkInterpreter may already be opened in PySparkInterpreter when ipython is available.
  if (opened) {
    return;
  }
  PySparkInterpreter pySparkInterpreter =
          getInterpreterInTheSameSessionByClassName(PySparkInterpreter.class, false);
  setProperty("zeppelin.python", pySparkInterpreter.getPythonExec());
  sparkInterpreter = getInterpreterInTheSameSessionByClassName(SparkInterpreter.class);
  setProperty("zeppelin.py4j.useAuth",
          sparkInterpreter.getSparkVersion().isSecretSocketSupported() + "");
  SparkConf conf = sparkInterpreter.getSparkContext().getConf();
  // only set PYTHONPATH in embedded, local or yarn-client mode.
  // yarn-cluster will setup PYTHONPATH automatically.
  if (!conf.contains(SparkStringConstants.SUBMIT_DEPLOY_MODE_PROP_NAME) ||
          !conf.get(SparkStringConstants.SUBMIT_DEPLOY_MODE_PROP_NAME).equals("cluster")) {
    setAdditionalPythonPath(PythonUtils.sparkPythonPath());
  }
  setUseBuiltinPy4j(false);
  setAdditionalPythonInitFile("python/zeppelin_ipyspark.py");
  setProperty("zeppelin.py4j.useAuth",
          sparkInterpreter.getSparkVersion().isSecretSocketSupported() + "");
  super.open();
  opened = true;
}

Example 5

Source File: PSRpcFactory.java From systemds with Apache License 2.0

5 votes

public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}

Example 6

Source File: SparkEngineBase.java From beakerx with Apache License 2.0

5 votes

protected void configureSparkConf(SparkConf sparkConf) {
  if (!sparkConf.contains(SPARK_APP_NAME)) {
    sparkConf.setAppName("beaker_" + UUID.randomUUID().toString());
  }
  if (sparkConf.contains(SPARK_MASTER) && !isLocalSpark(sparkConf)) {
    sparkConf.set(SPARK_REPL_CLASS_OUTPUT_DIR, KernelManager.get().getOutDir());
  }
}

Example 7

Source File: SparkEngineNoUIImpl.java From beakerx with Apache License 2.0

5 votes

private void configureSparkConfDefaults(SparkConf sparkConf) {
  if (!sparkConf.contains(SPARK_MASTER)) {
    this.conf.getMaster().ifPresent(sparkConf::setMaster);
  }
  if (!sparkConf.contains(SPARK_EXECUTOR_CORES)) {
    this.conf.getExecutorCores().ifPresent(x -> sparkConf.set(SPARK_EXECUTOR_CORES, x));
  }
  if (!sparkConf.contains(SPARK_EXECUTOR_MEMORY)) {
    this.conf.getExecutorMemory().ifPresent(x -> sparkConf.set(SPARK_EXECUTOR_MEMORY, x));
  }
}

Example 8

Source File: PSRpcFactory.java From systemds with Apache License 2.0

5 votes

public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}

Example 9

Source File: SparkExecutionContext.java From systemds with Apache License 2.0

5 votes

/**
 * Sets up a SystemDS-preferred Spark configuration based on the implicit
 * default configuration (as passed via configurations from outside).
 *
 * @return spark configuration
 */
public static SparkConf createSystemDSSparkConf() {
	SparkConf conf = new SparkConf();

	//always set unlimited result size (required for cp collect)
	conf.set("spark.driver.maxResultSize", "0");

	//always use the fair scheduler (for single jobs, it's equivalent to fifo
	//but for concurrent jobs in parfor it ensures better data locality because
	//round robin assignment mitigates the problem of 'sticky slots')
	if( FAIR_SCHEDULER_MODE ) {
		conf.set("spark.scheduler.mode", "FAIR");
	}

	//increase scheduler delay (usually more robust due to better data locality)
	if( !conf.contains("spark.locality.wait") ) { //default 3s
		conf.set("spark.locality.wait", "5s");
	}
	
	//increase max message size for robustness
	String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
	String msgSizeConf = (UtilFunctions.compareVersion(sparkVersion, "2.0.0") < 0) ?
		"spark.akka.frameSize" : "spark.rpc.message.maxSize";
	if( !conf.contains(msgSizeConf) ) { //default 128MB
		conf.set(msgSizeConf, "512");
	}
	
	return conf;
}

Example 10

Source File: IPySparkInterpreter.java From zeppelin with Apache License 2.0

5 votes

@Override
protected Map<String, String> setupKernelEnv() throws IOException {
  Map<String, String> env = super.setupKernelEnv();
  // set PYSPARK_PYTHON
  SparkConf conf = sparkInterpreter.getSparkContext().getConf();
  if (conf.contains("spark.pyspark.python")) {
    env.put("PYSPARK_PYTHON", conf.get("spark.pyspark.python"));
  }
  return env;
}

Example 11

Source File: SparkContextFactory.java From beam with Apache License 2.0

5 votes

private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}

Example 12

Source File: KerberosParameterValidations.java From envelope with Apache License 2.0

5 votes

@Override
public ValidationResult validate(Config config) {
  SparkConf conf = new SparkConf();
  if (!config.hasPath(USER_PRINC_CONFIG) && !conf.contains("spark.yarn.principal")) {
    return new ValidationResult(this, Validity.INVALID, USAGE);
  }
  return new ValidationResult(this, Validity.VALID,
      "Kerberos principal has been supplied");
}

Example 13

Source File: KerberosParameterValidations.java From envelope with Apache License 2.0

5 votes

@Override
public ValidationResult validate(Config config) {
  SparkConf conf = new SparkConf();
  if (!config.hasPath(KEYTAB_CONFIG) && !conf.contains("spark.yarn.keytab")) {
    return new ValidationResult(this, Validity.INVALID, USAGE);
  }
  return new ValidationResult(this, Validity.VALID,
      "Kerberos keytab has been supplied");
}

Example 14

Source File: SparkExecutionContext.java From systemds with Apache License 2.0

5 votes

/**
 * Sets up a SystemDS-preferred Spark configuration based on the implicit
 * default configuration (as passed via configurations from outside).
 *
 * @return spark configuration
 */
public static SparkConf createSystemDSSparkConf() {
	SparkConf conf = new SparkConf();

	//always set unlimited result size (required for cp collect)
	conf.set("spark.driver.maxResultSize", "0");

	//always use the fair scheduler (for single jobs, it's equivalent to fifo
	//but for concurrent jobs in parfor it ensures better data locality because
	//round robin assignment mitigates the problem of 'sticky slots')
	if( FAIR_SCHEDULER_MODE ) {
		conf.set("spark.scheduler.mode", "FAIR");
	}

	//increase scheduler delay (usually more robust due to better data locality)
	if( !conf.contains("spark.locality.wait") ) { //default 3s
		conf.set("spark.locality.wait", "5s");
	}
	
	//increase max message size for robustness
	String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
	String msgSizeConf = (UtilFunctions.compareVersion(sparkVersion, "2.0.0") < 0) ?
		"spark.akka.frameSize" : "spark.rpc.message.maxSize";
	if( !conf.contains(msgSizeConf) ) { //default 128MB
		conf.set(msgSizeConf, "512");
	}
	
	return conf;
}

Example 15

Source File: SparkInterpreter.java From zeppelin with Apache License 2.0

4 votes

@Override
public void open() throws InterpreterException {
  try {
    SparkConf conf = new SparkConf();
    for (Map.Entry<Object, Object> entry : getProperties().entrySet()) {
      if (!StringUtils.isBlank(entry.getValue().toString())) {
        conf.set(entry.getKey().toString(), entry.getValue().toString());
      }
      // zeppelin.spark.useHiveContext & zeppelin.spark.concurrentSQL are legacy zeppelin
      // properties, convert them to spark properties here.
      if (entry.getKey().toString().equals("zeppelin.spark.useHiveContext")) {
        conf.set("spark.useHiveContext", entry.getValue().toString());
      }
      if (entry.getKey().toString().equals("zeppelin.spark.concurrentSQL")
          && entry.getValue().toString().equals("true")) {
        conf.set(SparkStringConstants.SCHEDULER_MODE_PROP_NAME, "FAIR");
      }
    }
    // use local mode for embedded spark mode when spark.master is not found
    if (!conf.contains(SparkStringConstants.MASTER_PROP_NAME)) {
      if (conf.contains("master")) {
        conf.set(SparkStringConstants.MASTER_PROP_NAME, conf.get("master"));
      } else {
        String masterEnv = System.getenv(SparkStringConstants.MASTER_ENV_NAME);
        conf.set(SparkStringConstants.MASTER_PROP_NAME,
                masterEnv == null ? SparkStringConstants.DEFAULT_MASTER_VALUE : masterEnv);
      }
    }
    this.innerInterpreter = loadSparkScalaInterpreter(conf);
    this.innerInterpreter.open();

    sc = this.innerInterpreter.getSparkContext();
    jsc = JavaSparkContext.fromSparkContext(sc);
    sparkVersion = SparkVersion.fromVersionString(sc.version());
    if (enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion()) {
      throw new Exception("This is not officially supported spark version: " + sparkVersion
          + "\nYou can set zeppelin.spark.enableSupportedVersionCheck to false if you really" +
          " want to try this version of spark.");
    }
    sqlContext = this.innerInterpreter.getSqlContext();
    sparkSession = this.innerInterpreter.getSparkSession();

    SESSION_NUM.incrementAndGet();
  } catch (Exception e) {
    LOGGER.error("Fail to open SparkInterpreter", e);
    throw new InterpreterException("Fail to open SparkInterpreter", e);
  }
}

Example 16

Source File: SparkEngineBase.java From beakerx with Apache License 2.0

4 votes

private static boolean isLocalSpark(SparkConf sparkConf) {
  return sparkConf.contains(SPARK_MASTER) && sparkConf.get(SPARK_MASTER) != null && sparkConf.get("spark.master").startsWith("local");
}

Example 17

Source File: SparkEngineWithUIImpl.java From beakerx with Apache License 2.0

4 votes

private void configureRuntime(SparkConf sparkConf) {
  if (sparkConf.contains("spark.master") && sparkConf.get("spark.master").contains("yarn")) {
    YarnSparkOptionCommand.runtimeConfiguration(this, sparkConf);
  }
}