Java Code Examples for org.apache.spark.SparkConf#contains()

The following examples show how to use org.apache.spark.SparkConf#contains() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkContextProvider.java    From rdf2x with Apache License 2.0 6 votes vote down vote up
/**
 * Provide a {@link JavaSparkContext} based on default settings
 *
 * @return a {@link JavaSparkContext} based on default settings
 */
public static JavaSparkContext provide() {
    SparkConf config = new SparkConf()
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .registerKryoClasses(getSerializableClasses());

    if (!config.contains("spark.app.name")) {
        config.setAppName("RDF2X");
    }
    if (!config.contains("spark.master")) {
        config.setMaster("local");
    }

    // set serialization registration required if you want to make sure you registered all your classes
    // some spark internal classes will need to be registered as well
    // config.set("spark.kryo.registrationRequired", "true");


    log.info("Getting Spark Context for config: \n{}", config.toDebugString());
    return new JavaSparkContext(config);
}
 
Example 2
Source File: Word2VecPerformer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(Word2VecVariables.ADAGRAD, false);
    negative = conf.getDouble(Word2VecVariables.NEGATIVE, 5);
    numWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    window = conf.getInt(Word2VecVariables.WINDOW, 5);
    alpha = conf.getDouble(Word2VecVariables.ALPHA, 0.025f);
    minAlpha = conf.getDouble(Word2VecVariables.MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    vectorLength = conf.getInt(Word2VecVariables.VECTOR_LENGTH, 100);
    initExpTable();

    if (negative > 0 && conf.contains(Word2VecVariables.TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(Word2VecVariables.TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }

}
 
Example 3
Source File: Word2VecPerformerVoid.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(ADAGRAD, false);
    negative = conf.getDouble(NEGATIVE, 5);
    numWords = conf.getInt(NUM_WORDS, 1);
    window = conf.getInt(WINDOW, 5);
    alpha = conf.getDouble(ALPHA, 0.025f);
    minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(NUM_WORDS, 1);
    iterations = conf.getInt(ITERATIONS, 5);
    vectorLength = conf.getInt(VECTOR_LENGTH, 100);

    initExpTable();

    if (negative > 0 && conf.contains(TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }
}
 
Example 4
Source File: IPySparkInterpreter.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void open() throws InterpreterException {
  // IPySparkInterpreter may already be opened in PySparkInterpreter when ipython is available.
  if (opened) {
    return;
  }
  PySparkInterpreter pySparkInterpreter =
          getInterpreterInTheSameSessionByClassName(PySparkInterpreter.class, false);
  setProperty("zeppelin.python", pySparkInterpreter.getPythonExec());
  sparkInterpreter = getInterpreterInTheSameSessionByClassName(SparkInterpreter.class);
  setProperty("zeppelin.py4j.useAuth",
          sparkInterpreter.getSparkVersion().isSecretSocketSupported() + "");
  SparkConf conf = sparkInterpreter.getSparkContext().getConf();
  // only set PYTHONPATH in embedded, local or yarn-client mode.
  // yarn-cluster will setup PYTHONPATH automatically.
  if (!conf.contains(SparkStringConstants.SUBMIT_DEPLOY_MODE_PROP_NAME) ||
          !conf.get(SparkStringConstants.SUBMIT_DEPLOY_MODE_PROP_NAME).equals("cluster")) {
    setAdditionalPythonPath(PythonUtils.sparkPythonPath());
  }
  setUseBuiltinPy4j(false);
  setAdditionalPythonInitFile("python/zeppelin_ipyspark.py");
  setProperty("zeppelin.py4j.useAuth",
          sparkInterpreter.getSparkVersion().isSecretSocketSupported() + "");
  super.open();
  opened = true;
}
 
Example 5
Source File: PSRpcFactory.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}
 
Example 6
Source File: SparkEngineBase.java    From beakerx with Apache License 2.0 5 votes vote down vote up
protected void configureSparkConf(SparkConf sparkConf) {
  if (!sparkConf.contains(SPARK_APP_NAME)) {
    sparkConf.setAppName("beaker_" + UUID.randomUUID().toString());
  }
  if (sparkConf.contains(SPARK_MASTER) && !isLocalSpark(sparkConf)) {
    sparkConf.set(SPARK_REPL_CLASS_OUTPUT_DIR, KernelManager.get().getOutDir());
  }
}
 
Example 7
Source File: SparkEngineNoUIImpl.java    From beakerx with Apache License 2.0 5 votes vote down vote up
private void configureSparkConfDefaults(SparkConf sparkConf) {
  if (!sparkConf.contains(SPARK_MASTER)) {
    this.conf.getMaster().ifPresent(sparkConf::setMaster);
  }
  if (!sparkConf.contains(SPARK_EXECUTOR_CORES)) {
    this.conf.getExecutorCores().ifPresent(x -> sparkConf.set(SPARK_EXECUTOR_CORES, x));
  }
  if (!sparkConf.contains(SPARK_EXECUTOR_MEMORY)) {
    this.conf.getExecutorMemory().ifPresent(x -> sparkConf.set(SPARK_EXECUTOR_MEMORY, x));
  }
}
 
Example 8
Source File: PSRpcFactory.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}
 
Example 9
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 5 votes vote down vote up
/**
 * Sets up a SystemDS-preferred Spark configuration based on the implicit
 * default configuration (as passed via configurations from outside).
 *
 * @return spark configuration
 */
public static SparkConf createSystemDSSparkConf() {
	SparkConf conf = new SparkConf();

	//always set unlimited result size (required for cp collect)
	conf.set("spark.driver.maxResultSize", "0");

	//always use the fair scheduler (for single jobs, it's equivalent to fifo
	//but for concurrent jobs in parfor it ensures better data locality because
	//round robin assignment mitigates the problem of 'sticky slots')
	if( FAIR_SCHEDULER_MODE ) {
		conf.set("spark.scheduler.mode", "FAIR");
	}

	//increase scheduler delay (usually more robust due to better data locality)
	if( !conf.contains("spark.locality.wait") ) { //default 3s
		conf.set("spark.locality.wait", "5s");
	}
	
	//increase max message size for robustness
	String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
	String msgSizeConf = (UtilFunctions.compareVersion(sparkVersion, "2.0.0") < 0) ?
		"spark.akka.frameSize" : "spark.rpc.message.maxSize";
	if( !conf.contains(msgSizeConf) ) { //default 128MB
		conf.set(msgSizeConf, "512");
	}
	
	return conf;
}
 
Example 10
Source File: IPySparkInterpreter.java    From zeppelin with Apache License 2.0 5 votes vote down vote up
@Override
protected Map<String, String> setupKernelEnv() throws IOException {
  Map<String, String> env = super.setupKernelEnv();
  // set PYSPARK_PYTHON
  SparkConf conf = sparkInterpreter.getSparkContext().getConf();
  if (conf.contains("spark.pyspark.python")) {
    env.put("PYSPARK_PYTHON", conf.get("spark.pyspark.python"));
  }
  return env;
}
 
Example 11
Source File: SparkContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}
 
Example 12
Source File: KerberosParameterValidations.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Override
public ValidationResult validate(Config config) {
  SparkConf conf = new SparkConf();
  if (!config.hasPath(USER_PRINC_CONFIG) && !conf.contains("spark.yarn.principal")) {
    return new ValidationResult(this, Validity.INVALID, USAGE);
  }
  return new ValidationResult(this, Validity.VALID,
      "Kerberos principal has been supplied");
}
 
Example 13
Source File: KerberosParameterValidations.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Override
public ValidationResult validate(Config config) {
  SparkConf conf = new SparkConf();
  if (!config.hasPath(KEYTAB_CONFIG) && !conf.contains("spark.yarn.keytab")) {
    return new ValidationResult(this, Validity.INVALID, USAGE);
  }
  return new ValidationResult(this, Validity.VALID,
      "Kerberos keytab has been supplied");
}
 
Example 14
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 5 votes vote down vote up
/**
 * Sets up a SystemDS-preferred Spark configuration based on the implicit
 * default configuration (as passed via configurations from outside).
 *
 * @return spark configuration
 */
public static SparkConf createSystemDSSparkConf() {
	SparkConf conf = new SparkConf();

	//always set unlimited result size (required for cp collect)
	conf.set("spark.driver.maxResultSize", "0");

	//always use the fair scheduler (for single jobs, it's equivalent to fifo
	//but for concurrent jobs in parfor it ensures better data locality because
	//round robin assignment mitigates the problem of 'sticky slots')
	if( FAIR_SCHEDULER_MODE ) {
		conf.set("spark.scheduler.mode", "FAIR");
	}

	//increase scheduler delay (usually more robust due to better data locality)
	if( !conf.contains("spark.locality.wait") ) { //default 3s
		conf.set("spark.locality.wait", "5s");
	}
	
	//increase max message size for robustness
	String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
	String msgSizeConf = (UtilFunctions.compareVersion(sparkVersion, "2.0.0") < 0) ?
		"spark.akka.frameSize" : "spark.rpc.message.maxSize";
	if( !conf.contains(msgSizeConf) ) { //default 128MB
		conf.set(msgSizeConf, "512");
	}
	
	return conf;
}
 
Example 15
Source File: SparkInterpreter.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
@Override
public void open() throws InterpreterException {
  try {
    SparkConf conf = new SparkConf();
    for (Map.Entry<Object, Object> entry : getProperties().entrySet()) {
      if (!StringUtils.isBlank(entry.getValue().toString())) {
        conf.set(entry.getKey().toString(), entry.getValue().toString());
      }
      // zeppelin.spark.useHiveContext & zeppelin.spark.concurrentSQL are legacy zeppelin
      // properties, convert them to spark properties here.
      if (entry.getKey().toString().equals("zeppelin.spark.useHiveContext")) {
        conf.set("spark.useHiveContext", entry.getValue().toString());
      }
      if (entry.getKey().toString().equals("zeppelin.spark.concurrentSQL")
          && entry.getValue().toString().equals("true")) {
        conf.set(SparkStringConstants.SCHEDULER_MODE_PROP_NAME, "FAIR");
      }
    }
    // use local mode for embedded spark mode when spark.master is not found
    if (!conf.contains(SparkStringConstants.MASTER_PROP_NAME)) {
      if (conf.contains("master")) {
        conf.set(SparkStringConstants.MASTER_PROP_NAME, conf.get("master"));
      } else {
        String masterEnv = System.getenv(SparkStringConstants.MASTER_ENV_NAME);
        conf.set(SparkStringConstants.MASTER_PROP_NAME,
                masterEnv == null ? SparkStringConstants.DEFAULT_MASTER_VALUE : masterEnv);
      }
    }
    this.innerInterpreter = loadSparkScalaInterpreter(conf);
    this.innerInterpreter.open();

    sc = this.innerInterpreter.getSparkContext();
    jsc = JavaSparkContext.fromSparkContext(sc);
    sparkVersion = SparkVersion.fromVersionString(sc.version());
    if (enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion()) {
      throw new Exception("This is not officially supported spark version: " + sparkVersion
          + "\nYou can set zeppelin.spark.enableSupportedVersionCheck to false if you really" +
          " want to try this version of spark.");
    }
    sqlContext = this.innerInterpreter.getSqlContext();
    sparkSession = this.innerInterpreter.getSparkSession();

    SESSION_NUM.incrementAndGet();
  } catch (Exception e) {
    LOGGER.error("Fail to open SparkInterpreter", e);
    throw new InterpreterException("Fail to open SparkInterpreter", e);
  }
}
 
Example 16
Source File: SparkEngineBase.java    From beakerx with Apache License 2.0 4 votes vote down vote up
private static boolean isLocalSpark(SparkConf sparkConf) {
  return sparkConf.contains(SPARK_MASTER) && sparkConf.get(SPARK_MASTER) != null && sparkConf.get("spark.master").startsWith("local");
}
 
Example 17
Source File: SparkEngineWithUIImpl.java    From beakerx with Apache License 2.0 4 votes vote down vote up
private void configureRuntime(SparkConf sparkConf) {
  if (sparkConf.contains("spark.master") && sparkConf.get("spark.master").contains("yarn")) {
    YarnSparkOptionCommand.runtimeConfiguration(this, sparkConf);
  }
}