Java Code Examples for org.apache.spark.SparkConf#getInt()

The following examples show how to use org.apache.spark.SparkConf#getInt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 6 votes vote down vote up
private void analyzeSparkParallelismConfiguation(SparkConf conf) {
	//ensure allocated spark conf
	SparkConf sconf = (conf == null) ? createSystemDSSparkConf() : conf;
	
	int numExecutors = sconf.getInt("spark.executor.instances", -1);
	int numCoresPerExec = sconf.getInt("spark.executor.cores", -1);
	int defaultPar = sconf.getInt("spark.default.parallelism", -1);

	if( numExecutors > 1 && (defaultPar > 1 || numCoresPerExec > 1) ) {
		_numExecutors = numExecutors;
		_defaultPar = (defaultPar>1) ? defaultPar : numExecutors * numCoresPerExec;
		_confOnly &= true;
	}
	else {
		//get default parallelism (total number of executors and cores)
		//note: spark context provides this information while conf does not
		//(for num executors we need to correct for driver and local mode)
		@SuppressWarnings("resource")
		JavaSparkContext jsc = getSparkContextStatic();
		_numExecutors = Math.max(jsc.sc().getExecutorMemoryStatus().size() - 1, 1);
		_defaultPar = jsc.defaultParallelism();
		_confOnly &= false; //implies env info refresh w/ spark context
	}
}
 
Example 2
Source File: SparkExecutionContext.java    From systemds with Apache License 2.0 6 votes vote down vote up
private void analyzeSparkParallelismConfiguation(SparkConf conf) {
	//ensure allocated spark conf
	SparkConf sconf = (conf == null) ? createSystemDSSparkConf() : conf;
	
	int numExecutors = sconf.getInt("spark.executor.instances", -1);
	int numCoresPerExec = sconf.getInt("spark.executor.cores", -1);
	int defaultPar = sconf.getInt("spark.default.parallelism", -1);

	if( numExecutors > 1 && (defaultPar > 1 || numCoresPerExec > 1) ) {
		_numExecutors = numExecutors;
		_defaultPar = (defaultPar>1) ? defaultPar : numExecutors * numCoresPerExec;
		_confOnly &= true;
	}
	else {
		//get default parallelism (total number of executors and cores)
		//note: spark context provides this information while conf does not
		//(for num executors we need to correct for driver and local mode)
		@SuppressWarnings("resource")
		JavaSparkContext jsc = getSparkContextStatic();
		_numExecutors = Math.max(jsc.sc().getExecutorMemoryStatus().size() - 1, 1);
		_defaultPar = jsc.defaultParallelism();
		_confOnly &= false; //implies env info refresh w/ spark context
	}
}
 
Example 3
Source File: Word2VecVariables.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> T assignVar(String variableName, SparkConf conf, Class clazz) throws Exception {
    Object ret;
    if (clazz.equals(Integer.class)) {
        ret = conf.getInt(variableName, (Integer) getDefault(variableName));

    } else if (clazz.equals(Double.class)) {
        ret = conf.getDouble(variableName, (Double) getDefault(variableName));

    } else if (clazz.equals(Boolean.class)) {
        ret = conf.getBoolean(variableName, (Boolean) getDefault(variableName));

    } else if (clazz.equals(String.class)) {
        ret = conf.get(variableName, (String) getDefault(variableName));

    } else if (clazz.equals(Long.class)) {
        ret = conf.getLong(variableName, (Long) getDefault(variableName));
    } else {
        throw new Exception("Variable Type not supported. Only boolean, int, double and String supported.");
    }
    return (T) ret;
}
 
Example 4
Source File: Word2VecPerformerVoid.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(ADAGRAD, false);
    negative = conf.getDouble(NEGATIVE, 5);
    numWords = conf.getInt(NUM_WORDS, 1);
    window = conf.getInt(WINDOW, 5);
    alpha = conf.getDouble(ALPHA, 0.025f);
    minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(NUM_WORDS, 1);
    iterations = conf.getInt(ITERATIONS, 5);
    vectorLength = conf.getInt(VECTOR_LENGTH, 100);

    initExpTable();

    if (negative > 0 && conf.contains(TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }
}
 
Example 5
Source File: Word2VecPerformer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(Word2VecVariables.ADAGRAD, false);
    negative = conf.getDouble(Word2VecVariables.NEGATIVE, 5);
    numWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    window = conf.getInt(Word2VecVariables.WINDOW, 5);
    alpha = conf.getDouble(Word2VecVariables.ALPHA, 0.025f);
    minAlpha = conf.getDouble(Word2VecVariables.MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    vectorLength = conf.getInt(Word2VecVariables.VECTOR_LENGTH, 100);
    initExpTable();

    if (negative > 0 && conf.contains(Word2VecVariables.TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(Word2VecVariables.TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }

}
 
Example 6
Source File: HBaseIndex.java    From hudi with Apache License 2.0 5 votes vote down vote up
private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD,
    HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator, final JavaSparkContext jsc) {
  if (config.getHbaseIndexPutBatchSizeAutoCompute()) {
    SparkConf conf = jsc.getConf();
    int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1);
    if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) {
      maxExecutors =
          Math.max(maxExecutors, conf.getInt(DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
    }

    /*
     * Each writeStatus represents status information from a write done in one of the IOHandles. If a writeStatus has
     * any insert, it implies that the corresponding task contacts HBase for doing puts, since we only do puts for
     * inserts from HBaseIndex.
     */
    final Tuple2<Long, Integer> numPutsParallelismTuple = getHBasePutAccessParallelism(writeStatusRDD);
    final long numPuts = numPutsParallelismTuple._1;
    final int hbasePutsParallelism = numPutsParallelismTuple._2;
    this.numRegionServersForTable = getNumRegionServersAliveForTable();
    final float desiredQPSFraction =
        hBaseIndexQPSResourceAllocator.calculateQPSFractionForPutsTime(numPuts, this.numRegionServersForTable);
    LOG.info("Desired QPSFraction :" + desiredQPSFraction);
    LOG.info("Number HBase puts :" + numPuts);
    LOG.info("Hbase Puts Parallelism :" + hbasePutsParallelism);
    final float availableQpsFraction =
        hBaseIndexQPSResourceAllocator.acquireQPSResources(desiredQPSFraction, numPuts);
    LOG.info("Allocated QPS Fraction :" + availableQpsFraction);
    multiPutBatchSize = putBatchSizeCalculator.getBatchSize(numRegionServersForTable, maxQpsPerRegionServer,
        hbasePutsParallelism, maxExecutors, SLEEP_TIME_MILLISECONDS, availableQpsFraction);
    LOG.info("multiPutBatchSize :" + multiPutBatchSize);
  }
}
 
Example 7
Source File: SqoopSparkDriver.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
public static void execute(JobRequest request, SparkConf conf, JavaSparkContext sc)
    throws Exception {

  LOG.info("Executing sqoop spark job");

  long totalTime = System.currentTimeMillis();
  SparkPrefixContext driverContext = new SparkPrefixContext(request.getConf(),
      JobConstants.PREFIX_CONNECTOR_DRIVER_CONTEXT);

  int defaultExtractors = conf.getInt(DEFAULT_EXTRACTORS, 10);
  long numExtractors = (driverContext.getLong(JobConstants.JOB_ETL_EXTRACTOR_NUM,
      defaultExtractors));
  int numLoaders = conf.getInt(NUM_LOADERS, 1);

  List<Partition> sp = getPartitions(request, numExtractors);
  System.out.println(">>> Partition size:" + sp.size());

  JavaRDD<Partition> rdd = sc.parallelize(sp, sp.size());
  JavaRDD<List<IntermediateDataFormat<?>>> mapRDD = rdd.map(new SqoopExtractFunction(
      request));
  // if max loaders or num loaders is given reparition to adjust the max
  // loader parallelism
  if (numLoaders != numExtractors) {
    JavaRDD<List<IntermediateDataFormat<?>>> reParitionedRDD = mapRDD.repartition(numLoaders);
    System.out.println(">>> RePartition RDD size:" + reParitionedRDD.partitions().size());
    reParitionedRDD.mapPartitions(new SqoopLoadFunction(request)).collect();
  } else {
    System.out.println(">>> Mapped RDD size:" + mapRDD.partitions().size());
    mapRDD.mapPartitions(new SqoopLoadFunction(request)).collect();
  }

  System.out.println(">>> TOTAL time ms:" + (System.currentTimeMillis() - totalTime));

  LOG.info("Done EL in sqoop spark job, next call destroy apis");

}