Java Code Examples for org.apache.hadoop.mapred.JobConf#get()

The following examples show how to use org.apache.hadoop.mapred.JobConf#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamUtil.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public static TaskId getTaskInfo(JobConf job) {
  TaskId res = new TaskId();

  String id = job.get("mapred.task.id");
  if (isLocalJobTracker(job)) {
    // it uses difft naming 
    res.mapTask = job.getBoolean("mapred.task.is.map", true);
    res.jobid = "0";
    res.taskid = 0;
    res.execid = 0;
  } else {
    String[] e = id.split("_");
    res.mapTask = e[3].equals("m");
    res.jobid = e[1] + "_" + e[2];
    res.taskid = Integer.parseInt(e[4]);
    res.execid = Integer.parseInt(e[5]);
  }
  return res;
}
 
Example 2
Source File: DistCpV1.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static private void finalize(Configuration conf, JobConf jobconf,
    final Path destPath, String presevedAttributes) throws IOException {
  if (presevedAttributes == null) {
    return;
  }
  EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
  if (!preseved.contains(FileAttribute.USER)
      && !preseved.contains(FileAttribute.GROUP)
      && !preseved.contains(FileAttribute.PERMISSION)) {
    return;
  }

  FileSystem dstfs = destPath.getFileSystem(conf);
  Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
  try (SequenceFile.Reader in =
      new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
    Text dsttext = new Text();
    FilePair pair = new FilePair(); 
    for(; in.next(dsttext, pair); ) {
      Path absdst = new Path(destPath, pair.output);
      updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
          preseved, dstfs);
    }
  }
}
 
Example 3
Source File: PipeMapRed.java    From RDFS with Apache License 2.0 6 votes vote down vote up
void addJobConfToEnvironment(JobConf conf, Properties env) {
  if (debug_) {
    logprintln("addJobConfToEnvironment: begin");
  }
  Iterator it = conf.iterator();
  while (it.hasNext()) {
    Map.Entry en = (Map.Entry) it.next();
    String name = (String) en.getKey();
    //String value = (String)en.getValue(); // does not apply variable expansion
    String value = conf.get(name); // does variable expansion
    name = safeEnvVarName(name);
    envPut(env, name, value);
  }
  if (debug_) {
    logprintln("addJobConfToEnvironment: end");
  }
}
 
Example 4
Source File: HadoopUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Merge HadoopConfiguration into JobConf. This is necessary for the HDFS configuration.
 */
public static void mergeHadoopConf(JobConf jobConf) {
	// we have to load the global configuration here, because the HadoopInputFormatBase does not
	// have access to a Flink configuration object
	org.apache.flink.configuration.Configuration flinkConfiguration = GlobalConfiguration.loadConfiguration();

	Configuration hadoopConf = getHadoopConfiguration(flinkConfiguration);
	for (Map.Entry<String, String> e : hadoopConf) {
		if (jobConf.get(e.getKey()) == null) {
			jobConf.set(e.getKey(), e.getValue());
		}
	}
}
 
Example 5
Source File: DistCp.java    From RDFS with Apache License 2.0 5 votes vote down vote up
static private void finalize(Configuration conf, JobConf jobconf,
    final Path destPath, String preservedAttributes) throws IOException {
  if (preservedAttributes == null) {
    return;
  }
  EnumSet<FileAttribute> preserved = FileAttribute.parse(preservedAttributes);
  if (!preserved.contains(FileAttribute.USER)
      && !preserved.contains(FileAttribute.GROUP)
      && !preserved.contains(FileAttribute.PERMISSION)) {
    return;
  }

  FileSystem dstfs = destPath.getFileSystem(conf);
  Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
  SequenceFile.Reader in = null;
  try {
    in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf),
        dstdirlist, jobconf);
    Text dsttext = new Text();
    FilePairComparable pair = new FilePairComparable(); 
    for(; in.next(dsttext, pair); ) {
      Path absdst = new Path(destPath, pair.output);
      updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
          preserved, dstfs);
    }
  } finally {
    checkAndClose(in);
  }
}
 
Example 6
Source File: DistCh.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Produce splits such that each is no greater than the quotient of the
 * total size and the number of splits requested.
 * @param job The handle to the JobConf object
 * @param numSplits Number of splits requested
 */
public InputSplit[] getSplits(JobConf job, int numSplits
    ) throws IOException {
  final int srcCount = job.getInt(OP_COUNT_LABEL, -1);
  final int targetcount = srcCount / numSplits;
  String srclist = job.get(OP_LIST_LABEL, "");
  if (srcCount < 0 || "".equals(srclist)) {
    throw new RuntimeException("Invalid metadata: #files(" + srcCount +
                               ") listuri(" + srclist + ")");
  }
  Path srcs = new Path(srclist);
  FileSystem fs = srcs.getFileSystem(job);

  List<FileSplit> splits = new ArrayList<FileSplit>(numSplits);

  Text key = new Text();
  FileOperation value = new FileOperation();
  long prev = 0L;
  int count = 0; //count src
  try (SequenceFile.Reader in = new SequenceFile.Reader(fs, srcs, job)) {
    for ( ; in.next(key, value); ) {
      long curr = in.getPosition();
      long delta = curr - prev;
      if (++count > targetcount) {
        count = 0;
        splits.add(new FileSplit(srcs, prev, delta, (String[])null));
        prev = curr;
      }
    }
  }
  long remaining = fs.getFileStatus(srcs).getLen() - prev;
  if (remaining != 0) {
    splits.add(new FileSplit(srcs, prev, remaining, (String[])null));
  }
  LOG.info("numSplits="  + numSplits + ", splits.size()=" + splits.size());
  return splits.toArray(new FileSplit[splits.size()]);
}
 
Example 7
Source File: HBaseInputFormatGranular.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
/**
 * space delimited list of columns
 */

public void validateInput(JobConf job) throws IOException {
	// expecting exactly one path
	String tableName = getTableName(job);

	if (tableName == null) {
		throw new IOException("expecting one table name");
	}
	LOG.debug(String.format("Found Table name [%s]", tableName));

	// connected to table?
	if (getHTable() == null) {
		throw new IOException("could not connect to table '" + tableName + "'");
	}
	LOG.debug(String.format("Found Table [%s]", getHTable().getTableName()));

	// expecting at least one column
	String colArg = job.get(COLUMN_LIST);
	if (colArg == null || colArg.length() == 0) {
		throw new IOException("expecting at least one column");
	}
	LOG.debug(String.format("Found Columns [%s]", colArg));

	LOG.debug(String.format("Found Start & STop Key [%s][%s]", startKey,
			stopKey));

	if (sourceMode == SourceMode.EMPTY) {
		throw new IOException("SourceMode should not be EMPTY");
	}

	if (sourceMode == SourceMode.GET_LIST
			&& (keyList == null || keyList.size() == 0)) {
		throw new IOException("Source mode is GET_LIST bu key list is empty");
	}
}
 
Example 8
Source File: UtilsForTests.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) {
  try {
    String taskId = conf.get("mapred.task.id");
    id = Integer.parseInt(taskId.split("_")[4]);
    totalMaps = Integer.parseInt(conf.get("mapred.map.tasks"));
    fs = FileSystem.get(conf);
    signal = new Path(conf.get(getTaskSignalParameter(true)));
  } catch (IOException ioe) {
    System.out.println("Got an exception while obtaining the filesystem");
  }
}
 
Example 9
Source File: PipeMapRed.java    From hadoop with Apache License 2.0 5 votes vote down vote up
void addJobConfToEnvironment(JobConf jobconf, Properties env) {
  JobConf conf = new JobConf(jobconf);
  conf.setDeprecatedProperties();
  Iterator it = conf.iterator();
  while (it.hasNext()) {
    Map.Entry en = (Map.Entry) it.next();
    String name = (String) en.getKey();
    //String value = (String)en.getValue(); // does not apply variable expansion
    String value = conf.get(name); // does variable expansion 
    name = safeEnvVarName(name);
    envPut(env, name, value);
  }
}
 
Example 10
Source File: MerReduce.java    From emr-sample-apps with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) 
{
	curfile = conf.get("map.input.file");
	refpath = conf.get("refpath");
	ISREF = (curfile.indexOf(refpath) != -1);
	
	MIN_READ_LEN = Integer.parseInt(conf.get("MIN_READ_LEN"));
	MAX_READ_LEN = Integer.parseInt(conf.get("MAX_READ_LEN"));
	SEED_LEN     = Integer.parseInt(conf.get("SEED_LEN"));
	FLANK_LEN    = Integer.parseInt(conf.get("FLANK_LEN"));
	K            = Integer.parseInt(conf.get("K"));
	REDUNDANCY   = Integer.parseInt(conf.get("REDUNDANCY"));
	
	seedbuffer   = new byte[DNAString.arrToSeedLen(SEED_LEN, REDUNDANCY)];
}
 
Example 11
Source File: MapReduceTracer.java    From garmadon with Apache License 2.0 5 votes vote down vote up
public static void intercept(@Argument(1) JobConf jobConf) {
    String paths = (jobConf.get(FILE_OUTPUT_FORMAT_OUTPUT_DIR) != null) ?
            jobConf.get(FILE_OUTPUT_FORMAT_OUTPUT_DIR) : jobConf.get(DEPRECATED_FILE_OUTPUT_FORMAT_OUTPUT_DIR);
    if (paths != null) {
        DataAccessEventProtos.PathEvent pathEvent = DataAccessEventProtos.PathEvent
                .newBuilder()
                .setPath(paths)
                .setType(PathType.OUTPUT.name())
                .build();
        eventHandler.accept(System.currentTimeMillis(), pathEvent);
    }
}
 
Example 12
Source File: MultipleInputs.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Add a {@link Path} with a custom {@link InputFormat} to the list of
 * inputs for the map-reduce job.
 * 
 * @param conf The configuration of the job
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 */
public static void addInputPath(JobConf conf, Path path,
    Class<? extends InputFormat> inputFormatClass) {

  String inputFormatMapping = path.toString() + ";"
     + inputFormatClass.getName();
  String inputFormats = conf.get("mapred.input.dir.formats");
  conf.set("mapred.input.dir.formats",
     inputFormats == null ? inputFormatMapping : inputFormats + ","
         + inputFormatMapping);

  conf.setInputFormat(DelegatingInputFormat.class);
}
 
Example 13
Source File: PipeMapRed.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
void setStreamJobDetails(JobConf job) {
  jobLog_ = job.get("stream.jobLog_");
  String s = job.get("stream.minRecWrittenToEnableSkip_");
  if (s != null) {
    minRecWrittenToEnableSkip_ = Long.parseLong(s);
    logprintln("JobConf set minRecWrittenToEnableSkip_ =" + minRecWrittenToEnableSkip_);
  }
  taskId_ = StreamUtil.getTaskInfo(job_);
}
 
Example 14
Source File: InfrastructureAnalyzer.java    From systemds with Apache License 2.0 5 votes vote down vote up
/**
 * Analyzes only properties of hadoop configuration in order to prevent 
 * expensive call to cluster status .
 */
private static void analyzeHadoopConfiguration() {
	JobConf job = ConfigurationManager.getCachedJobConf();
	
	//HDFS blocksize
	String blocksize = job.get(HDFSTool.DFS_BLOCKSIZE, "134217728");
	_blocksize = Long.parseLong(blocksize);
	
	//analyze if local mode (internally requires yarn_enabled)
	_localJT = analyzeLocalMode(job);
}
 
Example 15
Source File: HadoopV2JobResourceManager.java    From ignite with Apache License 2.0 4 votes vote down vote up
/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws IgniteCheckedException If failed.
 */
public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException {
    try {
        if (jobLocDir.exists())
            throw new IgniteCheckedException("Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        Collection<URL> clsPathUrls = new ArrayList<>();

        String mrDir = cfg.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = job.fileSystem(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new IgniteCheckedException("Failed to find map-reduce submission " +
                        "directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new IgniteCheckedException("Failed to copy job submission directory "
                        + "contents to local file system "
                        + "[path=" + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath()
                        + ", jobId=" + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcSet.add(jarJobFile);
            rsrcSet.add(new File(jobLocDir, "job.xml"));
        }
        else if (!jobLocDir.mkdirs())
            throw new IgniteCheckedException("Failed to create local job directory: "
                + jobLocDir.getAbsolutePath());

        processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
        processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES);
        processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
        processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

        if (!clsPathUrls.isEmpty())
            clsPath = clsPathUrls.toArray(new URL[clsPathUrls.size()]);

        setLocalFSWorkingDirectory(jobLocDir);
    }
    catch (URISyntaxException | IOException e) {
        throw new IgniteCheckedException(e);
    }
}
 
Example 16
Source File: DistCpV1.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Produce splits such that each is no greater than the quotient of the
 * total size and the number of splits requested.
 * @param job The handle to the JobConf object
 * @param numSplits Number of splits requested
 */
public InputSplit[] getSplits(JobConf job, int numSplits)
    throws IOException {
  int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
  long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
  String srcfilelist = job.get(SRC_LIST_LABEL, "");
  if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
    throw new RuntimeException("Invalid metadata: #files(" + cnfiles +
                               ") total_size(" + cbsize + ") listuri(" +
                               srcfilelist + ")");
  }
  Path src = new Path(srcfilelist);
  FileSystem fs = src.getFileSystem(job);
  FileStatus srcst = fs.getFileStatus(src);

  ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
  LongWritable key = new LongWritable();
  FilePair value = new FilePair();
  final long targetsize = cbsize / numSplits;
  long pos = 0L;
  long last = 0L;
  long acc = 0L;
  long cbrem = srcst.getLen();
  try (SequenceFile.Reader sl =
      new SequenceFile.Reader(job, Reader.file(src))) {
    for (; sl.next(key, value); last = sl.getPosition()) {
      // if adding this split would put this split past the target size,
      // cut the last split and put this next file in the next split.
      if (acc + key.get() > targetsize && acc != 0) {
        long splitsize = last - pos;
        splits.add(new FileSplit(src, pos, splitsize, (String[])null));
        cbrem -= splitsize;
        pos = last;
        acc = 0L;
      }
      acc += key.get();
    }
  }
  if (cbrem != 0) {
    splits.add(new FileSplit(src, pos, cbrem, (String[])null));
  }

  return splits.toArray(new FileSplit[splits.size()]);
}
 
Example 17
Source File: HadoopMapFunctionITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void configure(JobConf c) {
	filterPrefix = c.get("my.filterPrefix");
}
 
Example 18
Source File: TermVectorPerHost.java    From big-c with Apache License 2.0 4 votes vote down vote up
public void configure(JobConf conf){
  path = conf.get("map.input.file");
}
 
Example 19
Source File: Submitter.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Get the URI of the application's executable.
 * @param conf
 * @return the URI where the application's executable is located
 */
public static String getExecutable(JobConf conf) {
  return conf.get(Submitter.EXECUTABLE);
}
 
Example 20
Source File: Submitter.java    From RDFS with Apache License 2.0 2 votes vote down vote up
/**
 * Get the URI of the application's executable.
 * @param conf
 * @return the URI where the application's executable is located
 */
public static String getExecutable(JobConf conf) {
  return conf.get("hadoop.pipes.executable");
}