Java Code Examples for org.apache.hadoop.mapred.JobConf#getUser()

The following examples show how to use org.apache.hadoop.mapred.JobConf#getUser() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HBaseTap.java From SpyGlass with Apache License 2.0

6 votes

private void obtainToken(JobConf conf) {
  if (User.isHBaseSecurityEnabled(conf)) {
    String user = conf.getUser();
    LOG.info("obtaining HBase token for: {}", user);
    try {
      UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
      user = currentUser.getUserName();
      Credentials credentials = conf.getCredentials();
      for (Token t : currentUser.getTokens()) {
        LOG.debug("Token {} is available", t);
        if ("HBASE_AUTH_TOKEN".equalsIgnoreCase(t.getKind().toString()))
          credentials.addToken(t.getKind(), t);
      }
    } catch (IOException e) {
      throw new TapException("Unable to obtain HBase auth token for " + user, e);
    }
  }
}

Example 2

Source File: DistributedCacheEmulator.java From hadoop with Apache License 2.0

4 votes

/**
 * For the job to be simulated, identify the needed distributed cache files by
 * mapping original cluster's distributed cache file paths to the simulated cluster's
 * paths and add these paths in the map {@code distCacheFiles}.
 *<br>
 * JobStory should contain distributed cache related properties like
 * <li> {@link MRJobConfig#CACHE_FILES}
 * <li> {@link MRJobConfig#CACHE_FILE_VISIBILITIES}
 * <li> {@link MRJobConfig#CACHE_FILES_SIZES}
 * <li> {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}
 * <li> {@link MRJobConfig#CLASSPATH_FILES}
 *
 * <li> {@link MRJobConfig#CACHE_ARCHIVES}
 * <li> {@link MRJobConfig#CACHE_ARCHIVES_VISIBILITIES}
 * <li> {@link MRJobConfig#CACHE_ARCHIVES_SIZES}
 * <li> {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}
 * <li> {@link MRJobConfig#CLASSPATH_ARCHIVES}
 *
 * <li> {@link MRJobConfig#CACHE_SYMLINK}
 *
 * @param jobdesc JobStory of original job obtained from trace
 * @throws IOException
 */
void updateHDFSDistCacheFilesList(JobStory jobdesc) throws IOException {

  // Map original job's distributed cache file paths to simulated cluster's
  // paths, to be used by this simulated job.
  JobConf jobConf = jobdesc.getJobConf();

  String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
  if (files != null) {

    String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
    String[] visibilities =
      jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
    String[] timeStamps =
      jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);

    FileSystem fs = FileSystem.get(conf);
    String user = jobConf.getUser();
    for (int i = 0; i < files.length; i++) {
      // Check if visibilities are available because older hadoop versions
      // didn't have public, private Distributed Caches separately.
      boolean visibility =
          (visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
      if (isLocalDistCacheFile(files[i], user, visibility)) {
        // local FS based distributed cache file.
        // Create this file on the pseudo local FS on the fly (i.e. when the
        // simulated job is submitted).
        continue;
      }
      // distributed cache file on hdfs
      String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
                                               visibility, user);

      // No need to add a distributed cache file path to the list if
      // (1) the mapped path is already there in the list OR
      // (2) the file with the mapped path already exists.
      // In any of the above 2 cases, file paths, timestamps, file sizes and
      // visibilities match. File sizes should match if file paths and
      // timestamps match because single file path with single timestamp
      // should correspond to a single file size.
      if (distCacheFiles.containsKey(mappedPath) ||
          fs.exists(new Path(mappedPath))) {
        continue;
      }
      distCacheFiles.put(mappedPath, Long.valueOf(fileSizes[i]));
    }
  }
}

Example 3

Source File: DistributedCacheEmulator.java From hadoop with Apache License 2.0

4 votes

/**
 * If gridmix needs to emulate distributed cache load, then configure
 * distributed cache files of a simulated job by mapping the original
 * cluster's distributed cache file paths to the simulated cluster's paths and
 * setting these mapped paths in the job configuration of the simulated job.
 * <br>
 * Configure local FS based distributed cache files through the property
 * "tmpfiles" and hdfs based distributed cache files through the property
 * {@link MRJobConfig#CACHE_FILES}.
 * @param conf configuration for the simulated job to be run
 * @param jobConf job configuration of original cluster's job, obtained from
 *                trace
 * @throws IOException
 */
void configureDistCacheFiles(Configuration conf, JobConf jobConf)
    throws IOException {
  if (shouldEmulateDistCacheLoad()) {

    String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
    if (files != null) {
      // hdfs based distributed cache files to be configured for simulated job
      List<String> cacheFiles = new ArrayList<String>();
      // local FS based distributed cache files to be configured for
      // simulated job
      List<String> localCacheFiles = new ArrayList<String>();

      String[] visibilities =
        jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
      String[] timeStamps =
        jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
      String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);

      String user = jobConf.getUser();
      for (int i = 0; i < files.length; i++) {
        // Check if visibilities are available because older hadoop versions
        // didn't have public, private Distributed Caches separately.
        boolean visibility =
          (visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
        if (isLocalDistCacheFile(files[i], user, visibility)) {
          // local FS based distributed cache file.
          // Create this file on the pseudo local FS.
          String fileId = MD5Hash.digest(files[i] + timeStamps[i]).toString();
          long fileSize = Long.parseLong(fileSizes[i]);
          Path mappedLocalFilePath =
              PseudoLocalFs.generateFilePath(fileId, fileSize)
                  .makeQualified(pseudoLocalFs.getUri(),
                                 pseudoLocalFs.getWorkingDirectory());
          pseudoLocalFs.create(mappedLocalFilePath);
          localCacheFiles.add(mappedLocalFilePath.toUri().toString());
        } else {
          // hdfs based distributed cache file.
          // Get the mapped HDFS path on simulated cluster
          String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
                                                   visibility, user);
          cacheFiles.add(mappedPath);
        }
      }
      if (cacheFiles.size() > 0) {
        // configure hdfs based distributed cache files for simulated job
        conf.setStrings(MRJobConfig.CACHE_FILES,
                        cacheFiles.toArray(new String[cacheFiles.size()]));
      }
      if (localCacheFiles.size() > 0) {
        // configure local FS based distributed cache files for simulated job
        conf.setStrings("tmpfiles", localCacheFiles.toArray(
                                      new String[localCacheFiles.size()]));
      }
    }
  }
}

Example 4

Source File: DistributedCacheEmulator.java From big-c with Apache License 2.0

4 votes

/**
 * For the job to be simulated, identify the needed distributed cache files by
 * mapping original cluster's distributed cache file paths to the simulated cluster's
 * paths and add these paths in the map {@code distCacheFiles}.
 *<br>
 * JobStory should contain distributed cache related properties like
 * <li> {@link MRJobConfig#CACHE_FILES}
 * <li> {@link MRJobConfig#CACHE_FILE_VISIBILITIES}
 * <li> {@link MRJobConfig#CACHE_FILES_SIZES}
 * <li> {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}
 * <li> {@link MRJobConfig#CLASSPATH_FILES}
 *
 * <li> {@link MRJobConfig#CACHE_ARCHIVES}
 * <li> {@link MRJobConfig#CACHE_ARCHIVES_VISIBILITIES}
 * <li> {@link MRJobConfig#CACHE_ARCHIVES_SIZES}
 * <li> {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}
 * <li> {@link MRJobConfig#CLASSPATH_ARCHIVES}
 *
 * <li> {@link MRJobConfig#CACHE_SYMLINK}
 *
 * @param jobdesc JobStory of original job obtained from trace
 * @throws IOException
 */
void updateHDFSDistCacheFilesList(JobStory jobdesc) throws IOException {

  // Map original job's distributed cache file paths to simulated cluster's
  // paths, to be used by this simulated job.
  JobConf jobConf = jobdesc.getJobConf();

  String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
  if (files != null) {

    String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
    String[] visibilities =
      jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
    String[] timeStamps =
      jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);

    FileSystem fs = FileSystem.get(conf);
    String user = jobConf.getUser();
    for (int i = 0; i < files.length; i++) {
      // Check if visibilities are available because older hadoop versions
      // didn't have public, private Distributed Caches separately.
      boolean visibility =
          (visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
      if (isLocalDistCacheFile(files[i], user, visibility)) {
        // local FS based distributed cache file.
        // Create this file on the pseudo local FS on the fly (i.e. when the
        // simulated job is submitted).
        continue;
      }
      // distributed cache file on hdfs
      String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
                                               visibility, user);

      // No need to add a distributed cache file path to the list if
      // (1) the mapped path is already there in the list OR
      // (2) the file with the mapped path already exists.
      // In any of the above 2 cases, file paths, timestamps, file sizes and
      // visibilities match. File sizes should match if file paths and
      // timestamps match because single file path with single timestamp
      // should correspond to a single file size.
      if (distCacheFiles.containsKey(mappedPath) ||
          fs.exists(new Path(mappedPath))) {
        continue;
      }
      distCacheFiles.put(mappedPath, Long.valueOf(fileSizes[i]));
    }
  }
}

Example 5

Source File: DistributedCacheEmulator.java From big-c with Apache License 2.0

4 votes

/**
 * If gridmix needs to emulate distributed cache load, then configure
 * distributed cache files of a simulated job by mapping the original
 * cluster's distributed cache file paths to the simulated cluster's paths and
 * setting these mapped paths in the job configuration of the simulated job.
 * <br>
 * Configure local FS based distributed cache files through the property
 * "tmpfiles" and hdfs based distributed cache files through the property
 * {@link MRJobConfig#CACHE_FILES}.
 * @param conf configuration for the simulated job to be run
 * @param jobConf job configuration of original cluster's job, obtained from
 *                trace
 * @throws IOException
 */
void configureDistCacheFiles(Configuration conf, JobConf jobConf)
    throws IOException {
  if (shouldEmulateDistCacheLoad()) {

    String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
    if (files != null) {
      // hdfs based distributed cache files to be configured for simulated job
      List<String> cacheFiles = new ArrayList<String>();
      // local FS based distributed cache files to be configured for
      // simulated job
      List<String> localCacheFiles = new ArrayList<String>();

      String[] visibilities =
        jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
      String[] timeStamps =
        jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
      String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);

      String user = jobConf.getUser();
      for (int i = 0; i < files.length; i++) {
        // Check if visibilities are available because older hadoop versions
        // didn't have public, private Distributed Caches separately.
        boolean visibility =
          (visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
        if (isLocalDistCacheFile(files[i], user, visibility)) {
          // local FS based distributed cache file.
          // Create this file on the pseudo local FS.
          String fileId = MD5Hash.digest(files[i] + timeStamps[i]).toString();
          long fileSize = Long.parseLong(fileSizes[i]);
          Path mappedLocalFilePath =
              PseudoLocalFs.generateFilePath(fileId, fileSize)
                  .makeQualified(pseudoLocalFs.getUri(),
                                 pseudoLocalFs.getWorkingDirectory());
          pseudoLocalFs.create(mappedLocalFilePath);
          localCacheFiles.add(mappedLocalFilePath.toUri().toString());
        } else {
          // hdfs based distributed cache file.
          // Get the mapped HDFS path on simulated cluster
          String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
                                                   visibility, user);
          cacheFiles.add(mappedPath);
        }
      }
      if (cacheFiles.size() > 0) {
        // configure hdfs based distributed cache files for simulated job
        conf.setStrings(MRJobConfig.CACHE_FILES,
                        cacheFiles.toArray(new String[cacheFiles.size()]));
      }
      if (localCacheFiles.size() > 0) {
        // configure local FS based distributed cache files for simulated job
        conf.setStrings("tmpfiles", localCacheFiles.toArray(
                                      new String[localCacheFiles.size()]));
      }
    }
  }
}