Java Code Examples for org.apache.hadoop.mapred.JobConf#getBoolean()

The following examples show how to use org.apache.hadoop.mapred.JobConf#getBoolean() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SolrIndexWriter.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
void init(SolrServer server, JobConf job) throws IOException {
    solr = server;
    batchSize = job.getInt(SolrConstants.COMMIT_SIZE, 1000);
    solrMapping = SolrMappingReader.getInstance(job);
    delete = job.getBoolean(IndexerMapReduce.INDEXER_DELETE, false);
    // parse optional params
    params = new ModifiableSolrParams();
    String paramString = job.get(IndexerMapReduce.INDEXER_PARAMS);
    if (paramString != null) {
        String[] values = paramString.split("&");
        for (String v : values) {
            String[] kv = v.split("=");
            if (kv.length < 2) {
                continue;
            }
            params.add(kv[0], kv[1]);
        }
    }
}
 
Example 2
Source File: StreamUtil.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static TaskId getTaskInfo(JobConf job) {
  TaskId res = new TaskId();

  String id = job.get("mapred.task.id");
  if (isLocalJobTracker(job)) {
    // it uses difft naming 
    res.mapTask = job.getBoolean("mapred.task.is.map", true);
    res.jobid = "0";
    res.taskid = 0;
    res.execid = 0;
  } else {
    String[] e = id.split("_");
    res.mapTask = e[3].equals("m");
    res.jobid = e[1] + "_" + e[2];
    res.taskid = Integer.parseInt(e[4]);
    res.execid = Integer.parseInt(e[5]);
  }
  return res;
}
 
Example 3
Source File: DistCp.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/** Mapper configuration.
 * Extracts source and destination file system, as well as
 * top-level paths on source and destination directories.
 * Gets the named file systems, to be used later in map.
 */
public void configure(JobConf job)
{
  destPath = new Path(job.get(DST_DIR_LABEL, "/"));
  try {
    destFileSys = destPath.getFileSystem(job);
  } catch (IOException ex) {
    throw new RuntimeException("Unable to get the named file system.", ex);
  }
  sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
  buffer = new byte[sizeBuf];
  ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
  preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
  if (preserve_status) {
    preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
  }
  update = job.getBoolean(Options.UPDATE.propertyname, false);
  overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
  this.job = job;
}
 
Example 4
Source File: IndexerMapReduce.java    From anthelion with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  setConf(job);
  this.filters = new IndexingFilters(getConf());
  this.scfilters = new ScoringFilters(getConf());
  this.delete = job.getBoolean(INDEXER_DELETE, false);
  this.deleteRobotsNoIndex = job.getBoolean(INDEXER_DELETE_ROBOTS_NOINDEX, false);
  this.skip = job.getBoolean(INDEXER_SKIP_NOTMODIFIED, false);

  normalize = job.getBoolean(URL_NORMALIZING, false);
  filter = job.getBoolean(URL_FILTERING, false);

  if (normalize) {
    urlNormalizers = new URLNormalizers(getConf(), URLNormalizers.SCOPE_DEFAULT);
  }

  if (filter) {
    urlFilters = new URLFilters(getConf());
  }
}
 
Example 5
Source File: PipeReducer.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example 6
Source File: WebGraph.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
/**
 * Configures the OutlinkDb job. Sets up internal links and link limiting.
 */
public void configure(JobConf conf) {
  this.conf = conf;
  ignoreHost = conf.getBoolean("link.ignore.internal.host", true);
  ignoreDomain = conf.getBoolean("link.ignore.internal.domain", true);
  limitPages = conf.getBoolean("link.ignore.limit.page", true);
  limitDomains = conf.getBoolean("link.ignore.limit.domain", true);

  normalize = conf.getBoolean(URL_NORMALIZING, false);
  filter = conf.getBoolean(URL_FILTERING, false);

  if (normalize) {
    urlNormalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT);
  }

  if (filter) {
    filters = new URLFilters(conf);
  }
}
 
Example 7
Source File: SipsIngestMapper.java    From hadoop-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void init(JobConf conf) throws IOException {
  boolean override = conf.getBoolean(IngestJob.INPUT_FORMAT_OVERRIDE, false);
  if (override == false) {
    conf.setInputFormat(SequenceFileInputFormat.class);
  }// else the user has overridden the input format and we assume it is OK.
}
 
Example 8
Source File: CleaningJob.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
@Override
public void configure(JobConf job) {
    writers = new IndexWriters(job);
    try {
        writers.open(job, "Deletion");
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    noCommit = job.getBoolean("noCommit", false);
}
 
Example 9
Source File: NodeDumper.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  this.inlinks = conf.getBoolean("inlinks", false);
  this.outlinks = conf.getBoolean("outlinks", false);
  this.scores = conf.getBoolean("scores", true);
  this.topn = conf.getLong("topn", Long.MAX_VALUE);
  this.host = conf.getBoolean("host", false);
  this.domain = conf.getBoolean("domain", false);
  this.sum = conf.getBoolean("sum", false);
  this.max = conf.getBoolean("max", false);
}
 
Example 10
Source File: LinkDbFilter.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  filter = job.getBoolean(URL_FILTERING, false);
  normalize = job.getBoolean(URL_NORMALIZING, false);
  if (filter) {
    filters = new URLFilters(job);
  }
  if (normalize) {
    scope = job.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_LINKDB);
    normalizers = new URLNormalizers(job, scope);
  }
}
 
Example 11
Source File: IngestJob.java    From hadoop-solr with Apache License 2.0 5 votes vote down vote up
public void doFinalCommit(JobConf conf, RunningJob job) {
  if (conf.getBoolean("lww.commit.on.close", false) && checkSolrOrZkString(conf)) {
    String jobName = job.getJobName();
    log.info("Performing final commit for job " + jobName);
    // Progress can be null here, because no write operation is performed.
    LucidWorksWriter lww = new LucidWorksWriter(null);
    try {
      lww.open(conf, jobName);
      lww.commit();
    } catch (Exception e) {
      log.error("Error in final job commit", e);
    }
  }
}
 
Example 12
Source File: PipesReducer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
}
 
Example 13
Source File: PipesReducer.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
}
 
Example 14
Source File: SolrClean.java    From anthelion with Apache License 2.0 5 votes vote down vote up
@Override
public void configure(JobConf job) {
  try {
    solr = SolrUtils.getCommonsHttpSolrServer(job);
    noCommit = job.getBoolean("noCommit", false);
  } catch (MalformedURLException e) {
    throw new RuntimeException(e);
  }
}
 
Example 15
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public void configure(JobConf job) {
  setConf(job);
  fastCheck = job.getBoolean("fs.test.fastCheck", false);
}
 
Example 16
Source File: TestFileSystem.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public void configure(JobConf job) {
  setConf(job);
  fastCheck = job.getBoolean("fs.test.fastCheck", false);
}
 
Example 17
Source File: Submitter.java    From hadoop-gpu with Apache License 2.0 2 votes vote down vote up
/**
 * Does the user want to keep the command file for debugging? If this is
 * true, pipes will write a copy of the command data to a file in the
 * task directory named "downlink.data", which may be used to run the C++
 * program under the debugger. You probably also want to set 
 * JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
 * being deleted.
 * To run using the data file, set the environment variable 
 * "hadoop.pipes.command.file" to point to the file.
 * @param conf the configuration to check
 * @return will the framework save the command file?
 */
public static boolean getKeepCommandFile(JobConf conf) {
  return conf.getBoolean("hadoop.pipes.command-file.keep", false);
}
 
Example 18
Source File: Submitter.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Check whether the job is using a Java Reducer.
 * @param conf the configuration to check
 * @return is it a Java Reducer?
 */
public static boolean getIsJavaReducer(JobConf conf) {
  return conf.getBoolean(Submitter.IS_JAVA_REDUCE, false);
}
 
Example 19
Source File: Submitter.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Does the user want to keep the command file for debugging? If this is
 * true, pipes will write a copy of the command data to a file in the
 * task directory named "downlink.data", which may be used to run the C++
 * program under the debugger. You probably also want to set 
 * JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
 * being deleted.
 * To run using the data file, set the environment variable 
 * "mapreduce.pipes.commandfile" to point to the file.
 * @param conf the configuration to check
 * @return will the framework save the command file?
 */
public static boolean getKeepCommandFile(JobConf conf) {
  return conf.getBoolean(Submitter.PRESERVE_COMMANDFILE, false);
}
 
Example 20
Source File: Submitter.java    From RDFS with Apache License 2.0 2 votes vote down vote up
/**
 * Does the user want to keep the command file for debugging? If this is
 * true, pipes will write a copy of the command data to a file in the
 * task directory named "downlink.data", which may be used to run the C++
 * program under the debugger. You probably also want to set 
 * JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
 * being deleted.
 * To run using the data file, set the environment variable 
 * "hadoop.pipes.command.file" to point to the file.
 * @param conf the configuration to check
 * @return will the framework save the command file?
 */
public static boolean getKeepCommandFile(JobConf conf) {
  return conf.getBoolean("hadoop.pipes.command-file.keep", false);
}