Java Code Examples for org.apache.hadoop.mapred.JobConf#setReduceSpeculativeExecution()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setReduceSpeculativeExecution() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadExistingDataJob.java    From tracing-framework with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(ReadDataJob.TestMapper.class);

    // Make sure this jar is included
    job.setJarByClass(ReadDataJob.TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_path));
}
 
Example 2
Source File: ReadDataJob.java    From tracing-framework with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(TestMapper.class);
    // job.setReducerClass(TestReducer.class);

    // Set the output types of the mapper and reducer
    // job.setMapOutputKeyClass(IntWritable.class);
    // job.setMapOutputValueClass(NullWritable.class);
    // job.setOutputKeyClass(NullWritable.class);
    // job.setOutputValueClass(NullWritable.class);

    // Make sure this jar is included
    job.setJarByClass(TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_filename));
}
 
Example 3
Source File: DistCp.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private static JobConf createJobConf(Configuration conf, boolean useFastCopy) {
  Class<? extends InputFormat> inputFormat =
    (useFastCopy) ? FastCopyInputFormat.class : CopyInputFormat.class;
  JobConf jobconf = new JobConf(conf, DistCp.class);
  jobconf.setJobName(NAME);

  // turn off speculative execution, because DFS doesn't handle
  // multiple writers to the same file.
  jobconf.setReduceSpeculativeExecution(false);
  jobconf.setMapOutputKeyClass(FilePairComparable.class);
  jobconf.setMapOutputValueClass(Text.class);
  jobconf.setOutputKeyClass(FilePairComparable.class);
  jobconf.setOutputValueClass(Text.class);

  jobconf.setInputFormat(inputFormat);
  jobconf.setMapperClass(CopyFilesTask.class);
  jobconf.setReducerClass(CopyFilesTask.class);
    
  jobconf.setNumReduceTasks(conf.getInt(MAX_REDUCE_LABEL, 1));
  // Prevent the reducer from starting until all maps are done.
  jobconf.setInt("mapred.job.rushreduce.reduce.threshold", 0);
  jobconf.setFloat("mapred.reduce.slowstart.completed.maps", 1.0f);
  return jobconf;
}
 
Example 4
Source File: DBOutputFormat.java    From SpyGlass with Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the reduce-part of the job with the appropriate output settings
 *
 * @param job                 The job
 * @param dbOutputFormatClass
 * @param tableName           The table to insert data into
 * @param fieldNames          The field names in the table. If unknown, supply the appropriate
 */
public static void setOutput(JobConf job, Class<? extends DBOutputFormat> dbOutputFormatClass,
    String tableName, String[] fieldNames, String[] updateFields, int batchSize) {
    if (dbOutputFormatClass == null) { job.setOutputFormat(DBOutputFormat.class); } else {
        job.setOutputFormat(dbOutputFormatClass);
    }

    // writing doesn't always happen in reduce
    job.setReduceSpeculativeExecution(false);
    job.setMapSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job);

    dbConf.setOutputTableName(tableName);
    dbConf.setOutputFieldNames(fieldNames);

    if (updateFields != null) { dbConf.setOutputUpdateFieldNames(updateFields); }

    if (batchSize != -1) { dbConf.setBatchStatementsNum(batchSize); }
}
 
Example 5
Source File: DBOutputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static DBConfiguration setOutput(JobConf job, String tableName) {
  job.setOutputFormat(DBOutputFormat.class);
  job.setReduceSpeculativeExecution(false);

  DBConfiguration dbConf = new DBConfiguration(job);
  
  dbConf.setOutputTableName(tableName);
  return dbConf;
}
 
Example 6
Source File: DBOutputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static DBConfiguration setOutput(JobConf job, String tableName) {
  job.setOutputFormat(DBOutputFormat.class);
  job.setReduceSpeculativeExecution(false);

  DBConfiguration dbConf = new DBConfiguration(job);
  
  dbConf.setOutputTableName(tableName);
  return dbConf;
}
 
Example 7
Source File: SolrIndexer.java    From anthelion with Apache License 2.0 4 votes vote down vote up
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
    List<Path> segments, boolean noCommit, boolean deleteGone, String solrParams,
    boolean filter, boolean normalize) throws IOException {
    
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrIndexer: starting at " + sdf.format(start));

  final JobConf job = new NutchJob(getConf());
  job.setJobName("index-solr " + solrUrl);

  LOG.info("SolrIndexer: deleting gone documents: " + deleteGone);
  LOG.info("SolrIndexer: URL filtering: " + filter);
  LOG.info("SolrIndexer: URL normalizing: " + normalize);
  
  IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
  job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
  job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
  if (solrParams != null) {
    job.set(SolrConstants.PARAMS, solrParams);
  }
  NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);

  job.setReduceSpeculativeExecution(false);

  final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
                       new Random().nextInt());

  FileOutputFormat.setOutputPath(job, tmp);
  try {
    JobClient.runJob(job);
    // do the commits once and for all the reducers in one go
    SolrServer solr =  SolrUtils.getCommonsHttpSolrServer(job);

    if (!noCommit) {
      solr.commit();
    }
    long end = System.currentTimeMillis();
    LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
  }
  catch (Exception e){
    LOG.error(e.toString());
  } finally {
    FileSystem.get(job).delete(tmp, true);
  }
}
 
Example 8
Source File: IndexingJob.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
public void index(Path crawlDb, Path linkDb, List<Path> segments,
        boolean noCommit, boolean deleteGone, String params,
        boolean filter, boolean normalize) throws IOException {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Indexer: starting at " + sdf.format(start));

    final JobConf job = new NutchJob(getConf());
    job.setJobName("Indexer");

    LOG.info("Indexer: deleting gone documents: " + deleteGone);
    LOG.info("Indexer: URL filtering: " + filter);
    LOG.info("Indexer: URL normalizing: " + normalize);   
    
    IndexWriters writers = new IndexWriters(getConf());
    LOG.info(writers.describe());

    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

    // NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM
    // job.set(SolrConstants.SERVER_URL, solrUrl);

    job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);

    if (params != null) {
        job.set(IndexerMapReduce.INDEXER_PARAMS, params);
    }

    job.setReduceSpeculativeExecution(false);

    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-"
            + new Random().nextInt());

    FileOutputFormat.setOutputPath(job, tmp);
    try {
        JobClient.runJob(job);
        // do the commits once and for all the reducers in one go
        if (!noCommit) {
            writers.open(job,"commit");
            writers.commit();
        }
        long end = System.currentTimeMillis();
        LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: "
                + TimingUtil.elapsedTime(start, end));
    } finally {
        FileSystem.get(job).delete(tmp, true);
    }
}
 
Example 9
Source File: DBOutputFormat.java    From RDFS with Apache License 2.0 3 votes vote down vote up
/**
 * Initializes the reduce-part of the job with the appropriate output settings
 * 
 * @param job
 *          The job
 * @param tableName
 *          The table to insert data into
 * @param fieldNames
 *          The field names in the table. If unknown, supply the appropriate
 *          number of nulls.
 */
public static void setOutput(JobConf job, String tableName, String... fieldNames) {
  job.setOutputFormat(DBOutputFormat.class);
  job.setReduceSpeculativeExecution(false);

  DBConfiguration dbConf = new DBConfiguration(job);
  
  dbConf.setOutputTableName(tableName);
  dbConf.setOutputFieldNames(fieldNames);
}
 
Example 10
Source File: DBOutputFormat.java    From hadoop-gpu with Apache License 2.0 3 votes vote down vote up
/**
 * Initializes the reduce-part of the job with the appropriate output settings
 * 
 * @param job
 *          The job
 * @param tableName
 *          The table to insert data into
 * @param fieldNames
 *          The field names in the table. If unknown, supply the appropriate
 *          number of nulls.
 */
public static void setOutput(JobConf job, String tableName, String... fieldNames) {
  job.setOutputFormat(DBOutputFormat.class);
  job.setReduceSpeculativeExecution(false);

  DBConfiguration dbConf = new DBConfiguration(job);
  
  dbConf.setOutputTableName(tableName);
  dbConf.setOutputFieldNames(fieldNames);
}