org.apache.hadoop.mapred.SkipBadRecords Java Examples

The following examples show how to use org.apache.hadoop.mapred.SkipBadRecords. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultithreadedMapRunner.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
  int numberOfThreads =
    jobConf.getInt("mapred.map.multithreadedrunner.threads", 10);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Configuring jobConf " + jobConf.getJobName() +
              " to use " + numberOfThreads + " threads");
  }

  this.job = jobConf;
  //increment processed counter only if skipping feature is enabled
  this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && 
    SkipBadRecords.getAutoIncrMapperProcCount(job);
  this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
      jobConf);

  // Creating a threadpool of the configured size to execute the Mapper
  // map method in parallel.
  executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 
                                           0L, TimeUnit.MILLISECONDS,
                                           new BlockingArrayQueue
                                             (numberOfThreads));
}
 
Example #2
Source File: TestStreamingBadRecords.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public App(String[] args) throws Exception{
  if(args.length>0) {
    isReducer = Boolean.parseBoolean(args[0]);
  }
  String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS;
  if(isReducer) {
    counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS;
  }
  BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  String line;
  int count = 0;
  while ((line = in.readLine()) != null) {
    processLine(line);
    count++;
    if(count>=10) {
      System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+
          ","+counter+","+count);
      count = 0;
    }
  }
}
 
Example #3
Source File: PipeMapper.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
  String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
  ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #4
Source File: PipeReducer.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #5
Source File: MultithreadedMapRunner.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
  int numberOfThreads =
    jobConf.getInt("mapred.map.multithreadedrunner.threads", 10);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Configuring jobConf " + jobConf.getJobName() +
              " to use " + numberOfThreads + " threads");
  }

  this.job = jobConf;
  //increment processed counter only if skipping feature is enabled
  this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && 
    SkipBadRecords.getAutoIncrMapperProcCount(job);
  this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
      jobConf);

  // Creating a threadpool of the configured size to execute the Mapper
  // map method in parallel.
  executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 
                                           0L, TimeUnit.MILLISECONDS,
                                           new BlockingArrayQueue
                                             (numberOfThreads));
}
 
Example #6
Source File: TestStreamingBadRecords.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public App(String[] args) throws Exception{
  if(args.length>0) {
    isReducer = Boolean.parseBoolean(args[0]);
  }
  String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS;
  if(isReducer) {
    counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS;
  }
  BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  String line;
  int count = 0;
  while ((line = in.readLine()) != null) {
    processLine(line);
    count++;
    if(count>=10) {
      System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+
          ","+counter+","+count);
      count = 0;
    }
  }
}
 
Example #7
Source File: PipeMapper.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
  String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
  ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #8
Source File: PipeReducer.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #9
Source File: TestStreamingBadRecords.java    From big-c with Apache License 2.0 6 votes vote down vote up
public App(String[] args) throws Exception{
  if(args.length>0) {
    isReducer = Boolean.parseBoolean(args[0]);
  }
  String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS;
  if(isReducer) {
    counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS;
  }
  BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  String line;
  int count = 0;
  while ((line = in.readLine()) != null) {
    processLine(line);
    count++;
    if(count>=10) {
      System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+
          ","+counter+","+count);
      count = 0;
    }
  }
}
 
Example #10
Source File: PipeMapper.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #11
Source File: MultithreadedMapRunner.java    From big-c with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
  int numberOfThreads =
    jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Configuring jobConf " + jobConf.getJobName() +
              " to use " + numberOfThreads + " threads");
  }

  this.job = jobConf;
  //increment processed counter only if skipping feature is enabled
  this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && 
    SkipBadRecords.getAutoIncrMapperProcCount(job);
  this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
      jobConf);

  // Creating a threadpool of the configured size to execute the Mapper
  // map method in parallel.
  executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 
                                           0L, TimeUnit.MILLISECONDS,
                                           new BlockingArrayQueue
                                             (numberOfThreads));
}
 
Example #12
Source File: MultithreadedMapRunner.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
  int numberOfThreads =
    jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Configuring jobConf " + jobConf.getJobName() +
              " to use " + numberOfThreads + " threads");
  }

  this.job = jobConf;
  //increment processed counter only if skipping feature is enabled
  this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && 
    SkipBadRecords.getAutoIncrMapperProcCount(job);
  this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
      jobConf);

  // Creating a threadpool of the configured size to execute the Mapper
  // map method in parallel.
  executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 
                                           0L, TimeUnit.MILLISECONDS,
                                           new BlockingArrayQueue
                                             (numberOfThreads));
}
 
Example #13
Source File: PipeReducer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #14
Source File: PipeMapper.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #15
Source File: TestStreamingBadRecords.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public App(String[] args) throws Exception{
  if(args.length>0) {
    isReducer = Boolean.parseBoolean(args[0]);
  }
  String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS;
  if(isReducer) {
    counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS;
  }
  BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  String line;
  int count = 0;
  while ((line = in.readLine()) != null) {
    processLine(line);
    count++;
    if(count>=10) {
      System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+
          ","+counter+","+count);
      count = 0;
    }
  }
}
 
Example #16
Source File: PipeReducer.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #17
Source File: PipesMapRunner.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Get the new configuration.
 * @param job the job's configuration
 */
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
}
 
Example #18
Source File: PipesReducer.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
}
 
Example #19
Source File: TestStreamingBadRecords.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testSkip() throws Exception {
  JobConf clusterConf = createJobConf();
  createInput();
  int attSkip =0;
  SkipBadRecords.setAttemptsToStartSkipping(clusterConf,attSkip);
  //the no of attempts to successfully complete the task depends 
  //on the no of bad records.
  int mapperAttempts = attSkip+1+MAPPER_BAD_RECORDS.size();
  int reducerAttempts = attSkip+1+REDUCER_BAD_RECORDS.size();
  
  String[] args =  new String[] {
    "-input", (new Path(getInputDir(), "text.txt")).toString(),
    "-output", getOutputDir().toString(),
    "-mapper", badMapper,
    "-reducer", badReducer,
    "-verbose",
    "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat",
    "-jobconf", "mapred.skip.attempts.to.start.skipping="+attSkip,
    "-jobconf", "mapred.skip.out.dir=none",
    "-jobconf", "mapred.map.max.attempts="+mapperAttempts,
    "-jobconf", "mapred.reduce.max.attempts="+reducerAttempts,
    "-jobconf", "mapred.skip.map.max.skip.records="+Long.MAX_VALUE,
    "-jobconf", "mapred.skip.reduce.max.skip.groups="+Long.MAX_VALUE,
    "-jobconf", "mapred.map.tasks=1",
    "-jobconf", "mapred.reduce.tasks=1",
    "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"),
    "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"),
    "-jobconf", "mapred.job.tracker.http.address="
                  +clusterConf.get("mapred.job.tracker.http.address"),
    "-jobconf", "stream.debug=set",
    "-jobconf", "keep.failed.task.files=true",
    "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
  };
  StreamJob job = new StreamJob(args, false);      
  job.go();
  validateOutput(job.running_, false);
  //validate that there is no skip directory as it has been set to "none"
  assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)==null);
}
 
Example #20
Source File: TestStreamingBadRecords.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testNarrowDown() throws Exception {
  createInput();
  JobConf clusterConf = createJobConf();
  String[] args =  new String[] {
    "-input", (new Path(getInputDir(), "text.txt")).toString(),
    "-output", getOutputDir().toString(),
    "-mapper", badMapper,
    "-reducer", badReducer,
    "-verbose",
    "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat",
    "-jobconf", "mapred.skip.attempts.to.start.skipping=1",
    //actually fewer attempts are required than specified
    //but to cater to the case of slow processed counter update, need to 
    //have more attempts
    "-jobconf", "mapred.map.max.attempts=20",
    "-jobconf", "mapred.reduce.max.attempts=15",
    "-jobconf", "mapred.skip.map.max.skip.records=1",
    "-jobconf", "mapred.skip.reduce.max.skip.groups=1",
    "-jobconf", "mapred.map.tasks=1",
    "-jobconf", "mapred.reduce.tasks=1",
    "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"),
    "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"),
    "-jobconf", "mapred.job.tracker.http.address="
                  +clusterConf.get("mapred.job.tracker.http.address"),
    "-jobconf", "stream.debug=set",
    "-jobconf", "keep.failed.task.files=true",
    "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
  };
  StreamJob job = new StreamJob(args, false);      
  job.go();
  
  validateOutput(job.running_, true);
  assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)!=null);
}
 
Example #21
Source File: PipesReducer.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
}
 
Example #22
Source File: PipesGPUMapRunner.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Get the new configuration.
 * @param job the job's configuration
 */
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
}
 
Example #23
Source File: PipesReducer.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
}
 
Example #24
Source File: PipesMapRunner.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Get the new configuration.
 * @param job the job's configuration
 */
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
}
 
Example #25
Source File: PipesMapRunner.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Get the new configuration.
 * @param job the job's configuration
 */
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
}
 
Example #26
Source File: PipesReducer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
}
 
Example #27
Source File: TestStreamingBadRecords.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void testSkip() throws Exception {
  JobConf clusterConf = createJobConf();
  createInput();
  int attSkip =0;
  SkipBadRecords.setAttemptsToStartSkipping(clusterConf,attSkip);
  //the no of attempts to successfully complete the task depends 
  //on the no of bad records.
  int mapperAttempts = attSkip+1+MAPPER_BAD_RECORDS.size();
  int reducerAttempts = attSkip+1+REDUCER_BAD_RECORDS.size();
  
  String[] args =  new String[] {
    "-input", (new Path(getInputDir(), "text.txt")).toString(),
    "-output", getOutputDir().toString(),
    "-mapper", badMapper,
    "-reducer", badReducer,
    "-verbose",
    "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat",
    "-jobconf", "mapred.skip.attempts.to.start.skipping="+attSkip,
    "-jobconf", "mapred.skip.out.dir=none",
    "-jobconf", "mapred.map.max.attempts="+mapperAttempts,
    "-jobconf", "mapred.reduce.max.attempts="+reducerAttempts,
    "-jobconf", "mapred.skip.map.max.skip.records="+Long.MAX_VALUE,
    "-jobconf", "mapred.skip.reduce.max.skip.groups="+Long.MAX_VALUE,
    "-jobconf", "mapred.map.tasks=1",
    "-jobconf", "mapred.reduce.tasks=1",
    "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"),
    "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"),
    "-jobconf", "mapred.job.tracker.http.address="
                  +clusterConf.get("mapred.job.tracker.http.address"),
    "-jobconf", "stream.debug=set",
    "-jobconf", "keep.failed.task.files=true",
    "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
  };
  StreamJob job = new StreamJob(args, false);      
  job.go();
  validateOutput(job.running_, false);
  //validate that there is no skip directory as it has been set to "none"
  assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)==null);
}
 
Example #28
Source File: TestStreamingBadRecords.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void testNarrowDown() throws Exception {
  createInput();
  JobConf clusterConf = createJobConf();
  String[] args =  new String[] {
    "-input", (new Path(getInputDir(), "text.txt")).toString(),
    "-output", getOutputDir().toString(),
    "-mapper", badMapper,
    "-reducer", badReducer,
    "-verbose",
    "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat",
    "-jobconf", "mapred.skip.attempts.to.start.skipping=1",
    //actually fewer attempts are required than specified
    //but to cater to the case of slow processed counter update, need to 
    //have more attempts
    "-jobconf", "mapred.map.max.attempts=20",
    "-jobconf", "mapred.reduce.max.attempts=15",
    "-jobconf", "mapred.skip.map.max.skip.records=1",
    "-jobconf", "mapred.skip.reduce.max.skip.groups=1",
    "-jobconf", "mapred.map.tasks=1",
    "-jobconf", "mapred.reduce.tasks=1",
    "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"),
    "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"),
    "-jobconf", "mapred.job.tracker.http.address="
                  +clusterConf.get("mapred.job.tracker.http.address"),
    "-jobconf", "stream.debug=set",
    "-jobconf", "keep.failed.task.files=true",
    "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
  };
  StreamJob job = new StreamJob(args, false);      
  job.go();
  
  validateOutput(job.running_, true);
  assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)!=null);
}
 
Example #29
Source File: PipesMapRunner.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Get the new configuration.
 * @param job the job's configuration
 */
public void configure(JobConf job) {
  this.job = job;
  //disable the auto increment of the counter. For pipes, no of processed 
  //records could be different(equal or less) than the no of records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
}