Java Code Examples for org.apache.hadoop.mapred.JobConf#setBoolean()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setBoolean() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFileSystem.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
Example 2
Source File: LinkDbMerger.java    From anthelion with Apache License 2.0 6 votes vote down vote up
public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
  Path newLinkDb =
    new Path("linkdb-merge-" + 
             Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  JobConf job = new NutchJob(config);
  job.setJobName("linkdb merge " + linkDb);

  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(LinkDbFilter.class);
  job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
  job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
  job.setReducerClass(LinkDbMerger.class);

  FileOutputFormat.setOutputPath(job, newLinkDb);
  job.setOutputFormat(MapFileOutputFormat.class);
  job.setBoolean("mapred.output.compress", true);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Inlinks.class);

  // https://issues.apache.org/jira/browse/NUTCH-1069
  job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  return job;
}
 
Example 3
Source File: RegressionPrepare.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
@Override
public void run() throws Exception
{
  JobConfig config = super.getJobConfig();
  JobConf conf =
      super.createJobConf(RegressionPrepareMapper.class,
                          RegressionPrepareOutput.SCHEMA$); 
  String mapKey = config.getString(MAP_KEY, "");
  conf.set(MAP_KEY, mapKey);
  conf.setInt(NUM_CLICK_REPLICATES, config.getInt(NUM_CLICK_REPLICATES, 1));
  conf.setBoolean(IGNORE_FEATURE_VALUE, config.getBoolean(IGNORE_FEATURE_VALUE, false));
  int nblocks = config.getInt(NUM_BLOCKS, 0);
  conf.setInt(NUM_BLOCKS, nblocks);
  _logger.info("Running the preparation job of admm with map.key = " + mapKey + " and num.blocks=" + nblocks);
  AvroUtils.runAvroJob(conf);
}
 
Example 4
Source File: TestConfigTranslationMRToTez.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testMRToTezKeyTranslation() {
  JobConf confVertex1 = new JobConf();
  confVertex1.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS,
      IntWritable.class.getName());
  confVertex1.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS,
      LongWritable.class.getName());
  confVertex1.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  MRHelpers.translateMRConfToTez(confVertex1);

  // Verify translation
  assertEquals(IntWritable.class.getName(), ConfigUtils
      .getIntermediateOutputKeyClass(confVertex1).getName());
  assertEquals(LongWritable.class.getName(), ConfigUtils
      .getIntermediateOutputValueClass(confVertex1).getName());
  assertEquals(IntWritable.class.getName(), ConfigUtils
      .getIntermediateInputKeyClass(confVertex1).getName());
  assertEquals(LongWritable.class.getName(), ConfigUtils
      .getIntermediateInputValueClass(confVertex1).getName());
  assertTrue(ConfigUtils.shouldCompressIntermediateOutput(confVertex1));
  assertTrue(ConfigUtils.isIntermediateInputCompressed(confVertex1));
}
 
Example 5
Source File: TestFetcher.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Before
@SuppressWarnings("unchecked") // mocked generics
public void setup() {
  LOG.info(">>>> " + name.getMethodName());
  job = new JobConf();
  job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false);
  jobWithRetry = new JobConf();
  jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true);
  id = TaskAttemptID.forName("attempt_0_1_r_1_1");
  ss = mock(ShuffleSchedulerImpl.class);
  mm = mock(MergeManagerImpl.class);
  r = mock(Reporter.class);
  metrics = mock(ShuffleClientMetrics.class);
  except = mock(ExceptionReporter.class);
  key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
  connection = mock(HttpURLConnection.class);

  allErrs = mock(Counters.Counter.class);
  when(r.getCounter(anyString(), anyString())).thenReturn(allErrs);

  ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
  maps.add(map1ID);
  maps.add(map2ID);
  when(ss.getMapsForHost(host)).thenReturn(maps);
}
 
Example 6
Source File: TestDeprecatedKeys.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);
  jobConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);

  MRHelpers.translateMRConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0),
      0.01f);
  assertEquals(false, jobConf.getBoolean(TezConfiguration.TEZ_USER_CLASSPATH_FIRST, true));
}
 
Example 7
Source File: TestChild.java    From big-c with Apache License 2.0 5 votes vote down vote up
private Job submitAndValidateJob(JobConf conf, int numMaps, int numReds, 
                                 boolean oldConfigs) 
    throws IOException, InterruptedException, ClassNotFoundException {
  conf.setBoolean(OLD_CONFIGS, oldConfigs);
  if (oldConfigs) {
    conf.set(JobConf.MAPRED_TASK_JAVA_OPTS, TASK_OPTS_VAL);
  } else {
    conf.set(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, MAP_OPTS_VAL);
    conf.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, REDUCE_OPTS_VAL);
  }
  
  conf.set(JobConf.MAPRED_MAP_TASK_LOG_LEVEL, Level.OFF.toString());
  conf.set(JobConf.MAPRED_REDUCE_TASK_LOG_LEVEL, Level.OFF.toString());
  
  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 
              numMaps, numReds);
  job.setMapperClass(MyMapper.class);
  job.setReducerClass(MyReducer.class);
  assertFalse("Job already has a job tracker connection, before it's submitted",
              job.isConnected());
  job.submit();
  assertTrue("Job doesn't have a job tracker connection, even though it's been submitted",
             job.isConnected());
  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // Check output directory
  FileSystem fs = FileSystem.get(conf);
  assertTrue("Job output directory doesn't exit!", fs.exists(outDir));
  FileStatus[] list = fs.listStatus(outDir, new OutputFilter());
  int numPartFiles = numReds == 0 ? numMaps : numReds;
  assertTrue("Number of part-files is " + list.length + " and not "
      + numPartFiles, list.length == numPartFiles);
  return job;
}
 
Example 8
Source File: LinkRank.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
/**
 * Runs the initializer job. The initializer job sets up the nodes with a
 * default starting score for link analysis.
 * 
 * @param nodeDb The node database to use.
 * @param output The job output directory.
 * 
 * @throws IOException If an error occurs while running the initializer job.
 */
private void runInitializer(Path nodeDb, Path output)
  throws IOException {

  // configure the initializer
  JobConf initializer = new NutchJob(getConf());
  initializer.setJobName("LinkAnalysis Initializer");
  FileInputFormat.addInputPath(initializer, nodeDb);
  FileOutputFormat.setOutputPath(initializer, output);
  initializer.setInputFormat(SequenceFileInputFormat.class);
  initializer.setMapperClass(Initializer.class);
  initializer.setMapOutputKeyClass(Text.class);
  initializer.setMapOutputValueClass(Node.class);
  initializer.setOutputKeyClass(Text.class);
  initializer.setOutputValueClass(Node.class);
  initializer.setOutputFormat(MapFileOutputFormat.class);
  initializer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  // run the initializer
  LOG.info("Starting initialization job");
  try {
    JobClient.runJob(initializer);
  }
  catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished initialization job.");
}
 
Example 9
Source File: IndexingJob.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
public void index(Path crawlDb, Path linkDb, List<Path> segments,
        boolean noCommit, boolean deleteGone, String params,
        boolean filter, boolean normalize) throws IOException {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Indexer: starting at " + sdf.format(start));

    final JobConf job = new NutchJob(getConf());
    job.setJobName("Indexer");

    LOG.info("Indexer: deleting gone documents: " + deleteGone);
    LOG.info("Indexer: URL filtering: " + filter);
    LOG.info("Indexer: URL normalizing: " + normalize);   
    
    IndexWriters writers = new IndexWriters(getConf());
    LOG.info(writers.describe());

    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

    // NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM
    // job.set(SolrConstants.SERVER_URL, solrUrl);

    job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);

    if (params != null) {
        job.set(IndexerMapReduce.INDEXER_PARAMS, params);
    }

    job.setReduceSpeculativeExecution(false);

    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-"
            + new Random().nextInt());

    FileOutputFormat.setOutputPath(job, tmp);
    try {
        JobClient.runJob(job);
        // do the commits once and for all the reducers in one go
        if (!noCommit) {
            writers.open(job,"commit");
            writers.commit();
        }
        long end = System.currentTimeMillis();
        LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: "
                + TimingUtil.elapsedTime(start, end));
    } finally {
        FileSystem.get(job).delete(tmp, true);
    }
}
 
Example 10
Source File: GenericMRLoadJobCreator.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public static JobConf createJob(String[] argv, boolean mapoutputCompressed,
    boolean outputCompressed) throws Exception {

  JobConf job = new JobConf();
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return null;
  }

  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormat(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != job.getClass("mapred.indirect.input.format", null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(job);
    Path sysdir = jClient.getSystemDir();
    Random r = new Random();
    Path indirInputFile = new Path(sysdir, Integer.toString(r
        .nextInt(Integer.MAX_VALUE), 36)
        + "_files");
    job.set("mapred.indirect.input.file", indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
        .getFileSystem(job), job, indirInputFile, LongWritable.class,
        Text.class, SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(job);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDir()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()), new Text(stat
                  .getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  job.setCompressMapOutput(mapoutputCompressed);
  job.setBoolean("mapred.output.compress", outputCompressed);
  return job;

}
 
Example 11
Source File: TradesHdfsDataVerifier.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception {

    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("TradesHdfsDataVerifier");

    String hdfsHomeDir = args[0];
    String url         = args[1];
    String tableName   = args[2];

    System.out.println("TradesHdfsDataVerifier.run() invoked with " 
                       + " hdfsHomeDir = " + hdfsHomeDir 
                       + " url = " + url
                       + " tableName = " + tableName);

    // Job-specific params
    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    
    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(HdfsDataMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(TradesRow.class);
    
    conf.setReducerClass(HdfsDataReducer.class);
    conf.set(RowOutputFormat.OUTPUT_TABLE, tableName + "_HDFS");
    //conf.set(GfxdOutputFormat.OUTPUT_SCHEMA, "APP");
    conf.set(RowOutputFormat.OUTPUT_URL, url);
    conf.setOutputFormat(RowOutputFormat.class);
    conf.setOutputKeyClass(Key.class);
    conf.setOutputValueClass(TradeOutputObject.class);

    StringBuffer aStr = new StringBuffer();
    aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " ");
    aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " ");
    aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " ");
    aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " ");
    System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString());

    
    FileOutputFormat.setOutputPath(conf, new Path("" + System.currentTimeMillis()));
    
    JobClient.runJob(conf);
    return 0;
  }
 
Example 12
Source File: TopBusyAirportGemfirexd.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception {

    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("Busy Airport Count");

    Path outputPath = new Path(args[0]);
    Path intermediateOutputPath = new Path(args[0] + "_int");
    String hdfsHomeDir = args[1];
    String tableName = args[2];

    outputPath.getFileSystem(conf).delete(outputPath, true);
    intermediateOutputPath.getFileSystem(conf).delete(intermediateOutputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);

    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(SampleMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setReducerClass(SampleReducer.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(conf, intermediateOutputPath);

    int rc = JobClient.runJob(conf).isSuccessful() ? 0 : 1;
    if (rc == 0) {
      JobConf topConf = new JobConf(getConf());
      topConf.setJobName("Top Busy Airport");

      String hdfsFS = topConf.get("fs.defaultFS");
      URI hdfsUri = URI.create(hdfsFS);
      hdfsUri.getHost();

      // Assume that SqlFire locator is running alongside the namenode
      topConf.set(RowOutputFormat.OUTPUT_URL, "jdbc:gemfirexd://" + hdfsUri.getHost() + ":1527");
      //topConf.set(ddGfxdOutputFormat.OUTPUT_SCHEMA, "APP");
      //topConf.set(GfxdOutputFormat.OUTPUT_TABLE, "BUSY_AIRPORT");
      topConf.set(RowOutputFormat.OUTPUT_TABLE, "APP.BUSY_AIRPORT");

      // Only run a single reducer
      topConf.setNumReduceTasks(1);

      FileInputFormat.setInputPaths(topConf, intermediateOutputPath);

      topConf.setInputFormat(TextInputFormat.class);
      topConf.setMapperClass(TopBusyAirportMapper.class);
      topConf.setMapOutputKeyClass(Text.class);
      topConf.setMapOutputValueClass(StringIntPair.class);

      topConf.setReducerClass(TopBusyAirportReducer.class);
      topConf.setOutputKeyClass(Key.class);
      topConf.setOutputValueClass(BusyAirportModel.class);
      topConf.setOutputFormat(RowOutputFormat.class);

      rc = JobClient.runJob(topConf).isSuccessful() ? 0 : 1;
    }
    return rc;
  }
 
Example 13
Source File: VerifyHdfsDataUsingMR.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception {

    // todo@lhughes -- why do we need this?
    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("hdfsMapReduce");

    String hdfsHomeDir = args[0];
    String url         = args[1];
    String tableName   = args[2];

    System.out.println("VerifyHdfsData.run() invoked with " 
                       + " hdfsHomeDir = " + hdfsHomeDir 
                       + " url = " + url
                       + " tableName = " + tableName);

    // Job-specific params
    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    
    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(HdfsDataMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(MyRow.class);
    
    conf.setReducerClass(HdfsDataReducer.class);
    conf.set(RowOutputFormat.OUTPUT_TABLE, "TRADE.HDFS_CUSTOMERS");
    //conf.set(GfxdOutputFormat.OUTPUT_SCHEMA, "APP");
    conf.set(RowOutputFormat.OUTPUT_URL, url);
    conf.setOutputFormat(RowOutputFormat.class);
    conf.setOutputKeyClass(Key.class);
    conf.setOutputValueClass(DataObject.class);

    StringBuffer aStr = new StringBuffer();
    aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " ");
    aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " ");
    aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " ");
    aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " ");
    System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString());

    // not planning to use this, but I get an NPE without it
    FileOutputFormat.setOutputPath(conf, new Path("" + System.currentTimeMillis()));
    
    JobClient.runJob(conf);
    return 0;
  }
 
Example 14
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testAppDiognosticEventOnNewTask() throws Exception {
  ApplicationId appId = ApplicationId.newInstance(1, 2);
  ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
      appId, 0);
  JobId jobId = MRBuilderUtils.newJobId(appId, 1);
  TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
  TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
  Path jobFile = mock(Path.class);

  MockEventHandler eventHandler = new MockEventHandler();
  TaskAttemptListener taListener = mock(TaskAttemptListener.class);
  when(taListener.getAddress()).thenReturn(
      new InetSocketAddress("localhost", 0));

  JobConf jobConf = new JobConf();
  jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
  jobConf.setBoolean("fs.file.impl.disable.cache", true);
  jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
  jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");

  TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
  when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });

  AppContext appCtx = mock(AppContext.class);
  ClusterInfo clusterInfo = mock(ClusterInfo.class);
  Resource resource = mock(Resource.class);
  when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
  when(resource.getMemory()).thenReturn(1024);

  TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler,
      jobFile, 1, splits, jobConf, taListener,
      new Token(), new Credentials(), new SystemClock(), appCtx);

  NodeId nid = NodeId.newInstance("127.0.0.1", 0);
  ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
  Container container = mock(Container.class);
  when(container.getId()).thenReturn(contId);
  when(container.getNodeId()).thenReturn(nid);
  when(container.getNodeHttpAddress()).thenReturn("localhost:0");
  taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId,
      "Task got killed"));
  assertFalse(
      "InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task",
      eventHandler.internalError);
}
 
Example 15
Source File: EventInputFormatTest.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
private void doTestRowSerDe(boolean concurrencyChecks) throws Exception {
  getConnection();
  Connection conn = startNetserverAndGetLocalNetConnection();
  final long statTS = System.currentTimeMillis();
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
  String concurrency = "persistent ENABLE CONCURRENCY CHECKS";
  st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) partition by primary key buckets 1 hdfsstore (myhdfs) "
      +(concurrencyChecks ? concurrency : ""));

  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  ps.setInt(1, 1);
  ps.setString(2, "Value-1");
  ps.execute();
  
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
  assertEquals(1, list.length);
  
  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  RowInputFormat ipformat = new RowInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  assertEquals(1, splits.length);
  RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null);
  Key key = rr.createKey();
  Row value = rr.createValue();
  assertTrue(rr.next(key, value));
  assertEquals(1, value.getRowAsResultSet().getInt(1));
  assertEquals("Value-1", value.getRowAsResultSet().getString(2));
  assertTrue(value.getTimestamp() > statTS);
  assertFalse(value.getRowAsResultSet().next());
  
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataOutputStream dos = new DataOutputStream(baos);
  value.write(dos);
  dos.close();
  
  byte[] buf = baos.toByteArray();
  DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf));
  Row row = new Row();
  row.readFields(dis);
  dis.close();
  
  assertEquals(1, row.getRowAsResultSet().getInt(1));
  assertEquals("Value-1", row.getRowAsResultSet().getString(2));
  assertFalse(value.getRowAsResultSet().next());
  
  TestUtil.shutDown();
}
 
Example 16
Source File: SolrIndexer.java    From anthelion with Apache License 2.0 4 votes vote down vote up
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
    List<Path> segments, boolean noCommit, boolean deleteGone, String solrParams,
    boolean filter, boolean normalize) throws IOException {
    
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrIndexer: starting at " + sdf.format(start));

  final JobConf job = new NutchJob(getConf());
  job.setJobName("index-solr " + solrUrl);

  LOG.info("SolrIndexer: deleting gone documents: " + deleteGone);
  LOG.info("SolrIndexer: URL filtering: " + filter);
  LOG.info("SolrIndexer: URL normalizing: " + normalize);
  
  IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
  job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
  job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
  if (solrParams != null) {
    job.set(SolrConstants.PARAMS, solrParams);
  }
  NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);

  job.setReduceSpeculativeExecution(false);

  final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
                       new Random().nextInt());

  FileOutputFormat.setOutputPath(job, tmp);
  try {
    JobClient.runJob(job);
    // do the commits once and for all the reducers in one go
    SolrServer solr =  SolrUtils.getCommonsHttpSolrServer(job);

    if (!noCommit) {
      solr.commit();
    }
    long end = System.currentTimeMillis();
    LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
  }
  catch (Exception e){
    LOG.error(e.toString());
  } finally {
    FileSystem.get(job).delete(tmp, true);
  }
}
 
Example 17
Source File: Submitter.java    From RDFS with Apache License 2.0 2 votes vote down vote up
/**
 * Set whether to keep the command file for debugging
 * @param conf the configuration to modify
 * @param keep the new value
 */
public static void setKeepCommandFile(JobConf conf, boolean keep) {
  conf.setBoolean("hadoop.pipes.command-file.keep", keep);
}
 
Example 18
Source File: Submitter.java    From hadoop-gpu with Apache License 2.0 2 votes vote down vote up
/**
 * Set whether the job will use a Java RecordWriter.
 * @param conf the configuration to modify
 * @param value the new value to set
 */
public static void setIsJavaRecordWriter(JobConf conf, boolean value) {
  conf.setBoolean("hadoop.pipes.java.recordwriter", value);
}
 
Example 19
Source File: Submitter.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Set whether the Mapper is written in Java.
 * @param conf the configuration to modify
 * @param value the new value
 */
public static void setIsJavaMapper(JobConf conf, boolean value) {
  conf.setBoolean(Submitter.IS_JAVA_MAP, value);
}
 
Example 20
Source File: Submitter.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Set whether the job is using a Java RecordReader.
 * @param conf the configuration to modify
 * @param value the new value
 */
public static void setIsJavaRecordReader(JobConf conf, boolean value) {
  conf.setBoolean(Submitter.IS_JAVA_RR, value);
}