Java Code Examples for org.apache.hadoop.mapred.JobContext

The following examples show how to use org.apache.hadoop.mapred.JobContext. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: HadoopOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example 2
Source Project: flink   Source File: HadoopOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example 3
Source Project: DataLink   Source File: HdfsHelper.java    License: Apache License 2.0 6 votes vote down vote up
TextWriterProxy(Configuration config, String fileName) throws IOException{
	fieldDelimiter = config.getChar(Key.FIELD_DELIMITER);
       columns = config.getListConfiguration(Key.COLUMN);
       
       String compress = config.getString(Key.COMPRESS,null);
       SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
       String attempt = "attempt_"+dateFormat.format(new Date())+"_0001_m_000000_0";
       Path outputPath = new Path(fileName);
       //todo 需要进一步确定TASK_ATTEMPT_ID
       conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
       FileOutputFormat outFormat = new TextOutputFormat();
       outFormat.setOutputPath(conf, outputPath);
       outFormat.setWorkOutputPath(conf, outputPath);
       if(null != compress) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       
       writer = outFormat.getRecordWriter(fileSystem, conf, outputPath.toString(), Reporter.NULL);
}
 
Example 4
private void setHadoopJobConfigs(Job job, int numInputPaths) {
  job.getConfiguration().set(JobContext.JOB_NAME, this.getClass().getName());
  // Turn this on to always firstly use class paths that user specifies.
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, "true");
  // Turn this off since we don't need an empty file in the output directory
  job.getConfiguration().set(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "false");

  job.setJarByClass(HadoopSegmentPreprocessingJob.class);

  String hadoopTokenFileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
  if (hadoopTokenFileLocation != null) {
    job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocation);
  }

  // Mapper configs.
  job.setMapperClass(SegmentPreprocessingMapper.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(AvroValue.class);
  job.getConfiguration().setInt(JobContext.NUM_MAPS, numInputPaths);

  // Reducer configs.
  job.setReducerClass(SegmentPreprocessingReducer.class);
  job.setOutputKeyClass(AvroKey.class);
  job.setOutputValueClass(NullWritable.class);
}
 
Example 5
Source Project: hadoop   Source File: TestJobEndNotifier.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 1, false));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown.
  // Unregistration fails: isLastAMRetry is recalculated, this is not
  app.shutDownJob();
  // Not the last AM attempt. So user should that the job is still running.
  app.waitForState(job, JobState.RUNNING);
  Assert.assertFalse(app.isLastAMRetry());
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example 6
Source Project: big-c   Source File: TestJobEndNotifier.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNotificationOnLastRetryNormalShutdown() throws Exception {
  HttpServer2 server = startHttpServer();
  // Act like it is the second attempt. Default max attempts is 2
  MRApp app = spy(new MRAppWithCustomContainerAllocator(
      2, 2, true, this.getClass().getName(), true, 2, true));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForInternalState(job, JobStateInternal.SUCCEEDED);
  // Unregistration succeeds: successfullyUnregistered is set
  app.shutDownJob();
  Assert.assertTrue(app.isLastAMRetry());
  Assert.assertEquals(1, JobEndServlet.calledTimes);
  Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED",
      JobEndServlet.requestUri.getQuery());
  Assert.assertEquals(JobState.SUCCEEDED.toString(),
    JobEndServlet.foundJobState);
  server.stop();
}
 
Example 7
Source Project: big-c   Source File: TestJobEndNotifier.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 1, false));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown.
  // Unregistration fails: isLastAMRetry is recalculated, this is not
  app.shutDownJob();
  // Not the last AM attempt. So user should that the job is still running.
  app.waitForState(job, JobState.RUNNING);
  Assert.assertFalse(app.isLastAMRetry());
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example 8
Source Project: ignite   Source File: HadoopV1CleanupTask.java    License: Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
@Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    JobContext jobCtx = ctx.jobContext();

    try {
        OutputCommitter committer = jobCtx.getJobConf().getOutputCommitter();

        if (abort)
            committer.abortJob(jobCtx, JobStatus.State.FAILED);
        else
            committer.commitJob(jobCtx);
    }
    catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}
 
Example 9
Source Project: flink   Source File: HadoopOutputFormatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example 10
Source Project: Flink-CEPplus   Source File: HadoopOutputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example 11
Source Project: Flink-CEPplus   Source File: HadoopOutputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}
 
Example 12
Source Project: Flink-CEPplus   Source File: HadoopOutputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
 
Example 13
Source Project: flink   Source File: HadoopOutputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}
 
Example 14
Source Project: flink   Source File: HadoopOutputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
 
Example 15
Source Project: flink   Source File: HiveTableOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
private void commitJob(String location) throws IOException {
	jobConf.set(OUTDIR, location);
	JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
	OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
	// finalize HDFS output format
	outputCommitter.commitJob(jobContext);
}
 
Example 16
Source Project: hadoop   Source File: TestJobEndNotifier.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNotificationOnLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 2, false));
  // Currently, we will have isLastRetry always equals to false at beginning
  // of MRAppMaster, except staging area exists or commit already started at 
  // the beginning.
  // Now manually set isLastRetry to true and this should reset to false when
  // unregister failed.
  app.isLastAMRetry = true;
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown. User should see FAILED state.
  // Unregistration fails: isLastAMRetry is recalculated, this is
  ///reboot will stop service internally, we don't need to shutdown twice
  app.waitForServiceToStop(10000);
  Assert.assertFalse(app.isLastAMRetry());
  // Since it's not last retry, JobEndServlet didn't called
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example 17
Source Project: hadoop   Source File: TestGridMixClasses.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}
 
Example 18
Source Project: hadoop   Source File: TestGridMixClasses.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}
 
Example 19
Source Project: big-c   Source File: TestJobEndNotifier.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNotificationOnLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 2, false));
  // Currently, we will have isLastRetry always equals to false at beginning
  // of MRAppMaster, except staging area exists or commit already started at 
  // the beginning.
  // Now manually set isLastRetry to true and this should reset to false when
  // unregister failed.
  app.isLastAMRetry = true;
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown. User should see FAILED state.
  // Unregistration fails: isLastAMRetry is recalculated, this is
  ///reboot will stop service internally, we don't need to shutdown twice
  app.waitForServiceToStop(10000);
  Assert.assertFalse(app.isLastAMRetry());
  // Since it's not last retry, JobEndServlet didn't called
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example 20
Source Project: big-c   Source File: TestGridMixClasses.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}
 
Example 21
Source Project: big-c   Source File: TestGridMixClasses.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}
 
Example 22
Source Project: flink   Source File: HadoopOutputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example 23
Source Project: flink   Source File: HadoopOutputFormatBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}
 
Example 24
Source Project: flink   Source File: HadoopOutputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
 
Example 25
Source Project: hadoop   Source File: CustomOutputCommitter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void setupJob(JobContext jobContext) throws IOException {
  writeFile(jobContext.getJobConf(), JOB_SETUP_FILE_NAME);
}
 
Example 26
Source Project: hadoop   Source File: CustomOutputCommitter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void commitJob(JobContext jobContext) throws IOException {
  super.commitJob(jobContext);
  writeFile(jobContext.getJobConf(), JOB_COMMIT_FILE_NAME);
}
 
Example 27
Source Project: hadoop   Source File: CustomOutputCommitter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void abortJob(JobContext jobContext, int status) 
throws IOException {
  super.abortJob(jobContext, status);
  writeFile(jobContext.getJobConf(), JOB_ABORT_FILE_NAME);
}
 
Example 28
Source Project: hadoop   Source File: JobEndNotifier.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Parse the URL that needs to be notified of the end of the job, along
 * with the number of retries in case of failure, the amount of time to
 * wait between retries and proxy settings
 * @param conf the configuration 
 */
public void setConf(Configuration conf) {
  this.conf = conf;
  
  numTries = Math.min(
    conf.getInt(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, 0) + 1
    , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
  );
  waitInterval = Math.min(
  conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
  , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
  );
  waitInterval = (waitInterval < 0) ? 5000 : waitInterval;

  timeout = conf.getInt(JobContext.MR_JOB_END_NOTIFICATION_TIMEOUT,
      JobContext.DEFAULT_MR_JOB_END_NOTIFICATION_TIMEOUT);

  userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);

  proxyConf = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY);

  //Configure the proxy to use if its set. It should be set like
  //[email protected]:port
  if(proxyConf != null && !proxyConf.equals("") &&
       proxyConf.lastIndexOf(":") != -1) {
    int typeIndex = proxyConf.indexOf("@");
    Proxy.Type proxyType = Proxy.Type.HTTP;
    if(typeIndex != -1 &&
      proxyConf.substring(0, typeIndex).compareToIgnoreCase("socks") == 0) {
      proxyType = Proxy.Type.SOCKS;
    }
    String hostname = proxyConf.substring(typeIndex + 1,
      proxyConf.lastIndexOf(":"));
    String portConf = proxyConf.substring(proxyConf.lastIndexOf(":") + 1);
    try {
      int port = Integer.parseInt(portConf);
      proxyToUse = new Proxy(proxyType,
        new InetSocketAddress(hostname, port));
      Log.info("Job end notification using proxy type \"" + proxyType + 
      "\" hostname \"" + hostname + "\" and port \"" + port + "\"");
    } catch(NumberFormatException nfe) {
      Log.warn("Job end notification couldn't parse configured proxy's port "
        + portConf + ". Not going to use a proxy");
    }
  }

}
 
Example 29
Source Project: hadoop   Source File: TestKeyFieldBasedComparator.java    License: Apache License 2.0 4 votes vote down vote up
public TestKeyFieldBasedComparator() throws IOException {
  super(HadoopTestCase.LOCAL_MR, HadoopTestCase.LOCAL_FS, 1, 1);
  conf = createJobConf();
  localConf = createJobConf();
  localConf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
}
 
Example 30
Source Project: hadoop   Source File: TestKeyFieldBasedComparator.java    License: Apache License 2.0 4 votes vote down vote up
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}