org.apache.hadoop.mapred.JobContext Java Examples

The following examples show how to use org.apache.hadoop.mapred.JobContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopOutputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example #2
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example #3
Source File: HadoopV1CleanupTask.java    From ignite with Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
@Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    JobContext jobCtx = ctx.jobContext();

    try {
        OutputCommitter committer = jobCtx.getJobConf().getOutputCommitter();

        if (abort)
            committer.abortJob(jobCtx, JobStatus.State.FAILED);
        else
            committer.commitJob(jobCtx);
    }
    catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}
 
Example #4
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example #5
Source File: HdfsHelper.java    From DataLink with Apache License 2.0 6 votes vote down vote up
TextWriterProxy(Configuration config, String fileName) throws IOException{
	fieldDelimiter = config.getChar(Key.FIELD_DELIMITER);
       columns = config.getListConfiguration(Key.COLUMN);
       
       String compress = config.getString(Key.COMPRESS,null);
       SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
       String attempt = "attempt_"+dateFormat.format(new Date())+"_0001_m_000000_0";
       Path outputPath = new Path(fileName);
       //todo 需要进一步确定TASK_ATTEMPT_ID
       conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
       FileOutputFormat outFormat = new TextOutputFormat();
       outFormat.setOutputPath(conf, outputPath);
       outFormat.setWorkOutputPath(conf, outputPath);
       if(null != compress) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       
       writer = outFormat.getRecordWriter(fileSystem, conf, outputPath.toString(), Reporter.NULL);
}
 
Example #6
Source File: TestJobEndNotifier.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 1, false));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown.
  // Unregistration fails: isLastAMRetry is recalculated, this is not
  app.shutDownJob();
  // Not the last AM attempt. So user should that the job is still running.
  app.waitForState(job, JobState.RUNNING);
  Assert.assertFalse(app.isLastAMRetry());
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example #7
Source File: TestJobEndNotifier.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testNotificationOnLastRetryNormalShutdown() throws Exception {
  HttpServer2 server = startHttpServer();
  // Act like it is the second attempt. Default max attempts is 2
  MRApp app = spy(new MRAppWithCustomContainerAllocator(
      2, 2, true, this.getClass().getName(), true, 2, true));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForInternalState(job, JobStateInternal.SUCCEEDED);
  // Unregistration succeeds: successfullyUnregistered is set
  app.shutDownJob();
  Assert.assertTrue(app.isLastAMRetry());
  Assert.assertEquals(1, JobEndServlet.calledTimes);
  Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED",
      JobEndServlet.requestUri.getQuery());
  Assert.assertEquals(JobState.SUCCEEDED.toString(),
    JobEndServlet.foundJobState);
  server.stop();
}
 
Example #8
Source File: TestJobEndNotifier.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 1, false));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown.
  // Unregistration fails: isLastAMRetry is recalculated, this is not
  app.shutDownJob();
  // Not the last AM attempt. So user should that the job is still running.
  app.waitForState(job, JobState.RUNNING);
  Assert.assertFalse(app.isLastAMRetry());
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example #9
Source File: HadoopSegmentPreprocessingJob.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
private void setHadoopJobConfigs(Job job, int numInputPaths) {
  job.getConfiguration().set(JobContext.JOB_NAME, this.getClass().getName());
  // Turn this on to always firstly use class paths that user specifies.
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, "true");
  // Turn this off since we don't need an empty file in the output directory
  job.getConfiguration().set(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "false");

  job.setJarByClass(HadoopSegmentPreprocessingJob.class);

  String hadoopTokenFileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
  if (hadoopTokenFileLocation != null) {
    job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocation);
  }

  // Mapper configs.
  job.setMapperClass(SegmentPreprocessingMapper.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(AvroValue.class);
  job.getConfiguration().setInt(JobContext.NUM_MAPS, numInputPaths);

  // Reducer configs.
  job.setReducerClass(SegmentPreprocessingReducer.class);
  job.setOutputKeyClass(AvroKey.class);
  job.setOutputValueClass(NullWritable.class);
}
 
Example #10
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
 
Example #11
Source File: HadoopOutputFormatTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
 
Example #12
Source File: HadoopOutputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example #13
Source File: HadoopOutputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}
 
Example #14
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
 
Example #15
Source File: HiveTableOutputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
private void commitJob(String location) throws IOException {
	jobConf.set(OUTDIR, location);
	JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
	OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
	// finalize HDFS output format
	outputCommitter.commitJob(jobContext);
}
 
Example #16
Source File: HadoopOutputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}
 
Example #17
Source File: TestJobEndNotifier.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotificationOnLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 2, false));
  // Currently, we will have isLastRetry always equals to false at beginning
  // of MRAppMaster, except staging area exists or commit already started at 
  // the beginning.
  // Now manually set isLastRetry to true and this should reset to false when
  // unregister failed.
  app.isLastAMRetry = true;
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown. User should see FAILED state.
  // Unregistration fails: isLastAMRetry is recalculated, this is
  ///reboot will stop service internally, we don't need to shutdown twice
  app.waitForServiceToStop(10000);
  Assert.assertFalse(app.isLastAMRetry());
  // Since it's not last retry, JobEndServlet didn't called
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example #18
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}
 
Example #19
Source File: TestJobEndNotifier.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotificationOnLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 2, false));
  // Currently, we will have isLastRetry always equals to false at beginning
  // of MRAppMaster, except staging area exists or commit already started at 
  // the beginning.
  // Now manually set isLastRetry to true and this should reset to false when
  // unregister failed.
  app.isLastAMRetry = true;
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown. User should see FAILED state.
  // Unregistration fails: isLastAMRetry is recalculated, this is
  ///reboot will stop service internally, we don't need to shutdown twice
  app.waitForServiceToStop(10000);
  Assert.assertFalse(app.isLastAMRetry());
  // Since it's not last retry, JobEndServlet didn't called
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}
 
Example #20
Source File: TestGridMixClasses.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}
 
Example #21
Source File: TestGridMixClasses.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}
 
Example #22
Source File: HadoopOutputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}
 
Example #23
Source File: HadoopOutputFormatBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}
 
Example #24
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}
 
Example #25
Source File: KafkaInputFormat.java    From HiveKa with Apache License 2.0 4 votes vote down vote up
public static void setKafkaClientTimeout(JobContext job, int val) {
	job.getConfiguration().setInt(KAFKA_CLIENT_SO_TIMEOUT, val);
}
 
Example #26
Source File: BlurHiveMRLoaderOutputCommitter.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
public void commitJob(JobContext context) throws IOException {
  finishBulkJob(context, true);
}
 
Example #27
Source File: KafkaInputFormat.java    From HiveKa with Apache License 2.0 4 votes vote down vote up
public static int getKafkaClientTimeout(JobContext job) {
	return job.getConfiguration().getInt(KAFKA_CLIENT_SO_TIMEOUT, 60000);
}
 
Example #28
Source File: KafkaInputFormat.java    From HiveKa with Apache License 2.0 4 votes vote down vote up
public static void setKafkaMaxPullHrs(JobContext job, int val) {
	job.getConfiguration().setInt(KAFKA_MAX_PULL_HRS, val);
}
 
Example #29
Source File: KafkaInputFormat.java    From HiveKa with Apache License 2.0 4 votes vote down vote up
public static int getKafkaMaxPullHrs(JobContext job) {
	return job.getConfiguration().getInt(KAFKA_MAX_PULL_HRS, -1);
}
 
Example #30
Source File: BlurHiveOutputCommitter.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
public void setupJob(JobContext jobContext) throws IOException {
}