org.apache.hadoop.mapreduce.task.JobContextImpl Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.task.JobContextImpl. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestMRCJCFileOutputCommitter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #2
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	List<InputSplit> splits;
	try {
		splits = this.hCatInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++){
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #3
Source File: HadoopInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #4
Source File: GsonBigQueryInputFormatTest.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
/** Tests getSplits method of GsonBigQueryInputFormat when Bigquery connection error is thrown. */
@Test
public void testGetSplitsSecurityException() throws IOException {
  when(mockBigquery.tables()).thenReturn(mockBigqueryTables);

  // Write values to file.
  Path mockPath = new Path("gs://test_bucket/path/test");
  GsonRecordReaderTest.writeFile(ghfs, mockPath, (value1 + "\n" + value2 + "\n").getBytes(UTF_8));

  // Run getSplits method.
  GsonBigQueryInputFormat gsonBigQueryInputFormat =
      new GsonBigQueryInputFormatForTestGeneralSecurityException();
  config.set("mapreduce.input.fileinputformat.inputdir", "gs://test_bucket/path/test");

  JobContext jobContext = new JobContextImpl(config, new JobID());

  assertThrows(IOException.class, () -> gsonBigQueryInputFormat.getSplits(jobContext));
}
 
Example #5
Source File: HCatInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	List<InputSplit> splits;
	try {
		splits = this.hCatInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++){
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #6
Source File: HadoopInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #7
Source File: TestCRAMInputFormatOnHDFS.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
  reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
  String referenceIndex = ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai")
      .toURI().toString();
  conf.set("mapred.input.dir", "file://" + input);

  URI hdfsRef = clusterUri.resolve("/tmp/auxf.fa");
  URI hdfsRefIndex = clusterUri.resolve("/tmp/auxf.fa.fai");
  Files.copy(Paths.get(URI.create(reference)), Paths.get(hdfsRef));
  Files.copy(Paths.get(URI.create(referenceIndex)), Paths.get(hdfsRefIndex));

  conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, hdfsRef.toString());


  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());

}
 
Example #8
Source File: HCatInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	List<InputSplit> splits;
	try {
		splits = this.hCatInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++){
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #9
Source File: HadoopInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #10
Source File: HadoopElementIterator.java    From tinkerpop with Apache License 2.0 6 votes vote down vote up
public HadoopElementIterator(final HadoopGraph graph) {
    try {
        this.graph = graph;
        final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
        final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
        if (inputFormat instanceof FileInputFormat) {
            final Storage storage = FileSystemStorage.open(configuration);
            if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
                return; // there is no input location and thus, no data (empty graph)
            if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
                return; // there is no data at the input location (empty graph)
            configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
        }
        final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
        for (final InputSplit split : splits) {
            this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
        }
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}
 
Example #11
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 6 votes vote down vote up
private static OutputCommitter initOutputCommitter(
    OutputFormat<?, ?> outputFormatObj,
    Configuration conf,
    TaskAttemptContext taskAttemptContext)
    throws IllegalStateException {
  OutputCommitter outputCommitter;
  try {
    outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
    if (outputCommitter != null) {
      outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
    }
  } catch (Exception e) {
    throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
  }

  return outputCommitter;
}
 
Example #12
Source File: TestCopyCommitter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoCommitAction() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");

    //Test for idempotent commit
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
  } catch (IOException e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("Commit failed");
  }
}
 
Example #13
Source File: TestCopyCommitter.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoCommitAction() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");

    //Test for idempotent commit
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
  } catch (IOException e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("Commit failed");
  }
}
 
Example #14
Source File: TestMRCJCFileOutputCommitter.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #15
Source File: TestS3MultipartOutputCommitter.java    From s3committer with Apache License 2.0 6 votes vote down vote up
@Before
public void setupCommitter() throws Exception {
  getConfiguration().set(
      "s3.multipart.committer.num-threads", String.valueOf(numThreads));
  getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());
  this.job = new JobContextImpl(getConfiguration(), JOB_ID);
  this.jobCommitter = new MockedS3Committer(S3_OUTPUT_PATH, job);
  jobCommitter.setupJob(job);
  this.uuid = job.getConfiguration().get(UPLOAD_UUID);

  this.tac = new TaskAttemptContextImpl(
      new Configuration(job.getConfiguration()), AID);

  // get the task's configuration copy so modifications take effect
  this.conf = tac.getConfiguration();
  conf.set("mapred.local.dir", "/tmp/local-0,/tmp/local-1");
  conf.setInt(UPLOAD_SIZE, 100);

  this.committer = new MockedS3Committer(S3_OUTPUT_PATH, tac);
}
 
Example #16
Source File: TestUtil.java    From s3committer with Apache License 2.0 6 votes vote down vote up
@Before
public void setupJob() throws Exception {
  this.mockFS = mock(FileSystem.class);
  FileSystem s3 = new Path("s3://" + MockS3FileSystem.BUCKET + "/")
      .getFileSystem(CONF);
  if (s3 instanceof MockS3FileSystem) {
    ((MockS3FileSystem) s3).setMock(mockFS);
  } else {
    throw new RuntimeException("Cannot continue: S3 not mocked");
  }

  this.job = new JobContextImpl(CONF, JOB_ID);
  job.getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());

  this.results = new TestUtil.ClientResults();
  this.errors = new TestUtil.ClientErrors();
  this.mockClient = TestUtil.newMockClient(results, errors);
}
 
Example #17
Source File: GsonBigQueryInputFormatTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Tests getSplits method of GsonBigQueryInputFormat with federated data. */
@Test
public void testGetSplitsFederated() throws Exception {
  JobContext jobContext = new JobContextImpl(config, new JobID());

  table.setType("EXTERNAL")
      .setExternalDataConfiguration(
          new ExternalDataConfiguration()
              .setSourceFormat("NEWLINE_DELIMITED_JSON")
              .setSourceUris(ImmutableList.of("gs://foo-bucket/bar.json")));

  FileSplit split = new FileSplit(new Path("gs://foo-bucket/bar.json"), 0, 100, new String[0]);
  when(mockInputFormat.getSplits(eq(jobContext))).thenReturn(ImmutableList.<InputSplit>of(split));

  GsonBigQueryInputFormat gsonBigQueryInputFormat = new GsonBigQueryInputFormatForTest();
  gsonBigQueryInputFormat.setDelegateInputFormat(mockInputFormat);

  // Run getSplits method.
  List<InputSplit> splits = gsonBigQueryInputFormat.getSplits(jobContext);

  assertThat(splits).hasSize(1);
  assertThat(((FileSplit) splits.get(0)).getPath()).isEqualTo(split.getPath());
  assertThat(config.get("mapreduce.input.fileinputformat.inputdir"))
      .isEqualTo("gs://foo-bucket/bar.json");
  verify(mockBigQueryHelper, times(1)).getTable(eq(tableRef));
  verifyNoMoreInteractions(mockBigquery);
}
 
Example #18
Source File: TestSAMInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.sam").getFile();
  conf.set("mapred.input.dir", "file://" + input);

  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}
 
Example #19
Source File: TestVCFInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Before
public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException, InterruptedException, NoSuchFieldException {
    Configuration conf = new Configuration();
    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
    conf.set("hadoopbam.vcf.trust-exts", "true");
    conf.set("mapred.input.dir", "file://" + input_file);
    conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(),
        BGZFCodec.class.getCanonicalName());
    conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 100 * 1024); // 100K

    if (interval != null) {
        VCFInputFormat.setIntervals(conf, ImmutableList.of(interval));
    }

    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());

    VCFInputFormat inputFormat = new VCFInputFormat(conf);
    List<InputSplit> splits = inputFormat.getSplits(ctx);
    switch (expectedSplits) {
        case EXACTLY_ONE:
            assertEquals("Should be exactly one split", 1, splits.size());
            break;
        case MORE_THAN_ONE:
            assertTrue("Should be more than one split", splits.size() > 1);
            break;
        case ANY:
        default:
            break;
    }
    readers = new ArrayList<>();
    for (InputSplit split : splits) {
        RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(split, taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        readers.add(reader);
    }
}
 
Example #20
Source File: TestFastaInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("mini-chr1-chr2.fasta").getFile();
  conf.set("mapred.input.dir", "file://" + input);

  // Input fasta is 600 bytes, so this gets us 3 FileInputFormat splits.
  conf.set(FileInputFormat.SPLIT_MAXSIZE, "200");

  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}
 
Example #21
Source File: TestCRAMInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
  reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
  conf.set("mapred.input.dir", "file://" + input);
  conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference);

  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}
 
Example #22
Source File: TestBAMInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private void completeSetup(boolean boundedTraversal, List<Interval> intervals, boolean
    traverseUnplacedUnmapped) {
  Configuration conf = new Configuration();
  conf.set("mapred.input.dir", "file://" + input);
  if (boundedTraversal) {
    BAMInputFormat.setTraversalParameters(conf, intervals, traverseUnplacedUnmapped);
  }
  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}
 
Example #23
Source File: TestCopyCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testAtomicCommitMissingFinal() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();

  String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
  String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    fs.mkdirs(new Path(workPath));

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
    conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);

    Assert.assertTrue(fs.exists(new Path(workPath)));
    Assert.assertFalse(fs.exists(new Path(finalPath)));
    committer.commitJob(jobContext);
    Assert.assertFalse(fs.exists(new Path(workPath)));
    Assert.assertTrue(fs.exists(new Path(finalPath)));

    //Test for idempotent commit
    committer.commitJob(jobContext);
    Assert.assertFalse(fs.exists(new Path(workPath)));
    Assert.assertTrue(fs.exists(new Path(finalPath)));

  } catch (IOException e) {
    LOG.error("Exception encountered while testing for preserve status", e);
    Assert.fail("Atomic commit failure");
  } finally {
    TestDistCpUtils.delete(fs, workPath);
    TestDistCpUtils.delete(fs, finalPath);
    conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false);
  }
}
 
Example #24
Source File: TestFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #25
Source File: TestFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void testCommitterInternal(int version) throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateContent(outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #26
Source File: TestMRCJCFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
      .toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME)
      .toString());
  assertFalse("job temp dir still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #27
Source File: HadoopInputFormatBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapreduceInputFormat instanceof FileInputFormat)) {
		return null;
	}

	JobContext jobContext = new JobContextImpl(configuration, null);

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
Example #28
Source File: TestMRCJCFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  File expectedFile = new File(new Path(outDir, partFile).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #29
Source File: RegionSplitsIT.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void testGetSplits() throws Exception{

    SMInputFormat smInputFormat = new SMInputFormat();
    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class,FakeOutputFormat.class);
    conf.setInt(MRConstants.SPLICE_SPLITS_PER_TABLE, 8);
    // Get splits for the SYSCOLUMNS table.
    String tableName = format("%s.%s", SCHEMA_NAME, TABLE1_NAME);
    conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
    long conglomId = spliceClassWatcher.getConglomId(TABLE1_NAME, SCHEMA_NAME);
    String conglomAsString = format("%d", conglomId);
    conf.set(MRConstants.SPLICE_INPUT_CONGLOMERATE, conglomAsString);
    String jdbcString = "jdbc:splice://localhost:1527/splicedb;user=splice;password=admin";
    conf.set(MRConstants.SPLICE_JDBC_STR, jdbcString);

    SMSQLUtil util = SMSQLUtil.getInstance(jdbcString);
    List<String> columns = new ArrayList<>();
    columns.add("I");
    conf.set(MRConstants.SPLICE_SCAN_INFO, util.getTableScannerBuilder(tableName, columns).base64Encode());
    smInputFormat.setConf(conf);
    JobContext ctx = new JobContextImpl(conf,new JobID("test",1));
    List<InputSplit> splits = smInputFormat.getSplits(ctx);

    LOG.info("Got "+splits.size() + " splits");
    assertTrue(format("Expected between 6 and 10 splits, got %d.", splits.size()),
            splits.size() >= 6 && splits.size() <= 10);

}
 
Example #30
Source File: FetchSplitsJob.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<InputSplit> call() {
    SMInputFormat inputFormat = new SMInputFormat();
    try {
        Configuration confTemp = new Configuration(conf);
        confTemp.unset(MRConstants.SPLICE_SCAN_INPUT_SPLITS_ID);
        return inputFormat.getSplits(new JobContextImpl(confTemp, null));
    } catch (IOException | InterruptedException ie) {
        throw new RuntimeException(ie.getMessage(), ie);
    }

}