org.apache.hadoop.mapreduce.task.JobContextImpl Java Exaples

Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0

6 votes

public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: HCatInputFormatBase.java From flink with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	List<InputSplit> splits;
	try {
		splits = this.hCatInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++){
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: HadoopInputFormatBase.java From flink with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: GsonBigQueryInputFormatTest.java From hadoop-connectors with Apache License 2.0

6 votes

/** Tests getSplits method of GsonBigQueryInputFormat when Bigquery connection error is thrown. */
@Test
public void testGetSplitsSecurityException() throws IOException {
  when(mockBigquery.tables()).thenReturn(mockBigqueryTables);

  // Write values to file.
  Path mockPath = new Path("gs://test_bucket/path/test");
  GsonRecordReaderTest.writeFile(ghfs, mockPath, (value1 + "\n" + value2 + "\n").getBytes(UTF_8));

  // Run getSplits method.
  GsonBigQueryInputFormat gsonBigQueryInputFormat =
      new GsonBigQueryInputFormatForTestGeneralSecurityException();
  config.set("mapreduce.input.fileinputformat.inputdir", "gs://test_bucket/path/test");

  JobContext jobContext = new JobContextImpl(config, new JobID());

  assertThrows(IOException.class, () -> gsonBigQueryInputFormat.getSplits(jobContext));
}

Source File: HCatInputFormatBase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	List<InputSplit> splits;
	try {
		splits = this.hCatInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++){
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: HadoopInputFormatBase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: TestCRAMInputFormatOnHDFS.java From Hadoop-BAM with MIT License

6 votes

@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
  reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
  String referenceIndex = ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai")
      .toURI().toString();
  conf.set("mapred.input.dir", "file://" + input);

  URI hdfsRef = clusterUri.resolve("/tmp/auxf.fa");
  URI hdfsRefIndex = clusterUri.resolve("/tmp/auxf.fa.fai");
  Files.copy(Paths.get(URI.create(reference)), Paths.get(hdfsRef));
  Files.copy(Paths.get(URI.create(referenceIndex)), Paths.get(hdfsRefIndex));

  conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, hdfsRef.toString());


  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());

}

Source File: HCatInputFormatBase.java From flink with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	List<InputSplit> splits;
	try {
		splits = this.hCatInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++){
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: HadoopInputFormatBase.java From flink with Apache License 2.0

6 votes

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}

Source File: HadoopElementIterator.java From tinkerpop with Apache License 2.0

6 votes

public HadoopElementIterator(final HadoopGraph graph) {
    try {
        this.graph = graph;
        final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
        final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
        if (inputFormat instanceof FileInputFormat) {
            final Storage storage = FileSystemStorage.open(configuration);
            if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
                return; // there is no input location and thus, no data (empty graph)
            if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
                return; // there is no data at the input location (empty graph)
            configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
        }
        final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
        for (final InputSplit split : splits) {
            this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
        }
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

Source File: HadoopFormatIO.java From beam with Apache License 2.0

6 votes

private static OutputCommitter initOutputCommitter(
    OutputFormat<?, ?> outputFormatObj,
    Configuration conf,
    TaskAttemptContext taskAttemptContext)
    throws IllegalStateException {
  OutputCommitter outputCommitter;
  try {
    outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
    if (outputCommitter != null) {
      outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
    }
  } catch (Exception e) {
    throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
  }

  return outputCommitter;
}

Source File: TestCopyCommitter.java From hadoop with Apache License 2.0

6 votes

@Test
public void testNoCommitAction() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");

    //Test for idempotent commit
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
  } catch (IOException e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("Commit failed");
  }
}

Source File: TestCopyCommitter.java From big-c with Apache License 2.0

6 votes

@Test
public void testNoCommitAction() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");

    //Test for idempotent commit
    committer.commitJob(jobContext);
    Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
  } catch (IOException e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("Commit failed");
  }
}

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

6 votes

public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: TestS3MultipartOutputCommitter.java From s3committer with Apache License 2.0

6 votes

@Before
public void setupCommitter() throws Exception {
  getConfiguration().set(
      "s3.multipart.committer.num-threads", String.valueOf(numThreads));
  getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());
  this.job = new JobContextImpl(getConfiguration(), JOB_ID);
  this.jobCommitter = new MockedS3Committer(S3_OUTPUT_PATH, job);
  jobCommitter.setupJob(job);
  this.uuid = job.getConfiguration().get(UPLOAD_UUID);

  this.tac = new TaskAttemptContextImpl(
      new Configuration(job.getConfiguration()), AID);

  // get the task's configuration copy so modifications take effect
  this.conf = tac.getConfiguration();
  conf.set("mapred.local.dir", "/tmp/local-0,/tmp/local-1");
  conf.setInt(UPLOAD_SIZE, 100);

  this.committer = new MockedS3Committer(S3_OUTPUT_PATH, tac);
}

Source File: TestUtil.java From s3committer with Apache License 2.0

6 votes

@Before
public void setupJob() throws Exception {
  this.mockFS = mock(FileSystem.class);
  FileSystem s3 = new Path("s3://" + MockS3FileSystem.BUCKET + "/")
      .getFileSystem(CONF);
  if (s3 instanceof MockS3FileSystem) {
    ((MockS3FileSystem) s3).setMock(mockFS);
  } else {
    throw new RuntimeException("Cannot continue: S3 not mocked");
  }

  this.job = new JobContextImpl(CONF, JOB_ID);
  job.getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());

  this.results = new TestUtil.ClientResults();
  this.errors = new TestUtil.ClientErrors();
  this.mockClient = TestUtil.newMockClient(results, errors);
}

Source File: GsonBigQueryInputFormatTest.java From hadoop-connectors with Apache License 2.0

5 votes

/** Tests getSplits method of GsonBigQueryInputFormat with federated data. */
@Test
public void testGetSplitsFederated() throws Exception {
  JobContext jobContext = new JobContextImpl(config, new JobID());

  table.setType("EXTERNAL")
      .setExternalDataConfiguration(
          new ExternalDataConfiguration()
              .setSourceFormat("NEWLINE_DELIMITED_JSON")
              .setSourceUris(ImmutableList.of("gs://foo-bucket/bar.json")));

  FileSplit split = new FileSplit(new Path("gs://foo-bucket/bar.json"), 0, 100, new String[0]);
  when(mockInputFormat.getSplits(eq(jobContext))).thenReturn(ImmutableList.<InputSplit>of(split));

  GsonBigQueryInputFormat gsonBigQueryInputFormat = new GsonBigQueryInputFormatForTest();
  gsonBigQueryInputFormat.setDelegateInputFormat(mockInputFormat);

  // Run getSplits method.
  List<InputSplit> splits = gsonBigQueryInputFormat.getSplits(jobContext);

  assertThat(splits).hasSize(1);
  assertThat(((FileSplit) splits.get(0)).getPath()).isEqualTo(split.getPath());
  assertThat(config.get("mapreduce.input.fileinputformat.inputdir"))
      .isEqualTo("gs://foo-bucket/bar.json");
  verify(mockBigQueryHelper, times(1)).getTable(eq(tableRef));
  verifyNoMoreInteractions(mockBigquery);
}

Source File: TestSAMInputFormat.java From Hadoop-BAM with MIT License

5 votes

@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.sam").getFile();
  conf.set("mapred.input.dir", "file://" + input);

  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}

Source File: TestVCFInputFormat.java From Hadoop-BAM with MIT License

5 votes

@Before
public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException, InterruptedException, NoSuchFieldException {
    Configuration conf = new Configuration();
    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
    conf.set("hadoopbam.vcf.trust-exts", "true");
    conf.set("mapred.input.dir", "file://" + input_file);
    conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(),
        BGZFCodec.class.getCanonicalName());
    conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 100 * 1024); // 100K

    if (interval != null) {
        VCFInputFormat.setIntervals(conf, ImmutableList.of(interval));
    }

    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());

    VCFInputFormat inputFormat = new VCFInputFormat(conf);
    List<InputSplit> splits = inputFormat.getSplits(ctx);
    switch (expectedSplits) {
        case EXACTLY_ONE:
            assertEquals("Should be exactly one split", 1, splits.size());
            break;
        case MORE_THAN_ONE:
            assertTrue("Should be more than one split", splits.size() > 1);
            break;
        case ANY:
        default:
            break;
    }
    readers = new ArrayList<>();
    for (InputSplit split : splits) {
        RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(split, taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        readers.add(reader);
    }
}

Source File: TestFastaInputFormat.java From Hadoop-BAM with MIT License

5 votes

@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("mini-chr1-chr2.fasta").getFile();
  conf.set("mapred.input.dir", "file://" + input);

  // Input fasta is 600 bytes, so this gets us 3 FileInputFormat splits.
  conf.set(FileInputFormat.SPLIT_MAXSIZE, "200");

  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}

Source File: TestCRAMInputFormat.java From Hadoop-BAM with MIT License

5 votes

@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
  reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
  conf.set("mapred.input.dir", "file://" + input);
  conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference);

  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}

Source File: TestBAMInputFormat.java From Hadoop-BAM with MIT License

5 votes

private void completeSetup(boolean boundedTraversal, List<Interval> intervals, boolean
    traverseUnplacedUnmapped) {
  Configuration conf = new Configuration();
  conf.set("mapred.input.dir", "file://" + input);
  if (boundedTraversal) {
    BAMInputFormat.setTraversalParameters(conf, intervals, traverseUnplacedUnmapped);
  }
  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}

Source File: TestCopyCommitter.java From big-c with Apache License 2.0

5 votes

@Test
public void testAtomicCommitMissingFinal() {
  TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
  JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
      taskAttemptContext.getTaskAttemptID().getJobID());
  Configuration conf = jobContext.getConfiguration();

  String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
  String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
  FileSystem fs = null;
  try {
    OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
    fs = FileSystem.get(conf);
    fs.mkdirs(new Path(workPath));

    conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
    conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
    conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);

    Assert.assertTrue(fs.exists(new Path(workPath)));
    Assert.assertFalse(fs.exists(new Path(finalPath)));
    committer.commitJob(jobContext);
    Assert.assertFalse(fs.exists(new Path(workPath)));
    Assert.assertTrue(fs.exists(new Path(finalPath)));

    //Test for idempotent commit
    committer.commitJob(jobContext);
    Assert.assertFalse(fs.exists(new Path(workPath)));
    Assert.assertTrue(fs.exists(new Path(finalPath)));

  } catch (IOException e) {
    LOG.error("Exception encountered while testing for preserve status", e);
    Assert.fail("Atomic commit failure");
  } finally {
    TestDistCpUtils.delete(fs, workPath);
    TestDistCpUtils.delete(fs, finalPath);
    conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false);
  }
}

Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

private void testCommitterInternal(int version) throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateContent(outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
      .toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME)
      .toString());
  assertFalse("job temp dir still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: HadoopInputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapreduceInputFormat instanceof FileInputFormat)) {
		return null;
	}

	JobContext jobContext = new JobContextImpl(configuration, null);

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  File expectedFile = new File(new Path(outDir, partFile).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: RegionSplitsIT.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Test
public void testGetSplits() throws Exception{

    SMInputFormat smInputFormat = new SMInputFormat();
    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class,FakeOutputFormat.class);
    conf.setInt(MRConstants.SPLICE_SPLITS_PER_TABLE, 8);
    // Get splits for the SYSCOLUMNS table.
    String tableName = format("%s.%s", SCHEMA_NAME, TABLE1_NAME);
    conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
    long conglomId = spliceClassWatcher.getConglomId(TABLE1_NAME, SCHEMA_NAME);
    String conglomAsString = format("%d", conglomId);
    conf.set(MRConstants.SPLICE_INPUT_CONGLOMERATE, conglomAsString);
    String jdbcString = "jdbc:splice://localhost:1527/splicedb;user=splice;password=admin";
    conf.set(MRConstants.SPLICE_JDBC_STR, jdbcString);

    SMSQLUtil util = SMSQLUtil.getInstance(jdbcString);
    List<String> columns = new ArrayList<>();
    columns.add("I");
    conf.set(MRConstants.SPLICE_SCAN_INFO, util.getTableScannerBuilder(tableName, columns).base64Encode());
    smInputFormat.setConf(conf);
    JobContext ctx = new JobContextImpl(conf,new JobID("test",1));
    List<InputSplit> splits = smInputFormat.getSplits(ctx);

    LOG.info("Got "+splits.size() + " splits");
    assertTrue(format("Expected between 6 and 10 splits, got %d.", splits.size()),
            splits.size() >= 6 && splits.size() <= 10);

}

Source File: FetchSplitsJob.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public List<InputSplit> call() {
    SMInputFormat inputFormat = new SMInputFormat();
    try {
        Configuration confTemp = new Configuration(conf);
        confTemp.unset(MRConstants.SPLICE_SCAN_INPUT_SPLITS_ID);
        return inputFormat.getSplits(new JobContextImpl(confTemp, null));
    } catch (IOException | InterruptedException ie) {
        throw new RuntimeException(ie.getMessage(), ie);
    }

}

org.apache.hadoop.mapreduce.task.JobContextImpl Java Examples