Java Code Examples for org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#setupJob()

The following examples show how to use org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#setupJob() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
private void testCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
    
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    
    // validate output
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example 2
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
private void testMapFileOutputCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
    
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    
    // write output
    MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeMapFileOutput(theRecordWriter, tContext);
    
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    
    // validate output
    validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example 3
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
private void testSafety(int commitVersion) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, commitVersion);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
    
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    
    // close the job prior to committing task (leaving files in temporary dir
    try {
        committer.commitJob(jContext);
        Assert.fail("Expected commit job to fail");
    } catch (Exception e) {
        committer.commitTask(tContext);
        committer.commitJob(jContext);
    }
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example 4
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 4 votes vote down vote up
private void testRecoveryInternal(int commitVersion, int recoveryVersion) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, commitVersion);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
    
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    
    // do commit
    committer.commitTask(tContext);
    
    Path jobTempDir1 = committer.getCommittedTaskPath(tContext);
    File jtd = new File(jobTempDir1.toUri().getPath());
    if (commitVersion == 1 || !patched) {
        assertTrue("Version 1 commits to temporary dir " + jtd, jtd.exists());
        validateContent(jtd);
    } else {
        assertFalse("Version 2 commits to output dir " + jtd, jtd.exists());
    }
    
    // now while running the second app attempt,
    // recover the task output from first attempt
    Configuration conf2 = job.getConfiguration();
    conf2.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf2.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 2);
    conf2.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, recoveryVersion);
    JobContext jContext2 = new JobContextImpl(conf2, taskID.getJobID());
    TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID);
    FileOutputCommitter committer2 = new SafeFileOutputCommitter(outDir, tContext2);
    committer2.setupJob(tContext2);
    Path jobTempDir2 = committer2.getCommittedTaskPath(tContext2);
    File jtd2 = new File(jobTempDir2.toUri().getPath());
    
    committer2.recoverTask(tContext2);
    if (recoveryVersion == 1 || !patched) {
        assertTrue("Version 1 recovers to " + jtd2, jtd2.exists());
        validateContent(jtd2);
    } else {
        assertFalse("Version 2 commits to output dir " + jtd2, jtd2.exists());
        if (commitVersion == 1 || !patched) {
            assertEquals("Version 2  recovery moves to output dir from " + jtd, 0, jtd.list().length);
        }
    }
    
    committer2.commitJob(jContext2);
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example 5
Source File: SafeFileOutputCommitterTest.java    From datawave with Apache License 2.0 4 votes vote down vote up
private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
    final Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    final Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    
    conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
    FileSystem.closeAll();
    
    final JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    final FileOutputCommitter amCommitter = new SafeFileOutputCommitter(outDir, jContext);
    amCommitter.setupJob(jContext);
    
    final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
    taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
    taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
    
    final TextOutputFormat[] tof = new TextOutputFormat[2];
    for (int i = 0; i < tof.length; i++) {
        tof[i] = new TextOutputFormat() {
            @Override
            public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
                final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context);
                return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension));
            }
        };
    }
    
    final ExecutorService executor = Executors.newFixedThreadPool(2);
    try {
        for (int i = 0; i < taCtx.length; i++) {
            final int taskIdx = i;
            executor.submit((Callable<Void>) () -> {
                final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
                outputCommitter.setupTask(taCtx[taskIdx]);
                final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
                writeOutput(rw, taCtx[taskIdx]);
                outputCommitter.commitTask(taCtx[taskIdx]);
                return null;
            });
        }
    } finally {
        executor.shutdown();
        while (!executor.awaitTermination(1, TimeUnit.SECONDS)) {
            LOG.info("Awaiting thread termination!");
        }
    }
    
    amCommitter.commitJob(jContext);
    final RawLocalFileSystem lfs = new RawLocalFileSystem();
    lfs.setConf(conf);
    assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
    
    // validate output
    validateContent(OUT_SUB_DIR);
    FileUtil.fullyDelete(new File(outDir.toString()));
}