org.apache.hadoop.mapred.JobContextImpl Java Examples

The following examples show how to use org.apache.hadoop.mapred.JobContextImpl. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Example #2

Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Example #3

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Example #4

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Example #5

Source File: HiveTableOutputFormat.java From flink with Apache License 2.0

5 votes

private void commitJob(String location) throws IOException {
	jobConf.set(OUTDIR, location);
	JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
	OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
	// finalize HDFS output format
	outputCommitter.commitJob(jobContext);
}

Example #6

Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0

5 votes

public void testAbort() throws IOException {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, committer
      .getTaskAttemptPath(tContext));

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs,
      job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer
      .getTaskAttemptPath(tContext), file).toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME)
      .toString());
  assertFalse("job temp dir "+expectedFile+" still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Example #7

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

public void testAbort() throws IOException {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, committer
      .getTaskAttemptPath(tContext));

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs,
      job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer
      .getTaskAttemptPath(tContext), file).toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME)
      .toString());
  assertFalse("job temp dir "+expectedFile+" still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Example #8

Source File: HadoopV2TaskContext.java From ignite with Apache License 2.0

5 votes

/**
 * @param taskInfo Task info.
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
    @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        }
        catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    }
    finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

Example #9

Source File: HadoopV2JobResourceManager.java From ignite with Apache License 2.0

5 votes

/**
 * Creates new instance.
 * @param jobId Job ID.
 * @param ctx Hadoop job context.
 * @param log Logger.
 */
public HadoopV2JobResourceManager(HadoopJobId jobId, JobContextImpl ctx, IgniteLogger log, HadoopV2Job job) {
    this.jobId = jobId;
    this.ctx = ctx;
    this.log = log.getLogger(HadoopV2JobResourceManager.class);
    this.job = job;
}

Example #10

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Example #11

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Example #12

Source File: HbaseClient.java From presto-connectors with Apache License 2.0

4 votes

/**
 * Fetches the TabletSplitMetadata for a query against an Hbase table.
 * <p>
 * Does a whole bunch of fun stuff! Splitting on row ID ranges, applying secondary indexes, column pruning,
 * all sorts of sweet optimizations. What you have here is an important method.
 *
 * @param session Current session
 * @param schema Schema name
 * @param table Table Name
 * @param rowIdDomain Domain for the row ID
 * @param constraints Column constraints for the query
 * @return List of TabletSplitMetadata objects for Presto
 */
public List<TabletSplitMetadata> getTabletSplits(
        ConnectorSession session,
        String schema,
        String table,
        Optional<Domain> rowIdDomain,
        List<HbaseColumnConstraint> constraints) //HbaseRowSerializer serializer
{
    try {
        TableName tableName = TableName.valueOf(schema, table);
        LOG.debug("Getting tablet splits for table %s", tableName);

        // Get the initial Range based on the row ID domain
        Collection<Range> rowIdRanges = getRangesFromDomain(rowIdDomain);  //serializer

        // Split the ranges on tablet boundaries, if enabled
        // Create TabletSplitMetadata objects for each range
        boolean fetchTabletLocations = HbaseSessionProperties.isOptimizeLocalityEnabled(session);

        LOG.debug("Fetching tablet locations: %s", fetchTabletLocations);

        ImmutableList.Builder<TabletSplitMetadata> builder = ImmutableList.builder();
        if (rowIdRanges.size() == 0) {  //无 rowkey过滤
            LOG.warn("This request has no rowkey filter");
        }
        List<Scan> rowIdScans = rowIdRanges.size() == 0 ?
                Arrays.asList(new Scan())
                : rowIdRanges.stream().map(HbaseClient::getScanFromPrestoRange).collect(Collectors.toList());

        for (Scan scan : rowIdScans) {
            TableInputFormat tableInputFormat = getNewTableInputFormat(connection, tableName);
            tableInputFormat.setConf(connection.getConfiguration());
            tableInputFormat.setScan(scan);

            JobContext context = new JobContextImpl(new JobConf(), null);
            List<TableSplit> splits = tableInputFormat.getSplits(context)
                    .stream().map(x -> (TableSplit) x).collect(Collectors.toList());

            for (TableSplit split : splits) {
                TabletSplitMetadata metadata = new TabletSplitMetadata(
                        split.getTable().getName(),
                        split.getStartRow(),
                        split.getEndRow(),
                        TabletSplitMetadata.convertScanToString(split.getScan()),
                        split.getRegionLocation(),
                        split.getLength());
                builder.add(metadata);
            }
        }
        List<TabletSplitMetadata> tabletSplits = builder.build();

        // Log some fun stuff and return the tablet splits
        LOG.debug("Number of splits for table %s is %d with %d ranges", tableName, tabletSplits.size(), rowIdRanges.size());
        return tabletSplits;
    }
    catch (Exception e) {
        throw new PrestoException(UNEXPECTED_HBASE_ERROR, "Failed to get splits from Hbase", e);
    }
}

Example #13

Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, 
    committer.getTaskAttemptPath(tContext));

  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter =
    theOutputFormat.getRecordWriter(localFs, job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  // validate output
  File expectedFile = new File(new Path(outDir, file).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());

  FileUtil.fullyDelete(new File(outDir.toString()));
}

Example #14

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, 
    committer.getTaskAttemptPath(tContext));

  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter =
    theOutputFormat.getRecordWriter(localFs, job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  // validate output
  File expectedFile = new File(new Path(outDir, file).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());

  FileUtil.fullyDelete(new File(outDir.toString()));
}

Example #15

Source File: HadoopV2TaskContext.java From ignite with Apache License 2.0

2 votes

/**
 * Gets job context of the task.
 *
 * @return Job context.
 */
public JobContextImpl jobContext() {
    return jobCtx;
}