org.apache.hadoop.mapreduce.JobContext Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.JobContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopInputFormatBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
		throws IOException {
	configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);

	JobContext jobContext = new JobContextImpl(configuration, new JobID());

	jobContext.getCredentials().addAll(this.credentials);
	Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
	if (currentUserCreds != null) {
		jobContext.getCredentials().addAll(currentUserCreds);
	}

	List<org.apache.hadoop.mapreduce.InputSplit> splits;
	try {
		splits = this.mapreduceInputFormat.getSplits(jobContext);
	} catch (InterruptedException e) {
		throw new IOException("Could not get Splits.", e);
	}
	HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];

	for (int i = 0; i < hadoopInputSplits.length; i++) {
		hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
	}
	return hadoopInputSplits;
}
 
Example #2
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * The job dataset may already exist if the ApplicationMaster was restarted
 */
@SuppressWarnings("unchecked")
private static <E> Dataset<E> loadOrCreateJobDataset(JobContext jobContext) {
  Dataset<Object> dataset = load(jobContext).getDataset();
  String jobDatasetName = getJobDatasetName(jobContext);
  DatasetRepository repo = getDatasetRepository(jobContext);
  if (repo.exists(TEMP_NAMESPACE, jobDatasetName)) {
    Dataset<E> tempDataset = repo.load(TEMP_NAMESPACE, jobDatasetName,
      DatasetKeyOutputFormat.<E>getType(jobContext));
    try {
      Compatibility.checkCompatible(dataset.getDescriptor(),
        tempDataset.getDescriptor());
      return tempDataset;
    } catch (RuntimeException ex) {
      // swallow
    }
  }

  return repo.create(TEMP_NAMESPACE, jobDatasetName,
      copy(dataset.getDescriptor()),
      DatasetKeyOutputFormat.<E>getType(jobContext));
}
 
Example #3
Source File: PrunedSequenceFileInputFormat.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = super.getSplits(job);
  List<InputSplit> results = new ArrayList<InputSplit>();
  Configuration configuration = job.getConfiguration();
  String table = InputSplitPruneUtil.getTable(configuration);
  for (InputSplit inputSplit : splits) {
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    LOG.debug("Getting shard index from path [" + path + "]");
    String name = path.getName();
    int shard = getShardIndex(name);
    long rowIdUpdateFromNewDataCount = InputSplitPruneUtil.getBlurLookupRowIdUpdateFromNewDataCount(configuration,
        table, shard);
    long indexCount = InputSplitPruneUtil.getBlurLookupRowIdFromIndexCount(configuration, table, shard);
    if (rowIdUpdateFromNewDataCount == 0 || indexCount == 0) {
      LOG.debug("Pruning id lookup input path [" + path + "] no overlapping ids.");
    } else if (InputSplitPruneUtil.shouldLookupExecuteOnShard(configuration, table, shard)) {
      LOG.debug("Keeping id lookup input path [" + path + "]");
      results.add(inputSplit);
    } else {
      LOG.debug("Pruning id lookup input path [" + path + "]");
    }
  }
  return results;
}
 
Example #4
Source File: GenerateData.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
 
Example #5
Source File: GeoWaveInputFormat.java    From geowave with Apache License 2.0 6 votes vote down vote up
/**
 * Check whether a configuration is fully configured to be used with an Accumulo
 * {@link org.apache.hadoop.mapreduce.InputFormat}.
 *
 * @param context the Hadoop context for the configured job
 * @throws IOException if the context is improperly configured
 * @since 1.5.0
 */
protected static void validateOptions(final JobContext context) throws IOException { // attempt to
  // get each
  // of the
  // GeoWave
  // stores
  // from the job context
  try {
    final Map<String, String> configOptions = getStoreOptionsMap(context);
    final StoreFactoryFamilySpi factoryFamily = GeoWaveStoreFinder.findStoreFamily(configOptions);
    if (factoryFamily == null) {
      final String msg = "Unable to find GeoWave data store";
      LOGGER.warn(msg);
      throw new IOException(msg);
    }
  } catch (final Exception e) {
    LOGGER.warn("Error finding GeoWave stores", e);
    throw new IOException("Error finding GeoWave stores", e);
  }
}
 
Example #6
Source File: TestMRCJCFileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testLastInputSplitSingleSplit() throws Exception {
  FileInputFormat fif = new FileInputFormatForTest(100l * 1024 * 1024,
      128l * 1024 * 1024);
  Configuration conf = new Configuration();
  JobContext jobContext = mock(JobContext.class);
  when(jobContext.getConfiguration()).thenReturn(conf);
  List<InputSplit> splits = fif.getSplits(jobContext);
  assertEquals(1, splits.size());
  for (int i = 0; i < splits.size(); i++) {
    InputSplit split = splits.get(i);
    assertEquals(("host" + i), split.getLocations()[0]);
  }
}
 
Example #7
Source File: NMapInputFormat.java    From HBase-ToHDFS with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException,
    InterruptedException {
  int count = getNumMapTasks(context.getConfiguration());
  List<InputSplit> splits = new ArrayList<InputSplit>(count);
  for (int i = 0; i < count; i++) {
    splits.add(new NullInputSplit());
  }
  return splits;
}
 
Example #8
Source File: GeoWaveConfiguratorBase.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static final <T> T getInstance(
    final Class<?> implementingClass,
    final Enum<?> e,
    final JobContext context,
    final Class<T> interfaceClass,
    final Class<? extends T> defaultClass) throws InstantiationException, IllegalAccessException {
  return getConfiguration(context).getClass(
      enumToConfKey(implementingClass, e),
      defaultClass,
      interfaceClass).newInstance();
}
 
Example #9
Source File: SequenceFileAsBinaryOutputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override 
public void checkOutputSpecs(JobContext job) throws IOException {
  super.checkOutputSpecs(job);
  if (getCompressOutput(job) && 
      getOutputCompressionType(job) == CompressionType.RECORD ) {
    throw new InvalidJobConfException("SequenceFileAsBinaryOutputFormat "
      + "doesn't support Record Compression" );
  }
}
 
Example #10
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	OutputCommitter outputCommitter = setupOutputCommitter(true);
	when(dummyOutputFormat.getOutputCommitter(any(TaskAttemptContext.class))).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> hadoopOutputFormat = setupHadoopOutputFormat(dummyOutputFormat,
		Job.getInstance(), new DummyRecordWriter(), setupOutputCommitter(true), new Configuration());

	hadoopOutputFormat.open(1, 4);

	verify(hadoopOutputFormat.outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(hadoopOutputFormat.mapreduceOutputFormat, times(1)).getRecordWriter(any(TaskAttemptContext.class));
}
 
Example #11
Source File: FileAndDirectoryInputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
protected List<FileStatus> listStatus(JobContext job
        ) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
            job.getConfiguration());

    // Whether we need to recursive look into the directory structure
    boolean recursive = getInputDirRecursive(job);

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    List<FileStatus> result = simpleListStatus(job, dirs, inputFilter, recursive);     

    LOG.info("Total input paths to process : " + result.size()); 
    return result;
}
 
Example #12
Source File: AccumuloHDFSFileInputFormat.java    From rya with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
    //read the params from AccumuloInputFormat
    Configuration conf = jobContext.getConfiguration();
    Instance instance = MRUtils.AccumuloProps.getInstance(jobContext);
    String user = MRUtils.AccumuloProps.getUsername(jobContext);
    AuthenticationToken password = MRUtils.AccumuloProps.getPassword(jobContext);
    String table = MRUtils.AccumuloProps.getTablename(jobContext);
    ArgumentChecker.notNull(instance);
    ArgumentChecker.notNull(table);

    //find the files necessary
    try {
        Connector connector = instance.getConnector(user, password);
        TableOperations tos = connector.tableOperations();
        String tableId = tos.tableIdMap().get(table);
        Scanner scanner = connector.createScanner("accumulo.metadata", Authorizations.EMPTY); //TODO: auths?
        scanner.setRange(new Range(new Text(tableId + "\u0000"), new Text(tableId + "\uFFFD")));
        scanner.fetchColumnFamily(new Text("file"));
        List<String> files = new ArrayList<String>();
        List<InputSplit> fileSplits = new ArrayList<InputSplit>();
        for (Map.Entry<Key, Value> entry : scanner) {
            String file = entry.getKey().getColumnQualifier().toString();
            Path path = new Path(file);
            FileSystem fs = path.getFileSystem(conf);
            FileStatus fileStatus = fs.getFileStatus(path);
            long len = fileStatus.getLen();
            BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(fileStatus, 0, len);
            files.add(file);
            fileSplits.add(new FileSplit(path, 0, len, fileBlockLocations[0].getHosts()));
        }
        System.out.println(files);
        return fileSplits;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
 
Example #13
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
private static DatasetRepository getDatasetRepository(JobContext jobContext) {
  Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
  DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
  if (repo instanceof TemporaryDatasetRepositoryAccessor) {
    Dataset<Object> dataset = load(jobContext).getDataset();
    String namespace = dataset.getNamespace();
    repo = ((TemporaryDatasetRepositoryAccessor) repo)
        .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
  }
  return repo;
}
 
Example #14
Source File: S3DirectoryOutputCommitter.java    From s3committer with Apache License 2.0 5 votes vote down vote up
@Override
public void commitJob(JobContext context) throws IOException {
  Path outputPath = getOutputPath(context);
  // use the FS implementation because it will check for _$folder$
  FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
  if (fs.exists(outputPath)) {
    switch (getMode(context)) {
      case FAIL:
        // this was checked in setupJob, but this avoids some cases where
        // output was created while the job was processing
        throw new AlreadyExistsException(
            "Output path already exists: " + outputPath);
      case APPEND:
        // do nothing
        break;
      case REPLACE:
        LOG.info("Removing output path to be replaced: " + outputPath);
        if (!fs.delete(outputPath, true /* recursive */ )) {
          throw new IOException(
              "Failed to delete existing output directory for replace:" +
              outputPath);
        }
        break;
      default:
        throw new RuntimeException(
            "Unknown conflict resolution mode: " + getMode(context));
    }
  }

  super.commitJob(context);
}
 
Example #15
Source File: FileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Delete the temporary directory, including all of the work directories.
 * @param context the job's context
 */
@Override
public void abortJob(JobContext context, JobStatus.State state) 
throws IOException {
  // delete the _temporary folder
  cleanupJob(context);
}
 
Example #16
Source File: HadoopOutputFormatTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	OutputCommitter outputCommitter = setupOutputCommitter(true);
	when(dummyOutputFormat.getOutputCommitter(any(TaskAttemptContext.class))).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> hadoopOutputFormat = setupHadoopOutputFormat(dummyOutputFormat,
		Job.getInstance(), new DummyRecordWriter(), setupOutputCommitter(true), new Configuration());

	hadoopOutputFormat.open(1, 4);

	verify(hadoopOutputFormat.outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(hadoopOutputFormat.mapreduceOutputFormat, times(1)).getRecordWriter(any(TaskAttemptContext.class));
}
 
Example #17
Source File: TestJobImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test(timeout=20000)
public void testKilledDuringKillAbort() throws Exception {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  AsyncDispatcher dispatcher = new AsyncDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  OutputCommitter committer = new StubbedOutputCommitter() {
    @Override
    public synchronized void abortJob(JobContext jobContext, State state)
        throws IOException {
      while (!Thread.interrupted()) {
        try {
          wait();
        } catch (InterruptedException e) {
        }
      }
    }
  };
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();

  JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
  JobId jobId = job.getID();
  job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
  assertJobState(job, JobStateInternal.INITED);
  job.handle(new JobStartEvent(jobId));
  assertJobState(job, JobStateInternal.SETUP);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILL_ABORT);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILLED);
  dispatcher.stop();
  commitHandler.stop();
}
 
Example #18
Source File: IcebergPigInputFormat.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public List<InputSplit> getSplits(JobContext context) throws IOException {
  if (splits != null) {
    LOG.info("Returning cached splits: {}", splits.size());
    return splits;
  }

  splits = Lists.newArrayList();

  TableScan scan = table.newScan();

  //Apply Filters
  Expression filterExpression =
      (Expression) ObjectSerializer.deserialize(context.getConfiguration().get(scope(ICEBERG_FILTER_EXPRESSION)));
  LOG.info("[{}]: iceberg filter expressions: {}", signature, filterExpression);

  if (filterExpression != null) {
    LOG.info("Filter Expression: {}", filterExpression);
    scan = scan.filter(filterExpression);
  }

  //Wrap in Splits
  try (CloseableIterable<CombinedScanTask> tasks = scan.planTasks()) {
    tasks.forEach(scanTask -> splits.add(new IcebergSplit(scanTask)));
  }

  return splits;
}
 
Example #19
Source File: TestFileOutputCommitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #20
Source File: RegionSplitsIT.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void testGetSplits() throws Exception{

    SMInputFormat smInputFormat = new SMInputFormat();
    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class,FakeOutputFormat.class);
    conf.setInt(MRConstants.SPLICE_SPLITS_PER_TABLE, 8);
    // Get splits for the SYSCOLUMNS table.
    String tableName = format("%s.%s", SCHEMA_NAME, TABLE1_NAME);
    conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
    long conglomId = spliceClassWatcher.getConglomId(TABLE1_NAME, SCHEMA_NAME);
    String conglomAsString = format("%d", conglomId);
    conf.set(MRConstants.SPLICE_INPUT_CONGLOMERATE, conglomAsString);
    String jdbcString = "jdbc:splice://localhost:1527/splicedb;user=splice;password=admin";
    conf.set(MRConstants.SPLICE_JDBC_STR, jdbcString);

    SMSQLUtil util = SMSQLUtil.getInstance(jdbcString);
    List<String> columns = new ArrayList<>();
    columns.add("I");
    conf.set(MRConstants.SPLICE_SCAN_INFO, util.getTableScannerBuilder(tableName, columns).base64Encode());
    smInputFormat.setConf(conf);
    JobContext ctx = new JobContextImpl(conf,new JobID("test",1));
    List<InputSplit> splits = smInputFormat.getSplits(ctx);

    LOG.info("Got "+splits.size() + " splits");
    assertTrue(format("Expected between 6 and 10 splits, got %d.", splits.size()),
            splits.size() >= 6 && splits.size() <= 10);

}
 
Example #21
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public void commitJob(JobContext jobContext) throws IOException {
  Configuration conf = Hadoop.JobContext
      .getConfiguration.invoke(jobContext);
  DatasetRepository repo = getDatasetRepository(jobContext);
  boolean isTemp = repo instanceof TemporaryDatasetRepository;

  String jobDatasetName = getJobDatasetName(jobContext);
  View<E> targetView = load(jobContext);
  Dataset<E> jobDataset = repo.load(TEMP_NAMESPACE, jobDatasetName);
  WriteMode mode = conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT);
  if (mode == WriteMode.OVERWRITE && canReplace(targetView)) {
    ((Replaceable<View<E>>) targetView.getDataset()).replace(targetView, jobDataset);
  } else {
    ((Mergeable<Dataset<E>>) targetView.getDataset()).merge(jobDataset);
  }

  if (targetView instanceof Signalable) {
    ((Signalable)targetView).signalReady();
  }

  if (isTemp) {
    ((TemporaryDatasetRepository) repo).delete();
  } else {
    repo.delete(TEMP_NAMESPACE, jobDatasetName);
  }
}
 
Example #22
Source File: PhoenixInputFormat.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {  
    final Configuration configuration = context.getConfiguration();
    final QueryPlan queryPlan = getQueryPlan(context,configuration);
    final List<KeyRange> allSplits = queryPlan.getSplits();
    final List<InputSplit> splits = generateSplits(queryPlan,allSplits);
    return splits;
}
 
Example #23
Source File: InputRDDFormat.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
    return Collections.singletonList(new InputSplit() {
        @Override
        public long getLength() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public String[] getLocations() throws IOException, InterruptedException {
            return new String[0];
        }
    });
}
 
Example #24
Source File: TestFileOutputCommitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void testCommitterInternal(int version) throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateContent(outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #25
Source File: CombineFileInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }

  // Once we remove support for Hadoop < 2.0
  //return codec instanceof SplittableCompressionCodec;
  return false;
}
 
Example #26
Source File: YarnOutputFiles.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create a local map output file name on the same volume.
 */
public Path getOutputFileForWriteInVolume(Path existing) {
  Path outputDir = new Path(existing.getParent(), JOB_OUTPUT_DIR);
  Path attemptOutputDir = new Path(outputDir,
      conf.get(JobContext.TASK_ATTEMPT_ID));
  return new Path(attemptOutputDir, MAP_OUTPUT_FILENAME_STRING);
}
 
Example #27
Source File: DistSum.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** @return a list containing a single split of summation */
@Override
public List<InputSplit> getSplits(JobContext context) {
  //read sigma from conf
  final Configuration conf = context.getConfiguration();
  final Summation sigma = SummationWritable.read(DistSum.class, conf); 
  
  //create splits
  final List<InputSplit> splits = new ArrayList<InputSplit>(1);
  splits.add(new SummationSplit(sigma));
  return splits;
}
 
Example #28
Source File: SSTableInputFormat.java    From hadoop-sstable with Apache License 2.0 5 votes vote down vote up
@Override
public List<InputSplit> getSplits(final JobContext job) throws IOException {
    final Configuration configuration = job.getConfiguration();

    final List<InputSplit> result = Lists.newArrayList();

    final List<FileStatus> files = listStatus(job);

    LOG.debug("Initial file list: {} {}", files.size(), files);

    for (final FileStatus fileStatus : files) {
        final Path dataFile = fileStatus.getPath();
        final FileSystem fileSystem = dataFile.getFileSystem(configuration);
        final BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());

        // Data file, try to split if the .index file was found
        final SSTableIndexIndex index = indexes.get(dataFile);
        if (index == null) {
            throw new IOException("Index not found for " + dataFile);
        }

        for (final SSTableIndexIndex.Chunk chunk : index.getOffsets()) {
            // This isn't likely to work well because we are dealing with the index into uncompressed data...
            final int blockIndex = getBlockIndex(blockLocations, chunk.getStart() / COMPRESSION_RATIO_ASSUMPTION);
            final SSTableSplit split = new SSTableSplit(dataFile, chunk.getStart(), chunk.getEnd(),
                    chunk.getEnd() - chunk.getStart(), blockLocations[blockIndex].getHosts());
            result.add(split);
        }
    }

    LOG.debug("Splits calculated: {} {}", result.size(), result);

    return result;
}
 
Example #29
Source File: AbstractSMInputFormat.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private List<InputSplit> getInputSplitsFromCache(JobContext context) {
    if (inputSplits != null) {
        return inputSplits;
    }

    String splitCacheId = context.getConfiguration().get(MRConstants.SPLICE_SCAN_INPUT_SPLITS_ID);
    if (StringUtils.isNotEmpty(splitCacheId)) {
        if (FetchSplitsJob.splitCache.containsKey(splitCacheId)) {
            Future<List<InputSplit>> cachedSplitsFuture = FetchSplitsJob.splitCache.get(splitCacheId);
            List<InputSplit> cachedSplits = null;
            if (cachedSplitsFuture != null) {
                try {
                    cachedSplits = cachedSplitsFuture.get();
                } catch (ExecutionException | InterruptedException e) {
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
            FetchSplitsJob.splitCache.remove(splitCacheId);
            if (cachedSplits != null) {
                inputSplits = cachedSplits;
                return cachedSplits;
            }
        }
    }

    return null;
}
 
Example #30
Source File: TestJobImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test (timeout=10000)
public void testFailAbortDoesntHang() throws IOException {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  conf.set(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, "1000");
  
  DrainDispatcher dispatcher = new DrainDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  OutputCommitter committer = Mockito.mock(OutputCommitter.class);
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();
  //Job has only 1 mapper task. No reducers
  conf.setInt(MRJobConfig.NUM_REDUCES, 0);
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
  JobImpl job = createRunningStubbedJob(conf, dispatcher, 1, null);

  //Fail / finish all the tasks. This should land the JobImpl directly in the
  //FAIL_ABORT state
  for(Task t: job.tasks.values()) {
    TaskImpl task = (TaskImpl) t;
    task.handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE));
    for(TaskAttempt ta: task.getAttempts().values()) {
      task.handle(new TaskTAttemptEvent(ta.getID(),
        TaskEventType.T_ATTEMPT_FAILED));
    }
  }

  dispatcher.await();
  //Verify abortJob is called once and the job failed
  Mockito.verify(committer, Mockito.timeout(2000).times(1))
    .abortJob((JobContext) Mockito.any(), (State) Mockito.any());
  assertJobState(job, JobStateInternal.FAILED);

  dispatcher.stop();
}