Java Code Examples for org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getOutputPath()

The following examples show how to use org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getOutputPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PinotRecordWriter.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
public PinotRecordWriter(TaskAttemptContext job, SegmentGeneratorConfig segmentGeneratorConfig,
    FieldExtractor<T> fieldExtractor)
    throws IOException {
  _segmentGeneratorConfig = segmentGeneratorConfig;
  _fieldExtractor = fieldExtractor;

  _tempSegmentDir = new File(PinotOutputFormat.getTempSegmentDir(job));
  if (_tempSegmentDir.exists()) {
    FileUtils.cleanDirectory(_tempSegmentDir);
  }
  _dataFileDir = new File(_tempSegmentDir, "dataFile");
  FileUtils.forceMkdir(_dataFileDir);
  _segmentTarDir = new File(_tempSegmentDir, "segmentTar");
  FileUtils.forceMkdir(_segmentTarDir);

  _handler = new FileHandler(_dataFileDir.getPath(), "data", ".json", MAX_FILE_SIZE);
  _handler.open(true);

  _fileSystem = FileSystem.get(job.getConfiguration());
  _outputDir = FileOutputFormat.getOutputPath(job);
}
 
Example 2
Source File: TestJoinDatamerge.java    From big-c with Apache License 2.0 6 votes vote down vote up
private static void checkOuterConsistency(Job job, Path[] src) 
    throws IOException {
  Path outf = FileOutputFormat.getOutputPath(job);
  FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new 
                           Utils.OutputFileUtils.OutputFilesFilter());
  assertEquals("number of part files is more than 1. It is" + outlist.length,
    1, outlist.length);
  assertTrue("output file with zero length" + outlist[0].getLen(),
    0 < outlist[0].getLen());
  SequenceFile.Reader r =
    new SequenceFile.Reader(cluster.getFileSystem(),
        outlist[0].getPath(), job.getConfiguration());
  IntWritable k = new IntWritable();
  IntWritable v = new IntWritable();
  while (r.next(k, v)) {
    assertEquals("counts does not match", v.get(),
      countProduct(k, src, job.getConfiguration()));
  }
  r.close();
}
 
Example 3
Source File: HFileOutputFormat.java    From terrapin with Apache License 2.0 6 votes vote down vote up
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(
        TaskAttemptContext context) throws IOException {
  // Get the path of the temporary output file
  final Path outputPath = FileOutputFormat.getOutputPath(context);
  final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
  final Configuration conf = context.getConfiguration();
  final FileSystem fs = outputDir.getFileSystem(conf);

  int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
  // Default to snappy.
  Compression.Algorithm compressionAlgorithm = getAlgorithm(
      conf.get(Constants.HFILE_COMPRESSION));
  final StoreFile.Writer writer =
      new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
          .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
          .withCompression(compressionAlgorithm)
          .build();
  return new HFileRecordWriter(writer);
}
 
Example 4
Source File: RandomWriter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null));
  }
  return result;
}
 
Example 5
Source File: JobUtil.java    From jumbune with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * This method call when injected into a class will modify the output path,
 * only if output is into HDFS
 * 
 * @param job
 *            Job whose output path need to be changed
 */
public static void modifyOutputPath(Job job) throws Exception {
	Path path = FileOutputFormat.getOutputPath(job);
	if (path == null) {
		throw new IllegalArgumentException("Job Output path is null, expecting not null path value");
	}
		StringBuilder out = new StringBuilder(path.toString());
		out.append(SEPARATOR_UNDERSCORE).append(System.currentTimeMillis());
		FileOutputFormat.setOutputPath(job, new Path(out.toString()));
}
 
Example 6
Source File: RandomTextWriterJob.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null));
  }
  return result;
}
 
Example 7
Source File: RandomWriter.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null));
  }
  return result;
}
 
Example 8
Source File: LargeSorter.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(
        new Path(outDir, "dummy-split-" + i), 0, 1, null));
  }
  return result;
}
 
Example 9
Source File: RandomWriter.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
/**
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> result = new ArrayList<InputSplit>();
    Path outDir = FileOutputFormat.getOutputPath(job);
    int numSplits =
            job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
    for (int i = 0; i < numSplits; ++i) {
        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1,
                (String[]) null));
    }
    return result;
}
 
Example 10
Source File: RandomWriter.java    From tez with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null));
  }
  return result;
}
 
Example 11
Source File: RandomTextWriterJob.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null));
  }
  return result;
}
 
Example 12
Source File: AdmmIterationOutputFormat.java    From laser with Apache License 2.0 5 votes vote down vote up
public Path getDefaultWorkFile(TaskAttemptContext context, String extension)
		throws IOException {
	FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
	String outputName = context.getConfiguration().get(
			"com.b5m.admm.iteration.output.name");
	if (null == outputName) {
		return new Path(committer.getWorkPath(), "Z");
	}
	return new Path(FileOutputFormat.getOutputPath(context), outputName);
}
 
Example 13
Source File: LargeSorter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> result = new ArrayList<InputSplit>();
  Path outDir = FileOutputFormat.getOutputPath(job);
  int numSplits = 
        job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
  for(int i=0; i < numSplits; ++i) {
    result.add(new FileSplit(
        new Path(outDir, "dummy-split-" + i), 0, 1, null));
  }
  return result;
}
 
Example 14
Source File: TransformerOutputFormat.java    From BigDataPlatform with GNU General Public License v3.0 4 votes vote down vote up
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context);
}
 
Example 15
Source File: GFOutputFormat.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
    throws IOException, InterruptedException {
  return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
      context);
}
 
Example 16
Source File: GenericMRLoadGenerator.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public int run(String [] argv) throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  Configuration conf = job.getConfiguration();
  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormatClass(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(conf);  
    Path tmpDir = new Path("/tmp");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(conf), conf, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(conf);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return ret;
}
 
Example 17
Source File: DBOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
    throws IOException, InterruptedException {
  return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
                                 context);
}
 
Example 18
Source File: TransformerOutputFormat.java    From BigDataArchitect with Apache License 2.0 4 votes vote down vote up
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context);
}
 
Example 19
Source File: TransformerOutputFormat.java    From BigDataArchitect with Apache License 2.0 4 votes vote down vote up
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context);
}
 
Example 20
Source File: HadoopClientProtocolSelfTest.java    From ignite with Apache License 2.0 2 votes vote down vote up
/**
 * Constructor.
 *
 * @param ctx Task attempt context.
 * @param delegate Delegate.
 * @throws IOException If failed.
 */
private TestOutputCommitter(TaskAttemptContext ctx, FileOutputCommitter delegate) throws IOException {
    super(FileOutputFormat.getOutputPath(ctx), ctx);

    this.delegate = delegate;
}