Java Code Examples for org.apache.hadoop.mapred.FileOutputFormat#getOutputPath()

The following examples show how to use org.apache.hadoop.mapred.FileOutputFormat#getOutputPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SegmentReader.java    From nutch-htmlunit with Apache License 2.0 6 votes vote down vote up
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(
    final FileSystem fs, JobConf job,
    String name, final Progressable progress) throws IOException {

  final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

  // Get the old copy out of the way
  if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);

  final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
  return new RecordWriter<WritableComparable<?>, Writable>() {
    public synchronized void write(WritableComparable<?> key, Writable value) throws IOException {
      printStream.println(value);
    }

    public synchronized void close(Reporter reporter) throws IOException {
      printStream.close();
    }
  };
}
 
Example 2
Source File: MRTask.java    From tez with Apache License 2.0 6 votes vote down vote up
public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}
 
Example 3
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 6 votes vote down vote up
public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path p = new Path(outputPath,
			(FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR +
				"_" + taskAttemptID.toString()));
		try {
			FileSystem fs = p.getFileSystem(conf);
			return p.makeQualified(fs);
		} catch (IOException ie) {
			LOG.warn(StringUtils.stringifyException(ie));
			return p;
		}
	}
	return null;
}
 
Example 4
Source File: SegmentReader.java    From anthelion with Apache License 2.0 6 votes vote down vote up
public RecordWriter<WritableComparable, Writable> getRecordWriter(
    final FileSystem fs, JobConf job,
    String name, final Progressable progress) throws IOException {

  final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

  // Get the old copy out of the way
  if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);

  final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
  return new RecordWriter<WritableComparable, Writable>() {
    public synchronized void write(WritableComparable key, Writable value) throws IOException {
      printStream.println(value);
    }

    public synchronized void close(Reporter reporter) throws IOException {
      printStream.close();
    }
  };
}
 
Example 5
Source File: FetcherOutputFormat.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
  Path out = FileOutputFormat.getOutputPath(job);
  if ((out == null) && (job.getNumReduceTasks() != 0)) {
  	throw new InvalidJobConfException(
  			"Output directory not set in JobConf.");
  }
  if (fs == null) {
  	fs = out.getFileSystem(job);
  }
  if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
  	throw new IOException("Segment already fetched!");
}
 
Example 6
Source File: MROutput.java    From tez with Apache License 2.0 5 votes vote down vote up
public void initCommitter(JobConf job, boolean useNewApi)
    throws IOException, InterruptedException {

  if (useNewApi) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("using new api for output committer");
    }

    this.committer = newOutputFormat.getOutputCommitter(
        newApiTaskAttemptContext);
  } else {
    this.committer = job.getOutputCommitter();
  }

  Path outputPath = FileOutputFormat.getOutputPath(job);
  if (outputPath != null) {
    if ((this.committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(job,
          ((FileOutputCommitter) this.committer).getTaskAttemptPath(
              oldApiTaskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(job, outputPath);
    }
  }
  if (useNewApi) {
    this.committer.setupTask(newApiTaskAttemptContext);
  } else {
    this.committer.setupTask(oldApiTaskAttemptContext);
  }
}
 
Example 7
Source File: FetcherOutputFormat.java    From nutch-htmlunit with Apache License 2.0 5 votes vote down vote up
public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
  Path out = FileOutputFormat.getOutputPath(job);
  if ((out == null) && (job.getNumReduceTasks() != 0)) {
  	throw new InvalidJobConfException(
  			"Output directory not set in JobConf.");
  }
  if (fs == null) {
  	fs = out.getFileSystem(job);
  }
  if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
  	throw new IOException("Segment already fetched!");
}
 
Example 8
Source File: RandomWriter.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public InputSplit[] getSplits(JobConf job, 
                              int numSplits) throws IOException {
  InputSplit[] result = new InputSplit[numSplits];
  Path outDir = FileOutputFormat.getOutputPath(job);
  for(int i=0; i < result.length; ++i) {
    result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null);
  }
  return result;
}
 
Example 9
Source File: MROutput.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public void initCommitter(JobConf job, boolean useNewApi)
    throws IOException, InterruptedException {

  if (useNewApi) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("using new api for output committer");
    }

    this.committer = newOutputFormat.getOutputCommitter(
        newApiTaskAttemptContext);
  } else {
    this.committer = job.getOutputCommitter();
  }

  Path outputPath = FileOutputFormat.getOutputPath(job);
  if (outputPath != null) {
    if ((this.committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(job,
          ((FileOutputCommitter) this.committer).getTaskAttemptPath(
              oldApiTaskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(job, outputPath);
    }
  }
  if (useNewApi) {
    this.committer.setupTask(newApiTaskAttemptContext);
  } else {
    this.committer.setupTask(oldApiTaskAttemptContext);
  }
}
 
Example 10
Source File: RandomWriter.java    From hadoop-book with Apache License 2.0 5 votes vote down vote up
/**
 * Generate the requested number of file splits, with the filename set
 * to the filename of the output file.
 */
public InputSplit[] getSplits(JobConf job,
        int numSplits) throws IOException {
    InputSplit[] result = new InputSplit[numSplits];
    Path outDir = FileOutputFormat.getOutputPath(job);
    for (int i = 0; i < result.length; ++i) {
        result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1,
                (String[]) null);
    }
    return result;
}
 
Example 11
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
public void setupJob(JobConf conf) throws IOException {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
		FileSystem fileSys = tmpDir.getFileSystem(conf);
		if (!fileSys.mkdirs(tmpDir)) {
			LOG.error("Mkdirs failed to create " + tmpDir.toString());
		}
	}
}
 
Example 12
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
private void markSuccessfulOutputDir(JobConf conf)
	throws IOException {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		FileSystem fileSys = outputPath.getFileSystem(conf);
		// create a file in the folder to mark it
		if (fileSys.exists(outputPath)) {
			Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME);
			fileSys.create(filePath).close();
		}
	}
}
 
Example 13
Source File: ExportManifestOutputFormat.java    From emr-dynamodb-connector with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<K, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
    Progressable progress) throws IOException {
  String extension = "";
  Path file = FileOutputFormat.getTaskOutputPath(job, MANIFEST_FILENAME);
  FileSystem fs = file.getFileSystem(job);
  FSDataOutputStream fileOut = fs.create(file, progress);
  if (getCompressOutput(job)) {
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
    CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
    extension = codec.getDefaultExtension();
  }
  return new ExportManifestRecordWriter<>(fileOut, FileOutputFormat.getOutputPath(job),
      extension);
}
 
Example 14
Source File: FileOutputCommitterWrapper.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
public void cleanupJob(JobConf conf) throws IOException {
	// do the clean up of temporary directory
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
		FileSystem fileSys = tmpDir.getFileSystem(conf);
		if (fileSys.exists(tmpDir)) {
			fileSys.delete(tmpDir, true);
		}
	} else {
		LOG.warn("Output path is null in cleanup");
	}
}
 
Example 15
Source File: FetcherOutputFormat.java    From anthelion with Apache License 2.0 4 votes vote down vote up
public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
                                    final JobConf job,
                                    final String name,
                                    final Progressable progress) throws IOException {

  Path out = FileOutputFormat.getOutputPath(job);
  final Path fetch =
    new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
  final Path content =
    new Path(new Path(out, Content.DIR_NAME), name);
  
  final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);

  final MapFile.Writer fetchOut =
    new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
        compType, progress);
  
  return new RecordWriter<Text, NutchWritable>() {
      private MapFile.Writer contentOut;
      private RecordWriter<Text, Parse> parseOut;

      {
        if (Fetcher.isStoringContent(job)) {
          contentOut = new MapFile.Writer(job, fs, content.toString(),
                                          Text.class, Content.class,
                                          compType, progress);
        }

        if (Fetcher.isParsing(job)) {
          parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
        }
      }

      public void write(Text key, NutchWritable value)
        throws IOException {

        Writable w = value.get();
        
        if (w instanceof CrawlDatum)
          fetchOut.append(key, w);
        else if (w instanceof Content)
          contentOut.append(key, w);
        else if (w instanceof Parse)
          parseOut.write(key, (Parse)w);
      }

      public void close(Reporter reporter) throws IOException {
        fetchOut.close();
        if (contentOut != null) {
          contentOut.close();
        }
        if (parseOut != null) {
          parseOut.close(reporter);
        }
      }

    };

}
 
Example 16
Source File: CrawlDbReader.java    From anthelion with Apache License 2.0 4 votes vote down vote up
public RecordWriter<Text,CrawlDatum> getRecordWriter(FileSystem fs, JobConf job, String name,
     Progressable progress) throws IOException {
   Path dir = FileOutputFormat.getOutputPath(job);
   DataOutputStream fileOut = fs.create(new Path(dir, name), progress);
   return new LineRecordWriter(fileOut);
}
 
Example 17
Source File: FetcherOutputFormat.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
                                    final JobConf job,
                                    final String name,
                                    final Progressable progress) throws IOException {

  Path out = FileOutputFormat.getOutputPath(job);
  final Path fetch =
    new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
  final Path content =
    new Path(new Path(out, Content.DIR_NAME), name);
  
  final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);

  final MapFile.Writer fetchOut =
    new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
        compType, progress);
  
  return new RecordWriter<Text, NutchWritable>() {
      private MapFile.Writer contentOut;
      private RecordWriter<Text, Parse> parseOut;

      {
        if (Fetcher.isStoringContent(job)) {
          contentOut = new MapFile.Writer(job, fs, content.toString(),
                                          Text.class, Content.class,
                                          compType, progress);
        }

        if (Fetcher.isParsing(job)) {
          parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
        }
      }

      public void write(Text key, NutchWritable value)
        throws IOException {

        Writable w = value.get();
        
        if (w instanceof CrawlDatum)
          fetchOut.append(key, w);
        else if (w instanceof Content && contentOut != null)
          contentOut.append(key, w);
        else if (w instanceof Parse && parseOut != null)
          parseOut.write(key, (Parse)w);
      }

      public void close(Reporter reporter) throws IOException {
        fetchOut.close();
        if (contentOut != null) {
          contentOut.close();
        }
        if (parseOut != null) {
          parseOut.close(reporter);
        }
      }

    };

}
 
Example 18
Source File: CustomOutputCommitter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}
 
Example 19
Source File: CrawlDbReader.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
public RecordWriter<Text,CrawlDatum> getRecordWriter(FileSystem fs, JobConf job, String name,
     Progressable progress) throws IOException {
   Path dir = FileOutputFormat.getOutputPath(job);
   DataOutputStream fileOut = fs.create(new Path(dir, name), progress);
   return new LineRecordWriter(fileOut);
}
 
Example 20
Source File: CustomOutputCommitter.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}