Java Code Examples for org.apache.hadoop.mapred.FileOutputFormat#getOutputPath()

The following examples show how to use org.apache.hadoop.mapred.FileOutputFormat#getOutputPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SegmentReader.java From nutch-htmlunit with Apache License 2.0

6 votes

public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(
    final FileSystem fs, JobConf job,
    String name, final Progressable progress) throws IOException {

  final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

  // Get the old copy out of the way
  if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);

  final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
  return new RecordWriter<WritableComparable<?>, Writable>() {
    public synchronized void write(WritableComparable<?> key, Writable value) throws IOException {
      printStream.println(value);
    }

    public synchronized void close(Reporter reporter) throws IOException {
      printStream.close();
    }
  };
}

Example 2

Source File: MRTask.java From tez with Apache License 2.0

6 votes

public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}

Example 3

Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0

6 votes

public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path p = new Path(outputPath,
			(FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR +
				"_" + taskAttemptID.toString()));
		try {
			FileSystem fs = p.getFileSystem(conf);
			return p.makeQualified(fs);
		} catch (IOException ie) {
			LOG.warn(StringUtils.stringifyException(ie));
			return p;
		}
	}
	return null;
}

Example 4

Source File: SegmentReader.java From anthelion with Apache License 2.0

6 votes

public RecordWriter<WritableComparable, Writable> getRecordWriter(
    final FileSystem fs, JobConf job,
    String name, final Progressable progress) throws IOException {

  final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

  // Get the old copy out of the way
  if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);

  final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
  return new RecordWriter<WritableComparable, Writable>() {
    public synchronized void write(WritableComparable key, Writable value) throws IOException {
      printStream.println(value);
    }

    public synchronized void close(Reporter reporter) throws IOException {
      printStream.close();
    }
  };
}

Example 5

Source File: FetcherOutputFormat.java From anthelion with Apache License 2.0

5 votes

public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
  Path out = FileOutputFormat.getOutputPath(job);
  if ((out == null) && (job.getNumReduceTasks() != 0)) {
  	throw new InvalidJobConfException(
  			"Output directory not set in JobConf.");
  }
  if (fs == null) {
  	fs = out.getFileSystem(job);
  }
  if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
  	throw new IOException("Segment already fetched!");
}

Example 6

Source File: MROutput.java From tez with Apache License 2.0

5 votes

public void initCommitter(JobConf job, boolean useNewApi)
    throws IOException, InterruptedException {

  if (useNewApi) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("using new api for output committer");
    }

    this.committer = newOutputFormat.getOutputCommitter(
        newApiTaskAttemptContext);
  } else {
    this.committer = job.getOutputCommitter();
  }

  Path outputPath = FileOutputFormat.getOutputPath(job);
  if (outputPath != null) {
    if ((this.committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(job,
          ((FileOutputCommitter) this.committer).getTaskAttemptPath(
              oldApiTaskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(job, outputPath);
    }
  }
  if (useNewApi) {
    this.committer.setupTask(newApiTaskAttemptContext);
  } else {
    this.committer.setupTask(oldApiTaskAttemptContext);
  }
}

Example 7

Source File: FetcherOutputFormat.java From nutch-htmlunit with Apache License 2.0

5 votes

public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
  Path out = FileOutputFormat.getOutputPath(job);
  if ((out == null) && (job.getNumReduceTasks() != 0)) {
  	throw new InvalidJobConfException(
  			"Output directory not set in JobConf.");
  }
  if (fs == null) {
  	fs = out.getFileSystem(job);
  }
  if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
  	throw new IOException("Segment already fetched!");
}

Example 8

Source File: RandomWriter.java From hadoop-gpu with Apache License 2.0

5 votes

/** 
 * Generate the requested number of file splits, with the filename
 * set to the filename of the output file.
 */
public InputSplit[] getSplits(JobConf job, 
                              int numSplits) throws IOException {
  InputSplit[] result = new InputSplit[numSplits];
  Path outDir = FileOutputFormat.getOutputPath(job);
  for(int i=0; i < result.length; ++i) {
    result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                              (String[])null);
  }
  return result;
}

Example 9

Source File: MROutput.java From incubator-tez with Apache License 2.0

5 votes

public void initCommitter(JobConf job, boolean useNewApi)
    throws IOException, InterruptedException {

  if (useNewApi) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("using new api for output committer");
    }

    this.committer = newOutputFormat.getOutputCommitter(
        newApiTaskAttemptContext);
  } else {
    this.committer = job.getOutputCommitter();
  }

  Path outputPath = FileOutputFormat.getOutputPath(job);
  if (outputPath != null) {
    if ((this.committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(job,
          ((FileOutputCommitter) this.committer).getTaskAttemptPath(
              oldApiTaskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(job, outputPath);
    }
  }
  if (useNewApi) {
    this.committer.setupTask(newApiTaskAttemptContext);
  } else {
    this.committer.setupTask(oldApiTaskAttemptContext);
  }
}

Example 10

Source File: RandomWriter.java From hadoop-book with Apache License 2.0

5 votes

/**
 * Generate the requested number of file splits, with the filename set
 * to the filename of the output file.
 */
public InputSplit[] getSplits(JobConf job,
        int numSplits) throws IOException {
    InputSplit[] result = new InputSplit[numSplits];
    Path outDir = FileOutputFormat.getOutputPath(job);
    for (int i = 0; i < result.length; ++i) {
        result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1,
                (String[]) null);
    }
    return result;
}

Example 11

Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0

5 votes

public void setupJob(JobConf conf) throws IOException {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
		FileSystem fileSys = tmpDir.getFileSystem(conf);
		if (!fileSys.mkdirs(tmpDir)) {
			LOG.error("Mkdirs failed to create " + tmpDir.toString());
		}
	}
}

Example 12

Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0

5 votes

private void markSuccessfulOutputDir(JobConf conf)
	throws IOException {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		FileSystem fileSys = outputPath.getFileSystem(conf);
		// create a file in the folder to mark it
		if (fileSys.exists(outputPath)) {
			Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME);
			fileSys.create(filePath).close();
		}
	}
}

Example 13

Source File: ExportManifestOutputFormat.java From emr-dynamodb-connector with Apache License 2.0

5 votes

@Override
public RecordWriter<K, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
    Progressable progress) throws IOException {
  String extension = "";
  Path file = FileOutputFormat.getTaskOutputPath(job, MANIFEST_FILENAME);
  FileSystem fs = file.getFileSystem(job);
  FSDataOutputStream fileOut = fs.create(file, progress);
  if (getCompressOutput(job)) {
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
    CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
    extension = codec.getDefaultExtension();
  }
  return new ExportManifestRecordWriter<>(fileOut, FileOutputFormat.getOutputPath(job),
      extension);
}

Example 14

Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0

5 votes

public void cleanupJob(JobConf conf) throws IOException {
	// do the clean up of temporary directory
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
		FileSystem fileSys = tmpDir.getFileSystem(conf);
		if (fileSys.exists(tmpDir)) {
			fileSys.delete(tmpDir, true);
		}
	} else {
		LOG.warn("Output path is null in cleanup");
	}
}

Example 15

Source File: FetcherOutputFormat.java From anthelion with Apache License 2.0

4 votes

public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
                                    final JobConf job,
                                    final String name,
                                    final Progressable progress) throws IOException {

  Path out = FileOutputFormat.getOutputPath(job);
  final Path fetch =
    new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
  final Path content =
    new Path(new Path(out, Content.DIR_NAME), name);
  
  final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);

  final MapFile.Writer fetchOut =
    new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
        compType, progress);
  
  return new RecordWriter<Text, NutchWritable>() {
      private MapFile.Writer contentOut;
      private RecordWriter<Text, Parse> parseOut;

      {
        if (Fetcher.isStoringContent(job)) {
          contentOut = new MapFile.Writer(job, fs, content.toString(),
                                          Text.class, Content.class,
                                          compType, progress);
        }

        if (Fetcher.isParsing(job)) {
          parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
        }
      }

      public void write(Text key, NutchWritable value)
        throws IOException {

        Writable w = value.get();
        
        if (w instanceof CrawlDatum)
          fetchOut.append(key, w);
        else if (w instanceof Content)
          contentOut.append(key, w);
        else if (w instanceof Parse)
          parseOut.write(key, (Parse)w);
      }

      public void close(Reporter reporter) throws IOException {
        fetchOut.close();
        if (contentOut != null) {
          contentOut.close();
        }
        if (parseOut != null) {
          parseOut.close(reporter);
        }
      }

    };

}

Example 16

Source File: CrawlDbReader.java From anthelion with Apache License 2.0

4 votes

public RecordWriter<Text,CrawlDatum> getRecordWriter(FileSystem fs, JobConf job, String name,
     Progressable progress) throws IOException {
   Path dir = FileOutputFormat.getOutputPath(job);
   DataOutputStream fileOut = fs.create(new Path(dir, name), progress);
   return new LineRecordWriter(fileOut);
}

Example 17

Source File: FetcherOutputFormat.java From nutch-htmlunit with Apache License 2.0

4 votes

public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
                                    final JobConf job,
                                    final String name,
                                    final Progressable progress) throws IOException {

  Path out = FileOutputFormat.getOutputPath(job);
  final Path fetch =
    new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
  final Path content =
    new Path(new Path(out, Content.DIR_NAME), name);
  
  final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);

  final MapFile.Writer fetchOut =
    new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
        compType, progress);
  
  return new RecordWriter<Text, NutchWritable>() {
      private MapFile.Writer contentOut;
      private RecordWriter<Text, Parse> parseOut;

      {
        if (Fetcher.isStoringContent(job)) {
          contentOut = new MapFile.Writer(job, fs, content.toString(),
                                          Text.class, Content.class,
                                          compType, progress);
        }

        if (Fetcher.isParsing(job)) {
          parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
        }
      }

      public void write(Text key, NutchWritable value)
        throws IOException {

        Writable w = value.get();
        
        if (w instanceof CrawlDatum)
          fetchOut.append(key, w);
        else if (w instanceof Content && contentOut != null)
          contentOut.append(key, w);
        else if (w instanceof Parse && parseOut != null)
          parseOut.write(key, (Parse)w);
      }

      public void close(Reporter reporter) throws IOException {
        fetchOut.close();
        if (contentOut != null) {
          contentOut.close();
        }
        if (parseOut != null) {
          parseOut.close(reporter);
        }
      }

    };

}

Example 18

Source File: CustomOutputCommitter.java From hadoop with Apache License 2.0

4 votes

private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}

Example 19

Source File: CrawlDbReader.java From nutch-htmlunit with Apache License 2.0

4 votes

public RecordWriter<Text,CrawlDatum> getRecordWriter(FileSystem fs, JobConf job, String name,
     Progressable progress) throws IOException {
   Path dir = FileOutputFormat.getOutputPath(job);
   DataOutputStream fileOut = fs.create(new Path(dir, name), progress);
   return new LineRecordWriter(fileOut);
}

Example 20

Source File: CustomOutputCommitter.java From big-c with Apache License 2.0

4 votes

private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}