Java Code Examples for org.apache.hadoop.fs.FileSystem#makeQualified()

The following examples show how to use org.apache.hadoop.fs.FileSystem#makeQualified() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: RDFS   File: FileOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
public void checkOutputSpecs(FileSystem ignored, JobConf job) 
  throws FileAlreadyExistsException, 
         InvalidJobConfException, IOException {
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null && job.getNumReduceTasks() != 0) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }
  if (outDir != null) {
    FileSystem fs = outDir.getFileSystem(job);
    // normalize the output directory
    outDir = fs.makeQualified(outDir);
    setOutputPath(job, outDir);
    // check its existence
    if (fs.exists(outDir)) {
      throw new FileAlreadyExistsException("Output directory " + outDir + 
                                           " already exists");
    }
  }
}
 
Example 2
private Path createSplitsFile(Map<Text,String> splits, Configuration conf, int expectedNumRows, String tableName) throws IOException {
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, URI.create("file:///").toString());
    conf.setLong("fs.local.block.size", 32 * 1024 * 1024);
    FileSystem fs = setWorkingDirectory(conf);
    
    Path path = new Path("splits" + tableName + ".seq");
    Path file = fs.makeQualified(path);
    long actualCount = ShardedTableMapFile.writeSplitsFile(splits, file, conf);
    Map<String,Path> shardedTableMapFiles = new HashMap<>();
    shardedTableMapFiles.put(tableName, path);
    ShardedTableMapFile.addToConf(conf, shardedTableMapFiles);
    Assert.assertEquals("IngestJob#writeSplitsFile failed to create the expected number of rows", expectedNumRows, actualCount);
    
    Assert.assertTrue(fs.exists(file));
    return file;
}
 
Example 3
Source Project: big-c   File: TestMapFile.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testMidKeyEmpty() throws Exception {
  // Write a mapfile of simple data: keys are
  Path dirName = new Path(TEST_DIR, "testMidKeyEmpty.mapfile");
  FileSystem fs = FileSystem.getLocal(conf);
  Path qualifiedDirName = fs.makeQualified(dirName);

  MapFile.Writer writer = new MapFile.Writer(conf, fs,
      qualifiedDirName.toString(), IntWritable.class, IntWritable.class);
  writer.close();
  // Now do getClosest on created mapfile.
  MapFile.Reader reader = new MapFile.Reader(qualifiedDirName, conf);
  try {
    assertEquals(null, reader.midKey()); 
  } finally {
    reader.close();
  }
}
 
Example 4
Source Project: Hadoop-BAM   File: TestCRAMOutputFormat.java    License: MIT License 6 votes vote down vote up
private Path doMapReduce(final String inputFile) throws Exception {
    final FileSystem fileSystem = FileSystem.get(conf);
    final Path inputPath = new Path(inputFile);
    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
    fileSystem.delete(outputPath, true);

    final Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, inputPath);

    job.setInputFormatClass(CRAMInputFormat.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);

    conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
    job.setOutputFormatClass(CRAMTestNoHeaderOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, outputPath);

    final boolean success = job.waitForCompletion(true);
    assertTrue(success);

    return outputPath;
}
 
Example 5
Source Project: big-c   File: FileOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
public void checkOutputSpecs(FileSystem ignored, JobConf job) 
  throws FileAlreadyExistsException, 
         InvalidJobConfException, IOException {
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null && job.getNumReduceTasks() != 0) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }
  if (outDir != null) {
    FileSystem fs = outDir.getFileSystem(job);
    // normalize the output directory
    outDir = fs.makeQualified(outDir);
    setOutputPath(job, outDir);
    
    // get delegation token for the outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), 
                                        new Path[] {outDir}, job);
    
    // check its existence
    if (fs.exists(outDir)) {
      throw new FileAlreadyExistsException("Output directory " + outDir + 
                                           " already exists");
    }
  }
}
 
Example 6
Source Project: tez   File: MRInputUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(
    TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter)
    throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  long offset = splitMetaInfo.getStartOffset();

  // Split information read from local filesystem.
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapreduce.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}
 
Example 7
Source Project: hbase   File: TestImportTSVWithTTLs.java    License: Apache License 2.0 5 votes vote down vote up
protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
    String[] args, int valueMultiplier) throws Exception {
  TableName table = TableName.valueOf(args[args.length - 1]);
  Configuration conf = new Configuration(util.getConfiguration());

  // populate input file
  FileSystem fs = FileSystem.get(conf);
  Path inputPath = fs.makeQualified(new Path(util
      .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
  FSDataOutputStream op = fs.create(inputPath, true);
  op.write(Bytes.toBytes(data));
  op.close();
  LOG.debug(String.format("Wrote test data to file: %s", inputPath));

  if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
    LOG.debug("Forcing combiner.");
    conf.setInt("mapreduce.map.combine.minspills", 1);
  }

  // run the import
  List<String> argv = new ArrayList<>(Arrays.asList(args));
  argv.add(inputPath.toString());
  Tool tool = new ImportTsv();
  LOG.debug("Running ImportTsv with arguments: " + argv);
  try {
    // Job will fail if observer rejects entries without TTL
    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
  } finally {
    // Clean up
    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
      LOG.debug("Deleting test subdirectory");
      util.cleanupDataTestDirOnTestFS(table.getNameAsString());
    }
  }

  return tool;
}
 
Example 8
/**
 * Run an ImportTsv job and perform basic validation on the results. Returns
 * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
 * for further validation as necessary. This method is static to insure
 * non-reliance on instance's util/conf facilities.
 *
 * @param args
 *          Any arguments to pass BEFORE inputFile path is appended.
 * @param dataAvailable
 * @return The Tool instance used to run the test.
 */
private Tool doMROnTableTest(HBaseTestingUtility util, String family, String data, String[] args,
    int valueMultiplier, boolean dataAvailable) throws Exception {
  String table = args[args.length - 1];
  Configuration conf = new Configuration(util.getConfiguration());

  // populate input file
  FileSystem fs = FileSystem.get(conf);
  Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
  FSDataOutputStream op = fs.create(inputPath, true);
  op.write(Bytes.toBytes(data));
  op.close();
  LOG.debug(String.format("Wrote test data to file: %s", inputPath));

  if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
    LOG.debug("Forcing combiner.");
    conf.setInt("mapreduce.map.combine.minspills", 1);
  }

  // run the import
  List<String> argv = new ArrayList<>(Arrays.asList(args));
  argv.add(inputPath.toString());
  Tool tool = new ImportTsv();
  LOG.debug("Running ImportTsv with arguments: " + argv);
  assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));

  validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);

  if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
    LOG.debug("Deleting test subdirectory");
    util.cleanupDataTestDirOnTestFS(table);
  }
  return tool;
}
 
Example 9
Source Project: tajo   File: PullServerUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Retrieve meta information of file chunks which correspond to the requested URI.
 * Only meta information for the file chunks which has non-zero length are retrieved.
 *
 * @param conf
 * @param lDirAlloc
 * @param localFS
 * @param params
 * @param gson
 * @param indexReaderCache
 * @param lowCacheHitCheckThreshold
 * @return
 * @throws IOException
 * @throws ExecutionException
 */
public static List<String> getJsonMeta(final TajoConf conf,
                                       final LocalDirAllocator lDirAlloc,
                                       final FileSystem localFS,
                                       final PullServerParams params,
                                       final Gson gson,
                                       final LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
                                       final int lowCacheHitCheckThreshold)
    throws IOException, ExecutionException {
  final List<String> taskIds = PullServerUtil.splitMaps(params.taskAttemptIds());
  final Path queryBaseDir = PullServerUtil.getBaseOutputDir(params.queryId(), params.ebId());
  final List<String> jsonMetas = new ArrayList<>();

  for (String eachTaskId : taskIds) {
    Path outputPath = StorageUtil.concatPath(queryBaseDir, eachTaskId, "output");
    if (!lDirAlloc.ifExists(outputPath.toString(), conf)) {
      LOG.warn("Range shuffle - file not exist. " + outputPath);
      continue;
    }
    Path path = localFS.makeQualified(lDirAlloc.getLocalPathToRead(outputPath.toString(), conf));
    FileChunkMeta meta;
    meta = PullServerUtil.searchFileChunkMeta(params.queryId(), params.ebId(), eachTaskId, path,
        params.startKey(), params.endKey(), params.last(), indexReaderCache, lowCacheHitCheckThreshold);
    if (meta != null && meta.getLength() > 0) {
      String jsonStr = gson.toJson(meta, FileChunkMeta.class);
      jsonMetas.add(jsonStr);
    }
  }
  return jsonMetas;
}
 
Example 10
Source Project: kylin   File: KylinConfigBase.java    License: Apache License 2.0 5 votes vote down vote up
public String getMetastoreBigCellHdfsDirectory() {

        if (cachedBigCellDirectory != null)
            return cachedBigCellDirectory;

        String root = getOptional("kylin.env.hdfs-metastore-bigcell-dir");

        if (root == null) {
            return getJdbcHdfsWorkingDirectory();
        }

        Path path = new Path(root);
        if (!path.isAbsolute())
            throw new IllegalArgumentException(
                    "kylin.env.hdfs-metastore-bigcell-dir must be absolute, but got " + root);

        // make sure path is qualified
        try {
            FileSystem fs = HadoopUtil.getReadFileSystem();
            path = fs.makeQualified(path);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        root = new Path(path, StringUtils.replaceChars(getMetadataUrlPrefix(), ':', '-')).toString();

        if (!root.endsWith("/"))
            root += "/";

        cachedBigCellDirectory = root;
        if (cachedBigCellDirectory.startsWith(FILE_SCHEME)) {
            cachedBigCellDirectory = cachedBigCellDirectory.replace(FILE_SCHEME, "file://");
        } else if (cachedBigCellDirectory.startsWith(MAPRFS_SCHEME)) {
            cachedBigCellDirectory = cachedBigCellDirectory.replace(MAPRFS_SCHEME, "maprfs://");
        }

        return cachedBigCellDirectory;
    }
 
Example 11
/**
 *
 * @param dir a local directory to be created
 * @return  the created path
 * @throws java.io.IOException
 */
public static Path getTestDir(String dir) throws IOException {
  Path path = new Path(dir);
  FileSystem fs = FileSystem.getLocal(new Configuration());
  cleanupTestDir(dir);
  fs.mkdirs(path);

  return fs.makeQualified(path);
}
 
Example 12
Source Project: tez   File: MROutput.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create the {@link DataSinkDescriptor}
 * @return {@link DataSinkDescriptor}
 */
public DataSinkDescriptor build() {
  if (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.class
      .isAssignableFrom(outputFormat) ||
      FileOutputFormat.class.isAssignableFrom(outputFormat)) {
    if (outputPath == null) {
      throw new TezUncheckedException(
          "OutputPaths must be specified for OutputFormats based on " +
              org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.class.getName() + " or " +
              FileOutputFormat.class.getName());
    }
  }
  Collection<URI> uris = null;
  if (getCredentialsForSinkFilesystem && outputPath != null) {
    try {
      Path path = new Path(outputPath);
      FileSystem fs;
      fs = path.getFileSystem(conf);
      Path qPath = fs.makeQualified(path);
      uris = Collections.singletonList(qPath.toUri());
    } catch (IOException e) {
      throw new TezUncheckedException(e);
    }
  }

  DataSinkDescriptor ds = DataSinkDescriptor.create(
      OutputDescriptor.create(outputClassName).setUserPayload(createUserPayload()),
      (doCommit ? OutputCommitterDescriptor.create(
          MROutputCommitter.class.getName()) : null), null);
  if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    ds.getOutputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
  }

  if (uris != null) {
    ds.addURIsForCredentials(uris);
  }
  return ds;
}
 
Example 13
Source Project: Hadoop-BAM   File: TestSAMInputFormat.java    License: MIT License 5 votes vote down vote up
@Test
public void testMapReduceJob() throws Exception {
  Configuration conf = new Configuration();

  FileSystem fileSystem = FileSystem.get(conf);
  Path inputPath = new Path(input);
  Path outputPath = fileSystem.makeQualified(new Path("target/out"));
  fileSystem.delete(outputPath, true);

  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SAMInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(SAMRecordWritable.class);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertTrue(success);

  List<String> samStrings = new ArrayList<String>();
  SamReader samReader = SamReaderFactory.makeDefault().open(new File(input));
  for (SAMRecord r : samReader) {
    samStrings.add(r.getSAMString().trim());
  }
  samReader.close();

  File outputFile = new File(new File(outputPath.toUri()), "part-m-00000");
  BufferedReader br = new BufferedReader(new FileReader(outputFile));
  String line;
  int index = 0;
  while ((line = br.readLine()) != null) {
    String value = line.substring(line.indexOf("\t") + 1); // ignore key
    assertEquals(samStrings.get(index++), value);
  }
  br.close();
}
 
Example 14
Source Project: hadoop   File: PathData.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates an object to wrap the given parameters as fields.  The string
 * used to create the path will be recorded since the Path object does not
 * return exactly the same string used to initialize it.
 * @param fs the FileSystem
 * @param pathString a String of the path
 * @param stat the FileStatus (may be null if the path doesn't exist)
 */
private PathData(FileSystem fs, String pathString, FileStatus stat)
throws IOException {
  this.fs = fs;
  this.uri = stringToUri(pathString);
  this.path = fs.makeQualified(new Path(uri));
  setStat(stat);

  if (Path.WINDOWS) {
    inferredSchemeFromPath = checkIfSchemeInferredFromPath(pathString);
  }
}
 
Example 15
Source Project: gemfirexd-oss   File: AbstractHoplog.java    License: Apache License 2.0 4 votes vote down vote up
private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) {
  this.conf = fs.getConf();
  this.stats = stats;
  this.path = fs.makeQualified(path);
  this.hfd = new HoplogDescriptor(this.path.getName());
}
 
Example 16
Source Project: hbase   File: TestImportTsv.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Run an ImportTsv job and perform basic validation on the results.
 * Returns the ImportTsv <code>Tool</code> instance so that other tests can
 * inspect it for further validation as necessary. This method is static to
 * insure non-reliance on instance's util/conf facilities.
 * @param args Any arguments to pass BEFORE inputFile path is appended.
 * @return The Tool instance used to run the test.
 */
protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
    String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
throws Exception {
  Configuration conf = new Configuration(util.getConfiguration());

  // populate input file
  FileSystem fs = FileSystem.get(conf);
  Path inputPath = fs.makeQualified(
          new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
  FSDataOutputStream op = fs.create(inputPath, true);
  if (data == null) {
    data = "KEY\u001bVALUE1\u001bVALUE2\n";
  }
  op.write(Bytes.toBytes(data));
  op.close();
  LOG.debug(String.format("Wrote test data to file: %s", inputPath));

  if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
    LOG.debug("Forcing combiner.");
    conf.setInt("mapreduce.map.combine.minspills", 1);
  }

  // Build args array.
  String[] argsArray = new String[args.size() + 2];
  Iterator it = args.entrySet().iterator();
  int i = 0;
  while (it.hasNext()) {
    Map.Entry pair = (Map.Entry) it.next();
    argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
    i++;
  }
  argsArray[i] = table.getNameAsString();
  argsArray[i + 1] = inputPath.toString();

  // run the import
  Tool tool = new ImportTsv();
  LOG.debug("Running ImportTsv with arguments: " + Arrays.toString(argsArray));
  assertEquals(0, ToolRunner.run(conf, tool, argsArray));

  // Perform basic validation. If the input args did not include
  // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
  // Otherwise, validate presence of hfiles.
  boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
      "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
  if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
    if (isDryRun) {
      assertFalse(String.format("Dry run mode, %s should not have been created.",
               ImportTsv.BULK_OUTPUT_CONF_KEY),
          fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
    } else {
      validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
    }
  } else {
    validateTable(conf, table, family, valueMultiplier, isDryRun);
  }

  if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
    LOG.debug("Deleting test subdirectory");
    util.cleanupDataTestDirOnTestFS(table.getNameAsString());
  }
  return tool;
}
 
Example 17
Source Project: big-c   File: TestMRApps.java    License: Apache License 2.0 4 votes vote down vote up
private static void delete(File dir) throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);
  Path p = fs.makeQualified(new Path(dir.getAbsolutePath()));
  fs.delete(p, true);
}
 
Example 18
Source Project: gemfirexd-oss   File: AbstractHoplog.java    License: Apache License 2.0 4 votes vote down vote up
private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) {
  this.conf = fs.getConf();
  this.stats = stats;
  this.path = fs.makeQualified(path);
  this.hfd = new HoplogDescriptor(this.path.getName());
}
 
Example 19
Source Project: mrgeo   File: CsvOutputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
@Category(UnitTest.class)
public void testBasics() throws Exception
{
  // this class and its unit tests are a work in progress.
  FileSystem fs = new RawLocalFileSystem();
  try
  {
    String output = TestUtils.composeOutputDir(CsvOutputFormatTest.class);

    Configuration c = new Configuration();
    fs.setConf(c);
    Path testFile = new Path(output, "testBasics.csv");
    testFile = fs.makeQualified(testFile);
    Path columns = new Path(testFile.toString() + ".columns");

    CsvOutputFormat.CsvRecordWriter writer = new CsvOutputFormat.CsvRecordWriter(columns,
        testFile);


    WritableGeometry f = GeometryFactory.createEmptyGeometry();

    f.setAttribute("string1", "foo");
    f.setAttribute("int1", "1");
    f.setAttribute("double1", "2.0");
    writer.write(new FeatureIdWritable(0), f);

    f.setAttribute("string1", "bar");
    f.setAttribute("int1", "3");
    f.setAttribute("double1", "4.0");
    writer.write(new FeatureIdWritable(1), f);

    writer.close(null);

    String input = TestUtils.composeInputDir(CsvOutputFormatTest.class);

    File csvBaselineFile = new File(input, "testBasics.csv");
    File csvOutputFile = new File(output, "testBasics.csv");
    TestUtils.compareTextFiles(csvBaselineFile.getAbsoluteFile(), csvOutputFile.getAbsoluteFile());

    File columnsBaselineFile = new File(input, "testBasics.csv.columns");
    File columnsOutputFile = new File(output, "testBasics.csv.columns");

    TestUtils.compareTextFiles(columnsBaselineFile.getAbsoluteFile(), columnsOutputFile.getAbsoluteFile());
  }
  catch (Exception e)
  {
    e.printStackTrace();
    throw e;
  }
  finally
  {
    fs.close();
  }
}
 
Example 20
Source Project: hadoop-book   File: PiEstimator.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR)
                + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(
                    fs, jobConf, file,
                    LongWritable.class, LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}