Java Code Examples for org.apache.hadoop.fs.FileSystem#makeQualified()

The following examples show how to use org.apache.hadoop.fs.FileSystem#makeQualified() .
Example 1
Project: RDFS   File: FileOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
public void checkOutputSpecs(FileSystem ignored, JobConf job) 
  throws FileAlreadyExistsException, 
         InvalidJobConfException, IOException {
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null && job.getNumReduceTasks() != 0) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }
  if (outDir != null) {
    FileSystem fs = outDir.getFileSystem(job);
    // normalize the output directory
    outDir = fs.makeQualified(outDir);
    setOutputPath(job, outDir);
    // check its existence
    if (fs.exists(outDir)) {
      throw new FileAlreadyExistsException("Output directory " + outDir + 
                                           " already exists");
    }
  }
}
 
Example 2
Project: datawave   File: ShardedTableMapFileTest.java    License: Apache License 2.0 6 votes vote down vote up
private Path createSplitsFile(Map<Text,String> splits, Configuration conf, int expectedNumRows, String tableName) throws IOException {
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, URI.create("file:///").toString());
    conf.setLong("fs.local.block.size", 32 * 1024 * 1024);
    FileSystem fs = setWorkingDirectory(conf);
    
    Path path = new Path("splits" + tableName + ".seq");
    Path file = fs.makeQualified(path);
    long actualCount = ShardedTableMapFile.writeSplitsFile(splits, file, conf);
    Map<String,Path> shardedTableMapFiles = new HashMap<>();
    shardedTableMapFiles.put(tableName, path);
    ShardedTableMapFile.addToConf(conf, shardedTableMapFiles);
    Assert.assertEquals("IngestJob#writeSplitsFile failed to create the expected number of rows", expectedNumRows, actualCount);
    
    Assert.assertTrue(fs.exists(file));
    return file;
}
 
Example 3
Project: big-c   File: TestMapFile.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testMidKeyEmpty() throws Exception {
  // Write a mapfile of simple data: keys are
  Path dirName = new Path(TEST_DIR, "testMidKeyEmpty.mapfile");
  FileSystem fs = FileSystem.getLocal(conf);
  Path qualifiedDirName = fs.makeQualified(dirName);

  MapFile.Writer writer = new MapFile.Writer(conf, fs,
      qualifiedDirName.toString(), IntWritable.class, IntWritable.class);
  writer.close();
  // Now do getClosest on created mapfile.
  MapFile.Reader reader = new MapFile.Reader(qualifiedDirName, conf);
  try {
    assertEquals(null, reader.midKey()); 
  } finally {
    reader.close();
  }
}
 
Example 4
Project: Hadoop-BAM   File: TestCRAMOutputFormat.java    License: MIT License 6 votes vote down vote up
private Path doMapReduce(final String inputFile) throws Exception {
    final FileSystem fileSystem = FileSystem.get(conf);
    final Path inputPath = new Path(inputFile);
    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
    fileSystem.delete(outputPath, true);

    final Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, inputPath);

    job.setInputFormatClass(CRAMInputFormat.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);

    conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
    job.setOutputFormatClass(CRAMTestNoHeaderOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, outputPath);

    final boolean success = job.waitForCompletion(true);
    assertTrue(success);

    return outputPath;
}
 
Example 5
Project: big-c   File: FileOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
public void checkOutputSpecs(FileSystem ignored, JobConf job) 
  throws FileAlreadyExistsException, 
         InvalidJobConfException, IOException {
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null && job.getNumReduceTasks() != 0) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }
  if (outDir != null) {
    FileSystem fs = outDir.getFileSystem(job);
    // normalize the output directory
    outDir = fs.makeQualified(outDir);
    setOutputPath(job, outDir);
    
    // get delegation token for the outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), 
                                        new Path[] {outDir}, job);
    
    // check its existence
    if (fs.exists(outDir)) {
      throw new FileAlreadyExistsException("Output directory " + outDir + 
                                           " already exists");
    }
  }
}
 
Example 6
Project: tez   File: MRInputUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(
    TaskSplitIndex splitMetaInfo, JobConf jobConf, TezCounter splitBytesCounter)
    throws IOException {
  Path file = new Path(splitMetaInfo.getSplitLocation());
  long offset = splitMetaInfo.getStartOffset();

  // Split information read from local filesystem.
  FileSystem fs = FileSystem.getLocal(jobConf);
  file = fs.makeQualified(file);
  LOG.info("Reading input split file from : " + file);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = Text.readString(inFile);
  Class<org.apache.hadoop.mapreduce.InputSplit> cls;
  try {
    cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(jobConf);
  Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
      .getDeserializer(cls);
  deserializer.open(inFile);
  org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  if (splitBytesCounter != null) {
    splitBytesCounter.increment(pos - offset);
  }
  inFile.close();
  return split;
}
 
Example 7
Project: hbase   File: TestImportTSVWithTTLs.java    License: Apache License 2.0 5 votes vote down vote up
protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
    String[] args, int valueMultiplier) throws Exception {
  TableName table = TableName.valueOf(args[args.length - 1]);
  Configuration conf = new Configuration(util.getConfiguration());

  // populate input file
  FileSystem fs = FileSystem.get(conf);
  Path inputPath = fs.makeQualified(new Path(util
      .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
  FSDataOutputStream op = fs.create(inputPath, true);
  op.write(Bytes.toBytes(data));
  op.close();
  LOG.debug(String.format("Wrote test data to file: %s", inputPath));

  if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
    LOG.debug("Forcing combiner.");
    conf.setInt("mapreduce.map.combine.minspills", 1);
  }

  // run the import
  List<String> argv = new ArrayList<>(Arrays.asList(args));
  argv.add(inputPath.toString());
  Tool tool = new ImportTsv();
  LOG.debug("Running ImportTsv with arguments: " + argv);
  try {
    // Job will fail if observer rejects entries without TTL
    assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
  } finally {
    // Clean up
    if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
      LOG.debug("Deleting test subdirectory");
      util.cleanupDataTestDirOnTestFS(table.getNameAsString());
    }
  }

  return tool;
}
 
Example 8
Project: hbase   File: TestImportTSVWithOperationAttributes.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Run an ImportTsv job and perform basic validation on the results. Returns
 * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
 * for further validation as necessary. This method is static to insure
 * non-reliance on instance's util/conf facilities.
 *
 * @param args
 *          Any arguments to pass BEFORE inputFile path is appended.
 * @param dataAvailable
 * @return The Tool instance used to run the test.
 */
private Tool doMROnTableTest(HBaseTestingUtility util, String family, String data, String[] args,
    int valueMultiplier, boolean dataAvailable) throws Exception {
  String table = args[args.length - 1];
  Configuration conf = new Configuration(util.getConfiguration());

  // populate input file
  FileSystem fs = FileSystem.get(conf);
  Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
  FSDataOutputStream op = fs.create(inputPath, true);
  op.write(Bytes.toBytes(data));
  op.close();
  LOG.debug(String.format("Wrote test data to file: %s", inputPath));

  if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
    LOG.debug("Forcing combiner.");
    conf.setInt("mapreduce.map.combine.minspills", 1);
  }

  // run the import
  List<String> argv = new ArrayList<>(Arrays.asList(args));
  argv.add(inputPath.toString());
  Tool tool = new ImportTsv();
  LOG.debug("Running ImportTsv with arguments: " + argv);
  assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));

  validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable);

  if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
    LOG.debug("Deleting test subdirectory");
    util.cleanupDataTestDirOnTestFS(table);
  }
  return tool;
}
 
Example 9
Project: tajo   File: PullServerUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Retrieve meta information of file chunks which correspond to the requested URI.
 * Only meta information for the file chunks which has non-zero length are retrieved.
 *
 * @param conf
 * @param lDirAlloc
 * @param localFS
 * @param params
 * @param gson
 * @param indexReaderCache
 * @param lowCacheHitCheckThreshold
 * @return
 * @throws IOException
 * @throws ExecutionException
 */
public static List<String> getJsonMeta(final TajoConf conf,
                                       final LocalDirAllocator lDirAlloc,
                                       final FileSystem localFS,
                                       final PullServerParams params,
                                       final Gson gson,
                                       final LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
                                       final int lowCacheHitCheckThreshold)
    throws IOException, ExecutionException {
  final List<String> taskIds = PullServerUtil.splitMaps(params.taskAttemptIds());
  final Path queryBaseDir = PullServerUtil.getBaseOutputDir(params.queryId(), params.ebId());
  final List<String> jsonMetas = new ArrayList<>();

  for (String eachTaskId : taskIds) {
    Path outputPath = StorageUtil.concatPath(queryBaseDir, eachTaskId, "output");
    if (!lDirAlloc.ifExists(outputPath.toString(), conf)) {
      LOG.warn("Range shuffle - file not exist. " + outputPath);
      continue;
    }
    Path path = localFS.makeQualified(lDirAlloc.getLocalPathToRead(outputPath.toString(), conf));
    FileChunkMeta meta;
    meta = PullServerUtil.searchFileChunkMeta(params.queryId(), params.ebId(), eachTaskId, path,
        params.startKey(), params.endKey(), params.last(), indexReaderCache, lowCacheHitCheckThreshold);
    if (meta != null && meta.getLength() > 0) {
      String jsonStr = gson.toJson(meta, FileChunkMeta.class);
      jsonMetas.add(jsonStr);
    }
  }
  return jsonMetas;
}
 
Example 10
Project: kylin   File: KylinConfigBase.java    License: Apache License 2.0 5 votes vote down vote up
public String getMetastoreBigCellHdfsDirectory() {

        if (cachedBigCellDirectory != null)
            return cachedBigCellDirectory;

        String root = getOptional("kylin.env.hdfs-metastore-bigcell-dir");

        if (root == null) {
            return getJdbcHdfsWorkingDirectory();
        }

        Path path = new Path(root);
        if (!path.isAbsolute())
            throw new IllegalArgumentException(
                    "kylin.env.hdfs-metastore-bigcell-dir must be absolute, but got " + root);

        // make sure path is qualified
        try {
            FileSystem fs = HadoopUtil.getReadFileSystem();
            path = fs.makeQualified(path);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        root = new Path(path, StringUtils.replaceChars(getMetadataUrlPrefix(), ':', '-')).toString();

        if (!root.endsWith("/"))
            root += "/";

        cachedBigCellDirectory = root;
        if (cachedBigCellDirectory.startsWith(FILE_SCHEME)) {
            cachedBigCellDirectory = cachedBigCellDirectory.replace(FILE_SCHEME, "file://");
        } else if (cachedBigCellDirectory.startsWith(MAPRFS_SCHEME)) {
            cachedBigCellDirectory = cachedBigCellDirectory.replace(MAPRFS_SCHEME, "maprfs://");
        }

        return cachedBigCellDirectory;
    }
 
Example 11
Project: incubator-tajo   File: CommonTestingUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param dir a local directory to be created
 * @return  the created path
 * @throws java.io.IOException
 */
public static Path getTestDir(String dir) throws IOException {
  Path path = new Path(dir);
  FileSystem fs = FileSystem.getLocal(new Configuration());
  cleanupTestDir(dir);
  fs.mkdirs(path);

  return fs.makeQualified(path);
}
 
Example 12
Project: tez   File: MROutput.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create the {@link DataSinkDescriptor}
 * @return {@link DataSinkDescriptor}
 */
public DataSinkDescriptor build() {
  if (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.class
      .isAssignableFrom(outputFormat) ||
      FileOutputFormat.class.isAssignableFrom(outputFormat)) {
    if (outputPath == null) {
      throw new TezUncheckedException(
          "OutputPaths must be specified for OutputFormats based on " +
              org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.class.getName() + " or " +
              FileOutputFormat.class.getName());
    }
  }
  Collection<URI> uris = null;
  if (getCredentialsForSinkFilesystem && outputPath != null) {
    try {
      Path path = new Path(outputPath);
      FileSystem fs;
      fs = path.getFileSystem(conf);
      Path qPath = fs.makeQualified(path);
      uris = Collections.singletonList(qPath.toUri());
    } catch (IOException e) {
      throw new TezUncheckedException(e);
    }
  }

  DataSinkDescriptor ds = DataSinkDescriptor.create(
      OutputDescriptor.create(outputClassName).setUserPayload(createUserPayload()),
      (doCommit ? OutputCommitterDescriptor.create(
          MROutputCommitter.class.getName()) : null), null);
  if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    ds.getOutputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
  }

  if (uris != null) {
    ds.addURIsForCredentials(uris);
  }
  return ds;
}
 
Example 13
Project: Hadoop-BAM   File: TestSAMInputFormat.java    License: MIT License 5 votes vote down vote up
@Test
public void testMapReduceJob() throws Exception {
  Configuration conf = new Configuration();

  FileSystem fileSystem = FileSystem.get(conf);
  Path inputPath = new Path(input);
  Path outputPath = fileSystem.makeQualified(new Path("target/out"));
  fileSystem.delete(outputPath, true);

  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SAMInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(SAMRecordWritable.class);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertTrue(success);

  List<String> samStrings = new ArrayList<String>();
  SamReader samReader = SamReaderFactory.makeDefault().open(new File(input));
  for (SAMRecord r : samReader) {
    samStrings.add(r.getSAMString().trim());
  }
  samReader.close();

  File outputFile = new File(new File(outputPath.toUri()), "part-m-00000");
  BufferedReader br = new BufferedReader(new FileReader(outputFile));
  String line;
  int index = 0;
  while ((line = br.readLine()) != null) {
    String value = line.substring(line.indexOf("\t") + 1); // ignore key
    assertEquals(samStrings.get(index++), value);
  }
  br.close();
}
 
Example 14
Project: hadoop   File: PathData.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates an object to wrap the given parameters as fields.  The string
 * used to create the path will be recorded since the Path object does not
 * return exactly the same string used to initialize it.
 * @param fs the FileSystem
 * @param pathString a String of the path
 * @param stat the FileStatus (may be null if the path doesn't exist)
 */
private PathData(FileSystem fs, String pathString, FileStatus stat)
throws IOException {
  this.fs = fs;
  this.uri = stringToUri(pathString);
  this.path = fs.makeQualified(new Path(uri));
  setStat(stat);

  if (Path.WINDOWS) {
    inferredSchemeFromPath = checkIfSchemeInferredFromPath(pathString);
  }
}
 
Example 15
Project: gemfirexd-oss   File: AbstractHoplog.java    License: Apache License 2.0 4 votes vote down vote up
private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) {
  this.conf = fs.getConf();
  this.stats = stats;
  this.path = fs.makeQualified(path);
  this.hfd = new HoplogDescriptor(this.path.getName());
}
 
Example 16
Project: hbase   File: TestImportTsv.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Run an ImportTsv job and perform basic validation on the results.
 * Returns the ImportTsv <code>Tool</code> instance so that other tests can
 * inspect it for further validation as necessary. This method is static to
 * insure non-reliance on instance's util/conf facilities.
 * @param args Any arguments to pass BEFORE inputFile path is appended.
 * @return The Tool instance used to run the test.
 */
protected static Tool doMROnTableTest(HBaseTestingUtility util, TableName table,
    String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
throws Exception {
  Configuration conf = new Configuration(util.getConfiguration());

  // populate input file
  FileSystem fs = FileSystem.get(conf);
  Path inputPath = fs.makeQualified(
          new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
  FSDataOutputStream op = fs.create(inputPath, true);
  if (data == null) {
    data = "KEY\u001bVALUE1\u001bVALUE2\n";
  }
  op.write(Bytes.toBytes(data));
  op.close();
  LOG.debug(String.format("Wrote test data to file: %s", inputPath));

  if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
    LOG.debug("Forcing combiner.");
    conf.setInt("mapreduce.map.combine.minspills", 1);
  }

  // Build args array.
  String[] argsArray = new String[args.size() + 2];
  Iterator it = args.entrySet().iterator();
  int i = 0;
  while (it.hasNext()) {
    Map.Entry pair = (Map.Entry) it.next();
    argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
    i++;
  }
  argsArray[i] = table.getNameAsString();
  argsArray[i + 1] = inputPath.toString();

  // run the import
  Tool tool = new ImportTsv();
  LOG.debug("Running ImportTsv with arguments: " + Arrays.toString(argsArray));
  assertEquals(0, ToolRunner.run(conf, tool, argsArray));

  // Perform basic validation. If the input args did not include
  // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
  // Otherwise, validate presence of hfiles.
  boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
      "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
  if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
    if (isDryRun) {
      assertFalse(String.format("Dry run mode, %s should not have been created.",
               ImportTsv.BULK_OUTPUT_CONF_KEY),
          fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
    } else {
      validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
    }
  } else {
    validateTable(conf, table, family, valueMultiplier, isDryRun);
  }

  if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
    LOG.debug("Deleting test subdirectory");
    util.cleanupDataTestDirOnTestFS(table.getNameAsString());
  }
  return tool;
}
 
Example 17
Project: big-c   File: TestMRApps.java    License: Apache License 2.0 4 votes vote down vote up
private static void delete(File dir) throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);
  Path p = fs.makeQualified(new Path(dir.getAbsolutePath()));
  fs.delete(p, true);
}
 
Example 18
Project: gemfirexd-oss   File: AbstractHoplog.java    License: Apache License 2.0 4 votes vote down vote up
private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) {
  this.conf = fs.getConf();
  this.stats = stats;
  this.path = fs.makeQualified(path);
  this.hfd = new HoplogDescriptor(this.path.getName());
}
 
Example 19
Project: mrgeo   File: CsvOutputFormatTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
@Category(UnitTest.class)
public void testBasics() throws Exception
{
  // this class and its unit tests are a work in progress.
  FileSystem fs = new RawLocalFileSystem();
  try
  {
    String output = TestUtils.composeOutputDir(CsvOutputFormatTest.class);

    Configuration c = new Configuration();
    fs.setConf(c);
    Path testFile = new Path(output, "testBasics.csv");
    testFile = fs.makeQualified(testFile);
    Path columns = new Path(testFile.toString() + ".columns");

    CsvOutputFormat.CsvRecordWriter writer = new CsvOutputFormat.CsvRecordWriter(columns,
        testFile);


    WritableGeometry f = GeometryFactory.createEmptyGeometry();

    f.setAttribute("string1", "foo");
    f.setAttribute("int1", "1");
    f.setAttribute("double1", "2.0");
    writer.write(new FeatureIdWritable(0), f);

    f.setAttribute("string1", "bar");
    f.setAttribute("int1", "3");
    f.setAttribute("double1", "4.0");
    writer.write(new FeatureIdWritable(1), f);

    writer.close(null);

    String input = TestUtils.composeInputDir(CsvOutputFormatTest.class);

    File csvBaselineFile = new File(input, "testBasics.csv");
    File csvOutputFile = new File(output, "testBasics.csv");
    TestUtils.compareTextFiles(csvBaselineFile.getAbsoluteFile(), csvOutputFile.getAbsoluteFile());

    File columnsBaselineFile = new File(input, "testBasics.csv.columns");
    File columnsOutputFile = new File(output, "testBasics.csv.columns");

    TestUtils.compareTextFiles(columnsBaselineFile.getAbsoluteFile(), columnsOutputFile.getAbsoluteFile());
  }
  catch (Exception e)
  {
    e.printStackTrace();
    throw e;
  }
  finally
  {
    fs.close();
  }
}
 
Example 20
Project: hadoop-book   File: PiEstimator.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR)
                + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(
                    fs, jobConf, file,
                    LongWritable.class, LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}