Java Code Examples for org.apache.hadoop.fs.FileSystem#copyFromLocalFile()

The following examples show how to use org.apache.hadoop.fs.FileSystem#copyFromLocalFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobFilePartitioner.java    From hraven with Apache License 2.0 6 votes vote down vote up
/**
 * @param hdfs
 *          FileSystem handle
 * @param f
 *          file to copy to HDFS
 * @param outputPath
 * @param skipExisting
 *          skip if the file already exist in the target. File will be
 *          overwritten if already there and this argument is false.
 * @throws IOException
 *           if target directory cannot be created or file cannot be copied to
 *           target directory.
 */
private void processPlainFile(FileSystem hdfs, File f, Path outputPath,
    boolean skipExisting) throws IOException {
  long fileModTime = f.lastModified();
  Path targetDir = getTargetDirectory(hdfs, outputPath, fileModTime);

  boolean doCopy = true;
  Path sourceFile = new Path(f.getPath());
  if (skipExisting) {
    Path target = new Path(targetDir, sourceFile.getName());
    if (hdfs.exists(target)) {
      doCopy = false;
    }
  }
  if (doCopy) {
    hdfs.copyFromLocalFile(sourceFile, targetDir);
  }

}
 
Example 2
Source File: HdfsDeployer.java    From celos with Apache License 2.0 6 votes vote down vote up
public void deploy() throws Exception {

        FileSystem fs = context.getFileSystem();
        final String hdfsDirLocalPath = String.format(LOCAL_HDFS_PATTERN, context.getDeployDir());

        final File hdfsDirLocal = new File(hdfsDirLocalPath);
        if (!hdfsDirLocal.exists()) {
            throw new IllegalStateException(hdfsDirLocalPath + " not found local FS");
        }

        undeploy();

        Path dst = getDestinationHdfsPath();
        fs.mkdirs(dst);
        String[] childFiles = hdfsDirLocal.list();
        for (String child : childFiles) {
            fs.copyFromLocalFile(new Path(hdfsDirLocalPath, child), dst);
        }
    }
 
Example 3
Source File: AbstractSolrSentryTestBase.java    From incubator-sentry with Apache License 2.0 6 votes vote down vote up
public static File setupSentry() throws Exception {
  File sentrySite = File.createTempFile("sentry-site", "xml");
  sentrySite.deleteOnExit();
  File authProviderDir = new File(RESOURCES_DIR, "sentry");
  String authProviderName = "test-authz-provider.ini";
  FileSystem clusterFs = dfsCluster.getFileSystem();
  clusterFs.copyFromLocalFile(false,
    new Path(authProviderDir.toString(), authProviderName),
    new Path(authProviderName));

  // need to write sentry-site at execution time because we don't know
  // the location of sentry.solr.provider.resource beforehand
  StringBuilder sentrySiteData = new StringBuilder();
  sentrySiteData.append("<configuration>\n");
  addPropertyToSentry(sentrySiteData, "sentry.provider",
    "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider");
  addPropertyToSentry(sentrySiteData, "sentry.solr.provider.resource",
     clusterFs.getWorkingDirectory() + File.separator + authProviderName);
  sentrySiteData.append("</configuration>\n");
  FileUtils.writeStringToFile(sentrySite,sentrySiteData.toString());
  return sentrySite;
}
 
Example 4
Source File: FileSystemOperations.java    From submarine with Apache License 2.0 6 votes vote down vote up
public Path uploadToRemoteFile(Path stagingDir, String fileToUpload) throws
    IOException {
  FileSystem fs = remoteDirectoryManager.getDefaultFileSystem();

  // Upload to remote FS under staging area
  File localFile = new File(fileToUpload);
  if (!localFile.exists()) {
    throw new FileNotFoundException(
        "Trying to upload file " + localFile.getAbsolutePath()
            + " to remote, but could not find local file!");
  }
  String filename = localFile.getName();

  Path uploadedFilePath = new Path(stagingDir, filename);
  if (!uploadedFiles.contains(uploadedFilePath)) {
    if (SubmarineLogs.isVerbose()) {
      LOG.info("Copying local file " + fileToUpload + " to remote "
          + uploadedFilePath);
    }
    fs.copyFromLocalFile(new Path(fileToUpload), uploadedFilePath);
    uploadedFiles.add(uploadedFilePath);
  }
  return uploadedFilePath;
}
 
Example 5
Source File: DistributedCacheUtilImpl.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value
 * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging
 * the archive.
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into
 *                  dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If
 *                  source is a folder its contents will be copied into dest. For more info see {@link
 *                  FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 */
public void stageForCache( FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic )
  throws IOException, KettleFileException {
  if ( !source.exists() ) {
    throw new KettleFileException(
      BaseMessages.getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source ) );
  }

  if ( fs.exists( dest ) ) {
    if ( overwrite ) {
      // It is a directory, clear it out
      fs.delete( dest, true );
    } else {
      throw new KettleFileException( BaseMessages
        .getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists",
          dest.toUri().getPath() ) );
    }
  }

  // Use the same replication we'd use for submitting jobs
  short replication = (short) fs.getConf().getInt( "mapred.submit.replication", 10 );

  if ( source.getURL().toString().endsWith( CONFIG_PROPERTIES ) ) {
    copyConfigProperties( source, fs, dest );
  } else {
    Path local = new Path( source.getURL().getPath() );
    fs.copyFromLocalFile( local, dest );
  }

  if ( isPublic ) {
    fs.setPermission( dest, PUBLIC_CACHED_FILE_PERMISSION );
  } else {
    fs.setPermission( dest, CACHED_FILE_PERMISSION );
  }
  fs.setReplication( dest, replication );
}
 
Example 6
Source File: Client.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void addToLocalResources(FileSystem fs, String fileSrcPath,
    String fileDstPath, String appId, Map<String, LocalResource> localResources,
    String resources) throws IOException {
  String suffix =
      appName + "/" + appId + "/" + fileDstPath;
  Path dst =
      new Path(fs.getHomeDirectory(), suffix);
  if (fileSrcPath == null) {
    FSDataOutputStream ostream = null;
    try {
      ostream = FileSystem
          .create(fs, dst, new FsPermission((short) 0710));
      ostream.writeUTF(resources);
    } finally {
      IOUtils.closeQuietly(ostream);
    }
  } else {
    fs.copyFromLocalFile(new Path(fileSrcPath), dst);
  }
  FileStatus scFileStatus = fs.getFileStatus(dst);
  LocalResource scRsrc =
      LocalResource.newInstance(
          ConverterUtils.getYarnUrlFromURI(dst.toUri()),
          LocalResourceType.FILE, LocalResourceVisibility.APPLICATION,
          scFileStatus.getLen(), scFileStatus.getModificationTime());
  localResources.put(fileDstPath, scRsrc);
}
 
Example 7
Source File: Utils.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
/**
 * 
 * @return Path to remote file (usually hdfs)
 * @throws IOException
 */
public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath, LocalResource appMasterJar, Path homedir)
		throws IOException {
	// copy to HDFS
	String suffix = ".stratosphere/" + appId + "/" + localRsrcPath.getName();
	
	Path dst = new Path(homedir, suffix);
	
	LOG.info("Copying from "+localRsrcPath+" to "+dst );
	fs.copyFromLocalFile(localRsrcPath, dst);
	registerLocalResource(fs, dst, appMasterJar);
	return dst;
}
 
Example 8
Source File: TestPigServerWithMacros.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegisterRemoteMacro() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties());

    String macroName = "util.pig";
    File macroFile = File.createTempFile("tmp", "");
    PrintWriter pw = new PrintWriter(new FileWriter(macroFile));
    pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };");
    pw.close();

    FileSystem fs = cluster.getFileSystem();
    fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName));

    // find the absolute path for the directory so that it does not
    // depend on configuration
    String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString();

    Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[]{"1", "2"});

    pig.registerQuery("import '" + absPath + "';");
    pig.registerQuery("a = load 'testRegisterRemoteMacro_input';");
    pig.registerQuery("b = row_count(a);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertEquals(2L, ((Long)iter.next().get(0)).longValue());

    pig.shutdown();
}
 
Example 9
Source File: StramClientUtils.java    From attic-apex-core with Apache License 2.0 5 votes vote down vote up
public static void copyFromLocalFileNoChecksum(FileSystem fs, File fromLocal, Path toDFS) throws IOException
{
  // This is to void the hadoop FileSystem API to perform checksum on the local file
  // This "feature" has caused a lot of headache because the local file can be copied from HDFS and modified,
  // and the checksum will fail if the file is again copied to HDFS
  try {
    new File(fromLocal.getParentFile(), "." + fromLocal.getName() + ".crc").delete();
  } catch (Exception ex) {
    // ignore
  }
  fs.copyFromLocalFile(new Path(fromLocal.toURI()), toDFS);
}
 
Example 10
Source File: Client.java    From metron with Apache License 2.0 5 votes vote down vote up
private Path addToLocalResources(FileSystem fs, String fileSrcPath,
                                 String fileDstPath, String appId, Map<String, LocalResource> localResources,
                                 String resources) throws IOException {
  String suffix =
          appName + "/" + appId + "/" + fileDstPath;
  Path dst =
          new Path(fs.getHomeDirectory(), suffix);
  if (fileSrcPath == null) {
    FSDataOutputStream ostream = null;
    try {
      ostream = FileSystem
              .create(fs, dst, new FsPermission((short) 0710));
      ostream.writeUTF(resources);
    } finally {
      IOUtils.closeQuietly(ostream);
    }
  } else {
    fs.copyFromLocalFile(new Path(fileSrcPath), dst);
  }
  fs.setPermission(dst, new FsPermission((short)0755));
  FileStatus scFileStatus = fs.getFileStatus(dst);
  LocalResource scRsrc =
          LocalResource.newInstance(
                  ConverterUtils.getYarnUrlFromURI(dst.toUri()),
                  LocalResourceType.FILE, LocalResourceVisibility.APPLICATION,
                  scFileStatus.getLen(), scFileStatus.getModificationTime());
  localResources.put(fileDstPath, scRsrc);
  return dst;
}
 
Example 11
Source File: MapReduceJobConfiguration.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected void addSingleFile(File source, Path destination, String jobId, Job job, FileSystem fs) throws IOException {
    Path jarPath = new Path(source.getAbsolutePath());
    try {
        fs.copyFromLocalFile(false, false, jarPath, destination);
    } catch (IOException e) {
        // If the file already exists, ignore error
        if (!e.getMessage().endsWith("already exists"))
            throw e;
    }
    log.trace("Adding {} to the classpath for job {}.", jarPath, jobId);
    job.addFileToClassPath(destination);
}
 
Example 12
Source File: HadoopFileUtils.java    From mrgeo with Apache License 2.0 5 votes vote down vote up
public static void copyToHdfs(Path fromDir, Path toDir, String fileName)
    throws IOException
{
  FileSystem fs = getFileSystem(toDir);
  fs.mkdirs(toDir);
  fs.copyFromLocalFile(false, true, new Path(fromDir, fileName), new Path(toDir, fileName));
}
 
Example 13
Source File: HdfsUtil.java    From spring-boot-tutorial with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
/**
 * 上传文件
 *
 * @param sourcePath 原文件路径
 * @param targetPath 目标路径
 * @throws IOException
 */
public void uploadFile(@NotBlank String sourcePath, @NotBlank String targetPath) throws Exception {
    FileSystem fileSystem = null;
    try {
        fileSystem = this.hdfsPool.borrowObject();
        // 调用文件系统的文件复制方法,第一个参数为是否删除原文件(true为删除),默认为 false
        fileSystem.copyFromLocalFile(false, new Path(sourcePath), new Path(targetPath));
    } catch (Exception e) {
        log.error("upload failed", e);
        throw e;
    } finally {
        if (fileSystem != null) { this.hdfsPool.returnObject(fileSystem); }
    }
}
 
Example 14
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegisterRemoteScript() throws Throwable {
    String scriptName = "script.py";
    File scriptFile = File.createTempFile("tmp", "");
    PrintWriter pw = new PrintWriter(new FileWriter(scriptFile));
    pw.println("@outputSchema(\"word:chararray\")\ndef helloworld():\n    return 'Hello, World'");
    pw.close();

    FileSystem fs = cluster.getFileSystem();
    fs.copyFromLocalFile(new Path(scriptFile.getAbsolutePath()), new Path(scriptName));

    // find the absolute path for the directory so that it does not
    // depend on configuration
    String absPath = fs.getFileStatus(new Path(scriptName)).getPath().toString();

    Util.createInputFile(cluster, "testRegisterRemoteScript_input", new String[]{"1", "2"});
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerCode(absPath, "jython", "pig");
    pig.registerQuery("a = load 'testRegisterRemoteScript_input';");
    pig.registerQuery("b = foreach a generate pig.helloworld($0);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertTrue(iter.hasNext());
    Tuple t = iter.next();
    assertTrue(t.size() > 0);
    assertEquals("Hello, World", t.get(0));

    assertTrue(iter.hasNext());
    t = iter.next();
    assertTrue(t.size() > 0);
    assertEquals("Hello, World", t.get(0));

    assertFalse(iter.hasNext());
}
 
Example 15
Source File: WcsGeneratorTestAbstract.java    From mrgeo with Apache License 2.0 5 votes vote down vote up
protected static void copyInputData() throws IOException
{
  final FileSystem fileSystem = HadoopFileUtils.getFileSystem(inputHdfs);

  Properties mrgeoProperties = MrGeoProperties.getInstance();

  mrgeoProperties.put(MrGeoConstants.MRGEO_COMMON_HOME, inputHdfs.toString());
  mrgeoProperties.put(MrGeoConstants.MRGEO_HDFS_IMAGE, inputHdfs.toString());
  mrgeoProperties.put(MrGeoConstants.MRGEO_HDFS_COLORSCALE, inputHdfs.toString());
  mrgeoProperties.put("base.path", inputHdfs.toString());

  fileSystem.copyFromLocalFile(false, true, new Path(input, "IslandsElevation-v2"), inputHdfs);
}
 
Example 16
Source File: MapReduceBackupMergeJob.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Copy file in DFS from p to newPath
 * @param fs file system
 * @param p old path
 * @param newPath new path
 * @throws IOException exception
 */
protected void copyFile(FileSystem fs, Path p, Path newPath) throws IOException {
  File f = File.createTempFile("data", "meta");
  Path localPath = new Path(f.getAbsolutePath());
  fs.copyToLocalFile(p, localPath);
  fs.copyFromLocalFile(localPath, newPath);
  boolean exists = fs.exists(newPath);
  if (!exists) {
    throw new IOException("Failed to copy meta file to: "+ newPath);
  }
}
 
Example 17
Source File: BulkIngestMapFileLoader.java    From datawave with Apache License 2.0 4 votes vote down vote up
private void writeStats(Path[] jobDirectories) throws IOException {
    if (!INGEST_METRICS) {
        log.info("ingest metrics disabled");
    } else {
        long now = System.currentTimeMillis();
        for (Path p : jobDirectories)
            reporter.getCounter("MapFileLoader.EndTimes", p.getName()).increment(now);
        // Write out the metrics.
        // We are going to serialize the counters into a file in HDFS.
        // The context was set in the processKeyValues method below, and should not be null. We'll guard against NPE anyway
        FileSystem fs = getFileSystem(seqFileHdfs);
        RawLocalFileSystem rawFS = new RawLocalFileSystem();
        rawFS.setConf(conf);
        CompressionCodec cc = new GzipCodec();
        CompressionType ct = CompressionType.BLOCK;
        
        Counters c = reporter.getCounters();
        if (null != c && c.countCounters() > 0) {
            // Serialize the counters to a file in HDFS.
            Path src = new Path(File.createTempFile("MapFileLoader", ".metrics").getAbsolutePath());
            Writer writer = SequenceFile.createWriter(conf, Writer.file(rawFS.makeQualified(src)), Writer.keyClass(NullWritable.class),
                            Writer.valueClass(Counters.class), Writer.compression(ct, cc));
            writer.append(NullWritable.get(), c);
            writer.close();
            
            // Now we will try to move the file to HDFS.
            // Copy the file to the temp dir
            try {
                Path mDir = new Path(workDir, "MapFileLoaderMetrics");
                if (!fs.exists(mDir))
                    fs.mkdirs(mDir);
                Path dst = new Path(mDir, src.getName());
                log.info("Copying file " + src + " to " + dst);
                fs.copyFromLocalFile(false, true, src, dst);
                // If this worked, then remove the local file
                rawFS.delete(src, false);
                // also remove the residual crc file
                rawFS.delete(getCrcFile(src), false);
            } catch (IOException e) {
                // If an error occurs in the copy, then we will leave in the local metrics directory.
                log.error("Error copying metrics file into HDFS, will remain in metrics directory.");
            }
            
            // reset reporter so that old metrics don't persist over time
            this.reporter = new StandaloneStatusReporter();
        }
    }
}
 
Example 18
Source File: OozieLocalServerIntegrationTest.java    From hadoop-mini-clusters with Apache License 2.0 4 votes vote down vote up
@Test
public void testSubmitWorkflow() throws Exception {

    LOG.info("OOZIE: Test Submit Workflow Start");

    FileSystem hdfsFs = hdfsLocalCluster.getHdfsFileSystemHandle();
    OozieClient oozie = oozieLocalServer.getOozieClient();

    Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp");
    hdfsFs.mkdirs(new Path(appPath, "lib"));
    Path workflow = new Path(appPath, "workflow.xml");

    // Setup input directory and file
    hdfsFs.mkdirs(new Path(TEST_INPUT_DIR));
    hdfsFs.copyFromLocalFile(
            new Path(getClass().getClassLoader().getResource(TEST_INPUT_FILE).toURI()), new Path(TEST_INPUT_DIR));

    //write workflow.xml
    String wfApp = "<workflow-app name=\"sugar-option-decision\" xmlns=\"uri:oozie:workflow:0.5\">\n" +
            "  <global>\n" +
            "    <job-tracker>${jobTracker}</job-tracker>\n" +
            "    <name-node>${nameNode}</name-node>\n" +
            "    <configuration>\n" +
            "      <property>\n" +
            "        <name>mapreduce.output.fileoutputformat.outputdir</name>\n" +
            "        <value>" + TEST_OUTPUT_DIR + "</value>\n" +
            "      </property>\n" +
            "      <property>\n" +
            "        <name>mapreduce.input.fileinputformat.inputdir</name>\n" +
            "        <value>" + TEST_INPUT_DIR + "</value>\n" +
            "      </property>\n" +
            "    </configuration>\n" +
            "  </global>\n" +
            "  <start to=\"first\"/>\n" +
            "  <action name=\"first\">\n" +
            "    <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" +
            "    <ok to=\"decision-second-option\"/>\n" +
            "    <error to=\"kill\"/>\n" +
            "  </action>\n" +
            "  <decision name=\"decision-second-option\">\n" +
            "    <switch>\n" +
            "      <case to=\"option\">${doOption}</case>\n" +
            "      <default to=\"second\"/>\n" +
            "    </switch>\n" +
            "  </decision>\n" +
            "  <action name=\"option\">\n" +
            "    <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" +
            "    <ok to=\"second\"/>\n" +
            "    <error to=\"kill\"/>\n" +
            "  </action>\n" +
            "  <action name=\"second\">\n" +
            "    <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" +
            "    <ok to=\"end\"/>\n" +
            "    <error to=\"kill\"/>\n" +
            "  </action>\n" +
            "  <kill name=\"kill\">\n" +
            "    <message>\n" +
            "      Failed to workflow, error message[${wf: errorMessage (wf: lastErrorNode ())}]\n" +
            "    </message>\n" +
            "  </kill>\n" +
            "  <end name=\"end\"/>\n" +
            "</workflow-app>";

    Writer writer = new OutputStreamWriter(hdfsFs.create(workflow));
    writer.write(wfApp);
    writer.close();

    //write job.properties
    Properties conf = oozie.createConfiguration();
    conf.setProperty(OozieClient.APP_PATH, workflow.toString());
    conf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName());
    conf.setProperty("nameNode", "hdfs://localhost:" + hdfsLocalCluster.getHdfsNamenodePort());
    conf.setProperty("jobTracker", mrLocalCluster.getResourceManagerAddress());
    conf.setProperty("doOption", "true");

    //submit and check
    final String jobId = oozie.run(conf);
    WorkflowJob wf = oozie.getJobInfo(jobId);
    assertNotNull(wf);
    assertEquals(WorkflowJob.Status.RUNNING, wf.getStatus());


    while(true){
        Thread.sleep(1000);
        wf = oozie.getJobInfo(jobId);
        if(wf.getStatus() == WorkflowJob.Status.FAILED || wf.getStatus() == WorkflowJob.Status.KILLED || wf.getStatus() == WorkflowJob.Status.PREP || wf.getStatus() == WorkflowJob.Status.SUCCEEDED){
            break;
        }
    }

    wf = oozie.getJobInfo(jobId);
    assertEquals(WorkflowJob.Status.SUCCEEDED, wf.getStatus());

    LOG.info("OOZIE: Workflow: {}", wf.toString());
    hdfsFs.close();

}
 
Example 19
Source File: Cluster.java    From spork with Apache License 2.0 4 votes vote down vote up
public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
    throws IOException {
  FileSystem fs = local.getFileSystem(configuration);
  fs.copyFromLocalFile(false, overwrite, local, destination);
}
 
Example 20
Source File: HadoopPopularWords.java    From ignite with Apache License 2.0 3 votes vote down vote up
/**
 * Prepare job's data: cleanup result directories that might have left over
 * after previous runs, copy input files from the local file system into DFS.
 *
 * @param fs Distributed file system to use in job.
 * @throws IOException If failed.
 */
private void prepareDirectories(FileSystem fs) throws IOException {
    X.println(">>> Cleaning up DFS result directory: " + RESULT_DFS_DIR);

    fs.delete(RESULT_DFS_DIR, true);

    X.println(">>> Cleaning up DFS input directory: " + BOOKS_DFS_DIR);

    fs.delete(BOOKS_DFS_DIR, true);

    X.println(">>> Copy local files into DFS input directory: " + BOOKS_DFS_DIR);

    fs.copyFromLocalFile(BOOKS_LOCAL_DIR, BOOKS_DFS_DIR);
}