Java Code Examples for org.apache.hadoop.fs.FileSystem#copyFromLocalFile()
The following examples show how to use
org.apache.hadoop.fs.FileSystem#copyFromLocalFile() .
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: submarine File: FileSystemOperations.java License: Apache License 2.0 | 6 votes |
public Path uploadToRemoteFile(Path stagingDir, String fileToUpload) throws IOException { FileSystem fs = remoteDirectoryManager.getDefaultFileSystem(); // Upload to remote FS under staging area File localFile = new File(fileToUpload); if (!localFile.exists()) { throw new FileNotFoundException( "Trying to upload file " + localFile.getAbsolutePath() + " to remote, but could not find local file!"); } String filename = localFile.getName(); Path uploadedFilePath = new Path(stagingDir, filename); if (!uploadedFiles.contains(uploadedFilePath)) { if (SubmarineLogs.isVerbose()) { LOG.info("Copying local file " + fileToUpload + " to remote " + uploadedFilePath); } fs.copyFromLocalFile(new Path(fileToUpload), uploadedFilePath); uploadedFiles.add(uploadedFilePath); } return uploadedFilePath; }
Example 2
Source Project: hraven File: JobFilePartitioner.java License: Apache License 2.0 | 6 votes |
/** * @param hdfs * FileSystem handle * @param f * file to copy to HDFS * @param outputPath * @param skipExisting * skip if the file already exist in the target. File will be * overwritten if already there and this argument is false. * @throws IOException * if target directory cannot be created or file cannot be copied to * target directory. */ private void processPlainFile(FileSystem hdfs, File f, Path outputPath, boolean skipExisting) throws IOException { long fileModTime = f.lastModified(); Path targetDir = getTargetDirectory(hdfs, outputPath, fileModTime); boolean doCopy = true; Path sourceFile = new Path(f.getPath()); if (skipExisting) { Path target = new Path(targetDir, sourceFile.getName()); if (hdfs.exists(target)) { doCopy = false; } } if (doCopy) { hdfs.copyFromLocalFile(sourceFile, targetDir); } }
Example 3
Source Project: celos File: HdfsDeployer.java License: Apache License 2.0 | 6 votes |
public void deploy() throws Exception { FileSystem fs = context.getFileSystem(); final String hdfsDirLocalPath = String.format(LOCAL_HDFS_PATTERN, context.getDeployDir()); final File hdfsDirLocal = new File(hdfsDirLocalPath); if (!hdfsDirLocal.exists()) { throw new IllegalStateException(hdfsDirLocalPath + " not found local FS"); } undeploy(); Path dst = getDestinationHdfsPath(); fs.mkdirs(dst); String[] childFiles = hdfsDirLocal.list(); for (String child : childFiles) { fs.copyFromLocalFile(new Path(hdfsDirLocalPath, child), dst); } }
Example 4
Source Project: incubator-sentry File: AbstractSolrSentryTestBase.java License: Apache License 2.0 | 6 votes |
public static File setupSentry() throws Exception { File sentrySite = File.createTempFile("sentry-site", "xml"); sentrySite.deleteOnExit(); File authProviderDir = new File(RESOURCES_DIR, "sentry"); String authProviderName = "test-authz-provider.ini"; FileSystem clusterFs = dfsCluster.getFileSystem(); clusterFs.copyFromLocalFile(false, new Path(authProviderDir.toString(), authProviderName), new Path(authProviderName)); // need to write sentry-site at execution time because we don't know // the location of sentry.solr.provider.resource beforehand StringBuilder sentrySiteData = new StringBuilder(); sentrySiteData.append("<configuration>\n"); addPropertyToSentry(sentrySiteData, "sentry.provider", "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"); addPropertyToSentry(sentrySiteData, "sentry.solr.provider.resource", clusterFs.getWorkingDirectory() + File.separator + authProviderName); sentrySiteData.append("</configuration>\n"); FileUtils.writeStringToFile(sentrySite,sentrySiteData.toString()); return sentrySite; }
Example 5
Source Project: hbase File: MapReduceBackupMergeJob.java License: Apache License 2.0 | 5 votes |
/** * Copy file in DFS from p to newPath * @param fs file system * @param p old path * @param newPath new path * @throws IOException exception */ protected void copyFile(FileSystem fs, Path p, Path newPath) throws IOException { File f = File.createTempFile("data", "meta"); Path localPath = new Path(f.getAbsolutePath()); fs.copyToLocalFile(p, localPath); fs.copyFromLocalFile(localPath, newPath); boolean exists = fs.exists(newPath); if (!exists) { throw new IOException("Failed to copy meta file to: "+ newPath); } }
Example 6
Source Project: mrgeo File: WcsGeneratorTestAbstract.java License: Apache License 2.0 | 5 votes |
protected static void copyInputData() throws IOException { final FileSystem fileSystem = HadoopFileUtils.getFileSystem(inputHdfs); Properties mrgeoProperties = MrGeoProperties.getInstance(); mrgeoProperties.put(MrGeoConstants.MRGEO_COMMON_HOME, inputHdfs.toString()); mrgeoProperties.put(MrGeoConstants.MRGEO_HDFS_IMAGE, inputHdfs.toString()); mrgeoProperties.put(MrGeoConstants.MRGEO_HDFS_COLORSCALE, inputHdfs.toString()); mrgeoProperties.put("base.path", inputHdfs.toString()); fileSystem.copyFromLocalFile(false, true, new Path(input, "IslandsElevation-v2"), inputHdfs); }
Example 7
Source Project: spork File: TestPigServer.java License: Apache License 2.0 | 5 votes |
@Test public void testRegisterRemoteScript() throws Throwable { String scriptName = "script.py"; File scriptFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(scriptFile)); pw.println("@outputSchema(\"word:chararray\")\ndef helloworld():\n return 'Hello, World'"); pw.close(); FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(scriptFile.getAbsolutePath()), new Path(scriptName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(scriptName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteScript_input", new String[]{"1", "2"}); PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerCode(absPath, "jython", "pig"); pig.registerQuery("a = load 'testRegisterRemoteScript_input';"); pig.registerQuery("b = foreach a generate pig.helloworld($0);"); Iterator<Tuple> iter = pig.openIterator("b"); assertTrue(iter.hasNext()); Tuple t = iter.next(); assertTrue(t.size() > 0); assertEquals("Hello, World", t.get(0)); assertTrue(iter.hasNext()); t = iter.next(); assertTrue(t.size() > 0); assertEquals("Hello, World", t.get(0)); assertFalse(iter.hasNext()); }
Example 8
Source Project: spring-boot-tutorial File: HdfsUtil.java License: Creative Commons Attribution Share Alike 4.0 International | 5 votes |
/** * 上传文件 * * @param sourcePath 原文件路径 * @param targetPath 目标路径 * @throws IOException */ public void uploadFile(@NotBlank String sourcePath, @NotBlank String targetPath) throws Exception { FileSystem fileSystem = null; try { fileSystem = this.hdfsPool.borrowObject(); // 调用文件系统的文件复制方法,第一个参数为是否删除原文件(true为删除),默认为 false fileSystem.copyFromLocalFile(false, new Path(sourcePath), new Path(targetPath)); } catch (Exception e) { log.error("upload failed", e); throw e; } finally { if (fileSystem != null) { this.hdfsPool.returnObject(fileSystem); } } }
Example 9
Source Project: pentaho-hadoop-shims File: DistributedCacheUtilImpl.java License: Apache License 2.0 | 5 votes |
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging * the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into * dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If * source is a folder its contents will be copied into dest. For more info see {@link * FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. */ public void stageForCache( FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic ) throws IOException, KettleFileException { if ( !source.exists() ) { throw new KettleFileException( BaseMessages.getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source ) ); } if ( fs.exists( dest ) ) { if ( overwrite ) { // It is a directory, clear it out fs.delete( dest, true ); } else { throw new KettleFileException( BaseMessages .getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath() ) ); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt( "mapred.submit.replication", 10 ); if ( source.getURL().toString().endsWith( CONFIG_PROPERTIES ) ) { copyConfigProperties( source, fs, dest ); } else { Path local = new Path( source.getURL().getPath() ); fs.copyFromLocalFile( local, dest ); } if ( isPublic ) { fs.setPermission( dest, PUBLIC_CACHED_FILE_PERMISSION ); } else { fs.setPermission( dest, CACHED_FILE_PERMISSION ); } fs.setReplication( dest, replication ); }
Example 10
Source Project: mrgeo File: HadoopFileUtils.java License: Apache License 2.0 | 5 votes |
public static void copyToHdfs(Path fromDir, Path toDir, String fileName) throws IOException { FileSystem fs = getFileSystem(toDir); fs.mkdirs(toDir); fs.copyFromLocalFile(false, true, new Path(fromDir, fileName), new Path(toDir, fileName)); }
Example 11
Source Project: datawave File: MapReduceJobConfiguration.java License: Apache License 2.0 | 5 votes |
protected void addSingleFile(File source, Path destination, String jobId, Job job, FileSystem fs) throws IOException { Path jarPath = new Path(source.getAbsolutePath()); try { fs.copyFromLocalFile(false, false, jarPath, destination); } catch (IOException e) { // If the file already exists, ignore error if (!e.getMessage().endsWith("already exists")) throw e; } log.trace("Adding {} to the classpath for job {}.", jarPath, jobId); job.addFileToClassPath(destination); }
Example 12
Source Project: metron File: Client.java License: Apache License 2.0 | 5 votes |
private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try { ostream = FileSystem .create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } fs.setPermission(dst, new FsPermission((short)0755)); FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); return dst; }
Example 13
Source Project: attic-apex-core File: StramClientUtils.java License: Apache License 2.0 | 5 votes |
public static void copyFromLocalFileNoChecksum(FileSystem fs, File fromLocal, Path toDFS) throws IOException { // This is to void the hadoop FileSystem API to perform checksum on the local file // This "feature" has caused a lot of headache because the local file can be copied from HDFS and modified, // and the checksum will fail if the file is again copied to HDFS try { new File(fromLocal.getParentFile(), "." + fromLocal.getName() + ".crc").delete(); } catch (Exception ex) { // ignore } fs.copyFromLocalFile(new Path(fromLocal.toURI()), toDFS); }
Example 14
Source Project: spork File: TestPigServerWithMacros.java License: Apache License 2.0 | 5 votes |
@Test public void testRegisterRemoteMacro() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties()); String macroName = "util.pig"; File macroFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(macroFile)); pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };"); pw.close(); FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[]{"1", "2"}); pig.registerQuery("import '" + absPath + "';"); pig.registerQuery("a = load 'testRegisterRemoteMacro_input';"); pig.registerQuery("b = row_count(a);"); Iterator<Tuple> iter = pig.openIterator("b"); assertEquals(2L, ((Long)iter.next().get(0)).longValue()); pig.shutdown(); }
Example 15
Source Project: stratosphere File: Utils.java License: Apache License 2.0 | 5 votes |
/** * * @return Path to remote file (usually hdfs) * @throws IOException */ public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath, LocalResource appMasterJar, Path homedir) throws IOException { // copy to HDFS String suffix = ".stratosphere/" + appId + "/" + localRsrcPath.getName(); Path dst = new Path(homedir, suffix); LOG.info("Copying from "+localRsrcPath+" to "+dst ); fs.copyFromLocalFile(localRsrcPath, dst); registerLocalResource(fs, dst, appMasterJar); return dst; }
Example 16
Source Project: hadoop File: Client.java License: Apache License 2.0 | 5 votes |
private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try { ostream = FileSystem .create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); }
Example 17
Source Project: spork File: Cluster.java License: Apache License 2.0 | 4 votes |
public void copyFromLocalFile(Path local, Path destination, boolean overwrite) throws IOException { FileSystem fs = local.getFileSystem(configuration); fs.copyFromLocalFile(false, overwrite, local, destination); }
Example 18
Source Project: datawave File: BulkIngestMapFileLoader.java License: Apache License 2.0 | 4 votes |
private void writeStats(Path[] jobDirectories) throws IOException { if (!INGEST_METRICS) { log.info("ingest metrics disabled"); } else { long now = System.currentTimeMillis(); for (Path p : jobDirectories) reporter.getCounter("MapFileLoader.EndTimes", p.getName()).increment(now); // Write out the metrics. // We are going to serialize the counters into a file in HDFS. // The context was set in the processKeyValues method below, and should not be null. We'll guard against NPE anyway FileSystem fs = getFileSystem(seqFileHdfs); RawLocalFileSystem rawFS = new RawLocalFileSystem(); rawFS.setConf(conf); CompressionCodec cc = new GzipCodec(); CompressionType ct = CompressionType.BLOCK; Counters c = reporter.getCounters(); if (null != c && c.countCounters() > 0) { // Serialize the counters to a file in HDFS. Path src = new Path(File.createTempFile("MapFileLoader", ".metrics").getAbsolutePath()); Writer writer = SequenceFile.createWriter(conf, Writer.file(rawFS.makeQualified(src)), Writer.keyClass(NullWritable.class), Writer.valueClass(Counters.class), Writer.compression(ct, cc)); writer.append(NullWritable.get(), c); writer.close(); // Now we will try to move the file to HDFS. // Copy the file to the temp dir try { Path mDir = new Path(workDir, "MapFileLoaderMetrics"); if (!fs.exists(mDir)) fs.mkdirs(mDir); Path dst = new Path(mDir, src.getName()); log.info("Copying file " + src + " to " + dst); fs.copyFromLocalFile(false, true, src, dst); // If this worked, then remove the local file rawFS.delete(src, false); // also remove the residual crc file rawFS.delete(getCrcFile(src), false); } catch (IOException e) { // If an error occurs in the copy, then we will leave in the local metrics directory. log.error("Error copying metrics file into HDFS, will remain in metrics directory."); } // reset reporter so that old metrics don't persist over time this.reporter = new StandaloneStatusReporter(); } } }
Example 19
Source Project: hadoop-mini-clusters File: OozieLocalServerIntegrationTest.java License: Apache License 2.0 | 4 votes |
@Test public void testSubmitWorkflow() throws Exception { LOG.info("OOZIE: Test Submit Workflow Start"); FileSystem hdfsFs = hdfsLocalCluster.getHdfsFileSystemHandle(); OozieClient oozie = oozieLocalServer.getOozieClient(); Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp"); hdfsFs.mkdirs(new Path(appPath, "lib")); Path workflow = new Path(appPath, "workflow.xml"); // Setup input directory and file hdfsFs.mkdirs(new Path(TEST_INPUT_DIR)); hdfsFs.copyFromLocalFile( new Path(getClass().getClassLoader().getResource(TEST_INPUT_FILE).toURI()), new Path(TEST_INPUT_DIR)); //write workflow.xml String wfApp = "<workflow-app name=\"sugar-option-decision\" xmlns=\"uri:oozie:workflow:0.5\">\n" + " <global>\n" + " <job-tracker>${jobTracker}</job-tracker>\n" + " <name-node>${nameNode}</name-node>\n" + " <configuration>\n" + " <property>\n" + " <name>mapreduce.output.fileoutputformat.outputdir</name>\n" + " <value>" + TEST_OUTPUT_DIR + "</value>\n" + " </property>\n" + " <property>\n" + " <name>mapreduce.input.fileinputformat.inputdir</name>\n" + " <value>" + TEST_INPUT_DIR + "</value>\n" + " </property>\n" + " </configuration>\n" + " </global>\n" + " <start to=\"first\"/>\n" + " <action name=\"first\">\n" + " <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" + " <ok to=\"decision-second-option\"/>\n" + " <error to=\"kill\"/>\n" + " </action>\n" + " <decision name=\"decision-second-option\">\n" + " <switch>\n" + " <case to=\"option\">${doOption}</case>\n" + " <default to=\"second\"/>\n" + " </switch>\n" + " </decision>\n" + " <action name=\"option\">\n" + " <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" + " <ok to=\"second\"/>\n" + " <error to=\"kill\"/>\n" + " </action>\n" + " <action name=\"second\">\n" + " <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" + " <ok to=\"end\"/>\n" + " <error to=\"kill\"/>\n" + " </action>\n" + " <kill name=\"kill\">\n" + " <message>\n" + " Failed to workflow, error message[${wf: errorMessage (wf: lastErrorNode ())}]\n" + " </message>\n" + " </kill>\n" + " <end name=\"end\"/>\n" + "</workflow-app>"; Writer writer = new OutputStreamWriter(hdfsFs.create(workflow)); writer.write(wfApp); writer.close(); //write job.properties Properties conf = oozie.createConfiguration(); conf.setProperty(OozieClient.APP_PATH, workflow.toString()); conf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); conf.setProperty("nameNode", "hdfs://localhost:" + hdfsLocalCluster.getHdfsNamenodePort()); conf.setProperty("jobTracker", mrLocalCluster.getResourceManagerAddress()); conf.setProperty("doOption", "true"); //submit and check final String jobId = oozie.run(conf); WorkflowJob wf = oozie.getJobInfo(jobId); assertNotNull(wf); assertEquals(WorkflowJob.Status.RUNNING, wf.getStatus()); while(true){ Thread.sleep(1000); wf = oozie.getJobInfo(jobId); if(wf.getStatus() == WorkflowJob.Status.FAILED || wf.getStatus() == WorkflowJob.Status.KILLED || wf.getStatus() == WorkflowJob.Status.PREP || wf.getStatus() == WorkflowJob.Status.SUCCEEDED){ break; } } wf = oozie.getJobInfo(jobId); assertEquals(WorkflowJob.Status.SUCCEEDED, wf.getStatus()); LOG.info("OOZIE: Workflow: {}", wf.toString()); hdfsFs.close(); }
Example 20
Source Project: ignite File: HadoopPopularWords.java License: Apache License 2.0 | 3 votes |
/** * Prepare job's data: cleanup result directories that might have left over * after previous runs, copy input files from the local file system into DFS. * * @param fs Distributed file system to use in job. * @throws IOException If failed. */ private void prepareDirectories(FileSystem fs) throws IOException { X.println(">>> Cleaning up DFS result directory: " + RESULT_DFS_DIR); fs.delete(RESULT_DFS_DIR, true); X.println(">>> Cleaning up DFS input directory: " + BOOKS_DFS_DIR); fs.delete(BOOKS_DFS_DIR, true); X.println(">>> Copy local files into DFS input directory: " + BOOKS_DFS_DIR); fs.copyFromLocalFile(BOOKS_LOCAL_DIR, BOOKS_DFS_DIR); }