Java Code Examples for org.apache.hadoop.fs.FileSystem#exists()

The following examples show how to use org.apache.hadoop.fs.FileSystem#exists() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestMiniMRClientCluster.java From big-c with Apache License 2.0

6 votes

@BeforeClass
public static void setup() throws IOException {
  final Configuration conf = new Configuration();
  final Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
      "/tmp"));
  testdir = new Path(TEST_ROOT_DIR, "TestMiniMRClientCluster");
  inDir = new Path(testdir, "in");
  outDir = new Path(testdir, "out");

  FileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(testdir) && !fs.delete(testdir, true)) {
    throw new IOException("Could not delete " + testdir);
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir);
  }

  for (int i = 0; i < inFiles.length; i++) {
    inFiles[i] = new Path(inDir, "part_" + i);
    createFile(inFiles[i], conf);
  }

  // create the mini cluster to be used for the tests
  mrCluster = MiniMRClientClusterFactory.create(
      InternalClass.class, 1, new Configuration());
}

Example 2

Source File: MoveHDFS.java From nifi with Apache License 2.0

5 votes

protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
        throws IOException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    if (!hdfs.exists(inputPath)) {
        throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    FileStatus inputStatus = hdfs.getFileStatus(inputPath);

    if (inputStatus.isDirectory()) {
        for (final FileStatus file : hdfs.listStatus(inputPath)) {
            final Path canonicalFile = file.getPath();

            if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
                continue;
            }

            if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }
            }
        }
    } else if (inputStatus.isFile()) {
        files.add(inputPath);
    }
    return files;
}

Example 3

Source File: SnapshotManager.java From hbase with Apache License 2.0

5 votes

/**
 * Check to see if the snapshot is one of the currently completed snapshots
 * Returns true if the snapshot exists in the "completed snapshots folder".
 *
 * @param snapshot expected snapshot to check
 * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
 *         not stored
 * @throws IOException if the filesystem throws an unexpected exception,
 * @throws IllegalArgumentException if snapshot name is invalid.
 */
private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
  try {
    final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    // check to see if the snapshot already exists
    return fs.exists(snapshotDir);
  } catch (IllegalArgumentException iae) {
    throw new UnknownSnapshotException("Unexpected exception thrown", iae);
  }
}

Example 4

Source File: TestEmptyJob.java From RDFS with Apache License 2.0

5 votes

@Override
public void commitJob(JobContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  Path share = new Path(conf.get("share"));
  FileSystem fs = FileSystem.get(conf);

  
  while (true) {
    if (fs.exists(share)) {
      break;
    }
    UtilsForTests.waitFor(100);
  }
  super.commitJob(context);
}

Example 5

Source File: TestPseudoLocalFs.java From big-c with Apache License 2.0

5 votes

/**
 * Validate if exists() returns <code>true</code> for correctly formed file
 * paths on PseudoLocalFs and returns <code>false</code> for improperly
 * formed file paths.
 * @param pfs Pseudo Local File System
 * @param path file path for which exists() is to be called
 * @param shouldSucceed expected return value of exists(&lt;path&gt;)
 * @throws IOException
 */
private void validateExists(FileSystem pfs, Path path,
    boolean shouldSucceed) throws IOException {
  boolean ret = pfs.exists(path);
  if (shouldSucceed) {
    assertTrue("exists() returned false for valid file name " + path, ret);
  } else {
    assertFalse("exists() returned true for invalid file name " + path, ret);
  }
}

Example 6

Source File: TestTFileSeek.java From hadoop-gpu with Apache License 2.0

5 votes

private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
  throws IOException {
  if (fs.exists(name)) {
    fs.delete(name, true);
  }
  FSDataOutputStream fout = fs.create(name);
  return fout;
}

Example 7

Source File: HiveRegisterStep.java From incubator-gobblin with Apache License 2.0

5 votes

@Override
public void execute() throws IOException {

  if (this.verifyBeforeRegistering) {
    if (!this.hiveSpec.getTable().getLocation().isPresent()) {
      throw getException("Table does not have a location parameter.");
    }
    Path tablePath = new Path(this.hiveSpec.getTable().getLocation().get());

    FileSystem fs = this.hiveSpec.getPath().getFileSystem(new Configuration());
    if (!fs.exists(tablePath)) {
      throw getException(String.format("Table location %s does not exist.", tablePath));
    }

    if (this.hiveSpec.getPartition().isPresent()) {

      if (!this.hiveSpec.getPartition().get().getLocation().isPresent()) {
        throw getException("Partition does not have a location parameter.");
      }
      Path partitionPath = new Path(this.hiveSpec.getPartition().get().getLocation().get());
      if (!fs.exists(this.hiveSpec.getPath())) {
        throw getException(String.format("Partition location %s does not exist.", partitionPath));
      }
    }
  }

  try (HiveRegister hiveRegister = HiveRegister.get(this.props, this.metastoreURI)) {
    log.info("Registering Hive Spec " + this.hiveSpec);
    ListenableFuture<Void> future = hiveRegister.register(this.hiveSpec);
    future.get();
  } catch (InterruptedException | ExecutionException ie) {
    throw new IOException("Hive registration was interrupted.", ie);
  }
}

Example 8

Source File: GorillaStore.java From timely with Apache License 2.0

5 votes

protected void writeCompressor(String metric, WrappedGorillaCompressor wrappedGorillaCompressor)
        throws IOException {

    try {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://localhost:8020"), configuration);
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd-HHmmss.SSS");
        sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
        String baseDir = "/timely/cache";
        Path directory = new Path(baseDir + "/" + metric);
        String fileName = metric + "-" + sdf.format(new Date(wrappedGorillaCompressor.getOldestTimestamp()));
        Path outputPath = new Path(directory, fileName);
        if (!fs.exists(directory)) {
            fs.mkdirs(directory);
        }
        if (fs.exists(outputPath)) {
            throw new IOException("output path exists");
        }
        OutputStream os = fs.create(outputPath);
        // write object to hdfs file
        ObjectOutputStream oos = new ObjectOutputStream(os);
        oos.writeObject(wrappedGorillaCompressor);
        oos.close();
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
}

Example 9

Source File: LateFileRecordCountProvider.java From incubator-gobblin with Apache License 2.0

5 votes

/**
 * Construct filename for a late file. If the file does not exists in the output dir, retain the original name.
 * Otherwise, append a LATE_COMPONENT{RandomInteger} to the original file name.
 * For example, if file "part1.123.avro" exists in dir "/a/b/", the returned path will be "/a/b/part1.123.late12345.avro".
 */
public Path constructLateFilePath(String originalFilename, FileSystem fs, Path outputDir) throws IOException {
  if (!fs.exists(new Path(outputDir, originalFilename))) {
    return new Path(outputDir, originalFilename);
  }
  return constructLateFilePath(FilenameUtils.getBaseName(originalFilename) + LATE_COMPONENT
      + new Random().nextInt(Integer.MAX_VALUE) + SEPARATOR + FilenameUtils.getExtension(originalFilename), fs,
      outputDir);
}

Example 10

Source File: CommonFriendStep2.java From BigData-In-Practice with Apache License 2.0

5 votes

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf);

    job.setJarByClass(CommonFriendStep2.class);
    // 设置job的mapper类和reducer类
    job.setMapperClass(CommonFansStep2Mapper.class);
    job.setReducerClass(CommonFansStep2Reducer.class);

    // 设置map阶段输出key:value数据的类型
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // 设置reudce阶段输出key:value数据的类型
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // 检测输出目录是否已存在，如果已存在则删除，以免在测试阶段需要反复手动删除输出目录
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path(args[1]);
    if(fs.exists(out)) {
        fs.delete(out, true);
    }

    // 设置数据输入输出目录
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job,out);

    // 提交job到yarn或者local runner执行
    job.waitForCompletion(true);

}

Example 11

Source File: SchemaHandler.java From Bats with Apache License 2.0

5 votes

/**
 * If raw schema was present in create schema command, returns schema from command,
 * otherwise loads raw schema from the given file.
 *
 * @param sqlCall sql create schema call
 * @return string representation of raw schema (column names, types and nullability)
 */
private String getSchemaString(SqlSchema.Create sqlCall) {
  if (sqlCall.hasSchema()) {
    return sqlCall.getSchema();
  }

  Path path = new Path(sqlCall.getLoad());
  try {
    FileSystem rawFs = path.getFileSystem(new Configuration());
    FileSystem fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), rawFs.getConf());

    if (!fs.exists(path)) {
      throw UserException.resourceError()
        .message("File with raw schema [%s] does not exist", path.toUri().getPath())
        .build(logger);
    }

    try (InputStream stream = fs.open(path)) {
      return IOUtils.toString(stream);
    }

  } catch (IOException e) {
    throw UserException.resourceError(e)
      .message("Unable to load raw schema from file %s", path.toUri().getPath())
      .build(logger);
  }
}

Example 12

Source File: FileUtils.java From sqoop-on-spark with Apache License 2.0

5 votes

public static void mkdirs(String directory) throws IOException {
  Path path = new Path(directory);
  FileSystem fs = path.getFileSystem(new Configuration());
  if (!fs.exists(path)) {
    fs.mkdirs(path);
  }
}

Example 13

Source File: TestMapperReducerCleanup.java From hadoop with Apache License 2.0

5 votes

@Test
public void testReduceCleanup() throws Exception {
  reset();
  
  Job job = Job.getInstance();

  Path inputPath = createInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(TrackingTokenizerMapper.class);
  job.setReducerClass(FailingReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setInputFormatClass(TrackingTextInputFormat.class);
  job.setOutputFormatClass(TrackingTextOutputFormat.class);
  job.setNumReduceTasks(1);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.waitForCompletion(true);

  Assert.assertTrue(mapCleanup);
  Assert.assertTrue(reduceCleanup);
  Assert.assertTrue(recordReaderCleanup);
  Assert.assertTrue(recordWriterCleanup);
}

Example 14

Source File: PVMinMax2.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "PVMinMax2";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(PVMinMax2.class);				//指定运行时作业类
	job.setJar("export\\PVMinMax2.jar");			//指定本地jar包
	job.setMapperClass(PVMinMax2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(Text.class);			//设置Mapper输出Value类型
	job.setReducerClass(PVMinMax2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(Text.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/output5_1";			//实验数据目录	
	String outputDir = "/expr/weblog/output5_2";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Example 15

Source File: MergeStatisticsWithOldStep.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}

Example 16

Source File: DatePartition2.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "DatePartition2";					//定义作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(DatePartition2.class);			//指定运行时作业类
	job.setJar("export\\DatePartition2.jar");			//指定本地jar包
	job.setMapperClass(DatePartition2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);				//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
	job.setReducerClass(DatePartition2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);					//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class);			//设置Reduce输出Value类型
	job.setPartitionerClass(YearPartitioner.class);		//自定义分区方法
	job.setNumReduceTasks(3); 	//设置reduce任务的数量,该值传递给Partitioner.getPartition()方法的numPartitions参数
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/datecount/data";				//实验数据目录	
	String outputDir = "/expr/datecount/output_partition2";	//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Example 17

Source File: FlowCount.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "FlowCount";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(FlowCount.class);				//指定运行时作业类
	job.setJar("export\\FlowCount.jar");			//指定本地jar包
	job.setMapperClass(FlowCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(FlowCountReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output1";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Example 18

Source File: StramClient.java From attic-apex-core with Apache License 2.0

4 votes

public void copyInitialState(Path origAppDir) throws IOException
{
  // locate previous snapshot
  long copyStart = System.currentTimeMillis();
  String newAppDir = this.dag.assertAppPath();

  FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
  // read snapshot against new dependencies
  Object snapshot = recoveryHandler.restore();
  if (snapshot == null) {
    throw new IllegalArgumentException("No previous application state found in " + origAppDir);
  }
  InputStream logIs = recoveryHandler.getLog();

  // modify snapshot state to switch app id
  ((StreamingContainerManager.CheckpointState)snapshot).setApplicationId(this.dag, conf);
  Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

  FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
  // remove the path that was created by the storage agent during deserialization and replacement
  fs.delete(checkpointPath, true);

  // write snapshot to new location
  recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
  recoveryHandler.save(snapshot);
  OutputStream logOs = recoveryHandler.rotateLog();
  IOUtils.copy(logIs, logOs);
  logOs.flush();
  logOs.close();
  logIs.close();

  List<String> excludeDirs = Arrays.asList(LogicalPlan.SUBDIR_CHECKPOINTS, LogicalPlan.SUBDIR_EVENTS, LogicalPlan.SUBDIR_STATS);
  // copy sub directories that are not present in target
  FileStatus[] lFiles = fs.listStatus(origAppDir);

  // In case of MapR/MapR-FS, f.getPath().toString() returns path as maprfs:///<orig app dir>
  // whereas origAppDir.toString & newAppDir are in maprfs:/<orig or new app dir> format
  // e.g.
  // f.getPath().toString -> maprfs:///user/dtadmin/datatorrent/apps/application_1481890072066_0004/checkpoints
  // origAppDir -> maprfs:/user/dtadmin/datatorrent/apps/application_1481890072066_0004
  // newAppDir -> maprfs:/user/dtadmin/datatorrent/apps/application_1481890072066_0005

  String origAppDirPath = Path.getPathWithoutSchemeAndAuthority(origAppDir).toString();
  String newAppDirPath = Path.getPathWithoutSchemeAndAuthority(new Path(newAppDir)).toString();

  for (FileStatus f : lFiles) {
    if (f.isDirectory() && !excludeDirs.contains(f.getPath().getName())) {
      String targetPath = f.getPath().toString().replace(origAppDirPath, newAppDirPath);
      if (!fs.exists(new Path(targetPath))) {
        LOG.debug("Copying {} size {} to {}", f.getPath(), f.getLen(), targetPath);
        long start = System.currentTimeMillis();
        FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
        LOG.debug("Copying {} to {} took {} ms", f.getPath(), f.getLen(), targetPath, System.currentTimeMillis() - start);
      } else {
        LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
      }
    }
  }
  LOG.info("Copying initial state took {} ms", System.currentTimeMillis() - copyStart);
}

Example 19

Source File: IPCount.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "IPCount";						//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(IPCount.class);				//指定运行时作业类
	job.setJar("export\\IPCount.jar");				//指定本地jar包
	job.setMapperClass(IPCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(DayAndIp.class);		//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(IPCountReducer.class);		//指定Reducer类
	job.setOutputKeyClass(DayAndIp.class);			//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output4";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Example 20

Source File: LockUtil.java From anthelion with Apache License 2.0

3 votes

/**
 * Remove lock file. NOTE: applications enforce the semantics of this file -
 * this method simply removes any file with a given name.
 * @param fs filesystem
 * @param lockFile lock file name
 * @return false, if the lock file doesn't exist. True, if it existed and was
 * successfully removed.
 * @throws IOException if lock file exists but it is a directory.
 */
public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
  if (!fs.exists(lockFile)) return false;
  if (fs.getFileStatus(lockFile).isDir())
    throw new IOException("lock file " + lockFile + " exists but is a directory!");
  return fs.delete(lockFile, false);
}