Java Code Examples for org.apache.hadoop.fs.FileSystem#exists()

The following examples show how to use org.apache.hadoop.fs.FileSystem#exists() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: big-c   File: TestMiniMRClientCluster.java    License: Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setup() throws IOException {
  final Configuration conf = new Configuration();
  final Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
      "/tmp"));
  testdir = new Path(TEST_ROOT_DIR, "TestMiniMRClientCluster");
  inDir = new Path(testdir, "in");
  outDir = new Path(testdir, "out");

  FileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(testdir) && !fs.delete(testdir, true)) {
    throw new IOException("Could not delete " + testdir);
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir);
  }

  for (int i = 0; i < inFiles.length; i++) {
    inFiles[i] = new Path(inDir, "part_" + i);
    createFile(inFiles[i], conf);
  }

  // create the mini cluster to be used for the tests
  mrCluster = MiniMRClientClusterFactory.create(
      InternalClass.class, 1, new Configuration());
}
 
Example 2
Source Project: nifi   File: MoveHDFS.java    License: Apache License 2.0 5 votes vote down vote up
protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
        throws IOException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    if (!hdfs.exists(inputPath)) {
        throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    FileStatus inputStatus = hdfs.getFileStatus(inputPath);

    if (inputStatus.isDirectory()) {
        for (final FileStatus file : hdfs.listStatus(inputPath)) {
            final Path canonicalFile = file.getPath();

            if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
                continue;
            }

            if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }
            }
        }
    } else if (inputStatus.isFile()) {
        files.add(inputPath);
    }
    return files;
}
 
Example 3
Source Project: hadoop   File: TestMapperReducerCleanup.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReduceCleanup() throws Exception {
  reset();
  
  Job job = Job.getInstance();

  Path inputPath = createInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(TrackingTokenizerMapper.class);
  job.setReducerClass(FailingReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setInputFormatClass(TrackingTextInputFormat.class);
  job.setOutputFormatClass(TrackingTextOutputFormat.class);
  job.setNumReduceTasks(1);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.waitForCompletion(true);

  Assert.assertTrue(mapCleanup);
  Assert.assertTrue(reduceCleanup);
  Assert.assertTrue(recordReaderCleanup);
  Assert.assertTrue(recordWriterCleanup);
}
 
Example 4
Source Project: sqoop-on-spark   File: FileUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static void mkdirs(String directory) throws IOException {
  Path path = new Path(directory);
  FileSystem fs = path.getFileSystem(new Configuration());
  if (!fs.exists(path)) {
    fs.mkdirs(path);
  }
}
 
Example 5
Source Project: Bats   File: SchemaHandler.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * If raw schema was present in create schema command, returns schema from command,
 * otherwise loads raw schema from the given file.
 *
 * @param sqlCall sql create schema call
 * @return string representation of raw schema (column names, types and nullability)
 */
private String getSchemaString(SqlSchema.Create sqlCall) {
  if (sqlCall.hasSchema()) {
    return sqlCall.getSchema();
  }

  Path path = new Path(sqlCall.getLoad());
  try {
    FileSystem rawFs = path.getFileSystem(new Configuration());
    FileSystem fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), rawFs.getConf());

    if (!fs.exists(path)) {
      throw UserException.resourceError()
        .message("File with raw schema [%s] does not exist", path.toUri().getPath())
        .build(logger);
    }

    try (InputStream stream = fs.open(path)) {
      return IOUtils.toString(stream);
    }

  } catch (IOException e) {
    throw UserException.resourceError(e)
      .message("Unable to load raw schema from file %s", path.toUri().getPath())
      .build(logger);
  }
}
 
Example 6
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf);

    job.setJarByClass(CommonFriendStep2.class);
    // 设置job的mapper类和reducer类
    job.setMapperClass(CommonFansStep2Mapper.class);
    job.setReducerClass(CommonFansStep2Reducer.class);

    // 设置map阶段输出key:value数据的类型
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // 设置reudce阶段输出key:value数据的类型
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // 检测输出目录是否已存在,如果已存在则删除,以免在测试阶段需要反复手动删除输出目录
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path(args[1]);
    if(fs.exists(out)) {
        fs.delete(out, true);
    }

    // 设置数据输入输出目录
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job,out);

    // 提交job到yarn或者local runner执行
    job.waitForCompletion(true);

}
 
Example 7
/**
 * Construct filename for a late file. If the file does not exists in the output dir, retain the original name.
 * Otherwise, append a LATE_COMPONENT{RandomInteger} to the original file name.
 * For example, if file "part1.123.avro" exists in dir "/a/b/", the returned path will be "/a/b/part1.123.late12345.avro".
 */
public Path constructLateFilePath(String originalFilename, FileSystem fs, Path outputDir) throws IOException {
  if (!fs.exists(new Path(outputDir, originalFilename))) {
    return new Path(outputDir, originalFilename);
  }
  return constructLateFilePath(FilenameUtils.getBaseName(originalFilename) + LATE_COMPONENT
      + new Random().nextInt(Integer.MAX_VALUE) + SEPARATOR + FilenameUtils.getExtension(originalFilename), fs,
      outputDir);
}
 
Example 8
Source Project: timely   File: GorillaStore.java    License: Apache License 2.0 5 votes vote down vote up
protected void writeCompressor(String metric, WrappedGorillaCompressor wrappedGorillaCompressor)
        throws IOException {

    try {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://localhost:8020"), configuration);
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd-HHmmss.SSS");
        sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
        String baseDir = "/timely/cache";
        Path directory = new Path(baseDir + "/" + metric);
        String fileName = metric + "-" + sdf.format(new Date(wrappedGorillaCompressor.getOldestTimestamp()));
        Path outputPath = new Path(directory, fileName);
        if (!fs.exists(directory)) {
            fs.mkdirs(directory);
        }
        if (fs.exists(outputPath)) {
            throw new IOException("output path exists");
        }
        OutputStream os = fs.create(outputPath);
        // write object to hdfs file
        ObjectOutputStream oos = new ObjectOutputStream(os);
        oos.writeObject(wrappedGorillaCompressor);
        oos.close();
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
}
 
Example 9
@Override
public void execute() throws IOException {

  if (this.verifyBeforeRegistering) {
    if (!this.hiveSpec.getTable().getLocation().isPresent()) {
      throw getException("Table does not have a location parameter.");
    }
    Path tablePath = new Path(this.hiveSpec.getTable().getLocation().get());

    FileSystem fs = this.hiveSpec.getPath().getFileSystem(new Configuration());
    if (!fs.exists(tablePath)) {
      throw getException(String.format("Table location %s does not exist.", tablePath));
    }

    if (this.hiveSpec.getPartition().isPresent()) {

      if (!this.hiveSpec.getPartition().get().getLocation().isPresent()) {
        throw getException("Partition does not have a location parameter.");
      }
      Path partitionPath = new Path(this.hiveSpec.getPartition().get().getLocation().get());
      if (!fs.exists(this.hiveSpec.getPath())) {
        throw getException(String.format("Partition location %s does not exist.", partitionPath));
      }
    }
  }

  try (HiveRegister hiveRegister = HiveRegister.get(this.props, this.metastoreURI)) {
    log.info("Registering Hive Spec " + this.hiveSpec);
    ListenableFuture<Void> future = hiveRegister.register(this.hiveSpec);
    future.get();
  } catch (InterruptedException | ExecutionException ie) {
    throw new IOException("Hive registration was interrupted.", ie);
  }
}
 
Example 10
Source Project: hadoop-gpu   File: TestTFileSeek.java    License: Apache License 2.0 5 votes vote down vote up
private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
  throws IOException {
  if (fs.exists(name)) {
    fs.delete(name, true);
  }
  FSDataOutputStream fout = fs.create(name);
  return fout;
}
 
Example 11
Source Project: big-c   File: TestPseudoLocalFs.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Validate if exists() returns <code>true</code> for correctly formed file
 * paths on PseudoLocalFs and returns <code>false</code> for improperly
 * formed file paths.
 * @param pfs Pseudo Local File System
 * @param path file path for which exists() is to be called
 * @param shouldSucceed expected return value of exists(&lt;path&gt;)
 * @throws IOException
 */
private void validateExists(FileSystem pfs, Path path,
    boolean shouldSucceed) throws IOException {
  boolean ret = pfs.exists(path);
  if (shouldSucceed) {
    assertTrue("exists() returned false for valid file name " + path, ret);
  } else {
    assertFalse("exists() returned true for invalid file name " + path, ret);
  }
}
 
Example 12
Source Project: RDFS   File: TestEmptyJob.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void commitJob(JobContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  Path share = new Path(conf.get("share"));
  FileSystem fs = FileSystem.get(conf);

  
  while (true) {
    if (fs.exists(share)) {
      break;
    }
    UtilsForTests.waitFor(100);
  }
  super.commitJob(context);
}
 
Example 13
Source Project: hbase   File: SnapshotManager.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Check to see if the snapshot is one of the currently completed snapshots
 * Returns true if the snapshot exists in the "completed snapshots folder".
 *
 * @param snapshot expected snapshot to check
 * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
 *         not stored
 * @throws IOException if the filesystem throws an unexpected exception,
 * @throws IllegalArgumentException if snapshot name is invalid.
 */
private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
  try {
    final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    // check to see if the snapshot already exists
    return fs.exists(snapshotDir);
  } catch (IllegalArgumentException iae) {
    throw new UnknownSnapshotException("Unexpected exception thrown", iae);
  }
}
 
Example 14
Source Project: MapReduce-Demo   File: IPCount.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "IPCount";						//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(IPCount.class);				//指定运行时作业类
	job.setJar("export\\IPCount.jar");				//指定本地jar包
	job.setMapperClass(IPCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(DayAndIp.class);		//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(IPCountReducer.class);		//指定Reducer类
	job.setOutputKeyClass(DayAndIp.class);			//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output4";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 15
Source Project: attic-apex-core   File: StramClient.java    License: Apache License 2.0 4 votes vote down vote up
public void copyInitialState(Path origAppDir) throws IOException
{
  // locate previous snapshot
  long copyStart = System.currentTimeMillis();
  String newAppDir = this.dag.assertAppPath();

  FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
  // read snapshot against new dependencies
  Object snapshot = recoveryHandler.restore();
  if (snapshot == null) {
    throw new IllegalArgumentException("No previous application state found in " + origAppDir);
  }
  InputStream logIs = recoveryHandler.getLog();

  // modify snapshot state to switch app id
  ((StreamingContainerManager.CheckpointState)snapshot).setApplicationId(this.dag, conf);
  Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

  FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
  // remove the path that was created by the storage agent during deserialization and replacement
  fs.delete(checkpointPath, true);

  // write snapshot to new location
  recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
  recoveryHandler.save(snapshot);
  OutputStream logOs = recoveryHandler.rotateLog();
  IOUtils.copy(logIs, logOs);
  logOs.flush();
  logOs.close();
  logIs.close();

  List<String> excludeDirs = Arrays.asList(LogicalPlan.SUBDIR_CHECKPOINTS, LogicalPlan.SUBDIR_EVENTS, LogicalPlan.SUBDIR_STATS);
  // copy sub directories that are not present in target
  FileStatus[] lFiles = fs.listStatus(origAppDir);

  // In case of MapR/MapR-FS, f.getPath().toString() returns path as maprfs:///<orig app dir>
  // whereas origAppDir.toString & newAppDir are in maprfs:/<orig or new app dir> format
  // e.g.
  // f.getPath().toString -> maprfs:///user/dtadmin/datatorrent/apps/application_1481890072066_0004/checkpoints
  // origAppDir -> maprfs:/user/dtadmin/datatorrent/apps/application_1481890072066_0004
  // newAppDir -> maprfs:/user/dtadmin/datatorrent/apps/application_1481890072066_0005

  String origAppDirPath = Path.getPathWithoutSchemeAndAuthority(origAppDir).toString();
  String newAppDirPath = Path.getPathWithoutSchemeAndAuthority(new Path(newAppDir)).toString();

  for (FileStatus f : lFiles) {
    if (f.isDirectory() && !excludeDirs.contains(f.getPath().getName())) {
      String targetPath = f.getPath().toString().replace(origAppDirPath, newAppDirPath);
      if (!fs.exists(new Path(targetPath))) {
        LOG.debug("Copying {} size {} to {}", f.getPath(), f.getLen(), targetPath);
        long start = System.currentTimeMillis();
        FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
        LOG.debug("Copying {} to {} took {} ms", f.getPath(), f.getLen(), targetPath, System.currentTimeMillis() - start);
      } else {
        LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
      }
    }
  }
  LOG.info("Copying initial state took {} ms", System.currentTimeMillis() - copyStart);
}
 
Example 16
Source Project: MapReduce-Demo   File: PVMinMax2.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "PVMinMax2";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(PVMinMax2.class);				//指定运行时作业类
	job.setJar("export\\PVMinMax2.jar");			//指定本地jar包
	job.setMapperClass(PVMinMax2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(Text.class);			//设置Mapper输出Value类型
	job.setReducerClass(PVMinMax2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(Text.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/output5_1";			//实验数据目录	
	String outputDir = "/expr/weblog/output5_2";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 17
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}
 
Example 18
Source Project: MapReduce-Demo   File: DatePartition2.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "DatePartition2";					//定义作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(DatePartition2.class);			//指定运行时作业类
	job.setJar("export\\DatePartition2.jar");			//指定本地jar包
	job.setMapperClass(DatePartition2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);				//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
	job.setReducerClass(DatePartition2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);					//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class);			//设置Reduce输出Value类型
	job.setPartitionerClass(YearPartitioner.class);		//自定义分区方法
	job.setNumReduceTasks(3); 	//设置reduce任务的数量,该值传递给Partitioner.getPartition()方法的numPartitions参数
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/datecount/data";				//实验数据目录	
	String outputDir = "/expr/datecount/output_partition2";	//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 19
Source Project: MapReduce-Demo   File: FlowCount.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "FlowCount";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(FlowCount.class);				//指定运行时作业类
	job.setJar("export\\FlowCount.jar");			//指定本地jar包
	job.setMapperClass(FlowCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(FlowCountReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output1";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 20
Source Project: anthelion   File: LockUtil.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Remove lock file. NOTE: applications enforce the semantics of this file -
 * this method simply removes any file with a given name.
 * @param fs filesystem
 * @param lockFile lock file name
 * @return false, if the lock file doesn't exist. True, if it existed and was
 * successfully removed.
 * @throws IOException if lock file exists but it is a directory.
 */
public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
  if (!fs.exists(lockFile)) return false;
  if (fs.getFileStatus(lockFile).isDir())
    throw new IOException("lock file " + lockFile + " exists but is a directory!");
  return fs.delete(lockFile, false);
}