Java Code Examples for org.apache.hadoop.fs.FileSystem#delete()

The following examples show how to use org.apache.hadoop.fs.FileSystem#delete() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: big-c   File: MRAppMaster.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * clean up staging directories for the job.
 * @throws IOException
 */
public void cleanupStagingDir() throws IOException {
  /* make sure we clean the staging files */
  String jobTempDir = null;
  FileSystem fs = getFileSystem(getConfig());
  try {
    if (!keepJobFiles(new JobConf(getConfig()))) {
      jobTempDir = getConfig().get(MRJobConfig.MAPREDUCE_JOB_DIR);
      if (jobTempDir == null) {
        LOG.warn("Job Staging directory is null");
        return;
      }
      Path jobTempDirPath = new Path(jobTempDir);
      LOG.info("Deleting staging directory " + FileSystem.getDefaultUri(getConfig()) +
          " " + jobTempDir);
      fs.delete(jobTempDirPath, true);
    }
  } catch(IOException io) {
    LOG.error("Failed to cleanup staging dir " + jobTempDir, io);
  }
}
 
Example 2
@Test
public void testCopyFromLocalFileSystem() throws Exception {
  Path localFilePath = new Path(System.getProperty("test.build.data",
      "azure_test"));
  FileSystem localFs = FileSystem.get(new Configuration());
  localFs.delete(localFilePath, true);
  try {
    writeString(localFs, localFilePath, "Testing");
    Path dstPath = new Path("copiedFromLocal");
    assertTrue(FileUtil.copy(localFs, localFilePath, fs, dstPath, false,
        fs.getConf()));
    assertTrue(fs.exists(dstPath));
    assertEquals("Testing", readString(fs, dstPath));
    fs.delete(dstPath, true);
  } finally {
    localFs.delete(localFilePath, true);
  }
}
 
Example 3
Source Project: big-c   File: TestJavaSerialization.java    License: Apache License 2.0 5 votes vote down vote up
private void cleanAndCreateInput(FileSystem fs) throws IOException {
  fs.delete(INPUT_FILE, true);
  fs.delete(OUTPUT_DIR, true);

  OutputStream os = fs.create(INPUT_FILE);

  Writer wr = new OutputStreamWriter(os);
  wr.write("b a\n");
  wr.close();
}
 
Example 4
@Test
public void findFiles_hdfs_native() throws Exception {
  DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl();

  // Copy the contents of test folder
  FileObject source = DistributedCacheTestUtil.createTestFolderWithContent();
  Path root = new Path( "bin/test/stageArchiveForCacheTest" );
  Configuration conf = new Configuration();
  FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf );
  Path dest = new Path( root, "org/pentaho/mapreduce/" );
  try {
    try {
      ch.stageForCache( source, fs, dest, true );

      List<Path> files = ch.findFiles( fs, dest, null );
      assertEquals( 6, files.size() );

      files = ch.findFiles( fs, dest, Pattern.compile( ".*jar$" ) );
      assertEquals( 2, files.size() );

      files = ch.findFiles( fs, dest, Pattern.compile( ".*folder$" ) );
      assertEquals( 1, files.size() );
    } finally {
      fs.delete( root, true );
    }
  } finally {
    source.delete( new AllFileSelector() );
  }
}
 
Example 5
/**
 * Write a test HFile with the given codec & cipher
 * @param conf
 * @param fs
 * @param mf
 * @param codec "none", "lzo", "gz", "snappy"
 * @param cipher "none", "aes"
 * @throws Exception
 */
private void runWriteBenchmark(Configuration conf, FileSystem fs, Path mf, String codec,
    String cipher) throws Exception {
  if (fs.exists(mf)) {
    fs.delete(mf, true);
  }

  runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT, codec, cipher),
      ROW_COUNT, codec, getCipherName(conf, cipher));

}
 
Example 6
Source Project: nifi   File: AbstractPutHDFSRecord.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Attempts to rename srcFile to destFile up to 10 times, with a 200ms sleep in between each attempt.
 *
 * If the file has not been renamed after 10 attempts, a FailureException is thrown.
 *
 * @param fileSystem the file system where the files are located
 * @param srcFile the source file
 * @param destFile the destination file to rename the source to
 * @throws IOException if IOException happens while attempting to rename
 * @throws InterruptedException if renaming is interrupted
 * @throws FailureException if the file couldn't be renamed after 10 attempts
 */
protected void rename(final FileSystem fileSystem, final Path srcFile, final Path destFile) throws IOException, InterruptedException, FailureException {
    boolean renamed = false;
    for (int i = 0; i < 10; i++) { // try to rename multiple times.
        if (fileSystem.rename(srcFile, destFile)) {
            renamed = true;
            break;// rename was successful
        }
        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
    }
    if (!renamed) {
        fileSystem.delete(srcFile, false);
        throw new FailureException("Could not rename file " + srcFile + " to its final filename");
    }
}
 
Example 7
Source Project: hadoop   File: TestLocalModeWithNewApis.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNewApis() throws Exception {
  Random r = new Random(System.currentTimeMillis());
  Path tmpBaseDir = new Path("/tmp/wc-" + r.nextInt());
  final Path inDir = new Path(tmpBaseDir, "input");
  final Path outDir = new Path(tmpBaseDir, "output");
  String input = "The quick brown fox\nhas many silly\nred fox sox\n";
  FileSystem inFs = inDir.getFileSystem(conf);
  FileSystem outFs = outDir.getFileSystem(conf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
  }

  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(TestLocalModeWithNewApis.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  assertEquals(job.waitForCompletion(true), true);

  String output = readOutput(outDir, conf);
  assertEquals("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" +
               "quick\t1\nred\t1\nsilly\t1\nsox\t1\n", output);
  
  outFs.delete(tmpBaseDir, true);
}
 
Example 8
Source Project: RDFS   File: TestJobInProgress.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
JobConf configure(Class MapClass,Class ReduceClass, int maps, int reducers,
                  boolean locality) 
throws Exception {
  JobConf jobConf = mrCluster.createJobConf();
  final Path inDir = new Path("./failjob/input");
  final Path outDir = new Path("./failjob/output");
  String input = "Test failing job.\n One more line";
  FileSystem inFs = inDir.getFileSystem(jobConf);
  FileSystem outFs = outDir.getFileSystem(jobConf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("create directory failed" + inDir.toString());
  }

  DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
  file.writeBytes(input);
  file.close();
  jobConf.setJobName("failmaptask");
  if (locality) {
    jobConf.setInputFormat(TextInputFormat.class);
  } else {
    jobConf.setInputFormat(UtilsForTests.RandomInputFormat.class);
  }
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);
  jobConf.setMapperClass(MapClass);
  jobConf.setCombinerClass(ReduceClass);
  jobConf.setReducerClass(ReduceClass);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outDir);
  jobConf.setNumMapTasks(maps);
  jobConf.setNumReduceTasks(reducers);
  return jobConf; 
}
 
Example 9
@Override
public boolean delete() {
	try {
		FileSystem fs = getFileSystem();
		return fs.delete(new Path(path), false);
	} catch (IOException e) {
		LOG.error(String.format("An exception occurred while deleting the path '%s'.", path), e);
		return false;
	}
}
 
Example 10
Source Project: incubator-tajo   File: TestFileSystems.java    License: Apache License 2.0 5 votes vote down vote up
public Path getTestDir(FileSystem fs, String dir) throws IOException {
  Path path = new Path(dir);
  if(fs.exists(path))
    fs.delete(path, true);

  fs.mkdirs(path);

  return fs.makeQualified(path);
}
 
Example 11
Source Project: multimedia-indexing   File: VisualJob.java    License: Apache License 2.0 5 votes vote down vote up
private Job createJob(String inputPath, String outputPath) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJarByClass(VisualJob.class);
    job.setNumReduceTasks(90);

    FileSystem fs = FileSystem.get(new URI(outputPath), conf);
    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(FloatArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(VisualThreadedMapper.class);
    job.setReducerClass(VisualReducer.class);

    return job;
}
 
Example 12
@Override 
protected void tearDown() throws Exception{
  FileSystem fs = hdfsStore.getFileSystem();
  Path cleanUpIntervalPath = new Path(hdfsStore.getHomeDir(),HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME);
  if (fs.exists(cleanUpIntervalPath)){
    fs.delete(cleanUpIntervalPath, true);
  }  
  super.tearDown();
}
 
Example 13
Source Project: hbase   File: BaseTestHBaseFsck.java    License: Apache License 2.0 5 votes vote down vote up
public void deleteTableDir(TableName table) throws IOException {
  Path rootDir = CommonFSUtils.getRootDir(conf);
  FileSystem fs = rootDir.getFileSystem(conf);
  Path p = CommonFSUtils.getTableDir(rootDir, table);
  HBaseFsck.debugLsr(conf, p);
  boolean success = fs.delete(p, true);
  LOG.info("Deleted " + p + " sucessfully? " + success);
}
 
Example 14
Source Project: hbase   File: GCRegionProcedure.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected Flow executeFromState(MasterProcedureEnv env, GCRegionState state)
    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
  if (LOG.isTraceEnabled()) {
    LOG.trace(this + " execute state=" + state);
  }
  MasterServices masterServices = env.getMasterServices();
  try {
    switch (state) {
      case GC_REGION_PREPARE:
        // Nothing to do to prepare.
        setNextState(GCRegionState.GC_REGION_ARCHIVE);
        break;
      case GC_REGION_ARCHIVE:
        MasterFileSystem mfs = masterServices.getMasterFileSystem();
        FileSystem fs = mfs.getFileSystem();
        if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Archiving region=" + getRegion().getShortNameToLog());
          }
          HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion());
        }
        FileSystem walFs = mfs.getWALFileSystem();
        // Cleanup the directories on WAL filesystem also
        Path regionWALDir = CommonFSUtils.getWALRegionDir(env.getMasterConfiguration(),
          getRegion().getTable(), getRegion().getEncodedName());
        if (walFs.exists(regionWALDir)) {
          if (!walFs.delete(regionWALDir, true)) {
            LOG.debug("Failed to delete {}", regionWALDir);
          }
        }
        Path wrongRegionWALDir = CommonFSUtils.getWrongWALRegionDir(env.getMasterConfiguration(),
          getRegion().getTable(), getRegion().getEncodedName());
        if (walFs.exists(wrongRegionWALDir)) {
          if (!walFs.delete(wrongRegionWALDir, true)) {
            LOG.debug("Failed to delete {}", regionWALDir);
          }
        }
        setNextState(GCRegionState.GC_REGION_PURGE_METADATA);
        break;
      case GC_REGION_PURGE_METADATA:
        // TODO: Purge metadata before removing from HDFS? This ordering is copied
        // from CatalogJanitor.
        AssignmentManager am = masterServices.getAssignmentManager();
        if (am != null) {
          if (am.getRegionStates() != null) {
            am.getRegionStates().deleteRegion(getRegion());
          }
        }
        MetaTableAccessor.deleteRegionInfo(masterServices.getConnection(), getRegion());
        masterServices.getServerManager().removeRegion(getRegion());
        FavoredNodesManager fnm = masterServices.getFavoredNodesManager();
        if (fnm != null) {
          fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion()));
        }
        return Flow.NO_MORE_STATE;
      default:
        throw new UnsupportedOperationException(this + " unhandled state=" + state);
    }
  } catch (IOException ioe) {
    // TODO: This is going to spew log? Add retry backoff
    LOG.warn("Error trying to GC " + getRegion().getShortNameToLog() + "; retrying...", ioe);
  }
  return Flow.HAS_MORE_STATE;
}
 
Example 15
Source Project: hbase   File: TestHFileCleaner.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testOnConfigurationChange() throws Exception {
  // constants
  final int ORIGINAL_THROTTLE_POINT = 512 * 1024;
  final int ORIGINAL_QUEUE_INIT_SIZE = 512;
  final int UPDATE_THROTTLE_POINT = 1024;// small enough to change large/small check
  final int UPDATE_QUEUE_INIT_SIZE = 1024;
  final int LARGE_FILE_NUM = 5;
  final int SMALL_FILE_NUM = 20;
  final int LARGE_THREAD_NUM = 2;
  final int SMALL_THREAD_NUM = 4;
  final long THREAD_TIMEOUT_MSEC = 30 * 1000L;
  final long THREAD_CHECK_INTERVAL_MSEC = 500L;

  Configuration conf = UTIL.getConfiguration();
  // no cleaner policies = delete all files
  conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, "");
  conf.setInt(HFileCleaner.HFILE_DELETE_THROTTLE_THRESHOLD, ORIGINAL_THROTTLE_POINT);
  conf.setInt(HFileCleaner.LARGE_HFILE_QUEUE_INIT_SIZE, ORIGINAL_QUEUE_INIT_SIZE);
  conf.setInt(HFileCleaner.SMALL_HFILE_QUEUE_INIT_SIZE, ORIGINAL_QUEUE_INIT_SIZE);
  Server server = new DummyServer();
  Path archivedHfileDir =
      new Path(UTIL.getDataTestDirOnTestFS(), HConstants.HFILE_ARCHIVE_DIRECTORY);

  // setup the cleaner
  FileSystem fs = UTIL.getDFSCluster().getFileSystem();
  final HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archivedHfileDir, POOL);
  Assert.assertEquals(ORIGINAL_THROTTLE_POINT, cleaner.getThrottlePoint());
  Assert.assertEquals(ORIGINAL_QUEUE_INIT_SIZE, cleaner.getLargeQueueInitSize());
  Assert.assertEquals(ORIGINAL_QUEUE_INIT_SIZE, cleaner.getSmallQueueInitSize());
  Assert.assertEquals(HFileCleaner.DEFAULT_HFILE_DELETE_THREAD_TIMEOUT_MSEC,
      cleaner.getCleanerThreadTimeoutMsec());
  Assert.assertEquals(HFileCleaner.DEFAULT_HFILE_DELETE_THREAD_CHECK_INTERVAL_MSEC,
      cleaner.getCleanerThreadCheckIntervalMsec());

  // clean up archive directory and create files for testing
  fs.delete(archivedHfileDir, true);
  fs.mkdirs(archivedHfileDir);
  createFilesForTesting(LARGE_FILE_NUM, SMALL_FILE_NUM, fs, archivedHfileDir);

  // call cleaner, run as daemon to test the interrupt-at-middle case
  Thread t = new Thread() {
    @Override
    public void run() {
      cleaner.chore();
    }
  };
  t.setDaemon(true);
  t.start();
  // wait until file clean started
  while (cleaner.getNumOfDeletedSmallFiles() == 0) {
    Thread.yield();
  }

  // trigger configuration change
  Configuration newConf = new Configuration(conf);
  newConf.setInt(HFileCleaner.HFILE_DELETE_THROTTLE_THRESHOLD, UPDATE_THROTTLE_POINT);
  newConf.setInt(HFileCleaner.LARGE_HFILE_QUEUE_INIT_SIZE, UPDATE_QUEUE_INIT_SIZE);
  newConf.setInt(HFileCleaner.SMALL_HFILE_QUEUE_INIT_SIZE, UPDATE_QUEUE_INIT_SIZE);
  newConf.setInt(HFileCleaner.LARGE_HFILE_DELETE_THREAD_NUMBER, LARGE_THREAD_NUM);
  newConf.setInt(HFileCleaner.SMALL_HFILE_DELETE_THREAD_NUMBER, SMALL_THREAD_NUM);
  newConf.setLong(HFileCleaner.HFILE_DELETE_THREAD_TIMEOUT_MSEC, THREAD_TIMEOUT_MSEC);
  newConf.setLong(HFileCleaner.HFILE_DELETE_THREAD_CHECK_INTERVAL_MSEC,
      THREAD_CHECK_INTERVAL_MSEC);

  LOG.debug("File deleted from large queue: " + cleaner.getNumOfDeletedLargeFiles()
      + "; from small queue: " + cleaner.getNumOfDeletedSmallFiles());
  cleaner.onConfigurationChange(newConf);

  // check values after change
  Assert.assertEquals(UPDATE_THROTTLE_POINT, cleaner.getThrottlePoint());
  Assert.assertEquals(UPDATE_QUEUE_INIT_SIZE, cleaner.getLargeQueueInitSize());
  Assert.assertEquals(UPDATE_QUEUE_INIT_SIZE, cleaner.getSmallQueueInitSize());
  Assert.assertEquals(LARGE_THREAD_NUM + SMALL_THREAD_NUM, cleaner.getCleanerThreads().size());
  Assert.assertEquals(THREAD_TIMEOUT_MSEC, cleaner.getCleanerThreadTimeoutMsec());
  Assert.assertEquals(THREAD_CHECK_INTERVAL_MSEC, cleaner.getCleanerThreadCheckIntervalMsec());

  // make sure no cost when onConfigurationChange called with no change
  List<Thread> oldThreads = cleaner.getCleanerThreads();
  cleaner.onConfigurationChange(newConf);
  List<Thread> newThreads = cleaner.getCleanerThreads();
  Assert.assertArrayEquals(oldThreads.toArray(), newThreads.toArray());

  // wait until clean done and check
  t.join();
  LOG.debug("File deleted from large queue: " + cleaner.getNumOfDeletedLargeFiles()
      + "; from small queue: " + cleaner.getNumOfDeletedSmallFiles());
  Assert.assertTrue("Should delete more than " + LARGE_FILE_NUM
      + " files from large queue but actually " + cleaner.getNumOfDeletedLargeFiles(),
    cleaner.getNumOfDeletedLargeFiles() > LARGE_FILE_NUM);
  Assert.assertTrue("Should delete less than " + SMALL_FILE_NUM
      + " files from small queue but actually " + cleaner.getNumOfDeletedSmallFiles(),
    cleaner.getNumOfDeletedSmallFiles() < SMALL_FILE_NUM);
}
 
Example 16
Source Project: ml-ease   File: Regression.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void run() throws Exception
{
  JobConfig config = super.getJobConfig();
  Path outBasePath = new Path(config.get(OUTPUT_BASE_PATH));
  JobConf conf = super.createJobConf();
  if (config.getBoolean("force.output.overwrite", false))
  {
    FileSystem fs = outBasePath.getFileSystem(conf);
    fs.delete(outBasePath, true);
  }
  
  String prepareOutputPath = outBasePath + "/tmp-data";
  // first run the preparation job
  JobConfig configPrepare = JobConfig.clone(config);
  configPrepare.put(AbstractAvroJob.OUTPUT_PATH, prepareOutputPath);
  RegressionPrepare regressionPrepareJob = new RegressionPrepare("Regression-Prepare", 
                                                                 configPrepare);
  regressionPrepareJob.run();
  
  // now start running the regression train using admm
  JobConfig configTrain = JobConfig.clone(config);
  configTrain.put(AbstractAvroJob.INPUT_PATHS, prepareOutputPath);
  RegressionAdmmTrain regressionAdmmTrainJob = new RegressionAdmmTrain("Regression-Admm-Train", configTrain);
  regressionAdmmTrainJob.run();
  
  // now test
  if (config.containsKey(TEST_PATH))
  {
    JobConfig configTest = JobConfig.clone(config);
    configTest.put(AbstractAvroJob.INPUT_PATHS, config.get(TEST_PATH));
    configTest.put(RegressionTest.MODEL_BASE_PATH, outBasePath.toString());
    String outTestBasePath = outBasePath.toString()+"/test";
    configTest.put(RegressionTest.OUTPUT_BASE_PATH, outTestBasePath);
    RegressionTest regressionTestJob = new RegressionTest("Regression-Test", configTest);
    regressionTestJob.run();
    
    // compute test loglikelihood
    JobConfig configTestLoglik = JobConfig.clone(config);
    configTestLoglik.put(RegressionTestLoglik.INPUT_BASE_PATHS, outTestBasePath);
    configTestLoglik.put(RegressionTestLoglik.OUTPUT_BASE_PATH, outTestBasePath);
    RegressionTestLoglik regressionTestLoglikJob = new RegressionTestLoglik("Regression-Test-Loglik", configTestLoglik);
    regressionTestLoglikJob.run();
  }
}
 
Example 17
Source Project: MapReduce-Demo   File: FlowSort.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	// 设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	// 设置job配置信息
	Job job = Job.getInstance(conf, "FlowSort");
	job.setJarByClass(FlowSort.class);
	job.setJar("export\\FlowSort.jar");
	// Mapper
	job.setMapperClass(SortMapper.class);
	job.setMapOutputKeyClass(MySortKey.class);
	job.setMapOutputValueClass(Text.class);
	// Reducer
	job.setReducerClass(SortReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputKeyClass(MySortKey.class);
	// 作业输入输出路径
	String dataDir = "/workspace/flowStatistics/output/part-r-00000"; // 实验数据目录
	String outputDir = "/workspace/flowStatistics/output_sort"; // 实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if (fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	// 运行作业
	System.out.println("Job: FlowSort is running...");
	if (job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 18
Source Project: hbase   File: TestRecoveredEdits.java    License: Apache License 2.0 4 votes vote down vote up
private void testReplayWorksWithMemoryCompactionPolicy(MemoryCompactionPolicy policy) throws
  IOException {
  Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
  // Set it so we flush every 1M or so.  Thats a lot.
  conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024*1024);
  conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, String.valueOf(policy).toLowerCase());
  // The file of recovered edits has a column family of 'meta'.
  final String columnFamily = "meta";
  byte[][] columnFamilyAsByteArray = new byte[][] { Bytes.toBytes(columnFamily) };
  TableDescriptor tableDescriptor = TableDescriptorBuilder
    .newBuilder(TableName.valueOf(testName.getMethodName())).setColumnFamily(
      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily)).build())
    .build();
  RegionInfo hri = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
  final String encodedRegionName = hri.getEncodedName();
  Path hbaseRootDir = TEST_UTIL.getDataTestDir();
  FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
  Path tableDir = CommonFSUtils.getTableDir(hbaseRootDir, tableDescriptor.getTableName());
  HRegionFileSystem hrfs =
      new HRegionFileSystem(TEST_UTIL.getConfiguration(), fs, tableDir, hri);
  if (fs.exists(hrfs.getRegionDir())) {
    LOG.info("Region directory already exists. Deleting.");
    fs.delete(hrfs.getRegionDir(), true);
  }
  HRegion region = HBaseTestingUtility
      .createRegionAndWAL(hri, hbaseRootDir, conf, tableDescriptor, blockCache);
  assertEquals(encodedRegionName, region.getRegionInfo().getEncodedName());
  List<String> storeFiles = region.getStoreFileList(columnFamilyAsByteArray);
  // There should be no store files.
  assertTrue(storeFiles.isEmpty());
  region.close();
  Path regionDir = FSUtils.getRegionDirFromRootDir(hbaseRootDir, hri);
  Path recoveredEditsDir = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir);
  // This is a little fragile getting this path to a file of 10M of edits.
  Path recoveredEditsFile = new Path(
    System.getProperty("test.build.classes", "target/test-classes"),
      "0000000000000016310");
  // Copy this file under the region's recovered.edits dir so it is replayed on reopen.
  Path destination = new Path(recoveredEditsDir, recoveredEditsFile.getName());
  fs.copyToLocalFile(recoveredEditsFile, destination);
  assertTrue(fs.exists(destination));
  // Now the file 0000000000000016310 is under recovered.edits, reopen the region to replay.
  region = HRegion.openHRegion(region, null);
  assertEquals(encodedRegionName, region.getRegionInfo().getEncodedName());
  storeFiles = region.getStoreFileList(columnFamilyAsByteArray);
  // Our 0000000000000016310 is 10MB. Most of the edits are for one region. Lets assume that if
  // we flush at 1MB, that there are at least 3 flushed files that are there because of the
  // replay of edits.
  if(policy == MemoryCompactionPolicy.EAGER || policy == MemoryCompactionPolicy.ADAPTIVE) {
    assertTrue("Files count=" + storeFiles.size(), storeFiles.size() >= 1);
  } else {
    assertTrue("Files count=" + storeFiles.size(), storeFiles.size() > 10);
  }
  // Now verify all edits made it into the region.
  int count = verifyAllEditsMadeItIn(fs, conf, recoveredEditsFile, region);
  LOG.info("Checked " + count + " edits made it in");
}
 
Example 19
Source Project: mt-flume   File: TestHDFSEventSink.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSlowAppendFailure() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 2;
  String newPath = testPath + "/singleBucket";
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  // create HDFS sink with slow writer
  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  Context context = new Context();
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);
  context.put("hdfs.callTimeout", Long.toString(1000));
  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();

  // push the event batches into channel
  for (i = 0; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      event.getHeaders().put("slow", "1500");
      event.setBody(("Test." + i + "." + j).getBytes());
      channel.put(event);
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    Status satus = sink.process();

    // verify that the append returned backoff due to timeotu
    Assert.assertEquals(satus, Status.BACKOFF);
  }

  sink.stop();
}
 
Example 20
private void binaryEvaluation(DataFrame predictions, String output, TrainingSettings trainingSettings) throws IOException {

        FileSystem fs = FileSystem.get(new Configuration());
        Path evalPath = new Path(output+"binary_evaluation_"+trainingSettings.getClassificationMethod()+".txt");
        fs.delete(evalPath, true);
        FSDataOutputStream fsdos = fs.create(evalPath);

        BinaryClassificationMetrics metrics = new BinaryClassificationMetrics(predictions
                .select("rawPrediction", "label")
                .javaRDD()
                .map((Row row) -> {
                    Vector vector = row.getAs("rawPrediction");
                    Double label = row.getAs("label");
                    return new Tuple2<Object, Object>(vector.apply(1), label);
                }).rdd());


        // Precision by threshold
        JavaRDD<Tuple2<Object, Object>> precision = metrics.precisionByThreshold().toJavaRDD();
        IOUtils.write("\nPrecision by threshold: " + precision.collect(), fsdos);

        // Recall by threshold
        JavaRDD<Tuple2<Object, Object>> recall = metrics.recallByThreshold().toJavaRDD();
        IOUtils.write("\nRecall by threshold: " + recall.collect(), fsdos);

        // F Score by threshold
        JavaRDD<Tuple2<Object, Object>> f1Score = metrics.fMeasureByThreshold().toJavaRDD();
        IOUtils.write("\nF1 Score by threshold: " + f1Score.collect(), fsdos);

        JavaRDD<Tuple2<Object, Object>> f2Score = metrics.fMeasureByThreshold(2.0).toJavaRDD();
        IOUtils.write("\nF2 Score by threshold: " + f2Score.collect(), fsdos);

        // Precision-recall curve
        JavaRDD<Tuple2<Object, Object>> prc = metrics.pr().toJavaRDD();
        IOUtils.write("\nPrecision-recall curve: " + prc.collect(), fsdos);

        // Thresholds
        JavaRDD<Double> thresholds = precision.map(t -> new Double(t._1().toString()));

        // ROC Curve
        JavaRDD<Tuple2<Object, Object>> roc = metrics.roc().toJavaRDD();
        IOUtils.write("\nROC curve: " + roc.collect(), fsdos);

        // AUPRC
        IOUtils.write("\nArea under precision-recall curve = " + metrics.areaUnderPR(), fsdos);

        // AUROC
        IOUtils.write("\nArea under ROC = " + metrics.areaUnderROC(), fsdos);

        fsdos.flush();
        IOUtils.closeQuietly(fsdos);
    }