Java Code Examples for org.apache.hadoop.fs.FileSystem#listStatus()

The following examples show how to use org.apache.hadoop.fs.FileSystem#listStatus() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: RDFS   File: TestDataJoin.java    License: Apache License 2.0 6 votes vote down vote up
private static void confirmOutput(Path out, JobConf job, int srcs)
    throws IOException {
  FileSystem fs = out.getFileSystem(job);
  FileStatus[] outlist = fs.listStatus(out);
  assertEquals(1, outlist.length);
  assertTrue(0 < outlist[0].getLen());
  FSDataInputStream in = fs.open(outlist[0].getPath());
  LineRecordReader rr = new LineRecordReader(in, 0, Integer.MAX_VALUE, job);
  LongWritable k = new LongWritable();
  Text v = new Text();
  int count = 0;
  while (rr.next(k, v)) {
    String[] vals = v.toString().split("\t");
    assertEquals(srcs + 1, vals.length);
    int[] ivals = new int[vals.length];
    for (int i = 0; i < vals.length; ++i)
      ivals[i] = Integer.parseInt(vals[i]);
    assertEquals(0, ivals[0] % (srcs * srcs));
    for (int i = 1; i < vals.length; ++i) {
      assertEquals((ivals[i] - (i - 1)) * srcs, 10 * ivals[0]);
    }
    ++count;
  }
  assertEquals(4, count);
}
 
Example 2
Source Project: RDFS   File: JobClient.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Checks if the job directory is clean and has all the required components
 * for (re) starting the job
 */
public static boolean isJobDirValid(Path jobDirPath, FileSystem fs)
throws IOException {
  FileStatus[] contents = fs.listStatus(jobDirPath);
  int matchCount = 0;
  if (contents != null && contents.length >=2) {
    for (FileStatus status : contents) {
      if ("job.xml".equals(status.getPath().getName())) {
        ++matchCount;
      }
      if ("job.split".equals(status.getPath().getName())) {
        ++matchCount;
      }
    }
    if (matchCount == 2) {
      return true;
    }
  }
  return false;
}
 
Example 3
Source Project: spork   File: FileBasedOutputSizeReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the total size of output files in bytes
 * @param sto POStore
 * @param conf configuration
 */
@Override
public long getOutputSize(POStore sto, Configuration conf) throws IOException {
    if (!supports(sto, conf)) {
        log.warn("'" + sto.getStoreFunc().getClass().getCanonicalName()
                + "' is not supported by " + getClass().getCanonicalName());
        return -1;
    }

    long bytes = 0;
    Path p = new Path(getLocationUri(sto));
    FileSystem fs = p.getFileSystem(conf);
    FileStatus[] lst = fs.listStatus(p);
    if (lst != null) {
        for (FileStatus status : lst) {
            bytes += status.getLen();
        }
    }

    return bytes;
}
 
Example 4
Source Project: systemds   File: TestUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static boolean readDMLBoolean(String filePath) {
	try {
		Boolean b = null;
		Path outDirectory = new Path(filePath);
		FileSystem fs = IOUtilFunctions.getFileSystem(outDirectory, conf);
		String line;
		FileStatus[] outFiles = fs.listStatus(outDirectory);
		for (FileStatus file : outFiles) {
			FSDataInputStream fsout = fs.open(file.getPath());
			try(BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout))) {
				while ((line = outIn.readLine()) != null) { // only 1 scalar value in file
					b = Boolean.valueOf(Boolean.parseBoolean(line));
				}
			}
		}
		return b.booleanValue();
	} catch (IOException e) {
		assertTrue("could not read from file " + filePath, false);
	}
	return _AssertOccured;
}
 
Example 5
/**
 * Finds the avro file in the input folder, and returns its avro schema
 * @param inputPathDir Path to input directory
 * @return Input schema
 * @throws IOException exception when accessing to IO
 */
private Schema getSchema(Path inputPathDir)
    throws IOException {
  FileSystem fs = FileSystem.get(new Configuration());
  Schema avroSchema = null;
  for (FileStatus fileStatus : fs.listStatus(inputPathDir)) {
    if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(".avro")) {
      _logger.info("Extracting schema from " + fileStatus.getPath());
      try (DataFileStream<GenericRecord> dataStreamReader = getAvroReader(inputPathDir)) {
        avroSchema = dataStreamReader.getSchema();
      }
      break;
    }
  }
  return avroSchema;
}
 
Example 6
Source Project: tajo   File: TestJoinQuery.java    License: Apache License 2.0 6 votes vote down vote up
protected static List<Path> getPartitionPathList(FileSystem fs, Path path) throws Exception {
  FileStatus[] files = fs.listStatus(path);
  List<Path> paths = new ArrayList<>();
  if (files != null) {
    for (FileStatus eachFile: files) {
      if (eachFile.isFile()) {
        paths.add(path);
        return paths;
      } else {
        paths.addAll(getPartitionPathList(fs, eachFile.getPath()));
      }
    }
  }

  return paths;
}
 
Example 7
Source Project: RDFS   File: DistCp.java    License: Apache License 2.0 6 votes vote down vote up
/**go to the directory we created for the chunk files
 * the chunk files are named as 0, 1, 2, 3....
 * For example, if a file File1 is chopped into 3 chunks, 
 * the we should have a directory /File1_chunkfiles, and
 * there are three files in that directory:
 * /File1_chunkfiles/0, /File1_chunkfiles/1, File1_chunkfiles/2
 * The returned chunkFilePath arrays contains the paths of 
 * those chunks in sorted order. Also we can make sure there is 
 * no missing chunks by checking the chunk file name .
 * For example, if we only have /File1_chunkfiles/0, File1_chunkfiles/2
 * we know that /File1_chunkfiles/1 is missing.
 * @param chunkFileDir the directory named with filename_chunkfiles
 * @return the paths to all the chunk files in the chunkFileDir
 * @throws IOException 
 */
private static Path[] getChunkFilePaths(Configuration conf, JobConf jobConf,
    final Arguments args, Path chunkFileDir, int chunkNum) throws IOException{
  FileSystem dstfs = args.dst.getFileSystem(conf);
  FileStatus [] chunkFileStatus = dstfs.listStatus(chunkFileDir);
  HashSet <String> chunkFilePathSet = new HashSet<String>(chunkFileStatus.length);
  for(FileStatus chunkfs:chunkFileStatus){
    chunkFilePathSet.add(chunkfs.getPath().toUri().getPath());
  }
  Path[] chunkFilePaths = new Path[chunkNum];
  for(int i = 0; i < chunkNum; ++i) {
    //make sure we add the chunk file in order,and the chunk file name is 
    //named in number
    Path chunkFile = new Path(chunkFileDir, Integer.toString(i));
    //make sure the chunk file is not missing
    if(chunkFilePathSet.contains(chunkFile.toUri().getPath()))
      chunkFilePaths[i] = chunkFile;
    else
      throw new IOException("Chunk File: " + chunkFile.toUri().getPath() +
          "doesn't exist!");
  }
  return chunkFilePaths;
}
 
Example 8
@Override
public void commitJob(JobContext jobContext) throws IOException {
  // look through all the shards for attempts that need to be cleaned up.
  // also find all the attempts that are finished
  // then rename all the attempts jobs to commits
  LOG.info("Commiting Job [{0}]", jobContext.getJobID());
  Configuration configuration = jobContext.getConfiguration();
  Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
  LOG.info("TableOutput path [{0}]", tableOutput);
  makeSureNoEmptyShards(configuration, tableOutput);
  FileSystem fileSystem = tableOutput.getFileSystem(configuration);
  for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
    LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath());
    if (isShard(fileStatus)) {
      commitOrAbortJob(jobContext, fileStatus.getPath(), true);
    }
  }
  LOG.info("Commiting Complete [{0}]", jobContext.getJobID());
}
 
Example 9
private Path[] getPaths() throws IOException {
  FileSystem fileSystem = path.getFileSystem(getConf());
  FileStatus[] listStatus = fileSystem.listStatus(path);
  SortedSet<Path> shards = new TreeSet<Path>();
  for (FileStatus status : listStatus) {
    Path shardPath = status.getPath();
    if (shardPath.getName().startsWith(BlurConstants.SHARD_PREFIX)) {
      shards.add(shardPath);
    }
  }
  return shards.toArray(new Path[shards.size()]);
}
 
Example 10
Source Project: hbase   File: TestDeleteMobTable.java    License: Apache License 2.0 5 votes vote down vote up
private int countMobFiles(TableName tn, String familyName) throws IOException {
  FileSystem fs = TEST_UTIL.getTestFileSystem();
  Path mobFileDir = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, familyName);
  if (fs.exists(mobFileDir)) {
    return fs.listStatus(mobFileDir).length;
  }
  return 0;
}
 
Example 11
Source Project: systemds   File: ReaderTextCSV.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static MatrixBlock readCSVMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, 
		long rlen, long clen, int blen, boolean hasHeader, String delim, boolean fill, double fillValue )
	throws IOException, DMLRuntimeException
{
	//prepare file paths in alphanumeric order
	ArrayList<Path> files=new ArrayList<>();
	if(fs.isDirectory(path)) {
		for(FileStatus stat: fs.listStatus(path, IOUtilFunctions.hiddenFileFilter))
			files.add(stat.getPath());
		Collections.sort(files);
	}
	else
		files.add(path);
	
	//determine matrix size via additional pass if required
	if ( dest == null ) {
		dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue);
		clen = dest.getNumColumns();
	}
	
	//actual read of individual files
	long lnnz = 0;
	MutableInt row = new MutableInt(0);
	for(int fileNo=0; fileNo<files.size(); fileNo++) {
		lnnz += readCSVMatrixFromInputStream(fs.open(files.get(fileNo)), path.toString(), dest, 
			row, rlen, clen, blen, hasHeader, delim, fill, fillValue, fileNo==0);
	}
	
	//post processing
	dest.setNonZeros( lnnz );
	
	return dest;
}
 
Example 12
Source Project: tajo   File: StorageUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*".
 *
 * This method finds the maximum sequence number from existing data files through the above patterns.
 * If it cannot find any matched file or the maximum number, it will return -1.
 *
 * @param fs
 * @param path
 * @param recursive
 * @return The maximum sequence number
 * @throws java.io.IOException
 */
public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException {
  if (!fs.isDirectory(path)) {
    return -1;
  }

  FileStatus[] files = fs.listStatus(path);

  if (files == null || files.length == 0) {
    return -1;
  }

  int maxValue = -1;

  for (FileStatus eachFile: files) {
    // In the case of partition table, return largest value within all partition dirs.
    int value;
    if (eachFile.isDirectory() && recursive) {
      value = getMaxFileSequence(fs, eachFile.getPath(), recursive);
      if (value > maxValue) {
        maxValue = value;
      }
    } else {
      if (eachFile.getPath().getName().matches(fileNamePatternV08) ||
          eachFile.getPath().getName().matches(fileNamePatternV09)) {
        value = getSequence(eachFile.getPath().getName());
        if (value > maxValue) {
          maxValue = value;
        }
      }
    }
  }

  return maxValue;
}
 
Example 13
/**
 * Cleanup staging data of all tasks of a job.
 *
 * @param state a {@link State} instance storing job configuration properties
 * @param logger a {@link Logger} used for logging
 */
public static void cleanJobStagingData(State state, Logger logger) throws IOException {
  Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR),
      "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR);
  Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR),
      "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR);

  String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
  FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));

  Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR));
  logger.info("Cleaning up staging directory " + jobStagingPath);
  HadoopUtils.deletePath(fs, jobStagingPath, true);

  if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) {
    logger.info("Deleting directory " + jobStagingPath.getParent());
    HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true);
  }

  Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));
  logger.info("Cleaning up output directory " + jobOutputPath);
  HadoopUtils.deletePath(fs, jobOutputPath, true);

  if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) {
    logger.info("Deleting directory " + jobOutputPath.getParent());
    HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true);
  }

  if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) {
    if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) {
      Path jobErrPath = new Path(state.getProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE));
      log.info("Cleaning up err directory : " + jobErrPath);
      HadoopUtils.deleteIfExists(fs, jobErrPath, true);
    }
  }
}
 
Example 14
private Map<Path, Path> toMap(FileSystem fileSystem, Set<Path> inuseDirs) throws IOException {
  Map<Path, Path> result = new TreeMap<Path, Path>();
  for (Path p : inuseDirs) {
    if (!fileSystem.isFile(p)) {
      FileStatus[] listStatus = fileSystem.listStatus(p);
      for (FileStatus status : listStatus) {
        result.put(status.getPath(), p);
      }
    }
  }
  return result;
}
 
Example 15
Source Project: big-c   File: GenericMRLoadGenerator.java    License: Apache License 2.0 4 votes vote down vote up
public int run(String [] argv) throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  Configuration conf = job.getConfiguration();
  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormatClass(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(conf);  
    Path tmpDir = new Path("/tmp");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(conf), conf, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(conf);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return ret;
}
 
Example 16
Source Project: tez   File: TestMRRJobsDAGApi.java    License: Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testHistoryLogging() throws IOException,
    InterruptedException, TezException, ClassNotFoundException, YarnException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  FileSystem localFs = FileSystem.getLocal(tezConf);
  Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
  localFs.mkdirs(historyLogDir);

  tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR,
      localFs.makeQualified(historyLogDir).toString());

  tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
  TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());

  FileStatus historyLogFileStatus = null;
  for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
    if (fileStatus.isDirectory()) {
      continue;
    }
    Path p = fileStatus.getPath();
    if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
      historyLogFileStatus = fileStatus;
      break;
    }
  }
  Assert.assertNotNull(historyLogFileStatus);
  Assert.assertTrue(historyLogFileStatus.getLen() > 0);
  tezSession.stop();
}
 
Example 17
Source Project: hadoop   File: TestDistCh.java    License: Apache License 2.0 4 votes vote down vote up
public void testDistCh() throws Exception {
  final Configuration conf = new Configuration();

  conf.set(CapacitySchedulerConfiguration.PREFIX+CapacitySchedulerConfiguration.ROOT+"."+CapacitySchedulerConfiguration.QUEUES, "default");
  conf.set(CapacitySchedulerConfiguration.PREFIX+CapacitySchedulerConfiguration.ROOT+".default."+CapacitySchedulerConfiguration.CAPACITY, "100");
  final MiniDFSCluster cluster=  new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).build();
  
  final FileSystem fs = cluster.getFileSystem();
  final FsShell shell = new FsShell(conf);
  
  try {
    final FileTree tree = new FileTree(fs, "testDistCh");
    final FileStatus rootstatus = fs.getFileStatus(tree.rootdir);

    runLsr(shell, tree.root, 0);

    final String[] args = new String[NUN_SUBS];
    final ChPermissionStatus[] newstatus = new ChPermissionStatus[NUN_SUBS];

    
    args[0]="/test/testDistCh/sub0:sub1::";
    newstatus[0] = new ChPermissionStatus(rootstatus, "sub1", "", "");

    args[1]="/test/testDistCh/sub1::sub2:";
    newstatus[1] = new ChPermissionStatus(rootstatus, "", "sub2", "");

    args[2]="/test/testDistCh/sub2:::437";
    newstatus[2] = new ChPermissionStatus(rootstatus, "", "", "437");

    args[3]="/test/testDistCh/sub3:sub1:sub2:447";
    newstatus[3] = new ChPermissionStatus(rootstatus, "sub1", "sub2", "447");
 
    args[4]="/test/testDistCh/sub4::sub5:437";
    newstatus[4] = new ChPermissionStatus(rootstatus, "", "sub5", "437");

    args[5]="/test/testDistCh/sub5:sub1:sub5:";
    newstatus[5] = new ChPermissionStatus(rootstatus, "sub1", "sub5", "");

    args[6]="/test/testDistCh/sub6:sub3::437";
    newstatus[6] = new ChPermissionStatus(rootstatus, "sub3", "", "437");
    
    System.out.println("args=" + Arrays.asList(args).toString().replace(",", ",\n  "));
    System.out.println("newstatus=" + Arrays.asList(newstatus).toString().replace(",", ",\n  "));

    //run DistCh
    new DistCh(MiniMRClientClusterFactory.create(this.getClass(), 2, conf).getConfig()).run(args);
    runLsr(shell, tree.root, 0);

    //check results
    for(int i = 0; i < NUN_SUBS; i++) {
      Path sub = new Path(tree.root + "/sub" + i);
      checkFileStatus(newstatus[i], fs.getFileStatus(sub));
      for(FileStatus status : fs.listStatus(sub)) {
        checkFileStatus(newstatus[i], status);
      }
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 18
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, halvadeOpts.nodes == 1, halvadeOpts.useBamInput);
    int pass2Reduces = HalvadeResourceManager.getPass2Reduces(halvadeOpts);
    halvadeOpts.splitChromosomes(pass1Conf, pass2Reduces);
    HalvadeConf.setPass2Suffix(pass1Conf, pass2suffix);
    
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    // set pass 2 suffix so only this job finds it!
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for(FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if(skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if(!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1); 
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);          
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}
 
Example 19
Source Project: datafu   File: Examples.java    License: Apache License 2.0 4 votes vote down vote up
private int countOutputFolders(Path path) throws IOException
{
  FileSystem fs = getFileSystem();
  return fs.listStatus(path,PathUtils.nonHiddenPathFilter).length;
}
 
Example 20
Source Project: tajo   File: TestHBaseTable.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testInsertIntoLocation() throws Exception {
  executeString(
      "CREATE TABLE hbase_mapped_table (rk text, col1 text, col2 text) TABLESPACE cluster1 " +
      "USING hbase WITH ('table'='hbase_table', 'columns'=':key,col1:a,col2:', " +
      "'hbase.split.rowkeys'='010,040,060,080')").close();

  assertTableExists("hbase_mapped_table");

  try {
    // create test table
    Schema schema = SchemaBuilder.builder()
        .add("id", Type.TEXT)
        .add("name", Type.TEXT)
        .add("comment", Type.TEXT)
        .build();
    List<String> datas = new ArrayList<>();
    DecimalFormat df = new DecimalFormat("000");
    for (int i = 99; i >= 0; i--) {
      datas.add(df.format(i) + "|value" + i + "|comment-" + i);
    }
    TajoTestingCluster.createTable(conf, getCurrentDatabase() + ".base_table",
        schema, datas.toArray(new String[datas.size()]), 2);

    executeString("insert into location '/tmp/hfile_test' " +
        "select id, name, comment from base_table ").close();

    FileSystem fs = testingCluster.getDefaultFileSystem();
    Path path = new Path("/tmp/hfile_test");
    assertTrue(fs.exists(path));

    FileStatus[] files = fs.listStatus(path);
    assertNotNull(files);
    assertEquals(2, files.length);

    int index = 0;
    for (FileStatus eachFile: files) {
      assertEquals("/tmp/hfile_test/part-01-00000" + index + "-00" + index, eachFile.getPath().toUri().getPath());
      for (FileStatus subFile: fs.listStatus(eachFile.getPath())) {
        assertTrue(subFile.isFile());
        assertTrue(subFile.getLen() > 0);
      }
      index++;
    }
  } finally {
    executeString("DROP TABLE base_table PURGE").close();
    executeString("DROP TABLE hbase_mapped_table PURGE").close();
  }
}