Java Code Examples for org.apache.hadoop.fs.FileSystem#listStatus()

The following examples show how to use org.apache.hadoop.fs.FileSystem#listStatus() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestUtils.java From systemds with Apache License 2.0

6 votes

public static boolean readDMLBoolean(String filePath) {
	try {
		Boolean b = null;
		Path outDirectory = new Path(filePath);
		FileSystem fs = IOUtilFunctions.getFileSystem(outDirectory, conf);
		String line;
		FileStatus[] outFiles = fs.listStatus(outDirectory);
		for (FileStatus file : outFiles) {
			FSDataInputStream fsout = fs.open(file.getPath());
			try(BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout))) {
				while ((line = outIn.readLine()) != null) { // only 1 scalar value in file
					b = Boolean.valueOf(Boolean.parseBoolean(line));
				}
			}
		}
		return b.booleanValue();
	} catch (IOException e) {
		assertTrue("could not read from file " + filePath, false);
	}
	return _AssertOccured;
}

Example 2

Source File: BlurOutputCommitter.java From incubator-retired-blur with Apache License 2.0

6 votes

@Override
public void commitJob(JobContext jobContext) throws IOException {
  // look through all the shards for attempts that need to be cleaned up.
  // also find all the attempts that are finished
  // then rename all the attempts jobs to commits
  LOG.info("Commiting Job [{0}]", jobContext.getJobID());
  Configuration configuration = jobContext.getConfiguration();
  Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
  LOG.info("TableOutput path [{0}]", tableOutput);
  makeSureNoEmptyShards(configuration, tableOutput);
  FileSystem fileSystem = tableOutput.getFileSystem(configuration);
  for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
    LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath());
    if (isShard(fileStatus)) {
      commitOrAbortJob(jobContext, fileStatus.getPath(), true);
    }
  }
  LOG.info("Commiting Complete [{0}]", jobContext.getJobID());
}

Example 3

Source File: DistCp.java From RDFS with Apache License 2.0

6 votes

/**go to the directory we created for the chunk files
 * the chunk files are named as 0, 1, 2, 3....
 * For example, if a file File1 is chopped into 3 chunks, 
 * the we should have a directory /File1_chunkfiles, and
 * there are three files in that directory:
 * /File1_chunkfiles/0, /File1_chunkfiles/1, File1_chunkfiles/2
 * The returned chunkFilePath arrays contains the paths of 
 * those chunks in sorted order. Also we can make sure there is 
 * no missing chunks by checking the chunk file name .
 * For example, if we only have /File1_chunkfiles/0, File1_chunkfiles/2
 * we know that /File1_chunkfiles/1 is missing.
 * @param chunkFileDir the directory named with filename_chunkfiles
 * @return the paths to all the chunk files in the chunkFileDir
 * @throws IOException 
 */
private static Path[] getChunkFilePaths(Configuration conf, JobConf jobConf,
    final Arguments args, Path chunkFileDir, int chunkNum) throws IOException{
  FileSystem dstfs = args.dst.getFileSystem(conf);
  FileStatus [] chunkFileStatus = dstfs.listStatus(chunkFileDir);
  HashSet <String> chunkFilePathSet = new HashSet<String>(chunkFileStatus.length);
  for(FileStatus chunkfs:chunkFileStatus){
    chunkFilePathSet.add(chunkfs.getPath().toUri().getPath());
  }
  Path[] chunkFilePaths = new Path[chunkNum];
  for(int i = 0; i < chunkNum; ++i) {
    //make sure we add the chunk file in order,and the chunk file name is 
    //named in number
    Path chunkFile = new Path(chunkFileDir, Integer.toString(i));
    //make sure the chunk file is not missing
    if(chunkFilePathSet.contains(chunkFile.toUri().getPath()))
      chunkFilePaths[i] = chunkFile;
    else
      throw new IOException("Chunk File: " + chunkFile.toUri().getPath() +
          "doesn't exist!");
  }
  return chunkFilePaths;
}

Example 4

Source File: TestJoinQuery.java From tajo with Apache License 2.0

6 votes

protected static List<Path> getPartitionPathList(FileSystem fs, Path path) throws Exception {
  FileStatus[] files = fs.listStatus(path);
  List<Path> paths = new ArrayList<>();
  if (files != null) {
    for (FileStatus eachFile: files) {
      if (eachFile.isFile()) {
        paths.add(path);
        return paths;
      } else {
        paths.addAll(getPartitionPathList(fs, eachFile.getPath()));
      }
    }
  }

  return paths;
}

Example 5

Source File: HadoopSegmentPreprocessingJob.java From incubator-pinot with Apache License 2.0

6 votes

/**
 * Finds the avro file in the input folder, and returns its avro schema
 * @param inputPathDir Path to input directory
 * @return Input schema
 * @throws IOException exception when accessing to IO
 */
private Schema getSchema(Path inputPathDir)
    throws IOException {
  FileSystem fs = FileSystem.get(new Configuration());
  Schema avroSchema = null;
  for (FileStatus fileStatus : fs.listStatus(inputPathDir)) {
    if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(".avro")) {
      _logger.info("Extracting schema from " + fileStatus.getPath());
      try (DataFileStream<GenericRecord> dataStreamReader = getAvroReader(inputPathDir)) {
        avroSchema = dataStreamReader.getSchema();
      }
      break;
    }
  }
  return avroSchema;
}

Example 6

Source File: FileBasedOutputSizeReader.java From spork with Apache License 2.0

6 votes

/**
 * Returns the total size of output files in bytes
 * @param sto POStore
 * @param conf configuration
 */
@Override
public long getOutputSize(POStore sto, Configuration conf) throws IOException {
    if (!supports(sto, conf)) {
        log.warn("'" + sto.getStoreFunc().getClass().getCanonicalName()
                + "' is not supported by " + getClass().getCanonicalName());
        return -1;
    }

    long bytes = 0;
    Path p = new Path(getLocationUri(sto));
    FileSystem fs = p.getFileSystem(conf);
    FileStatus[] lst = fs.listStatus(p);
    if (lst != null) {
        for (FileStatus status : lst) {
            bytes += status.getLen();
        }
    }

    return bytes;
}

Example 7

Source File: JobClient.java From RDFS with Apache License 2.0

6 votes

/**
 * Checks if the job directory is clean and has all the required components
 * for (re) starting the job
 */
public static boolean isJobDirValid(Path jobDirPath, FileSystem fs)
throws IOException {
  FileStatus[] contents = fs.listStatus(jobDirPath);
  int matchCount = 0;
  if (contents != null && contents.length >=2) {
    for (FileStatus status : contents) {
      if ("job.xml".equals(status.getPath().getName())) {
        ++matchCount;
      }
      if ("job.split".equals(status.getPath().getName())) {
        ++matchCount;
      }
    }
    if (matchCount == 2) {
      return true;
    }
  }
  return false;
}

Example 8

Source File: TestDataJoin.java From RDFS with Apache License 2.0

6 votes

private static void confirmOutput(Path out, JobConf job, int srcs)
    throws IOException {
  FileSystem fs = out.getFileSystem(job);
  FileStatus[] outlist = fs.listStatus(out);
  assertEquals(1, outlist.length);
  assertTrue(0 < outlist[0].getLen());
  FSDataInputStream in = fs.open(outlist[0].getPath());
  LineRecordReader rr = new LineRecordReader(in, 0, Integer.MAX_VALUE, job);
  LongWritable k = new LongWritable();
  Text v = new Text();
  int count = 0;
  while (rr.next(k, v)) {
    String[] vals = v.toString().split("\t");
    assertEquals(srcs + 1, vals.length);
    int[] ivals = new int[vals.length];
    for (int i = 0; i < vals.length; ++i)
      ivals[i] = Integer.parseInt(vals[i]);
    assertEquals(0, ivals[0] % (srcs * srcs));
    for (int i = 1; i < vals.length; ++i) {
      assertEquals((ivals[i] - (i - 1)) * srcs, 10 * ivals[0]);
    }
    ++count;
  }
  assertEquals(4, count);
}

Example 9

Source File: ReaderTextCSV.java From systemds with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private static MatrixBlock readCSVMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, 
		long rlen, long clen, int blen, boolean hasHeader, String delim, boolean fill, double fillValue )
	throws IOException, DMLRuntimeException
{
	//prepare file paths in alphanumeric order
	ArrayList<Path> files=new ArrayList<>();
	if(fs.isDirectory(path)) {
		for(FileStatus stat: fs.listStatus(path, IOUtilFunctions.hiddenFileFilter))
			files.add(stat.getPath());
		Collections.sort(files);
	}
	else
		files.add(path);
	
	//determine matrix size via additional pass if required
	if ( dest == null ) {
		dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue);
		clen = dest.getNumColumns();
	}
	
	//actual read of individual files
	long lnnz = 0;
	MutableInt row = new MutableInt(0);
	for(int fileNo=0; fileNo<files.size(); fileNo++) {
		lnnz += readCSVMatrixFromInputStream(fs.open(files.get(fileNo)), path.toString(), dest, 
			row, rlen, clen, blen, hasHeader, delim, fill, fillValue, fileNo==0);
	}
	
	//post processing
	dest.setNonZeros( lnnz );
	
	return dest;
}

Example 10

Source File: TestDeleteMobTable.java From hbase with Apache License 2.0

5 votes

private int countMobFiles(TableName tn, String familyName) throws IOException {
  FileSystem fs = TEST_UTIL.getTestFileSystem();
  Path mobFileDir = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, familyName);
  if (fs.exists(mobFileDir)) {
    return fs.listStatus(mobFileDir).length;
  }
  return 0;
}

Example 11

Source File: StorageUtil.java From tajo with Apache License 2.0

5 votes

/**
 * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*".
 *
 * This method finds the maximum sequence number from existing data files through the above patterns.
 * If it cannot find any matched file or the maximum number, it will return -1.
 *
 * @param fs
 * @param path
 * @param recursive
 * @return The maximum sequence number
 * @throws java.io.IOException
 */
public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException {
  if (!fs.isDirectory(path)) {
    return -1;
  }

  FileStatus[] files = fs.listStatus(path);

  if (files == null || files.length == 0) {
    return -1;
  }

  int maxValue = -1;

  for (FileStatus eachFile: files) {
    // In the case of partition table, return largest value within all partition dirs.
    int value;
    if (eachFile.isDirectory() && recursive) {
      value = getMaxFileSequence(fs, eachFile.getPath(), recursive);
      if (value > maxValue) {
        maxValue = value;
      }
    } else {
      if (eachFile.getPath().getName().matches(fileNamePatternV08) ||
          eachFile.getPath().getName().matches(fileNamePatternV09)) {
        value = getSequence(eachFile.getPath().getName());
        if (value > maxValue) {
          maxValue = value;
        }
      }
    }
  }

  return maxValue;
}

Example 12

Source File: JobLauncherUtils.java From incubator-gobblin with Apache License 2.0

5 votes

/**
 * Cleanup staging data of all tasks of a job.
 *
 * @param state a {@link State} instance storing job configuration properties
 * @param logger a {@link Logger} used for logging
 */
public static void cleanJobStagingData(State state, Logger logger) throws IOException {
  Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR),
      "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR);
  Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR),
      "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR);

  String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
  FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));

  Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR));
  logger.info("Cleaning up staging directory " + jobStagingPath);
  HadoopUtils.deletePath(fs, jobStagingPath, true);

  if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) {
    logger.info("Deleting directory " + jobStagingPath.getParent());
    HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true);
  }

  Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));
  logger.info("Cleaning up output directory " + jobOutputPath);
  HadoopUtils.deletePath(fs, jobOutputPath, true);

  if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) {
    logger.info("Deleting directory " + jobOutputPath.getParent());
    HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true);
  }

  if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) {
    if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) {
      Path jobErrPath = new Path(state.getProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE));
      log.info("Cleaning up err directory : " + jobErrPath);
      HadoopUtils.deleteIfExists(fs, jobErrPath, true);
    }
  }
}

Example 13

Source File: TableShardCountCollapser.java From incubator-retired-blur with Apache License 2.0

5 votes

private Path[] getPaths() throws IOException {
  FileSystem fileSystem = path.getFileSystem(getConf());
  FileStatus[] listStatus = fileSystem.listStatus(path);
  SortedSet<Path> shards = new TreeSet<Path>();
  for (FileStatus status : listStatus) {
    Path shardPath = status.getPath();
    if (shardPath.getName().startsWith(BlurConstants.SHARD_PREFIX)) {
      shards.add(shardPath);
    }
  }
  return shards.toArray(new Path[shards.size()]);
}

Example 14

Source File: IndexImporter.java From incubator-retired-blur with Apache License 2.0

5 votes

private Map<Path, Path> toMap(FileSystem fileSystem, Set<Path> inuseDirs) throws IOException {
  Map<Path, Path> result = new TreeMap<Path, Path>();
  for (Path p : inuseDirs) {
    if (!fileSystem.isFile(p)) {
      FileStatus[] listStatus = fileSystem.listStatus(p);
      for (FileStatus status : listStatus) {
        result.put(status.getPath(), p);
      }
    }
  }
  return result;
}

Example 15

Source File: MapReduceRunner.java From halvade with GNU General Public License v3.0

4 votes

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, halvadeOpts.nodes == 1, halvadeOpts.useBamInput);
    int pass2Reduces = HalvadeResourceManager.getPass2Reduces(halvadeOpts);
    halvadeOpts.splitChromosomes(pass1Conf, pass2Reduces);
    HalvadeConf.setPass2Suffix(pass1Conf, pass2suffix);
    
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    // set pass 2 suffix so only this job finds it!
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for(FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if(skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if(!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1); 
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);          
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}

Example 16

Source File: TestDistCh.java From hadoop with Apache License 2.0

4 votes

public void testDistCh() throws Exception {
  final Configuration conf = new Configuration();

  conf.set(CapacitySchedulerConfiguration.PREFIX+CapacitySchedulerConfiguration.ROOT+"."+CapacitySchedulerConfiguration.QUEUES, "default");
  conf.set(CapacitySchedulerConfiguration.PREFIX+CapacitySchedulerConfiguration.ROOT+".default."+CapacitySchedulerConfiguration.CAPACITY, "100");
  final MiniDFSCluster cluster=  new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).build();
  
  final FileSystem fs = cluster.getFileSystem();
  final FsShell shell = new FsShell(conf);
  
  try {
    final FileTree tree = new FileTree(fs, "testDistCh");
    final FileStatus rootstatus = fs.getFileStatus(tree.rootdir);

    runLsr(shell, tree.root, 0);

    final String[] args = new String[NUN_SUBS];
    final ChPermissionStatus[] newstatus = new ChPermissionStatus[NUN_SUBS];

    
    args[0]="/test/testDistCh/sub0:sub1::";
    newstatus[0] = new ChPermissionStatus(rootstatus, "sub1", "", "");

    args[1]="/test/testDistCh/sub1::sub2:";
    newstatus[1] = new ChPermissionStatus(rootstatus, "", "sub2", "");

    args[2]="/test/testDistCh/sub2:::437";
    newstatus[2] = new ChPermissionStatus(rootstatus, "", "", "437");

    args[3]="/test/testDistCh/sub3:sub1:sub2:447";
    newstatus[3] = new ChPermissionStatus(rootstatus, "sub1", "sub2", "447");
 
    args[4]="/test/testDistCh/sub4::sub5:437";
    newstatus[4] = new ChPermissionStatus(rootstatus, "", "sub5", "437");

    args[5]="/test/testDistCh/sub5:sub1:sub5:";
    newstatus[5] = new ChPermissionStatus(rootstatus, "sub1", "sub5", "");

    args[6]="/test/testDistCh/sub6:sub3::437";
    newstatus[6] = new ChPermissionStatus(rootstatus, "sub3", "", "437");
    
    System.out.println("args=" + Arrays.asList(args).toString().replace(",", ",\n  "));
    System.out.println("newstatus=" + Arrays.asList(newstatus).toString().replace(",", ",\n  "));

    //run DistCh
    new DistCh(MiniMRClientClusterFactory.create(this.getClass(), 2, conf).getConfig()).run(args);
    runLsr(shell, tree.root, 0);

    //check results
    for(int i = 0; i < NUN_SUBS; i++) {
      Path sub = new Path(tree.root + "/sub" + i);
      checkFileStatus(newstatus[i], fs.getFileStatus(sub));
      for(FileStatus status : fs.listStatus(sub)) {
        checkFileStatus(newstatus[i], status);
      }
    }
  } finally {
    cluster.shutdown();
  }
}

Example 17

Source File: GenericMRLoadGenerator.java From big-c with Apache License 2.0

4 votes

public int run(String [] argv) throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  Configuration conf = job.getConfiguration();
  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormatClass(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(conf);  
    Path tmpDir = new Path("/tmp");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(conf), conf, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(conf);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return ret;
}

Example 18

Source File: Examples.java From datafu with Apache License 2.0

4 votes

private int countOutputFolders(Path path) throws IOException
{
  FileSystem fs = getFileSystem();
  return fs.listStatus(path,PathUtils.nonHiddenPathFilter).length;
}

Example 19

Source File: TestHBaseTable.java From tajo with Apache License 2.0

4 votes

@Test
public void testInsertIntoLocation() throws Exception {
  executeString(
      "CREATE TABLE hbase_mapped_table (rk text, col1 text, col2 text) TABLESPACE cluster1 " +
      "USING hbase WITH ('table'='hbase_table', 'columns'=':key,col1:a,col2:', " +
      "'hbase.split.rowkeys'='010,040,060,080')").close();

  assertTableExists("hbase_mapped_table");

  try {
    // create test table
    Schema schema = SchemaBuilder.builder()
        .add("id", Type.TEXT)
        .add("name", Type.TEXT)
        .add("comment", Type.TEXT)
        .build();
    List<String> datas = new ArrayList<>();
    DecimalFormat df = new DecimalFormat("000");
    for (int i = 99; i >= 0; i--) {
      datas.add(df.format(i) + "|value" + i + "|comment-" + i);
    }
    TajoTestingCluster.createTable(conf, getCurrentDatabase() + ".base_table",
        schema, datas.toArray(new String[datas.size()]), 2);

    executeString("insert into location '/tmp/hfile_test' " +
        "select id, name, comment from base_table ").close();

    FileSystem fs = testingCluster.getDefaultFileSystem();
    Path path = new Path("/tmp/hfile_test");
    assertTrue(fs.exists(path));

    FileStatus[] files = fs.listStatus(path);
    assertNotNull(files);
    assertEquals(2, files.length);

    int index = 0;
    for (FileStatus eachFile: files) {
      assertEquals("/tmp/hfile_test/part-01-00000" + index + "-00" + index, eachFile.getPath().toUri().getPath());
      for (FileStatus subFile: fs.listStatus(eachFile.getPath())) {
        assertTrue(subFile.isFile());
        assertTrue(subFile.getLen() > 0);
      }
      index++;
    }
  } finally {
    executeString("DROP TABLE base_table PURGE").close();
    executeString("DROP TABLE hbase_mapped_table PURGE").close();
  }
}

Example 20

Source File: TestMRRJobsDAGApi.java From tez with Apache License 2.0

4 votes

@Test(timeout = 60000)
public void testHistoryLogging() throws IOException,
    InterruptedException, TezException, ClassNotFoundException, YarnException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  FileSystem localFs = FileSystem.getLocal(tezConf);
  Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
  localFs.mkdirs(historyLogDir);

  tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR,
      localFs.makeQualified(historyLogDir).toString());

  tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
  TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());

  FileStatus historyLogFileStatus = null;
  for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
    if (fileStatus.isDirectory()) {
      continue;
    }
    Path p = fileStatus.getPath();
    if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
      historyLogFileStatus = fileStatus;
      break;
    }
  }
  Assert.assertNotNull(historyLogFileStatus);
  Assert.assertTrue(historyLogFileStatus.getLen() > 0);
  tezSession.stop();
}