org.apache.hadoop.fs.HarFileSystem Java Examples

The following examples show how to use org.apache.hadoop.fs.HarFileSystem. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopArchives.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
  masterIndex = new Path(tmpOutputDir, "_masterindex");
  index = new Path(tmpOutputDir, "_index");
  try {
    fs = masterIndex.getFileSystem(conf);
    if (fs.exists(masterIndex)) {
      fs.delete(masterIndex, false);
    }
    if (fs.exists(index)) {
      fs.delete(index, false);
    }
    indexStream = fs.create(index);
    outStream = fs.create(masterIndex);
    String version = HarFileSystem.VERSION + " \n";
    outStream.write(version.getBytes());
    
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #2
Source File: TestHarFileSystemWithHA.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create an empty Har archive in the FileSystem fs at the Path p.
 * 
 * @param fs the file system to create the Har archive in
 * @param p the path to create the Har archive at
 * @throws IOException in the event of error
 */
private static void createEmptyHarArchive(FileSystem fs, Path p)
    throws IOException {
  fs.mkdirs(p);
  OutputStream out = fs.create(new Path(p, "_masterindex"));
  out.write(Integer.toString(HarFileSystem.VERSION).getBytes());
  out.close();
  fs.create(new Path(p, "_index")).close();
}
 
Example #3
Source File: HadoopArchives.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}
 
Example #4
Source File: TestHadoopArchives.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
/*
 * Tests copying from archive file system to a local file system
 */
public void testCopyToLocal() throws Exception {
  final String fullHarPathStr = makeArchive();

  // make path to copy the file to:
  final String tmpDir
    = System.getProperty("test.build.data","build/test/data") + "/work-dir/har-fs-tmp";
  final Path tmpPath = new Path(tmpDir);
  final LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
  localFs.delete(tmpPath, true);
  localFs.mkdirs(tmpPath);
  assertTrue(localFs.exists(tmpPath));
  
  // Create fresh HarFs:
  final HarFileSystem harFileSystem = new HarFileSystem(fs);
  try {
    final URI harUri = new URI(fullHarPathStr);
    harFileSystem.initialize(harUri, fs.getConf());
    
    final Path sourcePath = new Path(fullHarPathStr + Path.SEPARATOR + "a");
    final Path targetPath = new Path(tmpPath, "straus");
    // copy the Har file to a local file system:
    harFileSystem.copyToLocalFile(false, sourcePath, targetPath);
    FileStatus straus = localFs.getFileStatus(targetPath);
    // the file should contain just 1 character:
    assertEquals(1, straus.getLen());
  } finally {
    harFileSystem.close();
    localFs.delete(tmpPath, true);      
  }
}
 
Example #5
Source File: TestHarFileSystemWithHA.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Create an empty Har archive in the FileSystem fs at the Path p.
 * 
 * @param fs the file system to create the Har archive in
 * @param p the path to create the Har archive at
 * @throws IOException in the event of error
 */
private static void createEmptyHarArchive(FileSystem fs, Path p)
    throws IOException {
  fs.mkdirs(p);
  OutputStream out = fs.create(new Path(p, "_masterindex"));
  out.write(Integer.toString(HarFileSystem.VERSION).getBytes());
  out.close();
  fs.create(new Path(p, "_index")).close();
}
 
Example #6
Source File: HadoopArchives.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}
 
Example #7
Source File: TestHadoopArchives.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
/*
 * Tests copying from archive file system to a local file system
 */
public void testCopyToLocal() throws Exception {
  final String fullHarPathStr = makeArchive();

  // make path to copy the file to:
  final String tmpDir
    = System.getProperty("test.build.data","build/test/data") + "/work-dir/har-fs-tmp";
  final Path tmpPath = new Path(tmpDir);
  final LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
  localFs.delete(tmpPath, true);
  localFs.mkdirs(tmpPath);
  assertTrue(localFs.exists(tmpPath));
  
  // Create fresh HarFs:
  final HarFileSystem harFileSystem = new HarFileSystem(fs);
  try {
    final URI harUri = new URI(fullHarPathStr);
    harFileSystem.initialize(harUri, fs.getConf());
    
    final Path sourcePath = new Path(fullHarPathStr + Path.SEPARATOR + "a");
    final Path targetPath = new Path(tmpPath, "straus");
    // copy the Har file to a local file system:
    harFileSystem.copyToLocalFile(false, sourcePath, targetPath);
    FileStatus straus = localFs.getFileStatus(targetPath);
    // the file should contain just 1 character:
    assertEquals(1, straus.getLen());
  } finally {
    harFileSystem.close();
    localFs.delete(tmpPath, true);      
  }
}
 
Example #8
Source File: HadoopArchives.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = URLEncoder.encode(
                      srcStatus.getModificationTime() + " "
                    + srcStatus.getAccessTime() + " "
                    + srcStatus.getPermission().toShort() + " "
                    + URLEncoder.encode(srcStatus.getOwner(), "UTF-8") + " "
                    + URLEncoder.encode(srcStatus.getGroup(), "UTF-8"),
                   "UTF-8");
  if (value.isDir()) { 
    towrite = URLEncoder.encode(relPath.toString(),"UTF-8")  
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(URLEncoder.encode(child,"UTF-8") + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = URLEncoder.encode(relPath.toString(),"UTF-8")
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}
 
Example #9
Source File: HadoopArchives.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void map(LongWritable key, Text value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  String line  = value.toString();
  MapStat mstat = new MapStat(line);
  Path srcPath = new Path(mstat.pathname);
  String towrite = null;
  Path relPath = makeRelative(srcPath);
  int hash = HarFileSystem.getHarHash(relPath);
  long startPos = partStream.getPos();
  if (mstat.isDir) { 
    towrite = relPath.toString() + " " + "dir none " + 0 + " " + 0 + " ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: mstat.children) {
      sbuff.append(child + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FileSystem srcFs = srcPath.getFileSystem(conf);
    FileStatus srcStatus = srcFs.getFileStatus(srcPath);
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = relPath.toString() + " file " + partname + " " + startPos
    + " " + srcStatus.getLen() + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}
 
Example #10
Source File: TestHadoopArchives.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testReadFileContent() throws Exception {
  fileList.add(createFile(inputPath, fs, "c c"));
  final Path sub1 = new Path(inputPath, "sub 1");
  fs.mkdirs(sub1);
  fileList.add(createFile(inputPath, fs, sub1.getName(), "file x y z"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "file"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "x"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "y"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "z"));
  final Path sub2 = new Path(inputPath, "sub 1 with suffix");
  fs.mkdirs(sub2);
  fileList.add(createFile(inputPath, fs, sub2.getName(), "z"));
  // Generate a big binary file content:
  final byte[] binContent = prepareBin();
  fileList.add(createFile(inputPath, fs, binContent, sub2.getName(), "bin"));
  fileList.add(createFile(inputPath, fs, new byte[0], sub2.getName(), "zero-length"));

  final String fullHarPathStr = makeArchive();

  // Create fresh HarFs:
  final HarFileSystem harFileSystem = new HarFileSystem(fs);
  try {
    final URI harUri = new URI(fullHarPathStr);
    harFileSystem.initialize(harUri, fs.getConf());
    // now read the file content and compare it against the expected:
    int readFileCount = 0;
    for (final String pathStr0 : fileList) {
      final Path path = new Path(fullHarPathStr + Path.SEPARATOR + pathStr0);
      final String baseName = path.getName();
      final FileStatus status = harFileSystem.getFileStatus(path);
      if (status.isFile()) {
        // read the file:
        final byte[] actualContentSimple = readAllSimple(
            harFileSystem.open(path), true);
        
        final byte[] actualContentBuffer = readAllWithBuffer(
            harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentBuffer);
        
        final byte[] actualContentFully = readAllWithReadFully(
            actualContentSimple.length,
            harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentFully);
        
        final byte[] actualContentSeek = readAllWithSeek(
            actualContentSimple.length,
            harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentSeek);
        
        final byte[] actualContentRead4
        = readAllWithRead4(harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentRead4);
        
        final byte[] actualContentSkip = readAllWithSkip(
            actualContentSimple.length, 
            harFileSystem.open(path), 
            harFileSystem.open(path), 
            true);
        assertArrayEquals(actualContentSimple, actualContentSkip);
        
        if ("bin".equals(baseName)) {
          assertArrayEquals(binContent, actualContentSimple);
        } else if ("zero-length".equals(baseName)) {
          assertEquals(0, actualContentSimple.length);
        } else {
          String actual = new String(actualContentSimple, "UTF-8");
          assertEquals(baseName, actual);
        }
        readFileCount++;
      }
    }
    assertEquals(fileList.size(), readFileCount);
  } finally {
    harFileSystem.close();
  }
}
 
Example #11
Source File: TestHadoopArchives.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testReadFileContent() throws Exception {
  fileList.add(createFile(inputPath, fs, "c c"));
  final Path sub1 = new Path(inputPath, "sub 1");
  fs.mkdirs(sub1);
  fileList.add(createFile(inputPath, fs, sub1.getName(), "file x y z"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "file"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "x"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "y"));
  fileList.add(createFile(inputPath, fs, sub1.getName(), "z"));
  final Path sub2 = new Path(inputPath, "sub 1 with suffix");
  fs.mkdirs(sub2);
  fileList.add(createFile(inputPath, fs, sub2.getName(), "z"));
  // Generate a big binary file content:
  final byte[] binContent = prepareBin();
  fileList.add(createFile(inputPath, fs, binContent, sub2.getName(), "bin"));
  fileList.add(createFile(inputPath, fs, new byte[0], sub2.getName(), "zero-length"));

  final String fullHarPathStr = makeArchive();

  // Create fresh HarFs:
  final HarFileSystem harFileSystem = new HarFileSystem(fs);
  try {
    final URI harUri = new URI(fullHarPathStr);
    harFileSystem.initialize(harUri, fs.getConf());
    // now read the file content and compare it against the expected:
    int readFileCount = 0;
    for (final String pathStr0 : fileList) {
      final Path path = new Path(fullHarPathStr + Path.SEPARATOR + pathStr0);
      final String baseName = path.getName();
      final FileStatus status = harFileSystem.getFileStatus(path);
      if (status.isFile()) {
        // read the file:
        final byte[] actualContentSimple = readAllSimple(
            harFileSystem.open(path), true);
        
        final byte[] actualContentBuffer = readAllWithBuffer(
            harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentBuffer);
        
        final byte[] actualContentFully = readAllWithReadFully(
            actualContentSimple.length,
            harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentFully);
        
        final byte[] actualContentSeek = readAllWithSeek(
            actualContentSimple.length,
            harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentSeek);
        
        final byte[] actualContentRead4
        = readAllWithRead4(harFileSystem.open(path), true);
        assertArrayEquals(actualContentSimple, actualContentRead4);
        
        final byte[] actualContentSkip = readAllWithSkip(
            actualContentSimple.length, 
            harFileSystem.open(path), 
            harFileSystem.open(path), 
            true);
        assertArrayEquals(actualContentSimple, actualContentSkip);
        
        if ("bin".equals(baseName)) {
          assertArrayEquals(binContent, actualContentSimple);
        } else if ("zero-length".equals(baseName)) {
          assertEquals(0, actualContentSimple.length);
        } else {
          String actual = new String(actualContentSimple, "UTF-8");
          assertEquals(baseName, actual);
        }
        readFileCount++;
      }
    }
    assertEquals(fileList.size(), readFileCount);
  } finally {
    harFileSystem.close();
  }
}
 
Example #12
Source File: RaidShell.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * gets the parity blocks corresponding to file
 * returns the parity blocks in case of DFS
 * and the part blocks containing parity blocks
 * in case of HAR FS
 */
private BlockLocation[] getParityBlocks(final Path filePath,
                                        final long blockSize,
                                        final long numStripes,
                                        final RaidInfo raidInfo) 
  throws IOException {


  final String parityPathStr = raidInfo.parityPair.getPath().toUri().
    getPath();
  FileSystem parityFS = raidInfo.parityPair.getFileSystem();
  
  // get parity file metadata
  FileStatus parityFileStatus = parityFS.
    getFileStatus(new Path(parityPathStr));
  long parityFileLength = parityFileStatus.getLen();

  if (parityFileLength != numStripes * raidInfo.parityBlocksPerStripe *
      blockSize) {
    throw new IOException("expected parity file of length" + 
                          (numStripes * raidInfo.parityBlocksPerStripe *
                           blockSize) +
                          " but got parity file of length " + 
                          parityFileLength);
  }

  BlockLocation[] parityBlocks = 
    parityFS.getFileBlockLocations(parityFileStatus, 0L, parityFileLength);
  
  if (parityFS instanceof DistributedFileSystem ||
      parityFS instanceof DistributedRaidFileSystem) {
    long parityBlockSize = parityFileStatus.getBlockSize();
    if (parityBlockSize != blockSize) {
      throw new IOException("file block size is " + blockSize + 
                            " but parity file block size is " + 
                            parityBlockSize);
    }
  } else if (parityFS instanceof HarFileSystem) {
    LOG.debug("HAR FS found");
  } else {
    LOG.warn("parity file system is not of a supported type");
  }
  
  return parityBlocks;
}