Java Code Examples for org.apache.hadoop.fs.FileSystem.open()

The following are Jave code examples for showing how to use open() of the org.apache.hadoop.fs.FileSystem class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: hadoop-oss   File: BloomMapFile.java   Source Code and License Vote up 6 votes
private void initBloomFilter(Path dirName, 
                             Configuration conf) {
  
  DataInputStream in = null;
  try {
    FileSystem fs = dirName.getFileSystem(conf);
    in = fs.open(new Path(dirName, BLOOM_FILE_NAME));
    bloomFilter = new DynamicBloomFilter();
    bloomFilter.readFields(in);
    in.close();
    in = null;
  } catch (IOException ioe) {
    LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile.");
    bloomFilter = null;
  } finally {
    IOUtils.closeStream(in);
  }
}
 
Example 2
Project: DStream   File: PredictorHotKeyUtilTest.java   Source Code and License Vote up 6 votes
@Test
public void Main() throws Exception {
    String inputFile="/user/root/flinkwordcount/input/resultTweets.txt";
    FileSystem fs = HdfsOperationUtil.getFs();
    FSDataInputStream dataInputStream = fs.open(new Path(inputFile));
    BufferedReader bufferedReader=new BufferedReader(new InputStreamReader(dataInputStream));
    long startTimeSystemTime= System.currentTimeMillis();
    String text=null;
    while ((text=bufferedReader.readLine())!=null){
        predictorHotKeyUtil.simpleComputPredictorHotKey(text);
    }
    long endTimeSystemTime = System.currentTimeMillis();
    LOG.info("startTime:"+new Timestamp(startTimeSystemTime));
    LOG.info("endTime:"+new Timestamp(endTimeSystemTime));
    long timelong = (endTimeSystemTime-startTimeSystemTime) / 1000;
    LOG.info("totalTime:"+timelong+" s"+"------or------"+timelong/60+" min");
    System.exit(0);
}
 
Example 3
Project: hadoop   File: TestAuditLogs.java   Source Code and License Vote up 6 votes
/** test that denied operation puts proper entry in audit log */
@Test
public void testAuditDenied() throws Exception {
  final Path file = new Path(fnames[0]);
  FileSystem userfs = DFSTestUtil.getFileSystemAs(userGroupInfo, conf);

  fs.setPermission(file, new FsPermission((short)0600));
  fs.setOwner(file, "root", null);

  setupAuditLogs();

  try {
    userfs.open(file);
    fail("open must not succeed");
  } catch(AccessControlException e) {
    System.out.println("got access denied, as expected.");
  }
  verifyAuditLogs(false);
}
 
Example 4
Project: hadoop   File: TestRead.java   Source Code and License Vote up 6 votes
private void testEOF(MiniDFSCluster cluster, int fileLength) throws IOException {
  FileSystem fs = cluster.getFileSystem();
  Path path = new Path("testEOF." + fileLength);
  DFSTestUtil.createFile(fs, path, fileLength, (short)1, 0xBEEFBEEF);
  FSDataInputStream fis = fs.open(path);
  ByteBuffer empty = ByteBuffer.allocate(0);
  // A read into an empty bytebuffer at the beginning of the file gives 0.
  Assert.assertEquals(0, fis.read(empty));
  fis.seek(fileLength);
  // A read into an empty bytebuffer at the end of the file gives -1.
  Assert.assertEquals(-1, fis.read(empty));
  if (fileLength > BLOCK_SIZE) {
    fis.seek(fileLength - BLOCK_SIZE + 1);
    ByteBuffer dbb = ByteBuffer.allocateDirect(BLOCK_SIZE);
    Assert.assertEquals(BLOCK_SIZE - 1, fis.read(dbb));
  }
  fis.close();
}
 
Example 5
Project: hadoop   File: TestMRIntermediateDataEncryption.java   Source Code and License Vote up 5 votes
private void verifyOutput(RunningJob submittedJob, FileSystem fileSystem, int numMappers, int numLines)
  throws Exception {
  FSDataInputStream dis = null;
  long numValidRecords = 0;
  long numInvalidRecords = 0;
  String prevKeyValue = "000000000";
  Path[] fileList =
    FileUtil.stat2Paths(fileSystem.listStatus(OUTPUT,
        new Utils.OutputFileUtils.OutputFilesFilter()));
  for (Path outFile : fileList) {
    try {
      dis = fileSystem.open(outFile);
      String record;
      while((record = dis.readLine()) != null) {
        // Split the line into key and value.
        int blankPos = record.indexOf(" ");
        String keyString = record.substring(0, blankPos);
        String valueString = record.substring(blankPos+1);
        // Check for sorted output and correctness of record.
        if (keyString.compareTo(prevKeyValue) >= 0
            && keyString.equals(valueString)) {
          prevKeyValue = keyString;
          numValidRecords++;
        } else {
          numInvalidRecords++;
        }
      }
    } finally {
      if (dis != null) {
        dis.close();
        dis = null;
      }
    }
  }
  // Make sure we got all input records in the output in sorted order.
  assertEquals((long)(numMappers * numLines), numValidRecords);
  // Make sure there is no extraneous invalid record.
  assertEquals(0, numInvalidRecords);
}
 
Example 6
Project: hadoop-oss   File: ContractTestUtils.java   Source Code and License Vote up 5 votes
/**
 * Read in "length" bytes, convert to an ascii string
 * @param fs filesystem
 * @param path path to read
 * @param length #of bytes to read.
 * @return the bytes read and converted to a string
 * @throws IOException IO problems
 */
public static String readBytesToString(FileSystem fs,
                                Path path,
                                int length) throws IOException {
  FSDataInputStream in = fs.open(path);
  try {
    byte[] buf = new byte[length];
    in.readFully(0, buf);
    return toChar(buf);
  } finally {
    in.close();
  }
}
 
Example 7
Project: hadoop   File: TestWebHDFSForHA.java   Source Code and License Vote up 5 votes
@Test
public void testFailoverAfterOpen() throws IOException {
  Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
  conf.set(FS_DEFAULT_NAME_KEY, HdfsConstants.HDFS_URI_SCHEME +
      "://" + LOGICAL_NAME);
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  final Path p = new Path("/test");
  final byte[] data = "Hello".getBytes();

  try {
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo)
            .numDataNodes(1).build();

    HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);

    cluster.waitActive();

    fs = FileSystem.get(WEBHDFS_URI, conf);
    cluster.transitionToActive(1);

    FSDataOutputStream out = fs.create(p);
    cluster.shutdownNameNode(1);
    cluster.transitionToActive(0);

    out.write(data);
    out.close();
    FSDataInputStream in = fs.open(p);
    byte[] buf = new byte[data.length];
    IOUtils.readFully(in, buf, 0, buf.length);
    Assert.assertArrayEquals(data, buf);
  } finally {
    IOUtils.cleanup(null, fs);
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 8
Project: hadoop   File: TestPread.java   Source Code and License Vote up 5 votes
private void datanodeRestartTest(MiniDFSCluster cluster, FileSystem fileSys,
    Path name) throws IOException {
  // skip this test if using simulated storage since simulated blocks
  // don't survive datanode restarts.
  if (simulatedStorage) {
    return;
  }
  int numBlocks = 1;
  assertTrue(numBlocks <= DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT);
  byte[] expected = new byte[numBlocks * blockSize];
  Random rand = new Random(seed);
  rand.nextBytes(expected);
  byte[] actual = new byte[numBlocks * blockSize];
  FSDataInputStream stm = fileSys.open(name);
  // read a block and get block locations cached as a result
  stm.readFully(0, actual);
  checkAndEraseData(actual, 0, expected, "Pread Datanode Restart Setup");
  // restart all datanodes. it is expected that they will
  // restart on different ports, hence, cached block locations
  // will no longer work.
  assertTrue(cluster.restartDataNodes());
  cluster.waitActive();
  // verify the block can be read again using the same InputStream 
  // (via re-fetching of block locations from namenode). there is a 
  // 3 sec sleep in chooseDataNode(), which can be shortened for 
  // this test if configurable.
  stm.readFully(0, actual);
  checkAndEraseData(actual, 0, expected, "Pread Datanode Restart Test");
}
 
Example 9
Project: hadoop   File: TestMapRed.java   Source Code and License Vote up 5 votes
private static void printTextFile(FileSystem fs, Path p) throws IOException {
  BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p)));
  String line;
  while ((line = in.readLine()) != null) {
    System.out.println("  Row: " + line);
  }
  in.close();
}
 
Example 10
Project: angel   File: ModelLoader.java   Source Code and License Vote up 5 votes
/**
 * Get model meta
 *
 * @param modelDir model save directory path
 * @return model meta
 */
public static ModelFilesMeta getMeta(String modelDir, Configuration conf) throws IOException {
  Path modelPath = new Path(modelDir);
  Path meteFilePath = new Path(modelPath, ModelFilesConstent.modelMetaFileName);
  ModelFilesMeta meta = new ModelFilesMeta();
  FileSystem fs = meteFilePath.getFileSystem(conf);
  if (!fs.exists(meteFilePath)) {
    throw new IOException("matrix meta file does not exist ");
  }
  FSDataInputStream input = fs.open(meteFilePath);
  meta.read(input);
  input.close();
  return meta;
}
 
Example 11
Project: WIFIProbe   File: HDFSTool.java   Source Code and License Vote up 5 votes
/**从HDFS上读取文件*/
public static DataInputStream readFromHdfs(String fileName) throws IOException {

    String dst = NodeConfig.HDFS_PATH+fileName;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(dst), conf);
    return fs.open(new Path(dst));
}
 
Example 12
Project: hadoop-oss   File: TestCodec.java   Source Code and License Vote up 5 votes
@Test
public void testGzipCodecRead() throws IOException {
  // Create a gzipped file and try to read it back, using a decompressor
  // from the CodecPool.

  // Don't use native libs for this test.
  Configuration conf = new Configuration();
  ZlibFactory.setNativeZlibLoaded(false);
  // Ensure that the CodecPool has a BuiltInZlibInflater in it.
  Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
  assertNotNull("zlibDecompressor is null!", zlibDecompressor);
  assertTrue("ZlibFactory returned unexpected inflator",
      zlibDecompressor instanceof BuiltInZlibInflater);
  CodecPool.returnDecompressor(zlibDecompressor);

  // Now create a GZip text file.
  String tmpDir = System.getProperty("test.build.data", "/tmp/");
  Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
  BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
    new GZIPOutputStream(new FileOutputStream(f.toString()))));
  final String msg = "This is the message in the file!";
  bw.write(msg);
  bw.close();

  // Now read it back, using the CodecPool to establish the
  // decompressor to use.
  CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
  CompressionCodec codec = ccf.getCodec(f);
  Decompressor decompressor = CodecPool.getDecompressor(codec);
  FileSystem fs = FileSystem.getLocal(conf);
  InputStream is = fs.open(f);
  is = codec.createInputStream(is, decompressor);
  BufferedReader br = new BufferedReader(new InputStreamReader(is));
  String line = br.readLine();
  assertEquals("Didn't get the same message back!", msg, line);
  br.close();
}
 
Example 13
Project: hadoop   File: TeraInputFormat.java   Source Code and License Vote up 5 votes
public void initialize(InputSplit split, TaskAttemptContext context) 
    throws IOException, InterruptedException {
  Path p = ((FileSplit)split).getPath();
  FileSystem fs = p.getFileSystem(context.getConfiguration());
  in = fs.open(p);
  long start = ((FileSplit)split).getStart();
  // find the offset to start at a record boundary
  offset = (RECORD_LENGTH - (start % RECORD_LENGTH)) % RECORD_LENGTH;
  in.seek(start + offset);
  length = ((FileSplit)split).getLength();
}
 
Example 14
Project: MRNMF   File: MatrixByteConverter.java   Source Code and License Vote up 5 votes
public static void txt2dat(Path dir, String inputFile, String outputFile)
        throws IOException {

    FileSystem fileSystem = dir.getFileSystem(new Configuration());

    Path in = new Path(dir, inputFile);
    Path out = new Path(dir, outputFile);

    FSDataInputStream fsDataInputStream = fileSystem.open(in);
    InputStreamReader inputStreamReader = new InputStreamReader(fsDataInputStream);
    BufferedReader reader = new BufferedReader(inputStreamReader);

    FSDataOutputStream writer = fileSystem.create(out);

    try {
        String line;
        line = reader.readLine();
        while (line != null){

            String[] keyVal = line.split("\\t");
            writer.writeLong(Long.parseLong(keyVal[0]));

            for (String aij : keyVal[1].split(",")) {
                writer.writeDouble(Double.parseDouble(aij));
            }

            line = reader.readLine();
        }
    } finally {
        reader.close();
        inputStreamReader.close();
        fsDataInputStream.close();
        writer.flush();
        writer.close();
    }
}
 
Example 15
Project: hadoop   File: TestFileCreation.java   Source Code and License Vote up 5 votes
/**
 * Test creating a file whose data gets sync when closed
 */
@Test
public void testFileCreationSyncOnClose() throws IOException {
  Configuration conf = new HdfsConfiguration();
  conf.setBoolean(DFS_DATANODE_SYNCONCLOSE_KEY, true);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();

  try {
    FileSystem fs = cluster.getFileSystem();
    
    Path[] p = {new Path("/foo"), new Path("/bar")};
    
    //write 2 files at the same time
    FSDataOutputStream[] out = {fs.create(p[0]), fs.create(p[1])};
    int i = 0;
    for(; i < 100; i++) {
      out[0].write(i);
      out[1].write(i);
    }
    out[0].close();
    for(; i < 200; i++) {out[1].write(i);}
    out[1].close();

    //verify
    FSDataInputStream[] in = {fs.open(p[0]), fs.open(p[1])};  
    for(i = 0; i < 100; i++) {assertEquals(i, in[0].read());}
    for(i = 0; i < 200; i++) {assertEquals(i, in[1].read());}
  } finally {
    if (cluster != null) {cluster.shutdown();}
  }
}
 
Example 16
Project: ditb   File: RegionSplitter.java   Source Code and License Vote up 5 votes
private static byte [] readFile(final FileSystem fs, final Path path) throws IOException {
  FSDataInputStream tmpIn = fs.open(path);
  try {
    byte [] rawData = new byte[tmpIn.available()];
    tmpIn.readFully(rawData);
    return rawData;
  } finally {
    tmpIn.close();
  }
}
 
Example 17
Project: hadoop   File: TestShortCircuitLocalRead.java   Source Code and License Vote up 4 votes
@Test(timeout=10000)
public void testSkipWithVerifyChecksum() throws IOException {
  int size = blockSize;
  Configuration conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
  conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
      "/tmp/testSkipWithVerifyChecksum._PORT");
  DomainSocket.disableBindPathValidation();
  if (simulatedStorage) {
    SimulatedFSDataset.setFactory(conf);
  }
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(true).build();
  FileSystem fs = cluster.getFileSystem();
  try {
    // check that / exists
    Path path = new Path("/");
    assertTrue("/ should be a directory", fs.getFileStatus(path)
        .isDirectory() == true);
    
    byte[] fileData = AppendTestUtil.randomBytes(seed, size*3);
    // create a new file in home directory. Do not close it.
    Path file1 = new Path("filelocal.dat");
    FSDataOutputStream stm = createFile(fs, file1, 1);

    // write to file
    stm.write(fileData);
    stm.close();
    
    // now test the skip function
    FSDataInputStream instm = fs.open(file1);
    byte[] actual = new byte[fileData.length];
    // read something from the block first, otherwise BlockReaderLocal.skip()
    // will not be invoked
    int nread = instm.read(actual, 0, 3);
    long skipped = 2*size+3;
    instm.seek(skipped);
    nread = instm.read(actual, (int)(skipped + nread), 3);
    instm.close();
      
  } finally {
    fs.close();
    cluster.shutdown();
  }
}
 
Example 18
Project: hadoop   File: TestPersistBlocks.java   Source Code and License Vote up 4 votes
@Test
public void testRestartWithPartialBlockHflushed() throws IOException {
  final Configuration conf = new HdfsConfiguration();
  // Turn off persistent IPC, so that the DFSClient can survive NN restart
  conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
      0);
  MiniDFSCluster cluster = null;

  FSDataOutputStream stream;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    NameNode.getAddress(conf).getPort();
    // Creating a file with 4096 blockSize to write multiple blocks
    stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
    stream.write(DATA_BEFORE_RESTART);
    stream.write((byte)1);
    stream.hflush();
    
    // explicitly do NOT close the file before restarting the NN.
    cluster.restartNameNode();
    
    // this will fail if the final block of the file is prematurely COMPLETEd
    stream.write((byte)2);
    stream.hflush();
    stream.close();
    
    assertEquals(DATA_BEFORE_RESTART.length + 2,
        fs.getFileStatus(FILE_PATH).getLen());
    
    FSDataInputStream readStream = fs.open(FILE_PATH);
    try {
      byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
      byte[] expectedBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      System.arraycopy(DATA_BEFORE_RESTART, 0, expectedBuf, 0,
          DATA_BEFORE_RESTART.length);
      System.arraycopy(new byte[]{1, 2}, 0, expectedBuf,
          DATA_BEFORE_RESTART.length, 2);
      assertArrayEquals(expectedBuf, verifyBuf);
    } finally {
      IOUtils.closeStream(readStream);
    }
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}
 
Example 19
Project: hadoop   File: TestDatanodeDeath.java   Source Code and License Vote up 4 votes
static private void checkFile(FileSystem fileSys, Path name, int repl,
                       int numblocks, int filesize, long seed)
  throws IOException {
  boolean done = false;
  int attempt = 0;

  long len = fileSys.getFileStatus(name).getLen();
  assertTrue(name + " should be of size " + filesize +
             " but found to be of size " + len, 
             len == filesize);

  // wait till all full blocks are confirmed by the datanodes.
  while (!done) {
    attempt++;
    try {
      Thread.sleep(1000);
    } catch (InterruptedException e) {}
    done = true;
    BlockLocation[] locations = fileSys.getFileBlockLocations(
        fileSys.getFileStatus(name), 0, filesize);

    if (locations.length < numblocks) {
      if (attempt > 100) {
        System.out.println("File " + name + " has only " +
                           locations.length + " blocks, " +
                           " but is expected to have " + numblocks +
                           " blocks.");
      }
      done = false;
      continue;
    }
    for (int idx = 0; idx < locations.length; idx++) {
      if (locations[idx].getHosts().length < repl) {
        if (attempt > 100) {
          System.out.println("File " + name + " has " +
                             locations.length + " blocks: " +
                             " The " + idx + " block has only " +
                             locations[idx].getHosts().length + 
                             " replicas but is expected to have " 
                             + repl + " replicas.");
        }
        done = false;
        break;
      }
    }
  }
  FSDataInputStream stm = fileSys.open(name);
  final byte[] expected = AppendTestUtil.randomBytes(seed, fileSize);

  // do a sanity check. Read the file
  byte[] actual = new byte[filesize];
  stm.readFully(0, actual);
  checkData(actual, 0, expected, "Read 1");
}
 
Example 20
Project: hadoop   File: TestMRMultipleOutputs.java   Source Code and License Vote up 4 votes
protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
  String input = "a\nb\nc\nd\ne\nc\nd\ne";

  Configuration conf = createJobConf();
  conf.set("io.serializations",
  	    "org.apache.hadoop.io.serializer.JavaSerialization," +
  	    "org.apache.hadoop.io.serializer.WritableSerialization");

  Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);

  job.setJobName("mo");
  MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class,
    Long.class, String.class);

  MultipleOutputs.setCountersEnabled(job, withCounters);

  job.setSortComparatorClass(JavaSerializationComparator.class);
  
  job.setMapOutputKeyClass(Long.class);
  job.setMapOutputValueClass(String.class);

  job.setOutputKeyClass(Long.class);
  job.setOutputValueClass(String.class);

  job.setMapperClass(MOJavaSerDeMap.class);
  job.setReducerClass(MOJavaSerDeReduce.class);

  job.waitForCompletion(true);

  // assert number of named output part files
  int namedOutputCount = 0;
  int valueBasedOutputCount = 0;
  FileSystem fs = OUT_DIR.getFileSystem(conf);
  FileStatus[] statuses = fs.listStatus(OUT_DIR);
  for (FileStatus status : statuses) {
    String fileName = status.getPath().getName();
    if (fileName.equals("text-m-00000") ||
        fileName.equals("text-m-00001") ||
        fileName.equals("text-r-00000")) {
      namedOutputCount++;
    } else if (fileName.equals("a-r-00000") ||
        fileName.equals("b-r-00000") ||
        fileName.equals("c-r-00000") ||
        fileName.equals("d-r-00000") ||
        fileName.equals("e-r-00000")) {
      valueBasedOutputCount++;
    }
  }
  assertEquals(3, namedOutputCount);
  assertEquals(5, valueBasedOutputCount);

  // assert TextOutputFormat files correctness
  BufferedReader reader = new BufferedReader(
    new InputStreamReader(fs.open(
      new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
  int count = 0;
  String line = reader.readLine();
  while (line != null) {
    assertTrue(line.endsWith(TEXT));
    line = reader.readLine();
    count++;
  }
  reader.close();
  assertFalse(count == 0);

  if (withCounters) {
    CounterGroup counters =
      job.getCounters().getGroup(MultipleOutputs.class.getName());
    assertEquals(6, counters.size());
    assertEquals(4, counters.findCounter(TEXT).getValue());
    assertEquals(2, counters.findCounter("a").getValue());
    assertEquals(2, counters.findCounter("b").getValue());
    assertEquals(4, counters.findCounter("c").getValue());
    assertEquals(4, counters.findCounter("d").getValue());
    assertEquals(4, counters.findCounter("e").getValue());
  }
}