Java Code Examples for org.apache.hadoop.fs.FSDataInputStream#seek()

The following examples show how to use org.apache.hadoop.fs.FSDataInputStream#seek() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestTracing.java From hadoop with Apache License 2.0

6 votes

private void readTestFile(String testFileName) throws Exception {
  Path filePath = new Path(testFileName);
  FSDataInputStream istream = dfs.open(filePath, 10240);
  ByteBuffer buf = ByteBuffer.allocate(10240);

  int count = 0;
  try {
    while (istream.read(buf) > 0) {
      count += 1;
      buf.clear();
      istream.seek(istream.getPos() + 5);
    }
  } catch (IOException ioe) {
    // Ignore this it's probably a seek after eof.
  } finally {
    istream.close();
  }
}

Example 2

Source File: TestSeekBug.java From hadoop with Apache License 2.0

6 votes

private void seekReadFile(FileSystem fileSys, Path name) throws IOException {
  FSDataInputStream stm = fileSys.open(name, 4096);
  byte[] expected = new byte[ONEMB];
  Random rand = new Random(seed);
  rand.nextBytes(expected);
  
  // First read 128 bytes to set count in BufferedInputStream
  byte[] actual = new byte[128];
  stm.read(actual, 0, actual.length);
  // Now read a byte array that is bigger than the internal buffer
  actual = new byte[100000];
  IOUtils.readFully(stm, actual, 0, actual.length);
  checkAndEraseData(actual, 128, expected, "First Read Test");
  // now do a small seek, within the range that is already read
  stm.seek(96036); // 4 byte seek
  actual = new byte[128];
  IOUtils.readFully(stm, actual, 0, actual.length);
  checkAndEraseData(actual, 96036, expected, "Seek Bug");
  // all done
  stm.close();
}

Example 3

Source File: TestHftpFileSystem.java From RDFS with Apache License 2.0

6 votes

/**
 * Scenario: Read an under construction file using hftp.
 * 
 * Expected: Hftp should be able to read the latest byte after the file
 * has been hdfsSynced (but not yet closed).
 * 
 * @throws IOException
 */
public void testConcurrentRead() throws IOException {
  // Write a test file.
  FSDataOutputStream out = hdfs.create(TEST_FILE, true);
  out.writeBytes("123");
  out.sync();  // sync but not close
  
  // Try read using hftp.
  FSDataInputStream in = hftpFs.open(TEST_FILE);
  assertEquals('1', in.read());
  assertEquals('2', in.read());
  assertEquals('3', in.read());
  in.close();
  
  // Try seek and read.
  in = hftpFs.open(TEST_FILE);
  in.seek(2);
  assertEquals('3', in.read());
  in.close();
  
  out.close();
}

Example 4

Source File: TestSeekBug.java From big-c with Apache License 2.0

6 votes

/**
* Test (expected to throw IOE) for <code>FSDataInpuStream#seek</code>
* when the position argument is larger than the file size.
*/
@Test (expected=IOException.class)
public void testSeekPastFileSize() throws IOException {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
  FileSystem fs = cluster.getFileSystem();
  try {
    Path seekFile = new Path("seekboundaries.dat");
    DFSTestUtil.createFile(
      fs,
      seekFile,
      ONEMB,
      fs.getDefaultReplication(seekFile),
      seed);
    FSDataInputStream stream = fs.open(seekFile);
    // Perform "safe seek" (expected to pass)
    stream.seek(65536);
    assertEquals(65536, stream.getPos());
    // expect IOE for this call
    stream.seek(ONEMB + ONEMB + ONEMB);
  } finally {
    fs.close();
    cluster.shutdown();
  }
}

Example 5

Source File: BCFile.java From RDFS with Apache License 2.0

5 votes

/**
 * Constructor
 * 
 * @param fin
 *          FS input stream.
 * @param fileLength
 *          Length of the corresponding file
 * @throws IOException
 */
public Reader(FSDataInputStream fin, long fileLength, Configuration conf)
    throws IOException {
  this.in = fin;
  this.conf = conf;

  // move the cursor to the beginning of the tail, containing: offset to the
  // meta block index, version and magic
  fin.seek(fileLength - Magic.size() - Version.size() - Long.SIZE
      / Byte.SIZE);
  long offsetIndexMeta = fin.readLong();
  version = new Version(fin);
  Magic.readAndVerify(fin);

  if (!version.compatibleWith(BCFile.API_VERSION)) {
    throw new RuntimeException("Incompatible BCFile fileBCFileVersion.");
  }

  // read meta index
  fin.seek(offsetIndexMeta);
  metaIndex = new MetaIndex(fin);

  // read data:BCFile.index, the data block index
  BlockReader blockR = getMetaBlock(DataIndex.BLOCK_NAME);
  try {
    dataIndex = new DataIndex(blockR);
  } finally {
    blockR.close();
  }
}

Example 6

Source File: TestFSInputChecker.java From hadoop with Apache License 2.0

5 votes

private void readAndCompare(FSDataInputStream in, int position, int len)
    throws IOException {
  byte[] b = new byte[len];
  in.seek(position);
  IOUtils.readFully(in, b, 0, b.length);

  for (int i = 0; i < b.length; i++) {
    assertEquals(expected[position + i], b[i]);
  }
}

Example 7

Source File: FileStripeReader.java From RDFS with Apache License 2.0

5 votes

@Override
public InputStream[] getNextStripeInputs() throws IOException {
  InputStream[] blocks = new InputStream[codec.stripeLength];
  try {
    for (int i = 0; i < codec.stripeLength; i++) {
      long seekOffset = stripeStartOffset + i * blockSize;
      if (seekOffset < srcSize) {
        FSDataInputStream in = fs.open(srcFile, bufferSize);
        in.seek(seekOffset);
        LOG.info("Opening stream at " + srcFile + ":" + seekOffset);
        blocks[i] = in;
      } else {
        LOG.info("Using zeros at offset " + seekOffset);
        // We have no src data at this offset.
        blocks[i] = new RaidUtils.ZeroInputStream(
                          seekOffset + blockSize);
      }
    }
    stripeStartOffset += blockSize * codec.stripeLength;
    return blocks;
  } catch (IOException e) {
    // If there is an error during opening a stream, close the previously
    // opened streams and re-throw.
    RaidUtils.closeStreams(blocks);
    throw e;
  }
}

Example 8

Source File: DTBCFile.java From attic-apex-malhar with Apache License 2.0

5 votes

/**
 * Constructor
 *
 * @param fin
 *          FS input stream.
 * @param fileLength
 *          Length of the corresponding file
 * @throws IOException
 */
public Reader(FSDataInputStream fin, long fileLength, Configuration conf)
    throws IOException {
  this.in = fin;
  this.conf = conf;
  // A reader buffer to read the block
  baos = new ByteArrayOutputStream(DTFile.getFSInputBufferSize(conf) * 2);
  this.cacheKeys = new ArrayList<String>();
  // move the cursor to the beginning of the tail, containing: offset to the
  // meta block index, version and magic
  fin.seek(fileLength - Magic.size() - Version.size() - Long.SIZE
      / Byte.SIZE);
  long offsetIndexMeta = fin.readLong();
  version = new Version(fin);
  Magic.readAndVerify(fin);

  if (!version.compatibleWith(DTBCFile.API_VERSION)) {
    throw new RuntimeException("Incompatible BCFile fileBCFileVersion.");
  }

  // read meta index
  fin.seek(offsetIndexMeta);
  metaIndex = new MetaIndex(fin);

  // read data:BCFile.index, the data block index
  BlockReader blockR = getMetaBlock(DataIndex.BLOCK_NAME);
  try {
    dataIndex = new DataIndex(blockR);
  } finally {
    blockR.close();
  }
}

Example 9

Source File: MapTask.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private <T> T getSplitDetails(Path file, long offset) 
 throws IOException {
  FileSystem fs = file.getFileSystem(conf);
  FSDataInputStream inFile = fs.open(file);
  inFile.seek(offset);
  String className = StringInterner.weakIntern(Text.readString(inFile));
  Class<T> cls;
  try {
    cls = (Class<T>) conf.getClassByName(className);
  } catch (ClassNotFoundException ce) {
    IOException wrap = new IOException("Split class " + className + 
                                        " not found");
    wrap.initCause(ce);
    throw wrap;
  }
  SerializationFactory factory = new SerializationFactory(conf);
  Deserializer<T> deserializer = 
    (Deserializer<T>) factory.getDeserializer(cls);
  deserializer.open(inFile);
  T split = deserializer.deserialize(null);
  long pos = inFile.getPos();
  getCounters().findCounter(
      TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
  inFile.close();
  return split;
}

Example 10

Source File: TestCachingStrategy.java From big-c with Apache License 2.0

5 votes

@Test(timeout=120000)
public void testSeekAfterSetDropBehind() throws Exception {
  // start a cluster
  LOG.info("testSeekAfterSetDropBehind");
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  String TEST_PATH = "/test";
  int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
        .build();
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();
    createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false);
    // verify that we can seek after setDropBehind
    FSDataInputStream fis = fs.open(new Path(TEST_PATH));
    try {
      Assert.assertTrue(fis.read() != -1); // create BlockReader
      fis.setDropBehind(false); // clear BlockReader
      fis.seek(2); // seek
    } finally {
      fis.close();
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Example 11

Source File: BCFile.java From hadoop with Apache License 2.0

5 votes

/**
 * Constructor
 * 
 * @param fin
 *          FS input stream.
 * @param fileLength
 *          Length of the corresponding file
 * @throws IOException
 */
public Reader(FSDataInputStream fin, long fileLength, Configuration conf)
    throws IOException {
  this.in = fin;
  this.conf = conf;

  // move the cursor to the beginning of the tail, containing: offset to the
  // meta block index, version and magic
  fin.seek(fileLength - Magic.size() - Version.size() - Long.SIZE
      / Byte.SIZE);
  long offsetIndexMeta = fin.readLong();
  version = new Version(fin);
  Magic.readAndVerify(fin);

  if (!version.compatibleWith(BCFile.API_VERSION)) {
    throw new RuntimeException("Incompatible BCFile fileBCFileVersion.");
  }

  // read meta index
  fin.seek(offsetIndexMeta);
  metaIndex = new MetaIndex(fin);

  // read data:BCFile.index, the data block index
  BlockReader blockR = getMetaBlock(DataIndex.BLOCK_NAME);
  try {
    dataIndex = new DataIndex(blockR);
  } finally {
    blockR.close();
  }
}

Example 12

Source File: FSInputGeneralColumnDataReader.java From kylin with Apache License 2.0

5 votes

public FSInputGeneralColumnDataReader(FSDataInputStream fsInputStream, int dataStartOffset, int dataLength)
        throws IOException {
    this.fsInputStream = fsInputStream;
    fsInputStream.seek(dataStartOffset + dataLength - 4L);
    this.numOfVals = fsInputStream.readInt();
    fsInputStream.seek(dataStartOffset);
}

Example 13

Source File: TestHadoopArchives.java From hadoop with Apache License 2.0

5 votes

private static void expectSeekIOE(FSDataInputStream fsdis, long seekPos, String message) {
  try {
    fsdis.seek(seekPos);
    assertTrue(message + " (Position = " + fsdis.getPos() + ")", false);
  } catch (IOException ioe) {
    // okay
  }
}

Example 14

Source File: LineRecordReader.java From hadoop-gpu with Apache License 2.0

5 votes

public LineRecordReader(Configuration job, 
                        FileSplit split) throws IOException {
  this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
                                  Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  final CompressionCodec codec = compressionCodecs.getCodec(file);

  // open the file and seek to the start of the split
  FileSystem fs = file.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(split.getPath());
  boolean skipFirstLine = false;
  if (codec != null) {
    in = new LineReader(codec.createInputStream(fileIn), job);
    end = Long.MAX_VALUE;
  } else {
    if (start != 0) {
      skipFirstLine = true;
      --start;
      fileIn.seek(start);
    }
    in = new LineReader(fileIn, job);
  }
  if (skipFirstLine) {  // skip first line and re-establish "start".
    start += in.readLine(new Text(), 0,
                         (int)Math.min((long)Integer.MAX_VALUE, end - start));
  }
  this.pos = start;
}

Example 15

Source File: HdfsIOBenchmark.java From incubator-crail with Apache License 2.0

4 votes

public void readSequentialHeap() throws Exception {
	System.out.println("reading sequential file in heap mode " + path);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf);
	FileStatus status = fs.getFileStatus(path);
	FSDataInputStream instream = fs.open(path);
	byte[] buf = new byte[size];
	double sumbytes = 0;
	double ops = 0;
	System.out.println("file capacity " + status.getLen());
	System.out.println("read size " + size);
	System.out.println("operations " + loop);
	
	long start = System.currentTimeMillis();
	while (ops < loop) {
		double ret = (double) this.read(instream, buf);
		if (ret > 0) {
			sumbytes = sumbytes + ret;
			ops = ops + 1.0;
		} else {
			ops = ops + 1.0;
			if (instream.getPos() == 0){
				break;
			} else {
				instream.seek(0);
			}
		}
	}
	long end = System.currentTimeMillis();
	double executionTime = ((double) (end - start)) / 1000.0;
	double throughput = 0.0;
	double latency = 0.0;
	double sumbits = sumbytes * 8.0;
	if (executionTime > 0) {
		throughput = sumbits / executionTime / 1024.0 / 1024.0;
		latency = 1000000.0 * executionTime / ops;
	}
	System.out.println("execution time " + executionTime);
	System.out.println("ops " + ops);
	System.out.println("sumbytes " + sumbytes);
	System.out.println("throughput " + throughput);
	System.out.println("latency " + latency);
	System.out.println("closing stream");
	instream.close();	
	fs.close();
}

Example 16

Source File: HdfsIOBenchmark.java From incubator-crail with Apache License 2.0

4 votes

public void readSequentialDirect() throws Exception {
	System.out.println("reading sequential file in direct mode " + path);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf);
	FileStatus status = fs.getFileStatus(path);
	FSDataInputStream instream = fs.open(path);
	ByteBuffer buf = ByteBuffer.allocateDirect(size);
	buf.clear();
	double sumbytes = 0;
	double ops = 0;
	System.out.println("file capacity " + status.getLen());
	System.out.println("read size " + size);
	System.out.println("operations " + loop);
	
	long start = System.currentTimeMillis();
	while (ops < loop) {
		buf.clear();
		double ret = (double) instream.read(buf);
		if (ret > 0) {
			sumbytes = sumbytes + ret;
			ops = ops + 1.0;
		} else {
			ops = ops + 1.0;
			if (instream.getPos() == 0){
				break;
			} else {
				instream.seek(0);
			}
		}
	}
	long end = System.currentTimeMillis();
	double executionTime = ((double) (end - start)) / 1000.0;
	double throughput = 0.0;
	double latency = 0.0;
	double sumbits = sumbytes * 8.0;
	if (executionTime > 0) {
		throughput = sumbits / executionTime / 1024.0 / 1024.0;
		latency = 1000000.0 * executionTime / ops;
	}
	System.out.println("execution time " + executionTime);
	System.out.println("ops " + ops);
	System.out.println("sumbytes " + sumbytes);
	System.out.println("throughput " + throughput);
	System.out.println("latency " + latency);
	System.out.println("closing stream");
	instream.close();	
	fs.close();
}

Example 17

Source File: RubixRecordReader.java From Cubert with Apache License 2.0

4 votes

public void initialize(InputSplit split, Configuration conf) throws IOException,
        InterruptedException
{
    @SuppressWarnings("unchecked")
    RubixInputSplit<K, V> rsplit = (RubixInputSplit<K, V>) split;

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    switch (rsplit.getBlockSerializationType())
    {
    case DEFAULT:
        valueDeserializer =
                serializationFactory.getDeserializer(rsplit.getValueClass());
        break;
    case COMPACT:
        BlockSchema schema = rsplit.getSchema();
        valueDeserializer = new CompactDeserializer<V>(schema);
        break;
    }

    key = rsplit.getKey();

    // store the blockid and partition key in the conf
    conf.setLong("MY_BLOCK_ID", rsplit.getBlockId());
    conf.setLong("MY_NUM_RECORDS", rsplit.getNumRecords());
    ByteArrayOutputStream tmpOut = new ByteArrayOutputStream();
    ((Tuple) key).write(new DataOutputStream(tmpOut));
    String keySerialized = SerializerUtils.serializeToString(tmpOut.toByteArray());
    conf.set("MY_PARTITION_KEY", keySerialized);

    Path path = rsplit.getFilename();
    offset = rsplit.getOffset();
    length = rsplit.getLength();

    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(path);
    fsin.seek(offset);

    blockInputStream = new BlockInputStream(fsin, length);
    in = blockInputStream;

    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
    if (codec != null)
    {
        print.f("codec is not null and it is %s", codec.getClass().toString());
        in = codec.createInputStream(in);
    }
    else
    {
        print.f("codec is null");
    }

    valueDeserializer.open(in);
}

Example 18

Source File: DataValidationInputFormat.java From jumbune with GNU Lesser General Public License v3.0

4 votes

/**
 * Generate splits.
 *
 * @param job refers to JobContext that is being used to read the configurations of the job that ran
 * @param minSize refers to the minimum file block size.
 * @param maxSize refers to the maximum file block size.
 * @param splits refers  to a list of splits that are being generated.
 * @param file refers to the FileStatus required to determine block size,length,allocations.
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void generateSplits(JobContext job, long minSize, long maxSize,
		List<InputSplit> splits, FileStatus file) throws IOException {
	Path path = file.getPath();
	int numOfRecordsInCurrentSplit = 0;
	int numOfRecordsInPreviousSplit = 0;
	FileSystem fs = path.getFileSystem(job.getConfiguration());
	long length = file.getLen();
	BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0,
			length);
	FSDataInputStream fsin = null ;
	if ((length != 0) && isSplitable(job, path)) {
		long blockSize = file.getBlockSize();
		long splitSize = computeSplitSize(blockSize, minSize, maxSize);
		long bytesRemaining = length;
		
		// checking the occurrences of the record separator in current
		// split
		recordSeparator = job.getConfiguration()
				.get(DataValidationConstants.RECORD_SEPARATOR)
				.getBytes();
		while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
			int blkIndex = getBlockIndex(blkLocations, length
					- bytesRemaining);
			long start = length - bytesRemaining;
			long end = start + splitSize;
			try{
			fsin = fs.open(path);
			fsin.seek(start);
			long pos = start;
			int b = 0;
			int bufferPos = 0;
			while (true) {
				b = fsin.read();
				pos = fsin.getPos();
				if (b == -1) {
					break;}
				if (b == recordSeparator[bufferPos]) {
					bufferPos++;
					if (bufferPos == recordSeparator.length) {
						numOfRecordsInCurrentSplit++;
						bufferPos = 0;
						if (pos > end) {
							break;
						}
					}
				} else {
					// reset the value of buffer position to zero
					bufferPos = 0;
				}

			}}finally{
				if(fsin != null){
					fsin.close();
				}
			}

			splits.add(new DataValidationFileSplit(path, start,
					splitSize, numOfRecordsInPreviousSplit,
					blkLocations[blkIndex].getHosts()));
			bytesRemaining -= splitSize;
			numOfRecordsInPreviousSplit = numOfRecordsInCurrentSplit;
			numOfRecordsInCurrentSplit = 0;
		}

		addSplitIfBytesRemaining(splits, path, numOfRecordsInPreviousSplit,
				length, blkLocations, bytesRemaining);
	} else if (length != 0) {
		splits.add(new DataValidationFileSplit(path, 0, length,
				numOfRecordsInPreviousSplit, blkLocations[0].getHosts()));
	} else {
		splits.add(new DataValidationFileSplit(path, 0, length,
				numOfRecordsInPreviousSplit, new String[0]));
	}
}

Example 19

Source File: RubixFile.java From Cubert with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public List<KeyData<K>> getKeyData() throws IOException,
        ClassNotFoundException
{
    if (keyData != null)
        return keyData;

    final FileSystem fs = FileSystem.get(conf);
    keyData = new ArrayList<KeyData<K>>();

    final long filesize = fs.getFileStatus(path).getLen();
    FSDataInputStream in = fs.open(path);

    /* The last long in the file is the start position of the trailer section */
    in.seek(filesize - 8);
    long metaDataStartPos = in.readLong();

    in.seek(metaDataStartPos);

    ObjectMapper mapper = new ObjectMapper();
    metadataJson = mapper.readValue(in.readUTF(), JsonNode.class);

    int keySectionSize = in.readInt();

    // load the key section
    byte[] keySection = new byte[keySectionSize];

    in.seek(filesize - keySectionSize - 8);
    in.read(keySection, 0, keySectionSize);
    in.close();

    ByteArrayInputStream bis = new ByteArrayInputStream(keySection);
    DataInput dataInput = new DataInputStream(bis);

    int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue();

    // load the key section
    keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass"));
    valueClass =
            (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass"));

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass);

    deserializer.open(bis);

    while (bis.available() > 0 && numberOfBlocks > 0)
    {
        K key = deserializer.deserialize(null);

        long offset = dataInput.readLong();
        long blockId = dataInput.readLong();
        long numRecords = dataInput.readLong();

        keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId));
        numberOfBlocks--;
    }

    // Assign length to each keydata entry
    int numEntries = keyData.size();
    for (int i = 1; i < numEntries; i++)
    {
        KeyData<K> prev = keyData.get(i - 1);
        KeyData<K> current = keyData.get(i);

        prev.setLength(current.getOffset() - prev.getOffset());
    }

    if (numEntries > 0)
    {
        KeyData<K> last = keyData.get(numEntries - 1);
        last.setLength(metaDataStartPos - last.offset);
    }

    return keyData;
}

Example 20

Source File: InStream.java From hive-dwrf with Apache License 2.0

4 votes

public static void read(FSDataInputStream file, long fileOffset, byte[] array, int arrayOffset,
    int length) throws IOException {
  file.seek(fileOffset);
  file.readFully(array, arrayOffset, length);
}