Java Code Examples for org.apache.hadoop.fs.FileSystem#getDefaultBlockSize()

The following examples show how to use org.apache.hadoop.fs.FileSystem#getDefaultBlockSize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestCombineFileInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void splitRealFiles(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  long blockSize = fs.getDefaultBlockSize();

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    FileInputFormat.addInputPaths(job, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  List<InputSplit> splits = inFormat.getSplits(job);
  System.out.println("Total number of splits " + splits.size());
  for (int i = 0; i < splits.size(); ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i);
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}
 
Example 2
Source File: FSOperations.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return The URI of the created file.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
  if (replication == -1) {
    replication = fs.getDefaultReplication(path);
  }
  if (blockSize == -1) {
    blockSize = fs.getDefaultBlockSize(path);
  }
  FsPermission fsPermission = new FsPermission(permission);
  int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096);
  OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null);
  IOUtils.copyBytes(is, os, bufferSize, true);
  os.close();
  return null;
}
 
Example 3
Source File: TestCombineFileInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void splitRealFiles(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  long blockSize = fs.getDefaultBlockSize();

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    FileInputFormat.addInputPaths(job, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  List<InputSplit> splits = inFormat.getSplits(job);
  System.out.println("Total number of splits " + splits.size());
  for (int i = 0; i < splits.size(); ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i);
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}
 
Example 4
Source File: FSOperations.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return The URI of the created file.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
  if (replication == -1) {
    replication = fs.getDefaultReplication(path);
  }
  if (blockSize == -1) {
    blockSize = fs.getDefaultBlockSize(path);
  }
  FsPermission fsPermission = new FsPermission(permission);
  int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096);
  OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null);
  IOUtils.copyBytes(is, os, bufferSize, true);
  os.close();
  return null;
}
 
Example 5
Source File: SegmentHelper.java    From indexr with Apache License 2.0 5 votes vote down vote up
public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}
 
Example 6
Source File: RetriableFileCopyCommand.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * @return the block size of the source file if we need to preserve either
 *         the block size or the checksum type. Otherwise the default block
 *         size of the target FS.
 */
private static long getBlockSize(
        EnumSet<FileAttribute> fileAttributes,
        FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
  boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
      || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
  return preserve ? sourceFile.getBlockSize() : targetFS
      .getDefaultBlockSize(tmpTargetPath);
}
 
Example 7
Source File: TestCopyMapper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void touchFile(String path, boolean createMultipleBlocks,
    ChecksumOpt checksumOpt) throws Exception {
  FileSystem fs;
  DataOutputStream outputStream = null;
  try {
    fs = cluster.getFileSystem();
    final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
        fs.getWorkingDirectory());
    final long blockSize = createMultipleBlocks ? NON_DEFAULT_BLOCK_SIZE : fs
        .getDefaultBlockSize(qualifiedPath) * 2;
    FsPermission permission = FsPermission.getFileDefault().applyUMask(
        FsPermission.getUMask(fs.getConf()));
    outputStream = fs.create(qualifiedPath, permission,
        EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 0,
        (short) (fs.getDefaultReplication(qualifiedPath) * 2), blockSize,
        null, checksumOpt);
    byte[] bytes = new byte[DEFAULT_FILE_SIZE];
    outputStream.write(bytes);
    long fileSize = DEFAULT_FILE_SIZE;
    if (createMultipleBlocks) {
      while (fileSize < 2*blockSize) {
        outputStream.write(bytes);
        outputStream.flush();
        fileSize += DEFAULT_FILE_SIZE;
      }
    }
    pathList.add(qualifiedPath);
    ++nFiles;

    FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
    System.out.println(fileStatus.getBlockSize());
    System.out.println(fileStatus.getReplication());
  }
  finally {
    IOUtils.cleanup(null, outputStream);
  }
}
 
Example 8
Source File: SegmentHelper.java    From indexr with Apache License 2.0 5 votes vote down vote up
public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}
 
Example 9
Source File: SegmentHelper.java    From indexr with Apache License 2.0 5 votes vote down vote up
public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}
 
Example 10
Source File: RetriableFileCopyCommand.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * @return the block size of the source file if we need to preserve either
 *         the block size or the checksum type. Otherwise the default block
 *         size of the target FS.
 */
private static long getBlockSize(
        EnumSet<FileAttribute> fileAttributes,
        FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
  boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
      || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
  return preserve ? sourceFile.getBlockSize() : targetFS
      .getDefaultBlockSize(tmpTargetPath);
}
 
Example 11
Source File: TestS3ABlocksize.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testBlockSize() throws Exception {
  FileSystem fs = getFileSystem();
  long defaultBlockSize = fs.getDefaultBlockSize();
  assertEquals("incorrect blocksize",
      S3AFileSystem.DEFAULT_BLOCKSIZE, defaultBlockSize);
  long newBlockSize = defaultBlockSize * 2;
  fs.getConf().setLong(Constants.FS_S3A_BLOCK_SIZE, newBlockSize);

  Path dir = path("testBlockSize");
  Path file = new Path(dir, "file");
  createFile(fs, file, true, dataset(1024, 'a', 'z' - 'a'));
  FileStatus fileStatus = fs.getFileStatus(file);
  assertEquals("Double default block size in stat(): " + fileStatus,
      newBlockSize,
      fileStatus.getBlockSize());

  // check the listing  & assert that the block size is picked up by
  // this route too.
  boolean found = false;
  FileStatus[] listing = fs.listStatus(dir);
  for (FileStatus stat : listing) {
    LOG.info("entry: {}", stat);
    if (file.equals(stat.getPath())) {
      found = true;
      assertEquals("Double default block size in ls(): " + stat,
          newBlockSize,
          stat.getBlockSize());
    }
  }
  assertTrue("Did not find " + fileStatsToString(listing, ", "), found);
}
 
Example 12
Source File: WriterBinaryBlock.java    From systemds with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
protected final void writeBinaryBlockMatrixToSequenceFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int blen, int rl, int ru ) 
	throws IOException
{
	boolean sparse = src.isInSparseFormat();
	int rlen = src.getNumRows();
	int clen = src.getNumColumns();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());	
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen && rl == 0 ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			
			//create and write subblocks of matrix
			for(int blockRow = rl/blen; blockRow < (int)Math.ceil(ru/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
			
					int row_offset = blockRow*blen;
					int col_offset = blockCol*blen;
					
					//get reuse matrix block
					MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);

					//copy submatrix to block
					src.slice( row_offset, row_offset+maxRow-1, 
							             col_offset, col_offset+maxCol-1, block );
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
						
					//reset block for later reuse
					block.reset();
				}
		}
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}
 
Example 13
Source File: WriterBinaryBlock.java    From systemds with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) 
	throws IOException, DMLRuntimeException
{
	boolean sparse = src.isInSparseFormat();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			MatrixBlock emptyBlock = new MatrixBlock();
				
			//create and write subblocks of matrix
			for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
					MatrixBlock block = null;
					
					if( blockRow==blockCol ) //block on diagonal
					{	
						int row_offset = blockRow*blen;
						int col_offset = blockCol*blen;
						
						//get reuse matrix block
						block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);
	
						//copy submatrix to block
						src.slice( row_offset, row_offset+maxRow-1, 
							col_offset, col_offset+maxCol-1, block );
					}
					else //empty block (not on diagonal)
					{
						block = emptyBlock;
						block.reset(maxRow, maxCol);
					}
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
					
					//reset block for later reuse
					if( blockRow!=blockCol )
						block.reset();
				}
		}				
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}
 
Example 14
Source File: HadoopShims.java    From spork with Apache License 2.0 4 votes vote down vote up
public static long getDefaultBlockSize(FileSystem fs, Path path) {
    return fs.getDefaultBlockSize(path);
}
 
Example 15
Source File: WALFile.java    From streamx with Apache License 2.0 4 votes vote down vote up
Writer(Configuration conf, Option... opts) throws IOException {
  BlockSizeOption blockSizeOption =
      Options.getOption(BlockSizeOption.class, opts);
  BufferSizeOption bufferSizeOption =
      Options.getOption(BufferSizeOption.class, opts);
  ReplicationOption replicationOption =
      Options.getOption(ReplicationOption.class, opts);

  FileOption fileOption = Options.getOption(FileOption.class, opts);
  AppendIfExistsOption appendIfExistsOption = Options.getOption(
      AppendIfExistsOption.class, opts);
  StreamOption streamOption = Options.getOption(StreamOption.class, opts);

  // check consistency of options
  if ((fileOption == null) == (streamOption == null)) {
    throw new IllegalArgumentException("file or stream must be specified");
  }
  if (fileOption == null && (blockSizeOption != null ||
                             bufferSizeOption != null ||
                             replicationOption != null)) {
    throw new IllegalArgumentException("file modifier options not " +
                                       "compatible with stream");
  }

  FSDataOutputStream out;
  boolean ownStream = fileOption != null;
  if (ownStream) {
    Path p = fileOption.getValue();
    FileSystem fs;
    fs = p.getFileSystem(conf);
    int bufferSize = bufferSizeOption == null ? getBufferSize(conf) :
                     bufferSizeOption.getValue();
    short replication = replicationOption == null ?
                        fs.getDefaultReplication(p) :
                        (short) replicationOption.getValue();
    long blockSize = blockSizeOption == null ? fs.getDefaultBlockSize(p) :
                     blockSizeOption.getValue();

    if (appendIfExistsOption != null && appendIfExistsOption.getValue()
        && fs.exists(p)) {
      // Read the file and verify header details
      try (WALFile.Reader reader =
               new WALFile.Reader(conf, WALFile.Reader.file(p), new Reader.OnlyHeaderOption())){
        if (reader.getVersion() != VERSION[3]) {
          throw new VersionMismatchException(VERSION[3], reader.getVersion());
        }
        sync = reader.getSync();
      }
      out = fs.append(p, bufferSize);
      this.appendMode = true;
    } else {
      out = fs.create(p, true, bufferSize, replication, blockSize);
    }
  } else {
    out = streamOption.getValue();
  }

  init(conf, out, ownStream);
}
 
Example 16
Source File: CopyableFile.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
/**
 * @return desired block size for destination file.
 */
public long getBlockSize(FileSystem targetFs) {
  return getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ?
      getOrigin().getBlockSize() : targetFs.getDefaultBlockSize(this.destination);
}
 
Example 17
Source File: WriterBinaryBlock.java    From systemds with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) 
	throws IOException, DMLRuntimeException
{
	boolean sparse = src.isInSparseFormat();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			MatrixBlock emptyBlock = new MatrixBlock();
				
			//create and write subblocks of matrix
			for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
					MatrixBlock block = null;
					
					if( blockRow==blockCol ) //block on diagonal
					{	
						int row_offset = blockRow*blen;
						int col_offset = blockCol*blen;
						
						//get reuse matrix block
						block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);
	
						//copy submatrix to block
						src.slice( row_offset, row_offset+maxRow-1, 
							col_offset, col_offset+maxCol-1, block );
					}
					else //empty block (not on diagonal)
					{
						block = emptyBlock;
						block.reset(maxRow, maxCol);
					}
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
					
					//reset block for later reuse
					if( blockRow!=blockCol )
						block.reset();
				}
		}				
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}
 
Example 18
Source File: HadoopShims.java    From spork with Apache License 2.0 4 votes vote down vote up
public static long getDefaultBlockSize(FileSystem fs, Path path) {
    return fs.getDefaultBlockSize();
}
 
Example 19
Source File: RCFile.java    From incubator-tajo with Apache License 2.0 3 votes vote down vote up
/**
 * Constructs a RCFile Writer.
 *
 * @param fs
 *          the file system used
 * @param conf
 *          the configuration file
 * @param name
 *          the file name
 * @param progress a progress meter to update as the file is written
 * @param metadata a string to string map in the file header
 * @throws java.io.IOException
 */
public Writer(FileSystem fs, Configuration conf, Path name,
    Progressable progress, Metadata metadata, CompressionCodec codec) throws IOException {
  this(fs, conf, name, fs.getConf().getInt("io.file.buffer.size", 4096),
      fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress,
      metadata, codec);
}
 
Example 20
Source File: CommonFSUtils.java    From hbase with Apache License 2.0 2 votes vote down vote up
/**
 * Return the number of bytes that large input files should be optimally
 * be split into to minimize i/o time.
 *
 * @param fs filesystem object
 * @return the default block size for the path's filesystem
 */
public static long getDefaultBlockSize(final FileSystem fs, final Path path) {
  return fs.getDefaultBlockSize(path);
}