Java Code Examples for org.apache.hadoop.fs.FileSystem#getDefaultBlockSize()

The following examples show how to use org.apache.hadoop.fs.FileSystem#getDefaultBlockSize() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
private void splitRealFiles(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  long blockSize = fs.getDefaultBlockSize();

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    FileInputFormat.addInputPaths(job, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  List<InputSplit> splits = inFormat.getSplits(job);
  System.out.println("Total number of splits " + splits.size());
  for (int i = 0; i < splits.size(); ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i);
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}
 
Example 2
Source Project: hadoop   File: FSOperations.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return The URI of the created file.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
  if (replication == -1) {
    replication = fs.getDefaultReplication(path);
  }
  if (blockSize == -1) {
    blockSize = fs.getDefaultBlockSize(path);
  }
  FsPermission fsPermission = new FsPermission(permission);
  int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096);
  OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null);
  IOUtils.copyBytes(is, os, bufferSize, true);
  os.close();
  return null;
}
 
Example 3
private void splitRealFiles(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  long blockSize = fs.getDefaultBlockSize();

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    FileInputFormat.addInputPaths(job, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  List<InputSplit> splits = inFormat.getSplits(job);
  System.out.println("Total number of splits " + splits.size());
  for (int i = 0; i < splits.size(); ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i);
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}
 
Example 4
Source Project: big-c   File: FSOperations.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return The URI of the created file.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
  if (replication == -1) {
    replication = fs.getDefaultReplication(path);
  }
  if (blockSize == -1) {
    blockSize = fs.getDefaultBlockSize(path);
  }
  FsPermission fsPermission = new FsPermission(permission);
  int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096);
  OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null);
  IOUtils.copyBytes(is, os, bufferSize, true);
  os.close();
  return null;
}
 
Example 5
Source Project: hadoop   File: RetriableFileCopyCommand.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @return the block size of the source file if we need to preserve either
 *         the block size or the checksum type. Otherwise the default block
 *         size of the target FS.
 */
private static long getBlockSize(
        EnumSet<FileAttribute> fileAttributes,
        FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
  boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
      || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
  return preserve ? sourceFile.getBlockSize() : targetFS
      .getDefaultBlockSize(tmpTargetPath);
}
 
Example 6
Source Project: hadoop   File: TestCopyMapper.java    License: Apache License 2.0 5 votes vote down vote up
private static void touchFile(String path, boolean createMultipleBlocks,
    ChecksumOpt checksumOpt) throws Exception {
  FileSystem fs;
  DataOutputStream outputStream = null;
  try {
    fs = cluster.getFileSystem();
    final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
        fs.getWorkingDirectory());
    final long blockSize = createMultipleBlocks ? NON_DEFAULT_BLOCK_SIZE : fs
        .getDefaultBlockSize(qualifiedPath) * 2;
    FsPermission permission = FsPermission.getFileDefault().applyUMask(
        FsPermission.getUMask(fs.getConf()));
    outputStream = fs.create(qualifiedPath, permission,
        EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 0,
        (short) (fs.getDefaultReplication(qualifiedPath) * 2), blockSize,
        null, checksumOpt);
    byte[] bytes = new byte[DEFAULT_FILE_SIZE];
    outputStream.write(bytes);
    long fileSize = DEFAULT_FILE_SIZE;
    if (createMultipleBlocks) {
      while (fileSize < 2*blockSize) {
        outputStream.write(bytes);
        outputStream.flush();
        fileSize += DEFAULT_FILE_SIZE;
      }
    }
    pathList.add(qualifiedPath);
    ++nFiles;

    FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
    System.out.println(fileStatus.getBlockSize());
    System.out.println(fileStatus.getReplication());
  }
  finally {
    IOUtils.cleanup(null, outputStream);
  }
}
 
Example 7
Source Project: big-c   File: RetriableFileCopyCommand.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @return the block size of the source file if we need to preserve either
 *         the block size or the checksum type. Otherwise the default block
 *         size of the target FS.
 */
private static long getBlockSize(
        EnumSet<FileAttribute> fileAttributes,
        FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
  boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
      || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
  return preserve ? sourceFile.getBlockSize() : targetFS
      .getDefaultBlockSize(tmpTargetPath);
}
 
Example 8
Source Project: big-c   File: TestS3ABlocksize.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testBlockSize() throws Exception {
  FileSystem fs = getFileSystem();
  long defaultBlockSize = fs.getDefaultBlockSize();
  assertEquals("incorrect blocksize",
      S3AFileSystem.DEFAULT_BLOCKSIZE, defaultBlockSize);
  long newBlockSize = defaultBlockSize * 2;
  fs.getConf().setLong(Constants.FS_S3A_BLOCK_SIZE, newBlockSize);

  Path dir = path("testBlockSize");
  Path file = new Path(dir, "file");
  createFile(fs, file, true, dataset(1024, 'a', 'z' - 'a'));
  FileStatus fileStatus = fs.getFileStatus(file);
  assertEquals("Double default block size in stat(): " + fileStatus,
      newBlockSize,
      fileStatus.getBlockSize());

  // check the listing  & assert that the block size is picked up by
  // this route too.
  boolean found = false;
  FileStatus[] listing = fs.listStatus(dir);
  for (FileStatus stat : listing) {
    LOG.info("entry: {}", stat);
    if (file.equals(stat.getPath())) {
      found = true;
      assertEquals("Double default block size in ls(): " + stat,
          newBlockSize,
          stat.getBlockSize());
    }
  }
  assertTrue("Did not find " + fileStatsToString(listing, ", "), found);
}
 
Example 9
Source Project: indexr   File: SegmentHelper.java    License: Apache License 2.0 5 votes vote down vote up
public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}
 
Example 10
Source Project: indexr   File: SegmentHelper.java    License: Apache License 2.0 5 votes vote down vote up
public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}
 
Example 11
Source Project: indexr   File: SegmentHelper.java    License: Apache License 2.0 5 votes vote down vote up
public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}
 
Example 12
Source Project: spork   File: HadoopShims.java    License: Apache License 2.0 4 votes vote down vote up
public static long getDefaultBlockSize(FileSystem fs, Path path) {
    return fs.getDefaultBlockSize();
}
 
Example 13
Source Project: systemds   File: WriterBinaryBlock.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
protected final void writeBinaryBlockMatrixToSequenceFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int blen, int rl, int ru ) 
	throws IOException
{
	boolean sparse = src.isInSparseFormat();
	int rlen = src.getNumRows();
	int clen = src.getNumColumns();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());	
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen && rl == 0 ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			
			//create and write subblocks of matrix
			for(int blockRow = rl/blen; blockRow < (int)Math.ceil(ru/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
			
					int row_offset = blockRow*blen;
					int col_offset = blockCol*blen;
					
					//get reuse matrix block
					MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);

					//copy submatrix to block
					src.slice( row_offset, row_offset+maxRow-1, 
							             col_offset, col_offset+maxCol-1, block );
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
						
					//reset block for later reuse
					block.reset();
				}
		}
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}
 
Example 14
Source Project: systemds   File: WriterBinaryBlock.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) 
	throws IOException, DMLRuntimeException
{
	boolean sparse = src.isInSparseFormat();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			MatrixBlock emptyBlock = new MatrixBlock();
				
			//create and write subblocks of matrix
			for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
					MatrixBlock block = null;
					
					if( blockRow==blockCol ) //block on diagonal
					{	
						int row_offset = blockRow*blen;
						int col_offset = blockCol*blen;
						
						//get reuse matrix block
						block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);
	
						//copy submatrix to block
						src.slice( row_offset, row_offset+maxRow-1, 
							col_offset, col_offset+maxCol-1, block );
					}
					else //empty block (not on diagonal)
					{
						block = emptyBlock;
						block.reset(maxRow, maxCol);
					}
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
					
					//reset block for later reuse
					if( blockRow!=blockCol )
						block.reset();
				}
		}				
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}
 
Example 15
Source Project: incubator-gobblin   File: CopyableFile.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * @return desired block size for destination file.
 */
public long getBlockSize(FileSystem targetFs) {
  return getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ?
      getOrigin().getBlockSize() : targetFs.getDefaultBlockSize(this.destination);
}
 
Example 16
Source Project: streamx   File: WALFile.java    License: Apache License 2.0 4 votes vote down vote up
Writer(Configuration conf, Option... opts) throws IOException {
  BlockSizeOption blockSizeOption =
      Options.getOption(BlockSizeOption.class, opts);
  BufferSizeOption bufferSizeOption =
      Options.getOption(BufferSizeOption.class, opts);
  ReplicationOption replicationOption =
      Options.getOption(ReplicationOption.class, opts);

  FileOption fileOption = Options.getOption(FileOption.class, opts);
  AppendIfExistsOption appendIfExistsOption = Options.getOption(
      AppendIfExistsOption.class, opts);
  StreamOption streamOption = Options.getOption(StreamOption.class, opts);

  // check consistency of options
  if ((fileOption == null) == (streamOption == null)) {
    throw new IllegalArgumentException("file or stream must be specified");
  }
  if (fileOption == null && (blockSizeOption != null ||
                             bufferSizeOption != null ||
                             replicationOption != null)) {
    throw new IllegalArgumentException("file modifier options not " +
                                       "compatible with stream");
  }

  FSDataOutputStream out;
  boolean ownStream = fileOption != null;
  if (ownStream) {
    Path p = fileOption.getValue();
    FileSystem fs;
    fs = p.getFileSystem(conf);
    int bufferSize = bufferSizeOption == null ? getBufferSize(conf) :
                     bufferSizeOption.getValue();
    short replication = replicationOption == null ?
                        fs.getDefaultReplication(p) :
                        (short) replicationOption.getValue();
    long blockSize = blockSizeOption == null ? fs.getDefaultBlockSize(p) :
                     blockSizeOption.getValue();

    if (appendIfExistsOption != null && appendIfExistsOption.getValue()
        && fs.exists(p)) {
      // Read the file and verify header details
      try (WALFile.Reader reader =
               new WALFile.Reader(conf, WALFile.Reader.file(p), new Reader.OnlyHeaderOption())){
        if (reader.getVersion() != VERSION[3]) {
          throw new VersionMismatchException(VERSION[3], reader.getVersion());
        }
        sync = reader.getSync();
      }
      out = fs.append(p, bufferSize);
      this.appendMode = true;
    } else {
      out = fs.create(p, true, bufferSize, replication, blockSize);
    }
  } else {
    out = streamOption.getValue();
  }

  init(conf, out, ownStream);
}
 
Example 17
Source Project: spork   File: HadoopShims.java    License: Apache License 2.0 4 votes vote down vote up
public static long getDefaultBlockSize(FileSystem fs, Path path) {
    return fs.getDefaultBlockSize(path);
}
 
Example 18
Source Project: systemds   File: WriterBinaryBlock.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) 
	throws IOException, DMLRuntimeException
{
	boolean sparse = src.isInSparseFormat();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			MatrixBlock emptyBlock = new MatrixBlock();
				
			//create and write subblocks of matrix
			for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
					MatrixBlock block = null;
					
					if( blockRow==blockCol ) //block on diagonal
					{	
						int row_offset = blockRow*blen;
						int col_offset = blockCol*blen;
						
						//get reuse matrix block
						block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);
	
						//copy submatrix to block
						src.slice( row_offset, row_offset+maxRow-1, 
							col_offset, col_offset+maxCol-1, block );
					}
					else //empty block (not on diagonal)
					{
						block = emptyBlock;
						block.reset(maxRow, maxCol);
					}
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
					
					//reset block for later reuse
					if( blockRow!=blockCol )
						block.reset();
				}
		}				
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}
 
Example 19
Source Project: incubator-tajo   File: RCFile.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Constructs a RCFile Writer.
 *
 * @param fs
 *          the file system used
 * @param conf
 *          the configuration file
 * @param name
 *          the file name
 * @param progress a progress meter to update as the file is written
 * @param metadata a string to string map in the file header
 * @throws java.io.IOException
 */
public Writer(FileSystem fs, Configuration conf, Path name,
    Progressable progress, Metadata metadata, CompressionCodec codec) throws IOException {
  this(fs, conf, name, fs.getConf().getInt("io.file.buffer.size", 4096),
      fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress,
      metadata, codec);
}
 
Example 20
Source Project: hbase   File: CommonFSUtils.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Return the number of bytes that large input files should be optimally
 * be split into to minimize i/o time.
 *
 * @param fs filesystem object
 * @return the default block size for the path's filesystem
 */
public static long getDefaultBlockSize(final FileSystem fs, final Path path) {
  return fs.getDefaultBlockSize(path);
}