Java Code Examples for org.apache.hadoop.fs.LocatedFileStatus#getLen()

The following examples show how to use org.apache.hadoop.fs.LocatedFileStatus#getLen() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenerateData.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
Example 2
Source File: GenerateData.java    From big-c with Apache License 2.0 6 votes vote down vote up
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
Example 3
Source File: SegmentHelper.java    From indexr with Apache License 2.0 6 votes vote down vote up
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException {
    RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true);
    while (files.hasNext()) {
        LocatedFileStatus fileStatus = files.next();
        if (!fileStatus.isFile()) {
            continue;
        }
        if (fileStatus.getLen() == 0) {
            continue;
        }

        Path path = fileStatus.getPath();
        if (checkSegmentByPath(path)) {
            consumer.accept(fileStatus);
        }
    }
}
 
Example 4
Source File: IntegrationTestBigLinkedList.java    From hbase with Apache License 2.0 6 votes vote down vote up
private static SortedSet<byte[]> readFileToSearch(final Configuration conf,
    final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException,
    InterruptedException {
  SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR);
  // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is
  // what is missing.
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
  try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr =
      new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) {
    InputSplit is =
      new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {});
    rr.initialize(is, context);
    while (rr.nextKeyValue()) {
      rr.getCurrentKey();
      BytesWritable bw = rr.getCurrentValue();
      if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) {
        byte[] key = new byte[rr.getCurrentKey().getLength()];
        System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey()
            .getLength());
        result.add(key);
      }
    }
  }
  return result;
}
 
Example 5
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private void verifyLocatedFileStatus(
    JobConf conf, List<LocatedFileStatus> stats)
    throws IOException {
  if (!conf.getBoolean("mapred.fileinputformat.verifysplits", true)) {
    return;
  }
  for (LocatedFileStatus stat: stats) {
    long fileLen = stat.getLen();
    long blockLenTotal = 0;
    for (BlockLocation loc: stat.getBlockLocations()) {
      blockLenTotal += loc.getLength();
    }
    if (blockLenTotal != fileLen) {
      throw new IOException("Error while getting located status, " +
        stat.getPath() + " has length " + fileLen + " but blocks total is " +
        blockLenTotal);
    }
  }
}
 
Example 6
Source File: RaidNode.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static List<LocatedFileStatus> listDirectoryRaidLocatedFileStatus(
		Configuration conf, FileSystem srcFs, Path p) throws IOException {
	long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY,
			MINIMUM_RAIDABLE_FILESIZE);
	List<LocatedFileStatus> lfs = new ArrayList<LocatedFileStatus>();
	RemoteIterator<LocatedFileStatus> iter = srcFs.listLocatedStatus(p);
	while (iter.hasNext()) {
		LocatedFileStatus stat = iter.next();
		if (stat.isDir()) {
			return null;
		}
		// We don't raid too small files
		if (stat.getLen() < minFileSize) {
			continue;
		}
		lfs.add(stat);
	}
	if (lfs.size() == 0)
		return null;
	return lfs;
}
 
Example 7
Source File: AbstractPolicy.java    From kafka-connect-fs with Apache License 2.0 5 votes vote down vote up
FileMetadata toMetadata(LocatedFileStatus fileStatus) {

        List<FileMetadata.BlockInfo> blocks = Arrays.stream(fileStatus.getBlockLocations())
                .map(block -> new FileMetadata.BlockInfo(block.getOffset(), block.getLength(), block.isCorrupt()))
                .collect(Collectors.toList());

        return new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen(), blocks);
    }
 
Example 8
Source File: FileSegmentPool.java    From indexr with Apache License 2.0 5 votes vote down vote up
public void refreshLocalities() {
    try {
        // HashMap taks muti-thread risk here. Change to ConcurrentHashMap if it happens.
        Map<String, List<String>> newHostMap = new HashMap<>(segmentFdMap.size());

        RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(segmentRootPath, true);
        while (files.hasNext()) {
            LocatedFileStatus fileStatus = files.next();
            if (fileStatus.getLen() == 0) {
                continue;
            }
            String name = getSegmentName(fileStatus);
            if (name == null) {
                continue;
            }
            BlockLocation[] locations = fileStatus.getBlockLocations();
            if (locations.length != 1) {
                logger.error("A segment should only consisted by one block, now {}. Ignored: {}", locations.length, name);
                continue;
            }
            List<String> hosts = Arrays.asList(locations[0].getHosts());
            newHostMap.put(name, hosts);
        }

        hostMap = newHostMap;
    } catch (IOException e) {
        if (e instanceof ClosedByInterruptException) {
            logger.warn("Refresh [{}] segment locality failed by ClosedByInterruptException.", tableName);
            // Normally close interrupt.
            return;
        }
        String msg = e.getMessage();
        if (msg != null && Strings.equals(msg.trim(), "Filesystem closed")) {
            logger.warn("Refresh [{}] segment locality failed by Filesystem closed.", tableName);
            // Normally close interrupt.
            return;
        }
        logger.warn("Refresh [{}] segment locality failed.", tableName, e);
    }
}
 
Example 9
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the list of files and make them into FileSplits.
 */ 
public List<InputSplit> getSplits(JobContext job
                                  ) throws IOException {
  long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
  long maxSize = getMaxSplitSize(job);

  // generate splits
  List<InputSplit> splits = new ArrayList<InputSplit>();
  for (LocatedFileStatus file: listLocatedStatus(job)) {
    Path path = file.getPath();
    long length = file.getLen();
    BlockLocation[] blkLocations = file.getBlockLocations();

    if ((length != 0) && isSplitable(job, path)) { 
      long blockSize = file.getBlockSize();
      long splitSize = computeSplitSize(blockSize, minSize, maxSize);

      long bytesRemaining = length;
      while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
        int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
        splits.add(new FileSplit(path, length-bytesRemaining, splitSize, 
                                 blkLocations[blkIndex].getHosts()));
        bytesRemaining -= splitSize;
      }
      
      if (bytesRemaining != 0) {
        splits.add(new FileSplit(path, length-bytesRemaining, bytesRemaining, 
                   blkLocations[blkLocations.length-1].getHosts()));
      }
    } else if (length != 0) {
      splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
    } else { 
      //Create empty hosts array for zero length files
      splits.add(new FileSplit(path, 0, length, new String[0]));
    }
  }
  LOG.debug("Total # of splits: " + splits.size());
  return splits;
}
 
Example 10
Source File: DumpHDFSData.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, SQLException {
  if(args.length < 3 || args.length > 4) {
    System.err.println("Usage: DumpHDFSData namenode_url homedir tablename");
    System.err.println("  Dumps the RAW data for the table tablename into a CSV format for debugging purposes");
    System.exit(1);
  }
  
  
  String namenodeURL = args[0];
  String homeDir = args[1];
  String table = args[2];
  
  
  Configuration conf = new Configuration();
  conf.set("fs.default.name", namenodeURL);
  FileSystem fs = FileSystem.get(conf);
  
  PrintStream out = new PrintStream(table + ".csv");
  try {
    String fullTable = RowInputFormat.getFullyQualifiedTableName(table);
    String folder = HdfsRegionManager.getRegionFolder(Misc.getRegionPath(fullTable));
    RemoteIterator<LocatedFileStatus> fileItr = fs.listFiles(new Path(homeDir + "/" + folder), true);
    
    conf.set(RowInputFormat.HOME_DIR, homeDir);
    conf.set(RowInputFormat.INPUT_TABLE, table);
    
    boolean wroteHeader = false;
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    
    while(fileItr.hasNext()) {
      LocatedFileStatus file = fileItr.next();
      Path path = file.getPath();
      if(!path.getName().endsWith("hop")) {
        continue;
      }
      CombineFileSplit split = new CombineFileSplit(new Path[] { path } , new long[] { file.getLen()});
      RowRecordReader reader = new RowRecordReader();
      reader.initialize(split, context);
      while(reader.nextKeyValue()) {
        Row row = reader.getCurrentValue();
        ResultSet rs = row.getRowAsResultSet();
        Type op = row.getEventType();
        long ts = row.getTimestamp();

        int numColumns = rs.getMetaData().getColumnCount();
        if(!wroteHeader) {
          out.print("timestamp,operation,path");
          for(int i =1; i <= numColumns; i++) {
            out.print(",");
            out.print(rs.getMetaData().getColumnName(i));
          }
          out.println();
          wroteHeader = true;
        }

        out.print(ts);
        out.print(",");
        out.print(op);
        out.print(",");
        out.print(path);
        for(int i =1; i <= numColumns; i++) {
          out.print(",");
          String s= rs.getString(i);
          if(s != null) {
            s = s.replaceAll("([,\n])", "\\\\1");
          } else {
            s = "NULL";
          }
          out.print(s);
        }
        out.println();
      }
    }
  
  } finally {
    out.close();
  }
}
 
Example 11
Source File: DumpHDFSData.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, InterruptedException, SQLException {
  if(args.length < 3 || args.length > 4) {
    System.err.println("Usage: DumpHDFSData namenode_url homedir tablename");
    System.err.println("  Dumps the RAW data for the table tablename into a CSV format for debugging purposes");
    System.exit(1);
  }
  
  
  String namenodeURL = args[0];
  String homeDir = args[1];
  String table = args[2];
  
  
  Configuration conf = new Configuration();
  conf.set("fs.default.name", namenodeURL);
  FileSystem fs = FileSystem.get(conf);
  
  PrintStream out = new PrintStream(table + ".csv");
  try {
    String fullTable = RowInputFormat.getFullyQualifiedTableName(table);
    String folder = HdfsRegionManager.getRegionFolder(Misc.getRegionPath(fullTable));
    RemoteIterator<LocatedFileStatus> fileItr = fs.listFiles(new Path(homeDir + "/" + folder), true);
    
    conf.set(RowInputFormat.HOME_DIR, homeDir);
    conf.set(RowInputFormat.INPUT_TABLE, table);
    
    boolean wroteHeader = false;
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    
    while(fileItr.hasNext()) {
      LocatedFileStatus file = fileItr.next();
      Path path = file.getPath();
      if(!path.getName().endsWith("hop")) {
        continue;
      }
      CombineFileSplit split = new CombineFileSplit(new Path[] { path } , new long[] { file.getLen()});
      RowRecordReader reader = new RowRecordReader();
      reader.initialize(split, context);
      while(reader.nextKeyValue()) {
        Row row = reader.getCurrentValue();
        ResultSet rs = row.getRowAsResultSet();
        Type op = row.getEventType();
        long ts = row.getTimestamp();

        int numColumns = rs.getMetaData().getColumnCount();
        if(!wroteHeader) {
          out.print("timestamp,operation,path");
          for(int i =1; i <= numColumns; i++) {
            out.print(",");
            out.print(rs.getMetaData().getColumnName(i));
          }
          out.println();
          wroteHeader = true;
        }

        out.print(ts);
        out.print(",");
        out.print(op);
        out.print(",");
        out.print(path);
        for(int i =1; i <= numColumns; i++) {
          out.print(",");
          String s= rs.getString(i);
          if(s != null) {
            s = s.replaceAll("([,\n])", "\\\\1");
          } else {
            s = "NULL";
          }
          out.print(s);
        }
        out.println();
      }
    }
  
  } finally {
    out.close();
  }
}
 
Example 12
Source File: TestPlannerUtil.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetNonZeroLengthDataFiles() throws Exception {
  String queryFiles = ClassLoader.getSystemResource("queries").toString() + "/TestSelectQuery";
  Path path = new Path(queryFiles);

  TableDesc tableDesc = new TableDesc();
  tableDesc.setName("Test");
  tableDesc.setUri(path.toUri());

  FileSystem fs = path.getFileSystem(util.getConfiguration());

  List<Path> expectedFiles = new ArrayList<>();
  RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, true);
  while (files.hasNext()) {
    LocatedFileStatus file = files.next();
    if (file.isFile() && file.getLen() > 0) {
      expectedFiles.add(file.getPath());
    }
  }
  int fileNum = expectedFiles.size() / 5;

  int numResultFiles = 0;
  for (int i = 0; i <= 5; i++) {
    int start = i * fileNum;

    FragmentProto[] fragments =
        PhysicalPlanUtil.getNonZeroLengthDataFiles(util.getConfiguration(), tableDesc, start, fileNum);
    assertNotNull(fragments);

    numResultFiles += fragments.length;
    int expectedSize = fileNum;
    if (i == 5) {
      //last
      expectedSize = expectedFiles.size() - (fileNum * 5);
    }

    comparePath(expectedFiles, fragments, start, expectedSize);
  }

  assertEquals(expectedFiles.size(), numResultFiles);
}