Java Code Examples for org.apache.hadoop.fs.FileStatus#getModificationTime()

The following examples show how to use org.apache.hadoop.fs.FileStatus#getModificationTime() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HadoopFileSystem.java    From jsr203-hadoop with Apache License 2.0 6 votes vote down vote up
public void setTimes(byte[] bs, FileTime mtime, FileTime atime, FileTime ctime) throws IOException
{
	org.apache.hadoop.fs.Path hp = new HadoopPath(this, bs).getRawResolvedPath();
	long mtime_millis = 0;
	long atime_millis = 0;
   // Get actual value
	if (mtime == null || atime == null)
	{
		FileStatus stat = this.fs.getFileStatus(hp);
     mtime_millis = stat.getModificationTime();
		atime_millis = stat.getAccessTime();
	}
   if (mtime != null) {
     mtime_millis = mtime.toMillis();
   }
   if (atime != null) {
     atime_millis = atime.toMillis();
   }
	this.fs.setTimes(hp, mtime_millis, atime_millis);
}
 
Example 2
Source File: FSDownload.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private Path copy(Path sCopy, Path dstdir) throws IOException {
  FileSystem sourceFs = sCopy.getFileSystem(conf);
  Path dCopy = new Path(dstdir, "tmp_"+sCopy.getName());
  FileStatus sStat = sourceFs.getFileStatus(sCopy);
  if (sStat.getModificationTime() != resource.getTimestamp()) {
    throw new IOException("Resource " + sCopy +
        " changed on src filesystem (expected " + resource.getTimestamp() +
        ", was " + sStat.getModificationTime());
  }
  if (resource.getVisibility() == LocalResourceVisibility.PUBLIC) {
    if (!isPublic(sourceFs, sCopy, sStat, statCache)) {
      throw new IOException("Resource " + sCopy +
          " is not publicly accessable and as such cannot be part of the" +
          " public cache.");
    }
  }

  FileUtil.copy(sourceFs, sStat, FileSystem.getLocal(conf), dCopy, false,
      true, conf);
  return dCopy;
}
 
Example 3
Source File: AggregatedLogDeletionService.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static boolean shouldDeleteLogDir(FileStatus dir, long cutoffMillis, 
    FileSystem fs) {
  boolean shouldDelete = true;
  try {
    for(FileStatus node: fs.listStatus(dir.getPath())) {
      if(node.getModificationTime() >= cutoffMillis) {
        shouldDelete = false;
        break;
      }
    }
  } catch(IOException e) {
    logIOException("Error reading the contents of " + dir.getPath(), e);
    shouldDelete = false;
  }
  return shouldDelete;
}
 
Example 4
Source File: FileSplitterInput.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
private void scan(Path filePath, Path rootPath, Map<String, Long> lastModifiedTimesForInputDir) throws IOException
{
  FileStatus parentStatus = fs.getFileStatus(filePath);
  String parentPathStr = filePath.toUri().getPath();

  LOG.debug("scan {}", parentPathStr);

  FileStatus[] childStatuses = fs.listStatus(filePath);

  if (childStatuses.length == 0 && rootPath == null && (lastModifiedTimesForInputDir == null || lastModifiedTimesForInputDir.get(parentPathStr) == null)) { // empty input directory copy as is
    ScannedFileInfo info = new ScannedFileInfo(null, filePath.toString(), parentStatus.getModificationTime());
    processDiscoveredFile(info);
  }

  for (FileStatus childStatus : childStatuses) {
    Path childPath = childStatus.getPath();
    String childPathStr = childPath.toUri().getPath();

    if (childStatus.isDirectory() && isRecursive()) {
      addToDiscoveredFiles(rootPath, parentStatus, childStatus, lastModifiedTimesForInputDir);
      scan(childPath, rootPath == null ? parentStatus.getPath() : rootPath, lastModifiedTimesForInputDir);
    } else if (acceptFile(childPathStr)) {
      addToDiscoveredFiles(rootPath, parentStatus, childStatus, lastModifiedTimesForInputDir);
    } else {
      // don't look at it again
      ignoredFiles.add(childPathStr);
    }
  }
}
 
Example 5
Source File: HdfsDirectory.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
protected long fileModified(String name) throws IOException {
  Path path = getPath(name);
  Tracer trace = Trace.trace("filesystem - fileModified", Trace.param("path", path));
  try {
    FileStatus fileStatus = _fileSystem.getFileStatus(path);
    if (_useCache) {
      _fileStatusCache.putFStat(name, new FStat(fileStatus));
    }
    return fileStatus.getModificationTime();
  } finally {
    trace.done();
  }
}
 
Example 6
Source File: HDFSResourceStore.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected RawResource getResourceImpl(String resPath) throws IOException {
    Path p = getRealHDFSPath(resPath);
    if (fs.exists(p) && fs.isFile(p)) {
        FileStatus fileStatus = fs.getFileStatus(p);
        if (fileStatus.getLen() == 0) {
            logger.warn("Zero length file: {}. ", p);
        }
        FSDataInputStream in = fs.open(p);
        long ts = fileStatus.getModificationTime();
        return new RawResource(resPath, ts, in);
    } else {
        return null;
    }
}
 
Example 7
Source File: MyClient.java    From yarn-beginners-examples with Apache License 2.0 5 votes vote down vote up
private Map<String, String> getAMEnvironment(Map<String, LocalResource> localResources
    , FileSystem fs) throws IOException{
  Map<String, String> env = new HashMap<String, String>();

  // Set ApplicationMaster jar file
  LocalResource appJarResource = localResources.get(Constants.AM_JAR_NAME);
  Path hdfsAppJarPath = new Path(fs.getHomeDirectory(), appJarResource.getResource().getFile());
  FileStatus hdfsAppJarStatus = fs.getFileStatus(hdfsAppJarPath);
  long hdfsAppJarLength = hdfsAppJarStatus.getLen();
  long hdfsAppJarTimestamp = hdfsAppJarStatus.getModificationTime();

  env.put(Constants.AM_JAR_PATH, hdfsAppJarPath.toString());
  env.put(Constants.AM_JAR_TIMESTAMP, Long.toString(hdfsAppJarTimestamp));
  env.put(Constants.AM_JAR_LENGTH, Long.toString(hdfsAppJarLength));

  // Add AppMaster.jar location to classpath
  // At some point we should not be required to add
  // the hadoop specific classpaths to the env.
  // It should be provided out of the box.
  // For now setting all required classpaths including
  // the classpath to "." for the application jar
  StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
      .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
  for (String c : conf.getStrings(
      YarnConfiguration.YARN_APPLICATION_CLASSPATH,
      YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
    classPathEnv.append(c.trim());
  }
  env.put("CLASSPATH", classPathEnv.toString());

  return env;
}
 
Example 8
Source File: SharedCacheUploader.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Checks that the (original) remote file is either owned by the user who
 * started the app or public.
 */
@VisibleForTesting
boolean verifyAccess() throws IOException {
  // if it is in the public cache, it's trivially OK
  if (resource.getVisibility() == LocalResourceVisibility.PUBLIC) {
    return true;
  }

  final Path remotePath;
  try {
    remotePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
  } catch (URISyntaxException e) {
    throw new IOException("Invalid resource", e);
  }

  // get the file status of the HDFS file
  FileSystem remoteFs = remotePath.getFileSystem(conf);
  FileStatus status = remoteFs.getFileStatus(remotePath);
  // check to see if the file has been modified in any way
  if (status.getModificationTime() != resource.getTimestamp()) {
    LOG.warn("The remote file " + remotePath +
        " has changed since it's localized; will not consider it for upload");
    return false;
  }

  // check for the user ownership
  if (status.getOwner().equals(user)) {
    return true; // the user owns the file
  }
  // check if the file is publicly readable otherwise
  return fileIsPublic(remotePath, remoteFs, status);
}
 
Example 9
Source File: HadoopConnectingFileSystemProvider.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Override
public HadoopFileStatus getExtendedStatus(URI path) throws IOException {
	checkConnected();
	FileStatus status = dfs.getFileStatus(new Path(path));
	return new HadoopFileStatus(status.getPath().toUri(), status.getLen(), status.isDir(),
			status.getModificationTime(), status.getBlockSize(), status.getGroup(), status.getOwner(),
			status.getReplication());
}
 
Example 10
Source File: HdfsSortedOplogOrganizer.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
/**
 * @param ts
 *          target timestamp
 * @return list of hoplogs, whose expiry markers were created before target
 *         timestamp, and the expiry marker itself.
 * @throws IOException
 */
protected List<FileStatus> getOptimizationTargets(long ts) throws IOException {
  if (logger.finerEnabled()) {
    logger.finer("Identifying optimization targets " + ts);
  }

  List<FileStatus> deleteTargets = new ArrayList<FileStatus>();
  FileStatus[] markers = getExpiryMarkers();
  if (markers != null) {
    for (FileStatus marker : markers) {
      String name = truncateExpiryExtension(marker.getPath().getName());
      long timestamp = marker.getModificationTime();

      // expired minor compacted files are not being used anywhere. These can
      // be removed immediately. All the other expired files should be removed
      // when the files have aged
      boolean isTarget = false;
      
      if (name.endsWith(MINOR_HOPLOG_EXTENSION)) {
        isTarget = true;
      } else if (timestamp < ts && name.endsWith(FLUSH_HOPLOG_EXTENSION)) {
        isTarget = true;
      } else if (timestamp < ts && name.endsWith(MAJOR_HOPLOG_EXTENSION)) {
        HDFSCompactionConfig compactionConf = store.getHDFSCompactionConfig();
        long majorCInterval = ((long)compactionConf.getMajorCompactionIntervalMins()) * 60 * 1000;
        if (timestamp < (System.currentTimeMillis() - majorCInterval)) {
          isTarget = true;
        }
      }
      if (!isTarget) {
        continue;
      }
      
      // if the file is still being read, do not delete or rename it
      TrackedReference<Hoplog> used = hoplogReadersController.getInactiveHoplog(name);
      if (used != null) {
        if (used.inUse() && logger.fineEnabled()) {
          logger.fine("Optimizer: found active expired hoplog:" + name);
        } else if (logger.fineEnabled()) {
          logger.fine("Optimizer: found open expired hoplog:" + name);
        }
        continue;
      }
      
      if (logger.fineEnabled()) {
        logger.fine("Delete target identified " + marker.getPath());
      }
      
      deleteTargets.add(marker);
      Path hoplogPath = new Path(bucketPath, name);
      if (store.getFileSystem().exists(hoplogPath)) {
        FileStatus hoplog = store.getFileSystem().getFileStatus(hoplogPath);
        deleteTargets.add(hoplog);
      }
    }
  }
  return deleteTargets;
}
 
Example 11
Source File: NativeAzureFileSystemBaseTest.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private boolean testModifiedTime(Path testPath, long time) throws Exception {
  FileStatus fileStatus = fs.getFileStatus(testPath);
  final long errorMargin = modifiedTimeErrorMargin;
  long lastModified = fileStatus.getModificationTime();
  return (lastModified > (time - errorMargin) && lastModified < (time + errorMargin));
}
 
Example 12
Source File: TestHFileCleaner.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * @param file to check
 * @return loggable information about the file
 */
private String getFileStats(Path file, FileSystem fs) throws IOException {
  FileStatus status = fs.getFileStatus(file);
  return "File" + file + ", mtime:" + status.getModificationTime() + ", atime:"
      + status.getAccessTime();
}
 
Example 13
Source File: TestSetTimes.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Tests mod time change at close in DFS.
 */
@Test
public void testTimesAtClose() throws IOException {
  Configuration conf = new HdfsConfiguration();
  final int MAX_IDLE_TIME = 2000; // 2s
  int replicas = 1;

  // parameter initialization
  conf.setInt("ipc.client.connection.maxidletime", MAX_IDLE_TIME);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000);
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_KEY, 50);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
                                             .numDataNodes(numDatanodes)
                                             .build();
  cluster.waitActive();
  InetSocketAddress addr = new InetSocketAddress("localhost",
                                                   cluster.getNameNodePort());
  DFSClient client = new DFSClient(addr, conf);
  DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
  assertEquals("Number of Datanodes ", numDatanodes, info.length);
  FileSystem fileSys = cluster.getFileSystem();
  assertTrue(fileSys instanceof DistributedFileSystem);

  try {
    // create a new file and write to it
    Path file1 = new Path("/simple.dat");
    FSDataOutputStream stm = writeFile(fileSys, file1, replicas);
    System.out.println("Created and wrote file simple.dat");
    FileStatus statBeforeClose = fileSys.getFileStatus(file1);
    long mtimeBeforeClose = statBeforeClose.getModificationTime();
    String mdateBeforeClose = dateForm.format(new Date(
                                                   mtimeBeforeClose));
    System.out.println("mtime on " + file1 + " before close is "
                + mdateBeforeClose + " (" + mtimeBeforeClose + ")");
    assertTrue(mtimeBeforeClose != 0);

    //close file after writing
    stm.close();
    System.out.println("Closed file.");
    FileStatus statAfterClose = fileSys.getFileStatus(file1);
    long mtimeAfterClose = statAfterClose.getModificationTime();
    String mdateAfterClose = dateForm.format(new Date(mtimeAfterClose));
    System.out.println("mtime on " + file1 + " after close is "
                + mdateAfterClose + " (" + mtimeAfterClose + ")");
    assertTrue(mtimeAfterClose != 0);
    assertTrue(mtimeBeforeClose != mtimeAfterClose);

    cleanupFile(fileSys, file1);
  } catch (IOException e) {
    info = client.datanodeReport(DatanodeReportType.ALL);
    printDatanodeReport(info);
    throw e;
  } finally {
    fileSys.close();
    cluster.shutdown();
  }
}
 
Example 14
Source File: PurgeMonitor.java    From RDFS with Apache License 2.0 4 votes vote down vote up
protected static float usefulHar(
  Codec codec,
  FileSystem srcFs, FileSystem parityFs,
  Path harPath, String parityPrefix, Configuration conf,
  PlacementMonitor placementMonitor) throws IOException {

  HarIndex harIndex = HarIndex.getHarIndex(parityFs, harPath);
  Iterator<HarIndex.IndexEntry> entryIt = harIndex.getEntries();
  int numUseless = 0;
  int filesInHar = 0;
  while (entryIt.hasNext()) {
    HarIndex.IndexEntry entry = entryIt.next();
    filesInHar++;
    if (!entry.fileName.startsWith(parityPrefix)) {
      continue;
    }
    String src = entry.fileName.substring(parityPrefix.length());
    if (existsBetterParityFile(codec, new Path(src), conf)) {
      numUseless += 1;
      continue;
    }
    try {
      FileStatus srcStatus = srcFs.getFileStatus(new Path(src));
      if (srcStatus == null) {
        numUseless++;
      } else if (entry.mtime != srcStatus.getModificationTime()) {
        numUseless++;
      } else {
        // This parity file in this HAR is good.
        if (placementMonitor != null) {
          // Check placement.
          placementMonitor.checkFile(
            srcFs, srcStatus,
            parityFs, harIndex.partFilePath(entry), entry, codec);
        }
        if (LOG.isDebugEnabled()) {
          LOG.debug("Useful file " + entry.fileName);
        }
      }
    } catch (FileNotFoundException e) {
      numUseless++;
    }
  }
  if (filesInHar == 0) { return 0; }
  float uselessPercent = numUseless * 100.0f / filesInHar;
  return 100 - uselessPercent;
}
 
Example 15
Source File: GetHDFS.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Poll HDFS for files to process that match the configured file filters.
 *
 * @param hdfs hdfs
 * @param dir dir
 * @param filesVisited filesVisited
 * @return files to process
 * @throws java.io.IOException ex
 */
protected Set<Path> selectFiles(final FileSystem hdfs, final Path dir, Set<Path> filesVisited) throws IOException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    if (!hdfs.exists(dir)) {
        throw new IOException("Selection directory " + dir.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    for (final FileStatus file : hdfs.listStatus(dir)) {
        if (files.size() >= MAX_WORKING_QUEUE_SIZE) {
            // no need to make the files set larger than what we would queue anyway
            break;
        }

        final Path canonicalFile = file.getPath();

        if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
            continue;
        }

        if (file.isDirectory() && processorConfig.getRecurseSubdirs()) {
            files.addAll(selectFiles(hdfs, canonicalFile, filesVisited));

        } else if (!file.isDirectory() && processorConfig.getPathFilter(dir).accept(canonicalFile)) {
            final long fileAge = System.currentTimeMillis() - file.getModificationTime();
            if (processorConfig.getMinimumAge() < fileAge && fileAge < processorConfig.getMaximumAge()) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }

            }
        }
    }
    return files;
}
 
Example 16
Source File: HdfsSortedOplogOrganizer.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
/**
 * @param ts
 *          target timestamp
 * @return list of hoplogs, whose expiry markers were created before target
 *         timestamp, and the expiry marker itself.
 * @throws IOException
 */
protected List<FileStatus> getOptimizationTargets(long ts) throws IOException {
  if (logger.finerEnabled()) {
    logger.finer("Identifying optimization targets " + ts);
  }

  List<FileStatus> deleteTargets = new ArrayList<FileStatus>();
  FileStatus[] markers = getExpiryMarkers();
  if (markers != null) {
    for (FileStatus marker : markers) {
      String name = truncateExpiryExtension(marker.getPath().getName());
      long timestamp = marker.getModificationTime();

      // expired minor compacted files are not being used anywhere. These can
      // be removed immediately. All the other expired files should be removed
      // when the files have aged
      boolean isTarget = false;
      
      if (name.endsWith(MINOR_HOPLOG_EXTENSION)) {
        isTarget = true;
      } else if (timestamp < ts && name.endsWith(FLUSH_HOPLOG_EXTENSION)) {
        isTarget = true;
      } else if (timestamp < ts && name.endsWith(MAJOR_HOPLOG_EXTENSION)) {
        HDFSCompactionConfig compactionConf = store.getHDFSCompactionConfig();
        long majorCInterval = ((long)compactionConf.getMajorCompactionIntervalMins()) * 60 * 1000;
        if (timestamp < (System.currentTimeMillis() - majorCInterval)) {
          isTarget = true;
        }
      }
      if (!isTarget) {
        continue;
      }
      
      // if the file is still being read, do not delete or rename it
      TrackedReference<Hoplog> used = hoplogReadersController.getInactiveHoplog(name);
      if (used != null) {
        if (used.inUse() && logger.fineEnabled()) {
          logger.fine("Optimizer: found active expired hoplog:" + name);
        } else if (logger.fineEnabled()) {
          logger.fine("Optimizer: found open expired hoplog:" + name);
        }
        continue;
      }
      
      if (logger.fineEnabled()) {
        logger.fine("Delete target identified " + marker.getPath());
      }
      
      deleteTargets.add(marker);
      Path hoplogPath = new Path(bucketPath, name);
      if (store.getFileSystem().exists(hoplogPath)) {
        FileStatus hoplog = store.getFileSystem().getFileStatus(hoplogPath);
        deleteTargets.add(hoplog);
      }
    }
  }
  return deleteTargets;
}
 
Example 17
Source File: RaidNode.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Returns a list of pathnames that needs raiding. The list of paths
 * could be obtained by resuming a previously suspended traversal. The
 * number of paths returned is limited by raid.distraid.max.jobs.
 */
private List<FileStatus> selectFiles(PolicyInfo info,
		ArrayList<PolicyInfo> allPolicies) throws IOException {
	String policyName = info.getName();
	long modTimePeriod = 0;
	try {
		modTimePeriod = Long.parseLong(info.getProperty("modTimePeriod"));
	} catch (NumberFormatException nfe) {
		//set modeTimePeriod to default: 1 minute
		modTimePeriod = 60000;
	}
	
	// Max number of files returned.
	int selectLimit = configMgr.getMaxFilesPerJob();

	PolicyState scanState = policyStateMap.get(policyName);

	List<FileStatus> returnSet = new ArrayList<FileStatus>(selectLimit);
	DirectoryTraversal traversal;
	if (scanState.isScanInProgress()) {
		LOG.info("Resuming traversal for policy " + policyName);
		traversal = scanState.pendingTraversal;
	} else {
		LOG.info("Start new traversal for policy " + policyName);
		scanState.startTime = now();
		if (!Codec.getCodec(info.getCodecId()).isDirRaid) {
			traversal = DirectoryTraversal.raidFileRetriever(info,
					info.getSrcPathExpanded(), allPolicies, conf,
					directoryTraversalThreads,
					directoryTraversalShuffle, true);
		} else {
			traversal = DirectoryTraversal.raidLeafDirectoryRetriever(
					info, info.getSrcPathExpanded(), allPolicies, conf,
					directoryTraversalThreads,
					directoryTraversalShuffle, true);
		}
		scanState.setTraversal(traversal);
	}

	FileStatus f;
	while ((f = traversal.next()) != DirectoryTraversal.FINISH_TOKEN) {
		long modTime = System.currentTimeMillis() - f.getModificationTime();
		if(modTime > modTimePeriod) {
			returnSet.add(f);
			if (returnSet.size() == selectLimit) {
				return returnSet;
			}
		}
	}
	scanState.resetTraversal();
	return returnSet;
}
 
Example 18
Source File: HdfsFileSystem.java    From datacollector with Apache License 2.0 4 votes vote down vote up
public void addFiles(WrappedFile dirFile, WrappedFile startingFile, List<WrappedFile> toProcess, boolean includeStartingFile, boolean useLastModified) throws IOException {
  final long scanTime = System.currentTimeMillis();

  PathFilter pathFilter = new PathFilter() {
    @Override
    public boolean accept(Path entry) {
      try {
        FileStatus fileStatus = fs.getFileStatus(entry);
        if (fileStatus.isDirectory()) {
          return false;
        }

        if(!patternMatches(entry.getName())) {
          return false;
        }

        HdfsFile hdfsFile = new HdfsFile(fs, entry);
        // SDC-3551: Pick up only files with mtime strictly less than scan time.
        if (fileStatus.getModificationTime() < scanTime) {
          if (startingFile == null || startingFile.toString().isEmpty()) {
            toProcess.add(hdfsFile);
          } else {
            int compares = compare(hdfsFile, startingFile, useLastModified);
            if (includeStartingFile) {
              if (compares >= 0) {
                toProcess.add(hdfsFile);
              }
            } else {
              if (compares > 0) {
                toProcess.add(hdfsFile);
              }
            }
          }
        }
      } catch (IOException ex) {
        LOG.error("Failed to open file {}", entry.toString());
      }
      return false;
    }
  };

  fs.globStatus(new Path(dirFile.getAbsolutePath(), "*"), pathFilter);
}
 
Example 19
Source File: RaidState.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Check the state of a raid source file against a policy
 * @param info The policy to check
 * @param file The source file to be checked
 * @param now The system millisecond time
 * @param skipParityCheck Skip checking the existence of parity. Checking
 *                        parity is very time-consuming for HAR parity file
 * @param lfs The list of FileStatus of files under the directory, only used
 *        by directory raid.
 * @return The state of the raid file
 * @throws IOException
 */
public RaidState check(PolicyInfo info, FileStatus file, long now,
    boolean skipParityCheck, List<FileStatus> lfs) throws IOException {
  ExpandedPolicy matched = null;
  long mtime = -1;
  String uriPath = file.getPath().toUri().getPath();
  if (inferMTimeFromName) {
    mtime = mtimeFromName(uriPath);
  }
  // If we can't infer the mtime from the name, use the mtime from filesystem.
  // If the the file is newer than a day, use the mtime from filesystem.
  if (mtime == -1 ||
      Math.abs(file.getModificationTime() - now) < ONE_DAY_MSEC) {
    mtime = file.getModificationTime();
  }
  for (ExpandedPolicy policy : sortedExpendedPolicy) {
    if (policy.parentPolicy == info) {
      matched = policy;
      break;
    }
    if (policy.match(file, mtime, now, skipParityCheck, conf)) {
      return NOT_RAIDED_OTHER_POLICY;
    }
  }
  if (matched == null) {
    return NOT_RAIDED_NO_POLICY;
  }
  
  // The preceding checks are more restrictive,
  // check for excluded just before parity check.
  if (shouldExclude(uriPath)) {
    return NOT_RAIDED_NO_POLICY;
  }
  
  if (file.isDir() != matched.codec.isDirRaid) {
    return NOT_RAIDED_NO_POLICY;
  }

  long blockNum = matched.codec.isDirRaid?
      DirectoryStripeReader.getBlockNum(lfs):
      computeNumBlocks(file);

  if (blockNum <= TOO_SMALL_NOT_RAID_NUM_BLOCKS) {
    return NOT_RAIDED_TOO_SMALL;
  }
  
  long repl = matched.codec.isDirRaid?
      DirectoryStripeReader.getReplication(lfs):
      file.getReplication();
  if (repl == matched.targetReplication) {
    if (skipParityCheck || 
        ParityFilePair.parityExists(file, matched.codec, conf)) {
      return RAIDED;
    }
  }
  
  if (now - mtime < matched.modTimePeriod) {
    return NOT_RAIDED_TOO_NEW;
  }
  
  return NOT_RAIDED_BUT_SHOULD;
}
 
Example 20
Source File: DFSPathSelector.java    From hudi with Apache License 2.0 4 votes vote down vote up
public Pair<Option<String>, String> getNextFilePathsAndMaxModificationTime(Option<String> lastCheckpointStr,
    long sourceLimit) {

  try {
    // obtain all eligible files under root folder.
    List<FileStatus> eligibleFiles = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> fitr =
        fs.listFiles(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), true);
    while (fitr.hasNext()) {
      LocatedFileStatus fileStatus = fitr.next();
      if (fileStatus.isDirectory()
          || IGNORE_FILEPREFIX_LIST.stream().anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) {
        continue;
      }
      eligibleFiles.add(fileStatus);
    }
    // sort them by modification time.
    eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime));

    // Filter based on checkpoint & input size, if needed
    long currentBytes = 0;
    long maxModificationTime = Long.MIN_VALUE;
    List<FileStatus> filteredFiles = new ArrayList<>();
    for (FileStatus f : eligibleFiles) {
      if (lastCheckpointStr.isPresent() && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get()).longValue()) {
        // skip processed files
        continue;
      }

      if (currentBytes + f.getLen() >= sourceLimit) {
        // we have enough data, we are done
        break;
      }

      maxModificationTime = f.getModificationTime();
      currentBytes += f.getLen();
      filteredFiles.add(f);
    }

    // no data to read
    if (filteredFiles.size() == 0) {
      return new ImmutablePair<>(Option.empty(), lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE)));
    }

    // read the files out.
    String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));

    return new ImmutablePair<>(Option.ofNullable(pathStr), String.valueOf(maxModificationTime));
  } catch (IOException ioe) {
    throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
  }
}