Java Code Examples for org.apache.hadoop.fs.FileStatus#getPath()

The following examples show how to use org.apache.hadoop.fs.FileStatus#getPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: S3PartitionedOutputCommitter.java    From s3committer with Apache License 2.0 6 votes vote down vote up
protected Set<String> getPartitions(FileSystem attemptFS, Path attemptPath,
                                    List<FileStatus> taskOutput)
    throws IOException {
  // get a list of partition directories
  Set<String> partitions = Sets.newLinkedHashSet();
  for (FileStatus stat : taskOutput) {
    // sanity check the output paths
    Path outputFile = stat.getPath();
    if (!attemptFS.isFile(outputFile)) {
      throw new RuntimeException(
          "Task output entry is not a file: " + outputFile);
    }
    String partition = getPartition(
        Paths.getRelativePath(attemptPath, outputFile));
    if (partition != null) {
      partitions.add(partition);
    } else {
      partitions.add(TABLE_ROOT);
    }
  }

  return partitions;
}
 
Example 2
Source File: PlacementMonitor.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void checkFile(FileSystem srcFs, FileStatus srcFile,
          FileSystem parityFs, Path partFile, HarIndex.IndexEntry entry,
          Codec codec) throws IOException {
  if (srcFile.getReplication() > blockMoveMinRepl) {
    // We only check placement for the file with 0..blockMoveMinRepl replicas.
    return;
  }
  if (srcFs.getUri().equals(parityFs.getUri())) {
    BlockAndDatanodeResolver resolver = new BlockAndDatanodeResolver(
        srcFile.getPath(), srcFs, partFile, parityFs);
    checkBlockLocations(
        getBlockInfos(srcFs, srcFile),
        getBlockInfos(parityFs, partFile, entry.startOffset, entry.length),
        codec, srcFile, resolver);
  } else { 
    // TODO: Move blocks in two clusters separately
    LOG.warn("Source and parity are in different file system. " +
        " source:" + srcFs.getUri() + " parity:" + parityFs.getUri() +
        ". Skip.");
  }
}
 
Example 3
Source File: FileUtil.java    From secor with Apache License 2.0 6 votes vote down vote up
public static long getModificationTimeMsRecursive(String path) throws IOException {
    FileSystem fs = getFileSystem(path);
    Path fsPath = new Path(path);
    FileStatus status = fs.getFileStatus(fsPath);
    long modificationTime = status.getModificationTime();
    FileStatus[] statuses = fs.listStatus(fsPath);
    if (statuses != null) {
        for (FileStatus fileStatus : statuses) {
            Path statusPath = fileStatus.getPath();
            String stringPath;
            if (path.startsWith("s3://") || path.startsWith("s3n://") || path.startsWith("s3a://") ||
                    path.startsWith("swift://") || path.startsWith("gs://")) {
                stringPath = statusPath.toUri().toString();
            } else {
                stringPath = statusPath.toUri().getPath();
            }
            if (!stringPath.equals(path)) {
                modificationTime = Math.max(modificationTime,
                        getModificationTimeMsRecursive(stringPath));
            }
        }
    }
    return modificationTime;
}
 
Example 4
Source File: MapRedUtil.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Get all files recursively from the given list of files
 *
 * @param files a list of FileStatus
 * @param conf the configuration object
 * @return the list of fileStatus that contains all the files in the given
 *         list and, recursively, all the files inside the directories in
 *         the given list
 * @throws IOException
 */
public static List<FileStatus> getAllFileRecursively(
        List<FileStatus> files, Configuration conf) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    int len = files.size();
    for (int i = 0; i < len; ++i) {
        FileStatus file = files.get(i);
        if (file.isDir()) {
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(conf);
            addInputPathRecursively(result, fs, p, hiddenFileFilter);
        } else {
            result.add(file);
        }
    }
    log.info("Total input paths to process : " + result.size());
    return result;
}
 
Example 5
Source File: CopyMapper.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void copyFileWithRetry(String description,
    FileStatus sourceFileStatus, Path target, Context context,
    FileAction action, EnumSet<DistCpOptions.FileAttribute> fileAttributes)
    throws IOException {
  long bytesCopied;
  try {
    bytesCopied = (Long) new RetriableFileCopyCommand(skipCrc, description,
        action).execute(sourceFileStatus, target, context, fileAttributes);
  } catch (Exception e) {
    context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
    throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
        " --> " + target, e);
  }
  incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
  incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
  incrementCounter(context, Counter.COPY, 1);
}
 
Example 6
Source File: FSPsuedoTransactionableStore.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Override
public void storeCommittedWindowId(String appId, int operatorId, long windowId)
{
  Path recoveryPath = getOperatorRecoveryPath(appId, operatorId);
  Path windowPath = getOperatorWindowRecoveryPath(appId, operatorId, windowId);
  String windowString = Long.toString(windowId);

  try {
    fs.create(windowPath);
    FileStatus[] windowFiles = fs.listStatus(recoveryPath);

    for (FileStatus fileStatus : windowFiles) {
      Path tempPath = fileStatus.getPath();
      if (!tempPath.getName().equals(windowString)) {
        fs.delete(tempPath, true);
      }
    }
  } catch (IOException ex) {
    throw new RuntimeException(ex);
  }
}
 
Example 7
Source File: SequenceFileInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
protected List<LocatedFileStatus> listLocatedStatus(JobContext job
                                      )throws IOException {

  List<LocatedFileStatus> files = super.listLocatedStatus(job);
  int len = files.size();
  for(int i=0; i < len; ++i) {
    FileStatus file = files.get(i);
    if (file.isDir()) {     // it's a MapFile
      Path p = file.getPath();
      FileSystem fs = p.getFileSystem(job.getConfiguration());
      // use the data file
      files.set(i, fs.listLocatedStatus(
          new Path(p, MapFile.DATA_FILE_NAME)).next());
    }
  }
  return files;
}
 
Example 8
Source File: LoadGenerator.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/** Create a table that contains all directories under the specified path and
 * another table that contains all files under the specified path and
 * whose name starts with "_file_".
 */
private void initFileDirTables(Path path) throws IOException {
  FileStatus[] stats = fs.listStatus(path);
  if (stats != null) {
    for (FileStatus stat : stats) {
      if (stat.isDir()) {
        dirs.add(stat.getPath().toString());
        initFileDirTables(stat.getPath());
      } else {
        Path filePath = stat.getPath();
        if (filePath.getName().startsWith(StructureGenerator.FILE_NAME_PREFIX)) {
          files.add(filePath.toString());
        }
      }
    }
  }
}
 
Example 9
Source File: ParquetFormatPlugin.java    From Bats with Apache License 2.0 6 votes vote down vote up
/**
 * Check if the metadata cache files exist
 * @param dir the path of the directory
 * @param fs
 * @return true if both file metadata and summary cache file exist
 * @throws IOException in case of problems during accessing files
 */
private boolean metaDataFileExists(FileSystem fs, FileStatus dir) throws IOException {
  boolean fileExists = true;
  for (String metaFileName : Metadata.CURRENT_METADATA_FILENAMES) {
    Path path = new Path(dir.getPath(), metaFileName);
    if (!fs.exists(path)) {
      fileExists = false;
    }
  }
  if (fileExists) {
    return true;
  } else {
    // Check if the older version of metadata file exists
    if (fs.exists(getOldMetadataPath(dir))) {
      return true;
    }
  }
  return false;
}
 
Example 10
Source File: HdfsTestSource.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
private List<String> findFiles() throws IOException
{
  List<String> files = Lists.newArrayList();
  Path directoryPath = new Path(directory);
  FileSystem lfs = FileSystem.newInstance(directoryPath.toUri(), configuration);
  try {
    logger.debug("checking for new files in {}", directoryPath);
    RemoteIterator<LocatedFileStatus> statuses = lfs.listFiles(directoryPath, true);
    for (; statuses.hasNext(); ) {
      FileStatus status = statuses.next();
      Path path = status.getPath();
      String filePathStr = path.toString();
      if (!filePathStr.endsWith(".gz")) {
        continue;
      }
      logger.debug("new file {}", filePathStr);
      files.add(path.toString());
    }
  } catch (FileNotFoundException e) {
    logger.warn("Failed to list directory {}", directoryPath, e);
    throw new RuntimeException(e);
  } finally {
    lfs.close();
  }
  return files;
}
 
Example 11
Source File: GetHDFS.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Poll HDFS for files to process that match the configured file filters.
 *
 * @param hdfs hdfs
 * @param dir dir
 * @param filesVisited filesVisited
 * @return files to process
 * @throws java.io.IOException ex
 */
protected Set<Path> selectFiles(final FileSystem hdfs, final Path dir, Set<Path> filesVisited) throws IOException, InterruptedException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    final Set<Path> files = new HashSet<>();

    FileStatus[] fileStatuses = getUserGroupInformation().doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfs.listStatus(dir));
    for (final FileStatus file : fileStatuses) {
        if (files.size() >= MAX_WORKING_QUEUE_SIZE) {
            // no need to make the files set larger than what we would queue anyway
            break;
        }

        final Path canonicalFile = file.getPath();

        if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
            continue;
        }

        if (file.isDirectory() && processorConfig.getRecurseSubdirs()) {
            files.addAll(selectFiles(hdfs, canonicalFile, filesVisited));

        } else if (!file.isDirectory() && processorConfig.getPathFilter(dir).accept(canonicalFile)) {
            final long fileAge = System.currentTimeMillis() - file.getModificationTime();
            if (processorConfig.getMinimumAge() < fileAge && fileAge < processorConfig.getMaximumAge()) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }

            }
        }
    }
    return files;
}
 
Example 12
Source File: DirectoryTraversal.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void pushNewNode(FileStatus stat) throws IOException {
  if (!stat.isDir()) {
    return;
  }
  Path p = stat.getPath();
  LOG.debug("Traversing to directory " + p);
  FileStatus[] elements = fs.listStatus(p);
  Node newNode = new Node(stat, (elements == null? new FileStatus[0]: elements));
  stack.push(newNode);
}
 
Example 13
Source File: Utils.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Finds a valid path for a file from a FileStatus object.
 * @param fileStatus FileStatus object corresponding to a file,
 * or a directory.
 * @param fileSystem FileSystem in with the file should be found
 * @return The first file found
 * @throws IOException
 */

public static Path depthFirstSearchForFile(final FileStatus fileStatus,
    final FileSystem fileSystem) throws IOException {
  if (fileSystem.isFile(fileStatus.getPath())) {
    return fileStatus.getPath();
  } else {
    return depthFirstSearchForFile(
        fileSystem.listStatus(fileStatus.getPath(), VISIBLE_FILES),
        fileSystem);
  }

}
 
Example 14
Source File: MoveHDFS.java    From nifi with Apache License 2.0 5 votes vote down vote up
protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
        throws IOException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    if (!hdfs.exists(inputPath)) {
        throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    FileStatus inputStatus = hdfs.getFileStatus(inputPath);

    if (inputStatus.isDirectory()) {
        for (final FileStatus file : hdfs.listStatus(inputPath)) {
            final Path canonicalFile = file.getPath();

            if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
                continue;
            }

            if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }
            }
        }
    } else if (inputStatus.isFile()) {
        files.add(inputPath);
    }
    return files;
}
 
Example 15
Source File: ManualMobMaintHFileCleaner.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Override
public boolean isFileDeletable(FileStatus fStat) {
  try {
    // if its a directory, then it can be deleted
    if (fStat.isDirectory()) {
      return true;
    }

    Path file = fStat.getPath();

    // we need the table and region to determine if this is from a mob region
    // we don't need to worry about hfilelink back references, because the hfilelink cleaner will
    // retain them.
    Path family = file.getParent();
    Path region = family.getParent();
    Path table = region.getParent();

    TableName tableName = CommonFSUtils.getTableName(table);

    String mobRegion = MOB_REGIONS.get(tableName);
    if (mobRegion == null) {
      String tmp = MobUtils.getMobRegionInfo(tableName).getEncodedName();
      if (tmp == null) {
        LOG.error("couldn't determine mob region for table {} keeping files just in case.",
            tableName);
        return false;
      }
      mobRegion = MOB_REGIONS.putIfAbsent(tableName, tmp);
      // a return of null means that tmp is now in the map for future lookups.
      if (mobRegion == null) {
        mobRegion = tmp;
      }
      LOG.debug("Had to calculate name of mob region for table {} and it is {}", tableName,
          mobRegion);
    }

    boolean ret = !mobRegion.equals(region.getName());
    if (LOG.isDebugEnabled() && !ret) {
      LOG.debug("Keeping file '{}' because it is from mob dir", fStat.getPath());
    }
    return ret;
  } catch (RuntimeException e) {
    LOG.error("Failed to determine mob status of '{}', keeping it just in case.", fStat.getPath(),
        e);
    return false;
  }
}
 
Example 16
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 17
Source File: LogCLIHelpers.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Private
@VisibleForTesting
public int dumpAContainersLogs(String appId, String containerId,
    String nodeId, String jobOwner) throws IOException {
  Path remoteRootLogDir = new Path(getConf().get(
      YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
      YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
  String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf());
  Path remoteAppLogDir = LogAggregationUtils.getRemoteAppLogDir(
      remoteRootLogDir, ConverterUtils.toApplicationId(appId), jobOwner,
      suffix);
  RemoteIterator<FileStatus> nodeFiles;
  try {
    Path qualifiedLogDir =
        FileContext.getFileContext(getConf()).makeQualified(
          remoteAppLogDir);
    nodeFiles =
        FileContext.getFileContext(qualifiedLogDir.toUri(), getConf())
          .listStatus(remoteAppLogDir);
  } catch (FileNotFoundException fnf) {
    logDirNotExist(remoteAppLogDir.toString());
    return -1;
  }
  boolean foundContainerLogs = false;
  while (nodeFiles.hasNext()) {
    FileStatus thisNodeFile = nodeFiles.next();
    String fileName = thisNodeFile.getPath().getName();
    if (fileName.contains(LogAggregationUtils.getNodeString(nodeId))
        && !fileName.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
      AggregatedLogFormat.LogReader reader = null;
      try {
        reader =
            new AggregatedLogFormat.LogReader(getConf(),
              thisNodeFile.getPath());
        if (dumpAContainerLogs(containerId, reader, System.out,
            thisNodeFile.getModificationTime()) > -1) {
          foundContainerLogs = true;
        }
      } finally {
        if (reader != null) {
          reader.close();
        }
      }
    }
  }
  if (!foundContainerLogs) {
    containerLogNotFound(containerId);
    return -1;
  }
  return 0;
}
 
Example 18
Source File: DataValidationInputFormat.java    From jumbune with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * Generate splits.
 *
 * @param job refers to JobContext that is being used to read the configurations of the job that ran
 * @param minSize refers to the minimum file block size.
 * @param maxSize refers to the maximum file block size.
 * @param splits refers  to a list of splits that are being generated.
 * @param file refers to the FileStatus required to determine block size,length,allocations.
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void generateSplits(JobContext job, long minSize, long maxSize,
		List<InputSplit> splits, FileStatus file) throws IOException {
	Path path = file.getPath();
	int numOfRecordsInCurrentSplit = 0;
	int numOfRecordsInPreviousSplit = 0;
	FileSystem fs = path.getFileSystem(job.getConfiguration());
	long length = file.getLen();
	BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0,
			length);
	FSDataInputStream fsin = null ;
	if ((length != 0) && isSplitable(job, path)) {
		long blockSize = file.getBlockSize();
		long splitSize = computeSplitSize(blockSize, minSize, maxSize);
		long bytesRemaining = length;
		
		// checking the occurrences of the record separator in current
		// split
		recordSeparator = job.getConfiguration()
				.get(DataValidationConstants.RECORD_SEPARATOR)
				.getBytes();
		while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
			int blkIndex = getBlockIndex(blkLocations, length
					- bytesRemaining);
			long start = length - bytesRemaining;
			long end = start + splitSize;
			try{
			fsin = fs.open(path);
			fsin.seek(start);
			long pos = start;
			int b = 0;
			int bufferPos = 0;
			while (true) {
				b = fsin.read();
				pos = fsin.getPos();
				if (b == -1) {
					break;}
				if (b == recordSeparator[bufferPos]) {
					bufferPos++;
					if (bufferPos == recordSeparator.length) {
						numOfRecordsInCurrentSplit++;
						bufferPos = 0;
						if (pos > end) {
							break;
						}
					}
				} else {
					// reset the value of buffer position to zero
					bufferPos = 0;
				}

			}}finally{
				if(fsin != null){
					fsin.close();
				}
			}

			splits.add(new DataValidationFileSplit(path, start,
					splitSize, numOfRecordsInPreviousSplit,
					blkLocations[blkIndex].getHosts()));
			bytesRemaining -= splitSize;
			numOfRecordsInPreviousSplit = numOfRecordsInCurrentSplit;
			numOfRecordsInCurrentSplit = 0;
		}

		addSplitIfBytesRemaining(splits, path, numOfRecordsInPreviousSplit,
				length, blkLocations, bytesRemaining);
	} else if (length != 0) {
		splits.add(new DataValidationFileSplit(path, 0, length,
				numOfRecordsInPreviousSplit, blkLocations[0].getHosts()));
	} else {
		splits.add(new DataValidationFileSplit(path, 0, length,
				numOfRecordsInPreviousSplit, new String[0]));
	}
}
 
Example 19
Source File: TestBSTIndex.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testFindNextKeyValue() throws IOException {
  meta = CatalogUtil.newTableMeta(storeType);

  Path tablePath = new Path(testDir, "testFindNextKeyValue_" + storeType);
  Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
  appender.init();
  Tuple tuple;
  for (int i = 0; i < TUPLE_NUM; i++) {
    tuple = new VTuple(5);
    tuple.put(0, DatumFactory.createInt4(i));
    tuple.put(1, DatumFactory.createInt8(i));
    tuple.put(2, DatumFactory.createFloat8(i));
    tuple.put(3, DatumFactory.createFloat4(i));
    tuple.put(4, DatumFactory.createText("field_" + i));
    appender.addTuple(tuple);
  }
  appender.close();

  FileStatus status = fs.getFileStatus(tablePath);
  long fileLen = status.getLen();
  FileFragment tablet = new FileFragment("table1_1", status.getPath(), 0, fileLen);

  SortSpec[] sortKeys = new SortSpec[2];
  sortKeys[0] = new SortSpec(schema.getColumn("int"), true, false);
  sortKeys[1] = new SortSpec(schema.getColumn("long"), true, false);

  Schema keySchema = new Schema();
  keySchema.addColumn(new Column("int", Type.INT4));
  keySchema.addColumn(new Column("long", Type.INT8));

  TupleComparator comp = new TupleComparator(keySchema, sortKeys);

  BSTIndex bst = new BSTIndex(conf);
  BSTIndexWriter creater = bst.getIndexWriter(new Path(testDir, "testFindNextKeyValue_" + storeType + ".idx"),
      BSTIndex.TWO_LEVEL_INDEX, keySchema, comp);
  creater.setLoadNum(LOAD_NUM);
  creater.open();

  SeekableScanner scanner = StorageManagerFactory.getSeekableScanner(conf, meta, schema, tablet, schema);
  scanner.init();

  Tuple keyTuple;
  long offset;
  while (true) {
    keyTuple = new VTuple(2);
    offset = scanner.getNextOffset();
    tuple = scanner.next();
    if (tuple == null) break;

    keyTuple.put(0, tuple.get(0));
    keyTuple.put(1, tuple.get(1));
    creater.write(keyTuple, offset);
  }

  creater.flush();
  creater.close();
  scanner.close();

  BSTIndexReader reader = bst.getIndexReader(new Path(testDir, "testFindNextKeyValue_" + storeType + ".idx"),
      keySchema, comp);
  reader.open();
  scanner = StorageManagerFactory.getSeekableScanner(conf, meta, schema, tablet, schema);
  scanner.init();

  Tuple result;
  for (int i = 0; i < TUPLE_NUM - 1; i++) {
    keyTuple = new VTuple(2);
    keyTuple.put(0, DatumFactory.createInt4(i));
    keyTuple.put(1, DatumFactory.createInt8(i));
    long offsets = reader.find(keyTuple, true);
    scanner.seek(offsets);
    result = scanner.next();
    assertTrue("[seek check " + (i + 1) + " ]",
        (i + 1) == (result.get(0).asInt4()));
    assertTrue("[seek check " + (i + 1) + " ]", (i + 1) == (result.get(1).asInt8()));

    offsets = reader.next();
    if (offsets == -1) {
      continue;
    }
    scanner.seek(offsets);
    result = scanner.next();
    assertTrue("[seek check " + (i + 2) + " ]", (i + 2) == (result.get(0).asInt8()));
    assertTrue("[seek check " + (i + 2) + " ]", (i + 2) == (result.get(1).asFloat8()));
  }
  reader.close();
  scanner.close();
}
 
Example 20
Source File: DistributedFileSystem.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Move blocks from srcs to trg and delete srcs afterwards.
 * The file block sizes must be the same.
 * 
 * @param trg existing file to append to
 * @param psrcs list of files (same block size, same replication)
 * @throws IOException
 */
@Override
public void concat(Path trg, Path [] psrcs) throws IOException {
  statistics.incrementWriteOps(1);
  // Make target absolute
  Path absF = fixRelativePart(trg);
  // Make all srcs absolute
  Path[] srcs = new Path[psrcs.length];
  for (int i=0; i<psrcs.length; i++) {
    srcs[i] = fixRelativePart(psrcs[i]);
  }
  // Try the concat without resolving any links
  String[] srcsStr = new String[psrcs.length];
  try {
    for (int i=0; i<psrcs.length; i++) {
      srcsStr[i] = getPathName(srcs[i]);
    }
    dfs.concat(getPathName(trg), srcsStr);
  } catch (UnresolvedLinkException e) {
    // Exception could be from trg or any src.
    // Fully resolve trg and srcs. Fail if any of them are a symlink.
    FileStatus stat = getFileLinkStatus(absF);
    if (stat.isSymlink()) {
      throw new IOException("Cannot concat with a symlink target: "
          + trg + " -> " + stat.getPath());
    }
    absF = fixRelativePart(stat.getPath());
    for (int i=0; i<psrcs.length; i++) {
      stat = getFileLinkStatus(srcs[i]);
      if (stat.isSymlink()) {
        throw new IOException("Cannot concat with a symlink src: "
            + psrcs[i] + " -> " + stat.getPath());
      }
      srcs[i] = fixRelativePart(stat.getPath());
    }
    // Try concat again. Can still race with another symlink.
    for (int i=0; i<psrcs.length; i++) {
      srcsStr[i] = getPathName(srcs[i]);
    }
    dfs.concat(getPathName(absF), srcsStr);
  }
}