Java Code Examples for com.amazonaws.services.s3.model.S3ObjectSummary#getKey()

The following examples show how to use com.amazonaws.services.s3.model.S3ObjectSummary#getKey() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: S3RemoteFileSystem.java From imhotep with Apache License 2.0

6 votes

private List<String> getFilenamesFromListing(ObjectListing listing, String prefix) {
    List<String> results = new ArrayList<String>(100);

    for (S3ObjectSummary summary : listing.getObjectSummaries()) {
        final String key = summary.getKey();
        final String filename;
        
        filename = key.substring(prefix.length());
        if (filename.length() == 0 || filename.contains(DELIMITER)) {
            log.error("Error parsing S3 object Key.  Key: " + key);
            continue;
        }
        results.add(filename);
    }
    
    return results;
}

Example 2

Source File: NfsSecondaryStorageResource.java From cloudstack with Apache License 2.0

6 votes

Map<Long, TemplateProp> s3ListVolume(S3TO s3) {
    String bucket = s3.getBucketName();
    // List the objects in the source directory on S3
    final List<S3ObjectSummary> objectSummaries = S3Utils.listDirectory(s3, bucket, VOLUME_ROOT_DIR);
    if (objectSummaries == null) {
        return null;
    }
    Map<Long, TemplateProp> tmpltInfos = new HashMap<Long, TemplateProp>();
    for (S3ObjectSummary objectSummary : objectSummaries) {
        String key = objectSummary.getKey();
        // String installPath = StringUtils.substringBeforeLast(key,
        // S3Utils.SEPARATOR);
        Long id = determineS3VolumeIdFromKey(key);
        // TODO: how to get volume template name
        TemplateProp tInfo = new TemplateProp(id.toString(), key, objectSummary.getSize(), objectSummary.getSize(), true, false);
        tmpltInfos.put(id, tInfo);
    }
    return tmpltInfos;

}

Example 3

Source File: S3FileInput.java From Elasticsearch with Apache License 2.0

5 votes

private void addKeyUris(List<URI> uris, ObjectListing list, URI uri, Predicate<URI> uriPredicate) {
    List<S3ObjectSummary> summaries = list.getObjectSummaries();
    for (S3ObjectSummary summary : summaries) {
        String key = summary.getKey();
        if (!key.endsWith("/")) {
            URI keyUri = uri.resolve("/" + key);
            if (uriPredicate.apply(keyUri)) {
                uris.add(keyUri);
                if (logger.isDebugEnabled()) {
                    logger.debug("{}", keyUri);
                }
            }
        }
    }
}

Example 4

Source File: S3Profile.java From jobcacher-plugin with MIT License

5 votes

public void rename(String bucketName, String currentPathPrefix, String newPathPrefix) {

        ObjectListing listing = null;
        do {
            listing = listing == null ? helper.client().listObjects(bucketName, currentPathPrefix) : helper.client().listNextBatchOfObjects(listing);
            for (S3ObjectSummary summary : listing.getObjectSummaries()) {
                String key = summary.getKey();

                helper.client().copyObject(bucketName, key, bucketName, newPathPrefix + key.substring(currentPathPrefix.length()));
                helper.client().deleteObject(bucketName, key);
            }
        } while (listing.isTruncated());
    }

Example 5

Source File: S3FileVec.java From h2o-2 with Apache License 2.0

5 votes

public static Key make(S3ObjectSummary obj, Futures fs) {
  String fname = obj.getKey();
  Key k = Key.make("s3://" + obj.getBucketName() + "/" + fname);
  long size = obj.getSize();
  Key k2 = Vec.newKey(k);
  new Frame(k).delete_and_lock(null);
  // Insert the top-level FileVec key into the store
  Vec v = new S3FileVec(k2,size);
  DKV.put(k2, v, fs);
  Frame fr = new Frame(k,new String[]{fname},new Vec[]{v});
  fr.update(null);
  fr.unlock(null);
  return k;
}

Example 6

Source File: S3FileInput.java From crate with Apache License 2.0

5 votes

private void addKeyUris(List<URI> uris, ObjectListing list, URI uri, Predicate<URI> uriPredicate) {
    List<S3ObjectSummary> summaries = list.getObjectSummaries();
    for (S3ObjectSummary summary : summaries) {
        String key = summary.getKey();
        if (!key.endsWith("/")) {
            URI keyUri = uri.resolve("/" + key);
            if (uriPredicate.test(keyUri)) {
                uris.add(keyUri);
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("{}", keyUri);
                }
            }
        }
    }
}

Example 7

Source File: AmazonS3FileSystem.java From iaf with Apache License 2.0

5 votes

@Override
public boolean folderExists(String folder) throws FileSystemException {
	ObjectListing objectListing = s3Client.listObjects(bucketName);
	Iterator<S3ObjectSummary> objIter = objectListing.getObjectSummaries().iterator();
	while (objIter.hasNext()) {
		S3ObjectSummary s3ObjectSummary = objIter.next();
		String key = s3ObjectSummary.getKey();
		if(key.endsWith("/") && key.equals(folder+"/")){
			return true;
		}
	}
	return false;
}

Example 8

Source File: StashReader.java From emodb with Apache License 2.0

5 votes

/**
 * Get the splits for a record stored in stash.  Each split corresponds to a file in the Stash table's directory.
 */
public List<StashSplit> getSplits(String table)
        throws StashNotAvailableException, TableNotStashedException {
    ImmutableList.Builder<StashSplit> splitsBuilder = ImmutableList.builder();

    Iterator<S3ObjectSummary> objectSummaries = getS3ObjectSummariesForTable(table);
    while (objectSummaries.hasNext()) {
        S3ObjectSummary objectSummary = objectSummaries.next();
        String key = objectSummary.getKey();
        // Strip the common root path prefix from the split since it is constant.
        splitsBuilder.add(new StashSplit(table, key.substring(_rootPath.length() + 1), objectSummary.getSize()));
    }

    return splitsBuilder.build();
}

Example 9

Source File: COSAPIClient.java From stocator with Apache License 2.0

5 votes

private FileStatus createFileStatus(S3ObjectSummary objSummary,
    String hostName, Path path)
    throws IllegalArgumentException, IOException {
  String objKey = objSummary.getKey();
  String newMergedPath = getMergedPath(hostName, path, objKey);
  return createFileStatus(objSummary.getSize(), objKey,
      objSummary.getLastModified(), new Path(newMergedPath));
}

Example 10

Source File: TestAmazonS3TargetForWholeFile.java From datacollector with Apache License 2.0

5 votes

private int verifyAndReturnNoOfObjects() throws Exception {
  int numberOfObjects = 0;
  for (S3ObjectSummary s3ObjectSummary : S3Objects.inBucket(s3client, TARGET_BUCKET_NAME)) {
    String fileNameOrKey = s3ObjectSummary.getKey();
    if (withFileNamePrefix) {
      //strip out the filePrefix sdc-
      fileNameOrKey = fileNameOrKey.substring(4);
    }
    switch (source) {
      case LOCAL:
        verifyStreamCorrectness(
            new FileInputStream(testDir.getAbsolutePath() + "/" + fileNameOrKey),
            s3client.getObject(TARGET_BUCKET_NAME, s3ObjectSummary.getKey()).getObjectContent()
        );
        break;
      case S3:
        verifyStreamCorrectness(
            s3client.getObject(SOURCE_BUCKET_NAME, fileNameOrKey).getObjectContent(),
            s3client.getObject(TARGET_BUCKET_NAME, s3ObjectSummary.getKey()).getObjectContent()
        );
        break;
    }
    deleteObjectsAfterVerificationInTarget(s3ObjectSummary.getKey());
    numberOfObjects++;
  }
  return numberOfObjects;
}

Example 11

Source File: S3AttributesLocation.java From nexus-public with Eclipse Public License 1.0

4 votes

public S3AttributesLocation(final S3ObjectSummary summary) {
  checkNotNull(summary);
  this.key = summary.getKey();
}

Example 12

Source File: S3BucketObjectLister.java From s3-bucket-loader with Apache License 2.0

4 votes

private void scanBucket(Set<TocInfo> toc, Queue<TocInfo> tocQueue) throws Exception {
	
	ListObjectsRequest listRequest = new ListObjectsRequest();
	listRequest.setBucketName(s3BucketName);
	// listRequest.setGeneralProgressListener(this);
	listRequest.setMaxKeys(1000);
	
	String nextMarker = null;
	ObjectListing objectListing = null;
	
	while(true) {
		
		objectListing = s3Client.listObjects(listRequest);
		
		List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
		
		for (S3ObjectSummary objSummary : objectSummaries) {
			String key = objSummary.getKey();
			
			TocInfo tocInfo = new TocInfo(key, objSummary.getSize());
			
			// is it a "dir/" ?
			if (key.lastIndexOf("/") == (key.length() - 1)) {
				tocInfo.isDirectory = true;
			} else {
				tocInfo.isDirectory = false;
			}
			
			toc.add(tocInfo);
			tocQueue.add(tocInfo);
			tocInfosGenerated++; // increment for logging

		}
		
		// for pagination
		nextMarker = objectListing.getNextMarker();
		if (nextMarker == null) {
			break;
		} else {
			listRequest.setMarker(nextMarker);
			logger.debug("scanBucket() nextMarker we will request listing for => " + nextMarker);
		}
	}
	
}

Example 13

Source File: AmazonS3Util.java From datacollector with Apache License 2.0

4 votes

/**
 * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same timestamp] which are
 * later than or equal to the timestamp of the previous offset object
 *
 * @param s3Client
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the timestamp of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsChronologically(
    AmazonS3 s3Client,
    S3ConfigBean s3ConfigBean,
    AntPathMatcher pathMatcher,
    S3Offset s3Offset,
    int fetchSize
) {

  //Algorithm:
  // - Full scan all objects that match the file name pattern and which are later than the file in the offset
  // - Select the oldest "fetchSize" number of files and return them.
  TreeSet<S3ObjectSummary> treeSet = new TreeSet<>((o1, o2) -> {
    int result = o1.getLastModified().compareTo(o2.getLastModified());
    if(result != 0) {
      //same modified time. Use name to sort
      return result;
    }
    return o1.getKey().compareTo(o2.getKey());
  });

  S3Objects s3ObjectSummaries = S3Objects
    .withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.commonPrefix);

  // SDC-9413: since the s3ObjectSummaries is in lexical order, we should get all list of files in one api call
  for (S3ObjectSummary s : s3ObjectSummaries) {
    String fullPrefix = s.getKey();
    String remainingPrefix = fullPrefix.substring(s3ConfigBean.s3Config.commonPrefix.length(), fullPrefix.length());
    if (!remainingPrefix.isEmpty()) {
      // remainingPrefix can be empty.
      // If the user manually creates a prefix "myFolder/mySubFolder" in bucket "myBucket" and uploads "myObject",
      // then the first objects returned here are:
      // myFolder/mySubFolder
      // myFolder/mySubFolder/myObject
      //
      // All is good when pipeline is run but preview returns with no data. So we should ignore the empty file as it
      // has no data
      if (pathMatcher.match(s3ConfigBean.s3FileConfig.prefixPattern, remainingPrefix) && isEligible(s, s3Offset)) {
        treeSet.add(s);
      }
      if (treeSet.size() > fetchSize) {
        treeSet.pollLast();
      }
    }
  }

  return new ArrayList<>(treeSet);
}

Example 14

Source File: AmazonS3Util.java From datacollector with Apache License 2.0

4 votes

/**
 * Lists objects from AmazonS3 in lexicographical order
 *
 * @param s3Client
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the key name of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsLexicographically(
    AmazonS3 s3Client,
    S3ConfigBean s3ConfigBean,
    AntPathMatcher pathMatcher,
    S3Offset s3Offset,
    int fetchSize
) {
  // Incrementally scan objects after the marker (s3Offset).
  List<S3ObjectSummary> list = new ArrayList<>(fetchSize);

  ListObjectsRequest listObjectsRequest = new ListObjectsRequest();
  listObjectsRequest.setBucketName(s3ConfigBean.s3Config.bucket);
  listObjectsRequest.setPrefix(s3ConfigBean.s3Config.commonPrefix);
  listObjectsRequest.setMaxKeys(BATCH_SIZE);

  if (s3Offset.getKey() != null) {
    if (!s3Offset.getKey().isEmpty() && parseOffset(s3Offset) != -1) {
      S3ObjectSummary currentObjectSummary = getObjectSummary(s3Client, s3ConfigBean.s3Config.bucket, s3Offset.getKey());
      list.add(currentObjectSummary);
    }
    listObjectsRequest.setMarker(s3Offset.getKey());
  }

  ObjectListing objectListing = s3Client.listObjects(listObjectsRequest);

  while (true) {
    for (S3ObjectSummary s : objectListing.getObjectSummaries()) {
      String fullPrefix = s.getKey();
      String remainingPrefix = fullPrefix.substring(s3ConfigBean.s3Config.commonPrefix.length(), fullPrefix.length());
      if (!remainingPrefix.isEmpty()) {
        if (pathMatcher.match(s3ConfigBean.s3FileConfig.prefixPattern, remainingPrefix)) {
          list.add(s);
        }
        // We've got enough objects.
        if (list.size() == fetchSize) {
          return list;
        }
      }
    }
    // Listing is complete. No more objects to be listed.
    if (!objectListing.isTruncated()) {
      break;
    }
    objectListing = s3Client.listNextBatchOfObjects(objectListing);
  }

  return list;
}

Example 15

Source File: S3Spooler.java From datacollector with Apache License 2.0

4 votes

private void findAndQueueObjects(
    AmazonS3Source amazonS3Source, BatchContext batchContext
) throws AmazonClientException {
  S3Offset s3offset;
  if (lastElementAddedToQueue != null) {
    s3offset = lastElementAddedToQueue;
  } else {
    s3offset = amazonS3Source.getLatestOffset();
  }

  List<S3ObjectSummary> s3ObjectSummaries;
  if (!AWSUtil.containsWildcard(s3ConfigBean.s3FileConfig.prefixPattern)) {
    // No wildcard in the prefixPattern - don't need to scan the bucket
    s3ObjectSummaries = AmazonS3Util.getObjectNoWildcard(s3Client, s3ConfigBean, s3offset, s3ConfigBean.s3FileConfig.prefixPattern);
  } else {
    ObjectOrdering objectOrdering = s3ConfigBean.s3FileConfig.objectOrdering;
    switch (objectOrdering) {
      case TIMESTAMP:
        s3ObjectSummaries = AmazonS3Util.listObjectsChronologically(s3Client,
            s3ConfigBean,
            pathMatcher,
            s3offset,
            objectQueue.remainingCapacity()
        );
        break;
      case LEXICOGRAPHICAL:
        s3ObjectSummaries = AmazonS3Util.listObjectsLexicographically(s3Client,
            s3ConfigBean,
            pathMatcher,
            s3offset,
            objectQueue.remainingCapacity()
        );
        break;
      default:
        throw new IllegalArgumentException("Unknown ordering: " + objectOrdering.getLabel());
    }
  }
  for (S3ObjectSummary objectSummary : s3ObjectSummaries) {
    addObjectToQueue(objectSummary);
  }
  spoolQueueMeter.mark(objectQueue.size());
  LOG.debug("Found '{}' files", objectQueue.size());
  if (s3ObjectSummaries.isEmpty()) {
    // Before sending the event we will check that all the threads have finished with their objects, if yes, we
    // send the event as normal, if not we will skip and try to send it again if the queue is still empty when the
    // next thread tries to fill the queue. If the event is sent we will set the newDataAfterEventSent to false to
    // indicate that we should not send new events until we get more new data
    if (newDataAfterEventSent) {
      newDataAfterEventSent = !amazonS3Source.sendNoMoreDataEvent(batchContext);
    }
  } else {
    // If it is the last element save it to keep track of the last element added to the queue
    S3ObjectSummary s3ObjectSummary = s3ObjectSummaries.get(s3ObjectSummaries.size() - 1);
    lastElementAddedToQueue = new S3Offset(s3ObjectSummary.getKey(),
        S3Constants.MINUS_ONE,
        s3ObjectSummary.getETag(),
        String.valueOf(s3ObjectSummary.getLastModified().getTime())
    );

    //  If we previously sent a no-more-data event and we have new objects now, let's reset the event to be able to
    //  send it again.
    if (!newDataAfterEventSent) {
      amazonS3Source.restartNoMoreDataEvent();
      newDataAfterEventSent = true;
    }
  }
}

Example 16

Source File: CarinaListener.java From carina with Apache License 2.0

4 votes

/**
 * Method to update MOBILE_APP path in case if apk is located in s3 bucket.
 */
private static void updateS3AppPath() {
    Pattern S3_BUCKET_PATTERN = Pattern.compile("s3:\\/\\/([a-zA-Z-0-9][^\\/]*)\\/(.*)");
    // get app path to be sure that we need(do not need) to download app
    // from s3 bucket
    String mobileAppPath = Configuration.getMobileApp();
    Matcher matcher = S3_BUCKET_PATTERN.matcher(mobileAppPath);

    LOGGER.info("Analyzing if mobile app is located on S3...");
    if (matcher.find()) {
        LOGGER.info("app artifact is located on s3...");
        String bucketName = matcher.group(1);
        String key = matcher.group(2);
        Pattern pattern = Pattern.compile(key);

        // analyze if we have any pattern inside mobile_app to make extra
        // search in AWS
        int position = key.indexOf(".*");
        if (position > 0) {
            // /android/develop/dfgdfg.*/Mapmyrun.apk
            int slashPosition = key.substring(0, position).lastIndexOf("/");
            if (slashPosition > 0) {
                key = key.substring(0, slashPosition);
                S3ObjectSummary lastBuild = AmazonS3Manager.getInstance().getLatestBuildArtifact(bucketName, key,
                        pattern);
                key = lastBuild.getKey();
            }

        }

        if (Configuration.getBoolean(Parameter.S3_USE_PRESIGN_URL)) {
            // generate presigned url for nearest 8 hours
            long hours = 8*1000*60*60;
            String presignedAppUrl = AmazonS3Manager.getInstance().generatePreSignUrl(bucketName, key, hours).toString();
            LOGGER.debug("preSigned URL: " + presignedAppUrl);
            Configuration.setMobileApp(presignedAppUrl);
        } else {
            // download artifact into the local storage
            S3Object objBuild = AmazonS3Manager.getInstance().get(bucketName, key);

            String s3LocalStorage = Configuration.get(Parameter.S3_LOCAL_STORAGE);

            // download file from AWS to local storage

            String fileName = s3LocalStorage + "/" + StringUtils.substringAfterLast(objBuild.getKey(), "/");
            File file = new File(fileName);

            // verify maybe requested artifact with the same size was already
            // download
            if (file.exists() && file.length() == objBuild.getObjectMetadata().getContentLength()) {
                LOGGER.info("build artifact with the same size already downloaded: " + file.getAbsolutePath());
            } else {
                LOGGER.info(String.format("Following data was extracted: bucket: %s, key: %s, local file: %s",
                        bucketName, key, file.getAbsolutePath()));
                AmazonS3Manager.getInstance().download(bucketName, key, new File(fileName));
            }

            Configuration.setMobileApp(file.getAbsolutePath());

            // try to redefine app_version if it's value is latest or empty
            String appVersion = Configuration.get(Parameter.APP_VERSION);
            if (appVersion.equals("latest") || appVersion.isEmpty()) {
                R.CONFIG.put(Parameter.APP_VERSION.getKey(), file.getName());
            }
        }

    }
}

Example 17

Source File: ResourceDataQueryService.java From wecube-platform with Apache License 2.0

4 votes

public List<List<String>> queryS3Files(String packageId) {
    List<PluginInstance> pluginInstances = pluginInstanceRepository.findByPluginPackage_Id(packageId);
    if(pluginInstances == null || pluginInstances.size()==0) {
        logger.info(String.format("Can not find out plugin instance for packageId:%d", packageId));
        return Lists.newArrayList();
    }
    
    String bucketName = null;
    for(PluginInstance ps:pluginInstances) {
        if(ps.getS3BucketResourceId() == null) {
            continue;
        }
        
        Optional<ResourceItem> item= resourceItemRepository.findById(ps.getS3BucketResourceId());
        if(item.isPresent()) {
            bucketName = item.get().getName();
            break;
        }
    }
    
    if(Strings.isNullOrEmpty(bucketName)) {
        return Lists.newArrayList();
    }
    
    List<S3ObjectSummary> s3Objs = s3client.listObjects(bucketName);
    List<List<String>> response = new LinkedList<>();
    SimpleDateFormat datetimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    for(S3ObjectSummary s3ObjSum:s3Objs) {
        List<String> rowVal = new ArrayList<>(4);
        String key = s3ObjSum.getKey();
        int lastSplitPos = key.lastIndexOf("/");
        String path = "";
        String fileName = "";
        if(lastSplitPos > 0) {
            path = key.substring(0,lastSplitPos+1);
            fileName = key.substring(lastSplitPos+1);
        }else {
            path = "/";
            fileName = key;
        }
        rowVal.add(fileName);
        rowVal.add(path);
        rowVal.add(s3ObjSum.getETag());
        rowVal.add(datetimeFormat.format(s3ObjSum.getLastModified()));
        response.add(rowVal);
    }
    return response;
}

Example 18

Source File: S3ChangeLogStore.java From athenz with Apache License 2.0

4 votes

/**
 * list the objects in the zts bucket. If the mod time is specified as 0
 * then we want to list all objects otherwise, we only list objects
 * that are newer than the specified timestamp
 * @param s3 AWS S3 client object
 * @param domains collection to be updated to include domain names
 * @param modTime only include domains newer than this timestamp
 */
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {
    
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("listObjects: Retrieving domains from {} with mod time > {}",
                s3BucketName, modTime);
    }
    
    ObjectListing objectListing = s3.listObjects(new ListObjectsRequest()
            .withBucketName(s3BucketName));
    
    String objectName;
    while (objectListing != null) {
        
        // process each entry in our result set and add the domain
        // name to our return list

        final List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
        boolean listTruncated = objectListing.isTruncated();
        
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("listObjects: retrieved {} objects, more objects available - {}",
                    objectSummaries.size(), listTruncated);
        }
        
        for (S3ObjectSummary objectSummary : objectSummaries) {
            
            // if mod time is specified then make sure we automatically skip
            // any domains older than the specified value
            
            if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
                continue;
            }
            
            // for now skip any folders/objects that start with '.'
            
            objectName = objectSummary.getKey();
            if (objectName.charAt(0) == '.') {
                continue;
            }
            domains.add(objectName);
        }
        
        // check if the object listing is truncated or not (break out in this case)
        // technically we can skip this call and just call listNextBatchOfResults
        // since that returns null if the object listing is not truncated but 
        // this direct check here makes the logic easier to follow
        
        if (!listTruncated) {
            break;
        }
        
        objectListing = s3.listNextBatchOfObjects(objectListing);
    }
}

Example 19

Source File: StashReader.java From emodb with Apache License 2.0

4 votes

/**
 * Gets the metadata for all tables in this stash.  This is a heavier operation that just {@link #listTables()}
 * since it also returns full file details for the entire Stash instead of just table names.
 */
public Iterator<StashTableMetadata> listTableMetadata() {
    final String root = getRootPath();
    final String prefix = String.format("%s/", root);
    final int prefixLength = prefix.length();

    return new AbstractIterator<StashTableMetadata>() {
        PeekingIterator<S3ObjectSummary> _listResponse =
                Iterators.peekingIterator(Iterators.<S3ObjectSummary>emptyIterator());
        String _marker = null;
        boolean _truncated = true;

        @Override
        protected StashTableMetadata computeNext() {
            String tableDir = null;
            List<StashFileMetadata> files = Lists.newArrayListWithCapacity(16);
            boolean allFilesRead = false;

            while (!allFilesRead) {
                if (_listResponse.hasNext()) {
                    // Peek at the next record but don't consume it until we verify it's part of the same table
                    S3ObjectSummary s3File = _listResponse.peek();
                    String key = s3File.getKey();

                    // Don't include the _SUCCESS file or any other stray files we may find
                    String[] parentDirAndFile = key.substring(prefixLength).split("/");
                    if (parentDirAndFile.length != 2) {
                        // Consume and skip this row
                        _listResponse.next();
                    } else {
                        String parentDir = parentDirAndFile[0];
                        if (tableDir == null) {
                            tableDir = parentDir;
                        }

                        if (!parentDir.equals(tableDir)) {
                            allFilesRead = true;
                        } else {
                            // Record is part of this table; consume it now
                            _listResponse.next();
                            files.add(new StashFileMetadata(_bucket, key, s3File.getSize()));
                        }
                    }
                } else if (_truncated) {
                    ObjectListing response = _s3.listObjects(new ListObjectsRequest()
                            .withBucketName(_bucket)
                            .withPrefix(prefix)
                            .withMarker(_marker)
                            .withMaxKeys(1000));

                    _listResponse = Iterators.peekingIterator(response.getObjectSummaries().iterator());
                    _marker = response.getNextMarker();
                    _truncated = response.isTruncated();
                } else {
                    allFilesRead = true;
                }
            }

            if (tableDir == null) {
                // No files read this iteration means all files have been read
                return endOfData();
            }

            String tablePrefix = prefix + tableDir + "/";
            String tableName = StashUtil.decodeStashTable(tableDir);
            return new StashTableMetadata(_bucket, tablePrefix, tableName, files);
        }
    };
}

Example 20

Source File: S3FileSystem.java From stratosphere with Apache License 2.0

4 votes

private S3FileStatus[] listBucketContent(final Path f, final S3BucketObjectPair bop) throws IOException {

		ObjectListing listing = null;
		final List<S3FileStatus> resultList = new ArrayList<S3FileStatus>();

		final int depth = (bop.hasObject() ? getDepth(bop.getObject()) + 1 : 0);

		while (true) {

			if (listing == null) {
				if (bop.hasObject()) {
					listing = this.s3Client.listObjects(bop.getBucket(), bop.getObject());
				} else {
					listing = this.s3Client.listObjects(bop.getBucket());
				}
			} else {
				listing = this.s3Client.listNextBatchOfObjects(listing);
			}

			final List<S3ObjectSummary> list = listing.getObjectSummaries();
			final Iterator<S3ObjectSummary> it = list.iterator();
			while (it.hasNext()) {

				final S3ObjectSummary os = it.next();
				String key = os.getKey();

				final int childDepth = getDepth(os.getKey());

				if (childDepth != depth) {
					continue;
				}

				// Remove the prefix
				if (bop.hasObject()) {
					if (key.startsWith(bop.getObject())) {
						key = key.substring(bop.getObject().length());
					}

					// This has been the prefix itself
					if (key.isEmpty()) {
						continue;
					}
				}

				final long modificationDate = dateToLong(os.getLastModified());

				S3FileStatus fileStatus;
				if (objectRepresentsDirectory(os)) {
					fileStatus = new S3FileStatus(extendPath(f, key), 0, true, modificationDate, 0L);
				} else {
					fileStatus = new S3FileStatus(extendPath(f, key), os.getSize(), false, modificationDate, 0L);
				}

				resultList.add(fileStatus);
			}

			if (!listing.isTruncated()) {
				break;
			}
		}

		/*
		 * System.out.println("---- RETURN CONTENT ----");
		 * for (final FileStatus entry : resultList) {
		 * System.out.println(entry.getPath());
		 * }
		 * System.out.println("------------------------");
		 */

		return resultList.toArray(new S3FileStatus[0]);

	}