Java Code Examples for com.amazonaws.services.s3.AmazonS3#listNextBatchOfObjects()

The following examples show how to use com.amazonaws.services.s3.AmazonS3#listNextBatchOfObjects() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: InfectedFileCache.java    From aws-s3-virusscan with Apache License 2.0 6 votes vote down vote up
public List<InfectedFile> getFiles() {
    final List<InfectedFile> files = new ArrayList<>();
    if (Config.has(Config.Key.INFECTED_FILES_BUCKET_NAME)) {
        final AmazonS3 s3local = AmazonS3ClientBuilder.standard().withCredentials(this.credentialsProvider).withRegion(Config.get(Config.Key.INFECTED_FILES_BUCKET_REGION)).build();
        ObjectListing objectListing = s3local.listObjects(Config.get(Config.Key.INFECTED_FILES_BUCKET_NAME));
        while (true) {
            objectListing.getObjectSummaries().forEach((summary) -> {
                final S3Object object = s3local.getObject(summary.getBucketName(), summary.getKey());
                final byte[] content;
                try {
                    content = IOUtils.toByteArray(object.getObjectContent());
                } catch (final IOException e) {
                    throw new RuntimeException(e);
                }
                files.add(new InfectedFile(summary.getKey(), content, object.getObjectMetadata().getContentType()));
            });
            if (objectListing.isTruncated()) {
                objectListing = s3local.listNextBatchOfObjects(objectListing);
            } else {
                break;
            }
        }
    }
    return files;
}
 
Example 2
Source File: S3CheckpointSpiSelfTest.java    From ignite with Apache License 2.0 6 votes vote down vote up
/**
 * @throws Exception If error.
 */
@Override protected void afterSpiStopped() throws Exception {
    AWSCredentials cred = new BasicAWSCredentials(IgniteS3TestSuite.getAccessKey(),
        IgniteS3TestSuite.getSecretKey());

    AmazonS3 s3 = new AmazonS3Client(cred);

    String bucketName = S3CheckpointSpi.BUCKET_NAME_PREFIX + "unit-test-bucket";

    try {
        ObjectListing list = s3.listObjects(bucketName);

        while (true) {
            for (S3ObjectSummary sum : list.getObjectSummaries())
                s3.deleteObject(bucketName, sum.getKey());

            if (list.isTruncated())
                list = s3.listNextBatchOfObjects(list);
            else
                break;
        }
    }
    catch (AmazonClientException e) {
        throw new IgniteSpiException("Failed to read checkpoint bucket: " + bucketName, e);
    }
}
 
Example 3
Source File: PrimitiveS3OperationHandler.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 6 votes vote down vote up
private void deleteObjects(AmazonS3 service, ObjectListing listing) throws MultiObjectDeleteException, IOException {
	do {
		if (Thread.currentThread().isInterrupted()) {
			throw new IOException(FileOperationMessages.getString("IOperationHandler.interrupted")); //$NON-NLS-1$
		}
		List<S3ObjectSummary> objectSummaries = listing.getObjectSummaries();
		if (!objectSummaries.isEmpty()) {
			List<KeyVersion> keys = new ArrayList<KeyVersion>(objectSummaries.size());
			for (S3ObjectSummary object: objectSummaries) {
				keys.add(new KeyVersion(object.getKey()));
			}
			DeleteObjectsRequest request = new DeleteObjectsRequest(listing.getBucketName()).withKeys(keys).withQuiet(true);
			service.deleteObjects(request); // quiet
		}
		listing = service.listNextBatchOfObjects(listing);
	} while (listing.isTruncated());
}
 
Example 4
Source File: AmazonS3FileSystemTestHelper.java    From iaf with Apache License 2.0 6 votes vote down vote up
public void cleanUpBucketAndShutDown(AmazonS3 s3Client) {
	if(s3Client.doesBucketExistV2(bucketName)) {
		 ObjectListing objectListing = s3Client.listObjects(bucketName);
            while (true) {
                Iterator<S3ObjectSummary> objIter = objectListing.getObjectSummaries().iterator();
                while (objIter.hasNext()) {
                    s3Client.deleteObject(bucketName, objIter.next().getKey());
                }
    
                // If the bucket contains many objects, the listObjects() call
                // might not return all of the objects in the first listing. Check to
                // see whether the listing was truncated. If so, retrieve the next page of objects 
                // and delete them.
                if (objectListing.isTruncated()) {
                    objectListing = s3Client.listNextBatchOfObjects(objectListing);
                } else {
                    break;
                }
            }
		s3Client.deleteBucket(bucketName);
	}
	if(s3Client != null) {
		s3Client.shutdown();
	}
}
 
Example 5
Source File: ImportS3.java    From h2o-2 with Apache License 2.0 6 votes vote down vote up
@Override
protected Response serve() {
  String bucket = _bucket.value();
  Log.info("ImportS3 processing (" + bucket + ")");
  JsonObject json = new JsonObject();
  JsonArray succ = new JsonArray();
  JsonArray fail = new JsonArray();
  AmazonS3 s3 = PersistS3.getClient();
  ObjectListing currentList = s3.listObjects(bucket);
  processListing(currentList, succ, fail);
  while(currentList.isTruncated()){
    currentList = s3.listNextBatchOfObjects(currentList);
    processListing(currentList, succ, fail);
  }
  json.add(NUM_SUCCEEDED, new JsonPrimitive(succ.size()));
  json.add(SUCCEEDED, succ);
  json.add(NUM_FAILED, new JsonPrimitive(fail.size()));
  json.add(FAILED, fail);
  DKV.write_barrier();
  Response r = Response.done(json);
  r.setBuilder(SUCCEEDED + "." + KEY, new KeyCellBuilder());
  return r;
}
 
Example 6
Source File: S3UploadAllCallable.java    From jobcacher-plugin with MIT License 5 votes vote down vote up
private Map<String,S3ObjectSummary> lookupExistingCacheEntries(AmazonS3 s3) {
    Map<String,S3ObjectSummary> summaries = new HashMap<>();

    ObjectListing listing = s3.listObjects(bucketName, pathPrefix);
    do {
        for (S3ObjectSummary summary : listing.getObjectSummaries()) {
            summaries.put(summary.getKey(), summary);
        }
        listing = listing.isTruncated() ? s3.listNextBatchOfObjects(listing) : null;
    } while (listing != null);

    return summaries;
}
 
Example 7
Source File: S3InputFormatUtils.java    From kangaroo with Apache License 2.0 5 votes vote down vote up
/**
 * Efficiently gets the Hadoop {@link org.apache.hadoop.fs.FileStatus} for all S3 files under the provided
 * {@code dirs}
 * 
 * @param s3Client
 *            s3 client
 * @param blockSize
 *            the block size
 * @param dirs
 *            the dirs to search through
 * @return the {@link org.apache.hadoop.fs.FileStatus} version of all S3 files under {@code dirs}
 */
static List<FileStatus> getFileStatuses(final AmazonS3 s3Client, final long blockSize, final Path... dirs) {
    final List<FileStatus> result = Lists.newArrayList();
    for (final Path dir : dirs) {
        // get bucket and prefix from path
        final String bucket = S3HadoopUtils.getBucketFromPath(dir.toString());
        final String prefix = S3HadoopUtils.getKeyFromPath(dir.toString());
        // list request
        final ListObjectsRequest req = new ListObjectsRequest().withMaxKeys(Integer.MAX_VALUE)
                .withBucketName(bucket).withPrefix(prefix);
        // recursively page through all objects under the path
        for (ObjectListing listing = s3Client.listObjects(req); listing.getObjectSummaries().size() > 0; listing = s3Client
                .listNextBatchOfObjects(listing)) {
            for (final S3ObjectSummary summary : listing.getObjectSummaries()) {
                final Path path = new Path(String.format("s3n://%s/%s", summary.getBucketName(), summary.getKey()));
                if (S3_PATH_FILTER.accept(path)) {
                    result.add(new FileStatus(summary.getSize(), false, 1, blockSize, summary.getLastModified()
                            .getTime(), path));
                }
            }
            // don't need to check the next listing if this one is not truncated
            if (!listing.isTruncated()) {
                break;
            }
        }
    }
    return result;
}
 
Example 8
Source File: AWSCommon.java    From camel-kafka-connector with Apache License 2.0 4 votes vote down vote up
/**
 * Delete an S3 bucket using the provided client. Coming from AWS documentation:
 * https://docs.aws.amazon.com/AmazonS3/latest/dev/delete-or-empty-bucket.html#delete-bucket-sdk-java
 * @param s3Client the AmazonS3 client instance used to delete the bucket
 * @param bucketName a String containing the bucket name
 */
public static void deleteBucket(AmazonS3 s3Client, String bucketName) {
    // Delete all objects from the bucket. This is sufficient
    // for non versioned buckets. For versioned buckets, when you attempt to delete objects, Amazon S3 inserts
    // delete markers for all objects, but doesn't delete the object versions.
    // To delete objects from versioned buckets, delete all of the object versions before deleting
    // the bucket (see below for an example).
    ObjectListing objectListing = s3Client.listObjects(bucketName);
    while (true) {
        Iterator<S3ObjectSummary> objIter = objectListing.getObjectSummaries().iterator();
        while (objIter.hasNext()) {
            s3Client.deleteObject(bucketName, objIter.next().getKey());
        }

        // If the bucket contains many objects, the listObjects() call
        // might not return all of the objects in the first listing. Check to
        // see whether the listing was truncated. If so, retrieve the next page of objects
        // and delete them.
        if (objectListing.isTruncated()) {
            objectListing = s3Client.listNextBatchOfObjects(objectListing);
        } else {
            break;
        }
    }

    // Delete all object versions (required for versioned buckets).
    VersionListing versionList = s3Client.listVersions(new ListVersionsRequest().withBucketName(bucketName));
    while (true) {
        Iterator<S3VersionSummary> versionIter = versionList.getVersionSummaries().iterator();
        while (versionIter.hasNext()) {
            S3VersionSummary vs = versionIter.next();
            s3Client.deleteVersion(bucketName, vs.getKey(), vs.getVersionId());
        }

        if (versionList.isTruncated()) {
            versionList = s3Client.listNextBatchOfVersions(versionList);
        } else {
            break;
        }
    }

    // After all objects and object versions are deleted, delete the bucket.
    s3Client.deleteBucket(bucketName);
}
 
Example 9
Source File: CloudFormationClient.java    From herd-mdl with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the stack {@link #stackName}
 */
public void deleteStack() throws Exception {

    CFTStackInfo cftStackInfo = getStackInfo();
    String rootStackId = cftStackInfo.stackId(); // Use the stack id to track the delete operation
    LOGGER.info("rootStackId   =   " + rootStackId);

    // Go through the stack and pick up resources that we want
    // to finalize before deleting the stack.
    List<String> s3BucketIds = new ArrayList<>();

    DescribeStacksResult describeStacksResult = amazonCloudFormation.describeStacks();
    for (Stack currentStack : describeStacksResult.getStacks()) {
        if (rootStackId.equals(currentStack.getRootId()) || rootStackId
                .equals(currentStack.getStackId())) {
            LOGGER.info("stackId   =   " + currentStack.getStackId());
            DescribeStackResourcesRequest describeStackResourcesRequest = new DescribeStackResourcesRequest();
            describeStackResourcesRequest.setStackName(currentStack.getStackName());
            List<StackResource> stackResources = amazonCloudFormation
                    .describeStackResources(describeStackResourcesRequest).getStackResources();
            for (StackResource stackResource : stackResources) {
                if (!stackResource.getResourceStatus()
                        .equals(ResourceStatus.DELETE_COMPLETE.toString())) {
                    if (stackResource.getResourceType().equals("AWS::S3::Bucket")) {
                        s3BucketIds.add(stackResource.getPhysicalResourceId());
                    }
                }
            }
        }
    }

    // Now empty S3 buckets, clean up will be done when the stack is deleted
    AmazonS3 amazonS3 = AmazonS3ClientBuilder.standard().withRegion(Regions.getCurrentRegion().getName())
            .withCredentials(new InstanceProfileCredentialsProvider(true)).build();
    for (String s3BucketPhysicalId : s3BucketIds) {
        String s3BucketName = s3BucketPhysicalId;
        if(!amazonS3.doesBucketExistV2(s3BucketName)){
            break;
        }
        LOGGER.info("Empyting S3 bucket, " + s3BucketName);
        ObjectListing objectListing = amazonS3.listObjects(s3BucketName);
        while (true) {
            for (Iterator<?> iterator = objectListing.getObjectSummaries().iterator(); iterator
                    .hasNext(); ) {
                S3ObjectSummary summary = (S3ObjectSummary) iterator.next();
                amazonS3.deleteObject(s3BucketName, summary.getKey());
            }
            if (objectListing.isTruncated()) {
                objectListing = amazonS3.listNextBatchOfObjects(objectListing);
            }
            else {
                break;
            }
        }
    }

    //Proceed with the regular stack deletion operation
    DeleteStackRequest deleteRequest = new DeleteStackRequest();
    deleteRequest.setStackName(stackName);
    amazonCloudFormation.deleteStack(deleteRequest);
    LOGGER.info("Stack deletion initiated");

    CFTStackStatus cftStackStatus = waitForCompletionAndGetStackStatus(amazonCloudFormation,
            rootStackId);
    LOGGER.info(
            "Stack deletion completed, the stack " + stackName + " completed with " + cftStackStatus);

    // Throw exception if failed
    if (!cftStackStatus.getStackStatus().equals(StackStatus.DELETE_COMPLETE.toString())) {
        throw new Exception(
                "deleteStack operation failed for stack " + stackName + " - " + cftStackStatus);
    }
}
 
Example 10
Source File: S3StorageService.java    From kayenta with Apache License 2.0 4 votes vote down vote up
@Override
public List<Map<String, Object>> listObjectKeys(
    String accountName, ObjectType objectType, List<String> applications, boolean skipIndex) {
  AwsNamedAccountCredentials credentials =
      accountCredentialsRepository.getRequiredOne(accountName);

  if (!skipIndex && objectType == ObjectType.CANARY_CONFIG) {
    Set<Map<String, Object>> canaryConfigSet =
        canaryConfigIndex.getCanaryConfigSummarySet(credentials, applications);

    return Lists.newArrayList(canaryConfigSet);
  } else {
    AmazonS3 amazonS3 = credentials.getAmazonS3();
    String bucket = credentials.getBucket();
    String group = objectType.getGroup();
    String prefix = buildTypedFolder(credentials, group);

    ensureBucketExists(accountName);

    int skipToOffset = prefix.length() + 1; // + Trailing slash
    List<Map<String, Object>> result = new ArrayList<>();

    log.debug("Listing {}", group);

    ObjectListing bucketListing =
        amazonS3.listObjects(new ListObjectsRequest(bucket, prefix, null, null, 10000));

    List<S3ObjectSummary> summaries = bucketListing.getObjectSummaries();

    while (bucketListing.isTruncated()) {
      bucketListing = amazonS3.listNextBatchOfObjects(bucketListing);
      summaries.addAll(bucketListing.getObjectSummaries());
    }

    if (summaries != null) {
      for (S3ObjectSummary summary : summaries) {
        String itemName = summary.getKey();
        int indexOfLastSlash = itemName.lastIndexOf("/");
        Map<String, Object> objectMetadataMap = new HashMap<>();
        long updatedTimestamp = summary.getLastModified().getTime();

        objectMetadataMap.put("id", itemName.substring(skipToOffset, indexOfLastSlash));
        objectMetadataMap.put("updatedTimestamp", updatedTimestamp);
        objectMetadataMap.put(
            "updatedTimestampIso", Instant.ofEpochMilli(updatedTimestamp).toString());

        if (objectType == ObjectType.CANARY_CONFIG) {
          String name = itemName.substring(indexOfLastSlash + 1);

          if (name.endsWith(".json")) {
            name = name.substring(0, name.length() - 5);
          }

          objectMetadataMap.put("name", name);
        }

        result.add(objectMetadataMap);
      }
    }

    return result;
  }
}
 
Example 11
Source File: DeleteBucket.java    From aws-doc-sdk-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
    final String USAGE = "\n" +
            "To run this example, supply the name of an S3 bucket\n" +
            "\n" +
            "Ex: DeleteBucket <bucketname>\n";

    if (args.length < 1) {
        System.out.println(USAGE);
        System.exit(1);
    }

    String bucket_name = args[0];

    System.out.println("Deleting S3 bucket: " + bucket_name);
    final AmazonS3 s3 = AmazonS3ClientBuilder.standard().withRegion(Regions.DEFAULT_REGION).build();
    try {
        System.out.println(" - removing objects from bucket");
        ObjectListing object_listing = s3.listObjects(bucket_name);
        while (true) {
            for (Iterator<?> iterator =
                 object_listing.getObjectSummaries().iterator();
                 iterator.hasNext(); ) {
                S3ObjectSummary summary = (S3ObjectSummary) iterator.next();
                s3.deleteObject(bucket_name, summary.getKey());
            }

            // more object_listing to retrieve?
            if (object_listing.isTruncated()) {
                object_listing = s3.listNextBatchOfObjects(object_listing);
            } else {
                break;
            }
        }

        System.out.println(" - removing versions from bucket");
        VersionListing version_listing = s3.listVersions(
                new ListVersionsRequest().withBucketName(bucket_name));
        while (true) {
            for (Iterator<?> iterator =
                 version_listing.getVersionSummaries().iterator();
                 iterator.hasNext(); ) {
                S3VersionSummary vs = (S3VersionSummary) iterator.next();
                s3.deleteVersion(
                        bucket_name, vs.getKey(), vs.getVersionId());
            }

            if (version_listing.isTruncated()) {
                version_listing = s3.listNextBatchOfVersions(
                        version_listing);
            } else {
                break;
            }
        }

        System.out.println(" OK, bucket ready to delete!");
        s3.deleteBucket(bucket_name);
    } catch (AmazonServiceException e) {
        System.err.println(e.getErrorMessage());
        System.exit(1);
    }
    System.out.println("Done!");
}
 
Example 12
Source File: S3ChangeLogStore.java    From athenz with Apache License 2.0 4 votes vote down vote up
/**
 * list the objects in the zts bucket. If the mod time is specified as 0
 * then we want to list all objects otherwise, we only list objects
 * that are newer than the specified timestamp
 * @param s3 AWS S3 client object
 * @param domains collection to be updated to include domain names
 * @param modTime only include domains newer than this timestamp
 */
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {
    
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("listObjects: Retrieving domains from {} with mod time > {}",
                s3BucketName, modTime);
    }
    
    ObjectListing objectListing = s3.listObjects(new ListObjectsRequest()
            .withBucketName(s3BucketName));
    
    String objectName;
    while (objectListing != null) {
        
        // process each entry in our result set and add the domain
        // name to our return list

        final List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
        boolean listTruncated = objectListing.isTruncated();
        
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("listObjects: retrieved {} objects, more objects available - {}",
                    objectSummaries.size(), listTruncated);
        }
        
        for (S3ObjectSummary objectSummary : objectSummaries) {
            
            // if mod time is specified then make sure we automatically skip
            // any domains older than the specified value
            
            if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
                continue;
            }
            
            // for now skip any folders/objects that start with '.'
            
            objectName = objectSummary.getKey();
            if (objectName.charAt(0) == '.') {
                continue;
            }
            domains.add(objectName);
        }
        
        // check if the object listing is truncated or not (break out in this case)
        // technically we can skip this call and just call listNextBatchOfResults
        // since that returns null if the object listing is not truncated but 
        // this direct check here makes the logic easier to follow
        
        if (!listTruncated) {
            break;
        }
        
        objectListing = s3.listNextBatchOfObjects(objectListing);
    }
}
 
Example 13
Source File: AmazonS3Util.java    From datacollector with Apache License 2.0 4 votes vote down vote up
/**
 * Lists objects from AmazonS3 in lexicographical order
 *
 * @param s3Client
 * @param s3ConfigBean
 * @param pathMatcher glob patterns to match file name against
 * @param s3Offset current offset which provides the key name of the previous object
 * @param fetchSize number of objects to fetch in one go
 * @return
 * @throws AmazonClientException
 */
static List<S3ObjectSummary> listObjectsLexicographically(
    AmazonS3 s3Client,
    S3ConfigBean s3ConfigBean,
    AntPathMatcher pathMatcher,
    S3Offset s3Offset,
    int fetchSize
) {
  // Incrementally scan objects after the marker (s3Offset).
  List<S3ObjectSummary> list = new ArrayList<>(fetchSize);

  ListObjectsRequest listObjectsRequest = new ListObjectsRequest();
  listObjectsRequest.setBucketName(s3ConfigBean.s3Config.bucket);
  listObjectsRequest.setPrefix(s3ConfigBean.s3Config.commonPrefix);
  listObjectsRequest.setMaxKeys(BATCH_SIZE);

  if (s3Offset.getKey() != null) {
    if (!s3Offset.getKey().isEmpty() && parseOffset(s3Offset) != -1) {
      S3ObjectSummary currentObjectSummary = getObjectSummary(s3Client, s3ConfigBean.s3Config.bucket, s3Offset.getKey());
      list.add(currentObjectSummary);
    }
    listObjectsRequest.setMarker(s3Offset.getKey());
  }

  ObjectListing objectListing = s3Client.listObjects(listObjectsRequest);

  while (true) {
    for (S3ObjectSummary s : objectListing.getObjectSummaries()) {
      String fullPrefix = s.getKey();
      String remainingPrefix = fullPrefix.substring(s3ConfigBean.s3Config.commonPrefix.length(), fullPrefix.length());
      if (!remainingPrefix.isEmpty()) {
        if (pathMatcher.match(s3ConfigBean.s3FileConfig.prefixPattern, remainingPrefix)) {
          list.add(s);
        }
        // We've got enough objects.
        if (list.size() == fetchSize) {
          return list;
        }
      }
    }
    // Listing is complete. No more objects to be listed.
    if (!objectListing.isTruncated()) {
      break;
    }
    objectListing = s3Client.listNextBatchOfObjects(objectListing);
  }

  return list;
}