package com.topstonesoftware.aws_s3; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.ListObjectsV2Result; import com.amazonaws.services.s3.model.S3ObjectSummary; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; /** *
* Read an S3 directory as a potentially paginated list of S3 keys (e.g., S3 file paths). *
** S3 directories can be very large. The directory cannot be listed in a single operation because * of memory limitations and network performance (S3 is an HTTP resource). This class allows blocks * of directory names to be read so that the entire directory can be read in a paginated fashion. *
** The S3 directory structure may consist of a set of prefixes (logical sub-directories): *
** foo/ * my_file_1 * my_file_2 * bar/ * your_file_1 * your_file_2 **
* Here the prefixes are "foo" and "bar" If the prefix is provided, only the S3 paths that include tht prefix * will be included in the "directory" list. *
** This class should be called once for a given S3 bucket and prefix. *
*/ public class S3DirectoryList { private final AmazonS3 amazonS3; private final String bucket; private final String prefix; private String startAfter = null; /** * * @param amazonS3 The authenticated AmazonS3 client * @param bucket The bucket to be listed. * @param prefix The prefix within the bucket. If no prefix is needed the prefix should be the * empty string "". */ public S3DirectoryList(AmazonS3 amazonS3, String bucket, String prefix) { this.amazonS3 = amazonS3; this.bucket = bucket; this.prefix = prefix; } /** * Return a list of S3 file names for the bucket/prefix. * * @param numToRead the maximum number of S3 paths to return. * @return a list of S3 directory paths. If all of the paths have been read, then a list of length * zero will be returned. */ public List