diff --git a/py/s3-deep-archive-restore.py b/py/s3-deep-archive-restore.py index 85b5c20..edeb587 100755 --- a/py/s3-deep-archive-restore.py +++ b/py/s3-deep-archive-restore.py @@ -1,9 +1,9 @@ #!/usr/bin/python3 import boto3 -import pickle -import os +# import pickle +# import os import concurrent.futures -from pprint import pprint +# from pprint import pprint def restoreObject(bucket, key, versionId, myCount): @@ -30,49 +30,60 @@ bucketName = 's3-emr-hbase' concurrency = 15 client = boto3.client('s3') count = 0 - -try: - with open('nextKey.pickle', 'rb') as file: - nextKey = pickle.load(file) - response = client.list_object_versions( - Bucket=bucketName, - MaxKeys=10, - KeyMarker=nextKey - ) -except IOError: - print("No position.pickle file. Start from beginning.") - response = client.list_object_versions( - Bucket=bucketName, - MaxKeys=10 - ) - -nextKey = response.get("NextKeyMarker") -with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: - for i in response["Versions"]: - count += 1 - # print(count, i.get("Key"), i.get("StorageClass")) - if i.get("StorageClass") == "DEEP_ARCHIVE": - executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) - -print("NextKey", nextKey) -while nextKey is not None: - response = client.list_object_versions( - Bucket=bucketName, - MaxKeys=300, - KeyMarker=nextKey - ) - nextKey = response.get("NextKeyMarker") - if nextKey is not None: - with open('nextKey.pickle', 'wb') as file: - pickle.dump(nextKey, file) - else: - os.remove('nextKey.pickle') +pagesize = 1000 +paginator = client.get_paginator('list_object_versions') +result = paginator.paginate(Bucket=bucketName, PaginationConfig={'PageSize': pagesize}) +for page in result: with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: - for i in response["Versions"]: + for i in page["Versions"]: count += 1 - # print(count, i.get("Key"), i.get("StorageClass")) if i.get("StorageClass") == "DEEP_ARCHIVE": executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) + +# +# try: +# with open('nextKey.pickle', 'rb') as file: +# nextKey = pickle.load(file) +# response = client.list_object_versions( +# Bucket=bucketName, +# MaxKeys=10, +# KeyMarker=nextKey +# ) +# except IOError: +# print("No position.pickle file. Start from beginning.") +# response = client.list_object_versions( +# Bucket=bucketName, +# MaxKeys=10 +# ) +# +# nextKey = response.get("NextKeyMarker") +# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: +# for i in response["Versions"]: +# count += 1 +# # print(count, i.get("Key"), i.get("StorageClass")) +# if i.get("StorageClass") == "DEEP_ARCHIVE": +# executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) +# +# print("NextKey", nextKey) +# while nextKey is not None: +# response = client.list_object_versions( +# Bucket=bucketName, +# MaxKeys=300, +# KeyMarker=nextKey +# ) +# nextKey = response.get("NextKeyMarker") +# if nextKey is not None: +# with open('nextKey.pickle', 'wb') as file: +# pickle.dump(nextKey, file) +# else: +# os.remove('nextKey.pickle') +# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: +# for i in response["Versions"]: +# count += 1 +# # print(count, i.get("Key"), i.get("StorageClass")) +# if i.get("StorageClass") == "DEEP_ARCHIVE": +# executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) + print('Total objects', count, sep=" ")