#!/usr/bin/python3 import boto3 # import pickle # import os import concurrent.futures # from pprint import pprint def restoreObject(bucket, key, versionId, myCount): myClient = boto3.client('s3') headObject = myClient.head_object( Bucket=bucket, Key=key, VersionId=versionId) if headObject.get("Restore") == "ongoing-request=\"true\"": print("\033[91m", myCount, "Already restoring", key, "\033[0m") return else: myClient.restore_object( Bucket=bucket, Key=key, VersionId=versionId, RestoreRequest={'Days': 20} ) print(myCount, "Restore requested for", key) bucketName = 's3-emr-hbase' # threading allows 104000 objects be restored in an hour, compared to 21000 files without concurrency concurrency = 15 client = boto3.client('s3') count = 0 pagesize = 1000 paginator = client.get_paginator('list_object_versions') result = paginator.paginate(Bucket=bucketName, PaginationConfig={'PageSize': pagesize}) for page in result: with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: for i in page["Versions"]: count += 1 if i.get("StorageClass") == "DEEP_ARCHIVE": executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) # # try: # with open('nextKey.pickle', 'rb') as file: # nextKey = pickle.load(file) # response = client.list_object_versions( # Bucket=bucketName, # MaxKeys=10, # KeyMarker=nextKey # ) # except IOError: # print("No position.pickle file. Start from beginning.") # response = client.list_object_versions( # Bucket=bucketName, # MaxKeys=10 # ) # # nextKey = response.get("NextKeyMarker") # with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: # for i in response["Versions"]: # count += 1 # # print(count, i.get("Key"), i.get("StorageClass")) # if i.get("StorageClass") == "DEEP_ARCHIVE": # executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) # # print("NextKey", nextKey) # while nextKey is not None: # response = client.list_object_versions( # Bucket=bucketName, # MaxKeys=300, # KeyMarker=nextKey # ) # nextKey = response.get("NextKeyMarker") # if nextKey is not None: # with open('nextKey.pickle', 'wb') as file: # pickle.dump(nextKey, file) # else: # os.remove('nextKey.pickle') # with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: # for i in response["Versions"]: # count += 1 # # print(count, i.get("Key"), i.get("StorageClass")) # if i.get("StorageClass") == "DEEP_ARCHIVE": # executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count) print('Total objects', count, sep=" ")