code-dumps/py/s3-deep-archive-restore.py

90 lines
2.9 KiB
Python
Executable File

#!/usr/bin/python3
import boto3
# import pickle
# import os
import concurrent.futures
# from pprint import pprint
def restoreObject(bucket, key, versionId, myCount):
myClient = boto3.client('s3')
headObject = myClient.head_object(
Bucket=bucket,
Key=key,
VersionId=versionId)
if headObject.get("Restore") == "ongoing-request=\"true\"":
print("\033[91m", myCount, "Already restoring", key, "\033[0m")
return
else:
myClient.restore_object(
Bucket=bucket,
Key=key,
VersionId=versionId,
RestoreRequest={'Days': 20}
)
print(myCount, "Restore requested for", key)
bucketName = 's3-emr-hbase'
# threading allows 104000 objects be restored in an hour, compared to 21000 files without concurrency
concurrency = 15
client = boto3.client('s3')
count = 0
pagesize = 1000
paginator = client.get_paginator('list_object_versions')
result = paginator.paginate(Bucket=bucketName, PaginationConfig={'PageSize': pagesize})
for page in result:
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
for i in page["Versions"]:
count += 1
if i.get("StorageClass") == "DEEP_ARCHIVE":
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
#
# try:
# with open('nextKey.pickle', 'rb') as file:
# nextKey = pickle.load(file)
# response = client.list_object_versions(
# Bucket=bucketName,
# MaxKeys=10,
# KeyMarker=nextKey
# )
# except IOError:
# print("No position.pickle file. Start from beginning.")
# response = client.list_object_versions(
# Bucket=bucketName,
# MaxKeys=10
# )
#
# nextKey = response.get("NextKeyMarker")
# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
# for i in response["Versions"]:
# count += 1
# # print(count, i.get("Key"), i.get("StorageClass"))
# if i.get("StorageClass") == "DEEP_ARCHIVE":
# executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
#
# print("NextKey", nextKey)
# while nextKey is not None:
# response = client.list_object_versions(
# Bucket=bucketName,
# MaxKeys=300,
# KeyMarker=nextKey
# )
# nextKey = response.get("NextKeyMarker")
# if nextKey is not None:
# with open('nextKey.pickle', 'wb') as file:
# pickle.dump(nextKey, file)
# else:
# os.remove('nextKey.pickle')
# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
# for i in response["Versions"]:
# count += 1
# # print(count, i.get("Key"), i.get("StorageClass"))
# if i.get("StorageClass") == "DEEP_ARCHIVE":
# executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
print('Total objects', count, sep=" ")