code-dumps/py/s3-deep-archive-restore.py

79 lines
2.4 KiB
Python
Raw Normal View History

#!/usr/bin/python3
import boto3
import pickle
import os
import concurrent.futures
from pprint import pprint
def restoreObject(bucket, key, versionId, myCount):
myClient = boto3.client('s3')
headObject = myClient.head_object(
Bucket=bucket,
Key=key,
VersionId=versionId)
if headObject.get("Restore") == "ongoing-request=\"true\"":
print("\033[91m", myCount, "Already restoring", key, "\033[0m")
return
else:
myClient.restore_object(
Bucket=bucket,
Key=key,
VersionId=versionId,
RestoreRequest={'Days': 20}
)
print(myCount, "Restore requested for", key)
2024-07-16 12:35:41 +08:00
bucketName = 's3-emr-hbase'
# threading allows 104000 objects be restored in an hour, compared to 21000 files without concurrency
concurrency = 15
client = boto3.client('s3')
count = 0
try:
with open('nextKey.pickle', 'rb') as file:
nextKey = pickle.load(file)
response = client.list_object_versions(
Bucket=bucketName,
MaxKeys=10,
KeyMarker=nextKey
)
except IOError:
print("No position.pickle file. Start from beginning.")
response = client.list_object_versions(
Bucket=bucketName,
MaxKeys=10
)
nextKey = response.get("NextKeyMarker")
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
for i in response["Versions"]:
count += 1
# print(count, i.get("Key"), i.get("StorageClass"))
if i.get("StorageClass") == "DEEP_ARCHIVE":
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
print("NextKey", nextKey)
while nextKey is not None:
response = client.list_object_versions(
Bucket=bucketName,
MaxKeys=300,
KeyMarker=nextKey
)
nextKey = response.get("NextKeyMarker")
if nextKey is not None:
with open('nextKey.pickle', 'wb') as file:
pickle.dump(nextKey, file)
else:
os.remove('nextKey.pickle')
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
for i in response["Versions"]:
count += 1
# print(count, i.get("Key"), i.get("StorageClass"))
if i.get("StorageClass") == "DEEP_ARCHIVE":
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
print('Total objects', count, sep=" ")