2024-07-16 12:34:47 +08:00
|
|
|
#!/usr/bin/python3
|
|
|
|
import boto3
|
|
|
|
import pickle
|
|
|
|
import os
|
|
|
|
import concurrent.futures
|
|
|
|
from pprint import pprint
|
|
|
|
|
|
|
|
|
|
|
|
def restoreObject(bucket, key, versionId, myCount):
|
|
|
|
myClient = boto3.client('s3')
|
|
|
|
headObject = myClient.head_object(
|
|
|
|
Bucket=bucket,
|
|
|
|
Key=key,
|
|
|
|
VersionId=versionId)
|
|
|
|
if headObject.get("Restore") == "ongoing-request=\"true\"":
|
|
|
|
print("\033[91m", myCount, "Already restoring", key, "\033[0m")
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
myClient.restore_object(
|
|
|
|
Bucket=bucket,
|
|
|
|
Key=key,
|
|
|
|
VersionId=versionId,
|
|
|
|
RestoreRequest={'Days': 20}
|
|
|
|
)
|
|
|
|
print(myCount, "Restore requested for", key)
|
|
|
|
|
|
|
|
|
2024-07-16 12:35:41 +08:00
|
|
|
bucketName = 's3-emr-hbase'
|
2024-07-16 12:34:47 +08:00
|
|
|
# threading allows 104000 objects be restored in an hour, compared to 21000 files without concurrency
|
|
|
|
concurrency = 15
|
|
|
|
client = boto3.client('s3')
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
try:
|
|
|
|
with open('nextKey.pickle', 'rb') as file:
|
|
|
|
nextKey = pickle.load(file)
|
|
|
|
response = client.list_object_versions(
|
|
|
|
Bucket=bucketName,
|
|
|
|
MaxKeys=10,
|
|
|
|
KeyMarker=nextKey
|
|
|
|
)
|
|
|
|
except IOError:
|
|
|
|
print("No position.pickle file. Start from beginning.")
|
|
|
|
response = client.list_object_versions(
|
|
|
|
Bucket=bucketName,
|
|
|
|
MaxKeys=10
|
|
|
|
)
|
|
|
|
|
|
|
|
nextKey = response.get("NextKeyMarker")
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
|
|
|
|
for i in response["Versions"]:
|
|
|
|
count += 1
|
|
|
|
# print(count, i.get("Key"), i.get("StorageClass"))
|
|
|
|
if i.get("StorageClass") == "DEEP_ARCHIVE":
|
|
|
|
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
|
|
|
|
|
|
|
|
print("NextKey", nextKey)
|
|
|
|
while nextKey is not None:
|
|
|
|
response = client.list_object_versions(
|
|
|
|
Bucket=bucketName,
|
|
|
|
MaxKeys=300,
|
|
|
|
KeyMarker=nextKey
|
|
|
|
)
|
|
|
|
nextKey = response.get("NextKeyMarker")
|
|
|
|
if nextKey is not None:
|
|
|
|
with open('nextKey.pickle', 'wb') as file:
|
|
|
|
pickle.dump(nextKey, file)
|
|
|
|
else:
|
|
|
|
os.remove('nextKey.pickle')
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
|
|
|
|
for i in response["Versions"]:
|
|
|
|
count += 1
|
|
|
|
# print(count, i.get("Key"), i.get("StorageClass"))
|
|
|
|
if i.get("StorageClass") == "DEEP_ARCHIVE":
|
|
|
|
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
|
|
|
|
|
|
|
|
print('Total objects', count, sep=" ")
|
|
|
|
|