NEW: script to restore versioned objects in glacier deep archive

This commit is contained in:
xpk 2024-07-16 12:34:47 +08:00
parent 22a665f1e8
commit 96fad1b1c1
Signed by: xpk
GPG Key ID: CD4FF6793F09AB86
2 changed files with 90 additions and 0 deletions

78
py/s3-deep-archive-restore.py Executable file
View File

@ -0,0 +1,78 @@
#!/usr/bin/python3
import boto3
import pickle
import os
import concurrent.futures
from pprint import pprint
def restoreObject(bucket, key, versionId, myCount):
myClient = boto3.client('s3')
headObject = myClient.head_object(
Bucket=bucket,
Key=key,
VersionId=versionId)
if headObject.get("Restore") == "ongoing-request=\"true\"":
print("\033[91m", myCount, "Already restoring", key, "\033[0m")
return
else:
myClient.restore_object(
Bucket=bucket,
Key=key,
VersionId=versionId,
RestoreRequest={'Days': 20}
)
print(myCount, "Restore requested for", key)
bucketName = 'whk1-bea-icc-mbk-prd-s3-emr-hbase'
# threading allows 104000 objects be restored in an hour, compared to 21000 files without concurrency
concurrency = 15
client = boto3.client('s3')
count = 0
try:
with open('nextKey.pickle', 'rb') as file:
nextKey = pickle.load(file)
response = client.list_object_versions(
Bucket=bucketName,
MaxKeys=10,
KeyMarker=nextKey
)
except IOError:
print("No position.pickle file. Start from beginning.")
response = client.list_object_versions(
Bucket=bucketName,
MaxKeys=10
)
nextKey = response.get("NextKeyMarker")
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
for i in response["Versions"]:
count += 1
# print(count, i.get("Key"), i.get("StorageClass"))
if i.get("StorageClass") == "DEEP_ARCHIVE":
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
print("NextKey", nextKey)
while nextKey is not None:
response = client.list_object_versions(
Bucket=bucketName,
MaxKeys=300,
KeyMarker=nextKey
)
nextKey = response.get("NextKeyMarker")
if nextKey is not None:
with open('nextKey.pickle', 'wb') as file:
pickle.dump(nextKey, file)
else:
os.remove('nextKey.pickle')
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
for i in response["Versions"]:
count += 1
# print(count, i.get("Key"), i.get("StorageClass"))
if i.get("StorageClass") == "DEEP_ARCHIVE":
executor.submit(restoreObject, bucketName, i.get("Key"), i.get("VersionId"), count)
print('Total objects', count, sep=" ")

12
py/socket-test.py Executable file
View File

@ -0,0 +1,12 @@
#!/usr/bin/python3
import socket
import traceback
try:
print(socket.gethostbyname('rrtest.zoo.lo'))
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client_socket.settimeout(3)
client_socket.connect(('rrtest.zoo.lo', 5000))
except socket.error as e:
print('Cannot connect', e)