I don't think there is much need, but I made it possible to easily share files while saving the files in S3 to the storage of a different Vender. It wasn't difficult to use GoogleDriveApi to follow the documentation, but there were times when the file size was large. I think there is a similar implementation again, I will leave it as a memo just in case.
Automatically transfer S3 data to Google Drive.
Migrated the contents of the S3 bucket to the specified team drive in Python.
--Files to be migrated exist in S3 bucket --Preparation of GoogleClientId, GoogleClientSecret Reference site --Preparation of GoogleRefreshToken Reference site --Preparation of FolderId of migration destination Google Drive
This is the source that I actually implemented.
# Download File from S3 to Local tmp Dir
# Upload a file to Google Drive
import os
import boto3
import json
import requests
import magic
## setting info
CONTENT_BUCKET_NAME = 'MY_S3_BUCKET_NAME'
CONTENT_BACKUP_KEY = 'MY_S3_BUCKET_KEY'
GOOGLE_CLIENT_ID = "XXXXXXXXXXXX.apps.googleusercontent.com"
GOOGLE_CLIENT_SECRET = "XXXXXXXXXXXX"
GOOGLE_REFRESH_TOKEN = "XXXXXXXXXXXX"
GOOGLE_FOLDER_ID = 'GOOGLE_FOLDER_ID'
s3 = boto3.resource('s3')
# Get the object from the event and show its content type
bucket = CONTENT_BUCKET_NAME
key = CONTENT_BACKUP_KEY
file_name = key.split("/")[1]
file_path = os.path.join("/tmp/"+ file_name)
s3.Object(bucket, key).download_file(file_path)
filesize = os.path.getsize(file_path)
fname, extension = os.path.splitext(file_name)
# refresh token
access_token_url = 'https://accounts.google.com/o/oauth2/token'
headers = {"Content-Type":"application/json","X-Accept":"application/json"}
refresh_token_request = {"grant_type":"refresh_token", "client_id": GOOGLE_CLIENT_ID, "client_secret": GOOGLE_CLIENT_SECRET, "refresh_token": GOOGLE_REFRESH_TOKEN}
access_token_request = requests.post(access_token_url,headers=headers,data=json.dumps(refresh_token_request))
access_token = access_token_request.json()['access_token']
print(access_token)
# check file already exist
downloadUrl = "https://www.googleapis.com/drive/v3/files"
headers = {
'Host':'www.googleapis.com',
'Authorization': 'Bearer ' + access_token,
'Content-Type':'application/json; charset=UTF-8',
"X-Accept":"application/json"
}
qs= { "q": "'" + GOOGLE_FOLDER_ID + "' in parents and name='" + file_name + "' and trashed=false",
"supportsAllDrives": True,
"includeItemsFromAllDrives": True
}
fileExistCheck = requests.get(downloadUrl, params=qs, headers=headers)
responseJsonFiles = fileExistCheck.json()['files']
searchResponseLength = len(responseJsonFiles)
#upload_file()
mime = magic.Magic(mime=True)
mimeType = mime.from_file(file_path)
#folder_id = GOOGLE_FOLDER_ID
headers = {
'Host':'www.googleapis.com',
'Content-Length': str(filesize),
'Authorization': 'Bearer ' + access_token,
'Content-Type':'application/json; charset=UTF-8',
'X-Upload-Content-Type': mimeType,
'X-Upload-Content-Length': str(filesize)
}
with open(file_path, 'rb') as data:
file_name= os.path.basename(file_path)
metadata = {
"name": file_name,
"title": file_name,
"parents": [GOOGLE_FOLDER_ID],
'kind': 'drive#permission',
"permissionDetails": [
{
"permissionType": "file",
"role": "organizer"
}
],
}
# No file exist. Post new one.
if searchResponseLength < 1:
postUrl = "https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable&supportsAllDrives=true"
r = requests.post(postUrl, data=json.dumps(metadata), headers=headers)
# data upload url
uploadUrl = r.headers['Location']
r2 = requests.post(uploadUrl, data=data, headers=headers)
# file exist. Put to override
else:
fileId = responseJsonFiles[0]['id']
metadata = {
"filename": file_name,
"name": file_name,
"title": file_name,
'kind': 'drive#permission',
"permissionDetails": [
{
"permissionType": "file",
"role": "organizer"
}
]
}
putUrl = "https://www.googleapis.com/upload/drive/v3/files/" + fileId + "?uploadType=resumable&supportsAllDrives=true"
r = requests.patch(putUrl, data=json.dumps(metadata), headers=headers)
uploadUrl = r.headers['Location']
r2 = requests.patch(uploadUrl, data=data, headers=headers)
I think there are more improvements, but if there is an easier way to implement it, please comment.
Recommended Posts