openreplay/api/chalicelib/utils/s3urls.py
2021-10-29 11:08:44 +02:00

120 lines
3.4 KiB
Python

import re
from urllib.parse import urlparse
def style(url):
""" Determine 'style' of a given S3 url
>>> style("s3://my-bucket/my-key/")
's3'
>>> style("s3://user@my-bucket/my-key/")
's3-credential'
>>> style("https://my-bucket.s3.amazonaws.com/my-key/")
'bucket-in-netloc'
>>> style("https://s3.amazonaws.com/my-bucket/my-key/")
'bucket-in-path'
"""
o = urlparse(url)
if o.scheme == 's3':
if '@' in o.netloc:
return 's3-credential'
else:
return 's3'
if re.search(r'^s3[.-](\w{2}-\w{4,9}-\d\.)?amazonaws\.com', o.netloc):
return 'bucket-in-path'
if re.search(r'\.s3[.-](\w{2}-\w{4,9}-\d\.)?amazonaws\.com', o.netloc):
return 'bucket-in-netloc'
raise ValueError(f'Unknown url style: {url}')
def build_url(url_type, bucket, key=None, region=None, credential_name=None):
""" Construct an S3 URL
Args:
url_type: one of 's3', 's3-credential', 'bucket-in-path', 'bucket-in-netloc'
bucket: S3 bucket name
key: Key within bucket (optional)
region: S3 region name (optional)
credential_name: user/credential name to use in S3 scheme url (optional)
Returns
(string) S3 URL
"""
if url_type == 's3':
credential = f'{credential_name}@' if credential_name else ""
return f's3://{credential}{bucket}/{key or ""}'
if url_type == 'bucket-in-path':
return f'https://s3{"-" if region else ""}{region or ""}.amazonaws.com/{bucket}/{key}'
if url_type == 'bucket-in-netloc':
return f'https://{bucket}.s3.amazonaws.com/{key}'
raise ValueError(f'Invalid url_type: {url_type}')
def parse_s3_credential_url(url):
""" Parse S3 scheme url containing a user/credential name
>>> parse_s3_url("s3://user@my-bucket/my-key")
{'bucket': 'my-bucket', 'key': 'my-key/', 'credential_name': 'user'}
"""
o = urlparse(url)
cred_name, bucket = o.netloc.split('@')
key = o.path if o.path[0] != '/' else o.path[1:]
return {'bucket': bucket, 'key': key, 'credential_name': cred_name}
def parse_s3_url(url):
""" Parse S3 scheme url
>>> parse_s3_url("s3://my-bucket/my-key")
{'bucket': 'my-bucket', 'key': 'my-key/'}
"""
o = urlparse(url)
bucket = o.netloc
key = o.path if o.path[0] != '/' else o.path[1:]
return {'bucket': bucket, 'key': key}
def parse_bucket_in_path_url(url):
""" Parse url with bucket name path
>>> parse_bucket_in_path_url("https://s3-eu-west-1.amazonaws.com/my-bucket/my-key/")
{'bucket': 'my-bucket', 'key': 'my-key/'}
"""
path = urlparse(url).path
bucket = path.split('/')[1]
key = '/'.join(path.split('/')[2:])
return {'bucket': bucket, 'key': key}
def parse_bucket_in_netloc_url(url):
""" Parse url with bucket name in host/netloc
>>> parse_bucket_in_netloc_url("https://my-bucket.s3.amazonaws.com/my-key/")
{'bucket': 'my-bucket', 'key': 'my-key/'}
"""
o = urlparse(url)
bucket = o.netloc.split('.')[0]
key = o.path if o.path[0] != '/' else o.path[1:]
return {'bucket': bucket, 'key': key}
def parse_url(url):
url_style = style(url)
if url_style == 's3-credential':
return parse_s3_credential_url(url)
if url_style == 's3':
return parse_s3_url(url)
if url_style == 'bucket-in-path':
return parse_bucket_in_path_url(url)
if url_style == 'bucket-in-netloc':
return parse_bucket_in_netloc_url(url)