120 lines
3.4 KiB
Python
120 lines
3.4 KiB
Python
import re
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
def style(url):
|
|
""" Determine 'style' of a given S3 url
|
|
|
|
>>> style("s3://my-bucket/my-key/")
|
|
's3'
|
|
|
|
>>> style("s3://user@my-bucket/my-key/")
|
|
's3-credential'
|
|
|
|
>>> style("https://my-bucket.s3.amazonaws.com/my-key/")
|
|
'bucket-in-netloc'
|
|
|
|
>>> style("https://s3.amazonaws.com/my-bucket/my-key/")
|
|
'bucket-in-path'
|
|
"""
|
|
o = urlparse(url)
|
|
if o.scheme == 's3':
|
|
if '@' in o.netloc:
|
|
return 's3-credential'
|
|
else:
|
|
return 's3'
|
|
|
|
if re.search(r'^s3[.-](\w{2}-\w{4,9}-\d\.)?amazonaws\.com', o.netloc):
|
|
return 'bucket-in-path'
|
|
|
|
if re.search(r'\.s3[.-](\w{2}-\w{4,9}-\d\.)?amazonaws\.com', o.netloc):
|
|
return 'bucket-in-netloc'
|
|
|
|
raise ValueError(f'Unknown url style: {url}')
|
|
|
|
|
|
def build_url(url_type, bucket, key=None, region=None, credential_name=None):
|
|
""" Construct an S3 URL
|
|
|
|
Args:
|
|
url_type: one of 's3', 's3-credential', 'bucket-in-path', 'bucket-in-netloc'
|
|
bucket: S3 bucket name
|
|
key: Key within bucket (optional)
|
|
region: S3 region name (optional)
|
|
credential_name: user/credential name to use in S3 scheme url (optional)
|
|
|
|
Returns
|
|
(string) S3 URL
|
|
"""
|
|
if url_type == 's3':
|
|
credential = f'{credential_name}@' if credential_name else ""
|
|
return f's3://{credential}{bucket}/{key or ""}'
|
|
|
|
if url_type == 'bucket-in-path':
|
|
return f'https://s3{"-" if region else ""}{region or ""}.amazonaws.com/{bucket}/{key}'
|
|
|
|
if url_type == 'bucket-in-netloc':
|
|
return f'https://{bucket}.s3.amazonaws.com/{key}'
|
|
|
|
raise ValueError(f'Invalid url_type: {url_type}')
|
|
|
|
|
|
def parse_s3_credential_url(url):
|
|
""" Parse S3 scheme url containing a user/credential name
|
|
|
|
>>> parse_s3_url("s3://user@my-bucket/my-key")
|
|
{'bucket': 'my-bucket', 'key': 'my-key/', 'credential_name': 'user'}
|
|
"""
|
|
o = urlparse(url)
|
|
cred_name, bucket = o.netloc.split('@')
|
|
key = o.path if o.path[0] != '/' else o.path[1:]
|
|
return {'bucket': bucket, 'key': key, 'credential_name': cred_name}
|
|
|
|
|
|
def parse_s3_url(url):
|
|
""" Parse S3 scheme url
|
|
|
|
>>> parse_s3_url("s3://my-bucket/my-key")
|
|
{'bucket': 'my-bucket', 'key': 'my-key/'}
|
|
"""
|
|
o = urlparse(url)
|
|
bucket = o.netloc
|
|
key = o.path if o.path[0] != '/' else o.path[1:]
|
|
return {'bucket': bucket, 'key': key}
|
|
|
|
|
|
def parse_bucket_in_path_url(url):
|
|
""" Parse url with bucket name path
|
|
|
|
>>> parse_bucket_in_path_url("https://s3-eu-west-1.amazonaws.com/my-bucket/my-key/")
|
|
{'bucket': 'my-bucket', 'key': 'my-key/'}
|
|
"""
|
|
path = urlparse(url).path
|
|
bucket = path.split('/')[1]
|
|
key = '/'.join(path.split('/')[2:])
|
|
return {'bucket': bucket, 'key': key}
|
|
|
|
|
|
def parse_bucket_in_netloc_url(url):
|
|
""" Parse url with bucket name in host/netloc
|
|
|
|
>>> parse_bucket_in_netloc_url("https://my-bucket.s3.amazonaws.com/my-key/")
|
|
{'bucket': 'my-bucket', 'key': 'my-key/'}
|
|
"""
|
|
o = urlparse(url)
|
|
bucket = o.netloc.split('.')[0]
|
|
key = o.path if o.path[0] != '/' else o.path[1:]
|
|
return {'bucket': bucket, 'key': key}
|
|
|
|
|
|
def parse_url(url):
|
|
url_style = style(url)
|
|
|
|
if url_style == 's3-credential':
|
|
return parse_s3_credential_url(url)
|
|
if url_style == 's3':
|
|
return parse_s3_url(url)
|
|
if url_style == 'bucket-in-path':
|
|
return parse_bucket_in_path_url(url)
|
|
if url_style == 'bucket-in-netloc':
|
|
return parse_bucket_in_netloc_url(url)
|