314 lines
12 KiB
Python
314 lines
12 KiB
Python
"""
|
|
S3 client wrapper — duck-typed, lazy-imports boto3 module.
|
|
Works with any S3-compatible storage (AWS, MinIO, etc.).
|
|
|
|
Resilience features:
|
|
- Adaptive retry with exponential backoff (up to 10 attempts)
|
|
- Multipart upload/download with configurable chunk size
|
|
- Auto-reconnect on connection loss (network switch, Wi-Fi change)
|
|
- boto3 TransferConfig tuned for unstable connections
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
|
|
from core.logger import log
|
|
|
|
_boto3 = None
|
|
_botocore = None
|
|
|
|
# Retry / resilience constants
|
|
_MAX_RETRIES = 10
|
|
_BASE_DELAY = 2.0 # seconds
|
|
_MAX_DELAY = 60.0 # seconds
|
|
_MULTIPART_THRESHOLD = 8 * 1024 * 1024 # 8 MB — use multipart above this
|
|
_MULTIPART_CHUNKSIZE = 8 * 1024 * 1024 # 8 MB chunks
|
|
_MAX_CONCURRENCY = 4 # parallel parts (low for unstable links)
|
|
|
|
|
|
def _get_boto3():
|
|
global _boto3, _botocore
|
|
if _boto3 is None:
|
|
import boto3 as _b
|
|
import botocore as _bc
|
|
_boto3 = _b
|
|
_botocore = _bc
|
|
return _boto3
|
|
|
|
|
|
def _get_transfer_config():
|
|
"""TransferConfig tuned for unreliable connections."""
|
|
from boto3.s3.transfer import TransferConfig
|
|
return TransferConfig(
|
|
multipart_threshold=_MULTIPART_THRESHOLD,
|
|
multipart_chunksize=_MULTIPART_CHUNKSIZE,
|
|
max_concurrency=_MAX_CONCURRENCY,
|
|
num_download_attempts=_MAX_RETRIES,
|
|
)
|
|
|
|
|
|
def _retry_delay(attempt: int) -> float:
|
|
"""Exponential backoff: 2, 4, 8, 16, 32, 60, 60, ..."""
|
|
delay = min(_BASE_DELAY * (2 ** attempt), _MAX_DELAY)
|
|
return delay
|
|
|
|
|
|
class S3Client:
|
|
"""Manage a single S3 connection. No ABC — duck typing."""
|
|
|
|
def __init__(self, server: dict):
|
|
self._server = server
|
|
self._endpoint = server.get("ip", "")
|
|
# If endpoint doesn't start with http, add https
|
|
if self._endpoint and not self._endpoint.startswith("http"):
|
|
use_ssl = server.get("use_ssl", True)
|
|
scheme = "https" if use_ssl else "http"
|
|
port = int(server.get("port", 443))
|
|
if (scheme == "https" and port == 443) or (scheme == "http" and port == 80):
|
|
self._endpoint = f"{scheme}://{self._endpoint}"
|
|
else:
|
|
self._endpoint = f"{scheme}://{self._endpoint}:{port}"
|
|
self._access_key = server.get("access_key", "")
|
|
self._secret_key = server.get("secret_key", "")
|
|
self._bucket = server.get("bucket", "")
|
|
self._use_ssl = server.get("use_ssl", True)
|
|
self._client = None
|
|
self._transfer_config = None
|
|
|
|
# -- lifecycle --------------------------------------------------------
|
|
|
|
def connect(self) -> bool:
|
|
try:
|
|
b3 = _get_boto3()
|
|
import botocore.config
|
|
import urllib3
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
config = botocore.config.Config(
|
|
signature_version="s3v4",
|
|
connect_timeout=15,
|
|
read_timeout=60,
|
|
retries={"max_attempts": 5, "mode": "adaptive"},
|
|
tcp_keepalive=True,
|
|
)
|
|
self._client = b3.client(
|
|
"s3",
|
|
endpoint_url=self._endpoint,
|
|
aws_access_key_id=self._access_key,
|
|
aws_secret_access_key=self._secret_key,
|
|
config=config,
|
|
verify=False,
|
|
)
|
|
self._transfer_config = _get_transfer_config()
|
|
# Test connection
|
|
self._client.list_buckets()
|
|
log.info("S3 connected %s", self._endpoint)
|
|
return True
|
|
except Exception as exc:
|
|
log.error("S3 connect failed: %s", exc)
|
|
self._client = None
|
|
return False
|
|
|
|
def _reconnect(self) -> bool:
|
|
"""Try to re-establish the S3 connection after a drop."""
|
|
log.warning("S3 reconnecting to %s...", self._endpoint)
|
|
self._client = None
|
|
return self.connect()
|
|
|
|
def _ensure_connected(self) -> bool:
|
|
"""Check connection, reconnect if needed."""
|
|
if self._client is None:
|
|
return self._reconnect()
|
|
try:
|
|
self._client.list_buckets()
|
|
return True
|
|
except Exception:
|
|
return self._reconnect()
|
|
|
|
def disconnect(self):
|
|
self._client = None
|
|
log.info("S3 disconnected")
|
|
|
|
def check_connection(self) -> bool:
|
|
try:
|
|
if self._client is None:
|
|
return False
|
|
self._client.list_buckets()
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
# -- bucket operations ------------------------------------------------
|
|
|
|
def list_buckets(self) -> list[dict]:
|
|
"""Return list of {'Name': str, 'CreationDate': datetime}."""
|
|
if not self._ensure_connected():
|
|
return []
|
|
try:
|
|
resp = self._client.list_buckets()
|
|
return resp.get("Buckets", [])
|
|
except Exception as exc:
|
|
log.error("S3 list_buckets failed: %s", exc)
|
|
return []
|
|
|
|
# -- object operations ------------------------------------------------
|
|
|
|
def list_objects(self, bucket: str = "", prefix: str = "",
|
|
delimiter: str = "/") -> tuple[list[dict], list[str]]:
|
|
"""List objects and common prefixes in a bucket/prefix.
|
|
|
|
Returns (objects, prefixes) where:
|
|
- objects: list of {'Key', 'Size', 'LastModified'}
|
|
- prefixes: list of prefix strings (subdirectories)
|
|
"""
|
|
if not self._ensure_connected():
|
|
return [], []
|
|
bucket = bucket or self._bucket
|
|
if not bucket:
|
|
return [], []
|
|
try:
|
|
objects = []
|
|
prefixes = []
|
|
paginator = self._client.get_paginator("list_objects_v2")
|
|
kwargs = {"Bucket": bucket, "Delimiter": delimiter}
|
|
if prefix:
|
|
kwargs["Prefix"] = prefix
|
|
for page in paginator.paginate(**kwargs):
|
|
for obj in page.get("Contents", []):
|
|
# Skip the prefix itself
|
|
if obj["Key"] != prefix:
|
|
objects.append(obj)
|
|
for cp in page.get("CommonPrefixes", []):
|
|
prefixes.append(cp["Prefix"])
|
|
return objects, prefixes
|
|
except Exception as exc:
|
|
log.error("S3 list_objects failed: %s", exc)
|
|
return [], []
|
|
|
|
def upload_file(self, local_path: str, bucket: str, key: str,
|
|
progress_cb=None, status_cb=None) -> bool:
|
|
"""Upload a local file to S3 with retry and resume.
|
|
|
|
progress_cb(bytes_transferred) — called periodically for progress bar.
|
|
status_cb(message) — called with status messages (retry info, etc.).
|
|
|
|
Uses multipart upload for files > 8 MB.
|
|
On failure, retries up to 10 times with exponential backoff.
|
|
boto3 multipart automatically resumes failed parts.
|
|
"""
|
|
if not self._ensure_connected():
|
|
return False
|
|
file_size = os.path.getsize(local_path)
|
|
for attempt in range(_MAX_RETRIES):
|
|
try:
|
|
self._client.upload_file(
|
|
local_path, bucket, key,
|
|
Config=self._transfer_config,
|
|
Callback=progress_cb,
|
|
)
|
|
log.info("S3 uploaded %s -> s3://%s/%s (%d bytes)",
|
|
local_path, bucket, key, file_size)
|
|
return True
|
|
except Exception as exc:
|
|
delay = _retry_delay(attempt)
|
|
log.warning("S3 upload attempt %d/%d failed: %s (retry in %.0fs)",
|
|
attempt + 1, _MAX_RETRIES, exc, delay)
|
|
if status_cb:
|
|
status_cb(f"Retry {attempt + 1}/{_MAX_RETRIES} in {delay:.0f}s...")
|
|
# Reset progress for retry (callback accumulates)
|
|
if progress_cb and attempt < _MAX_RETRIES - 1:
|
|
# We can't easily reset boto3's internal counter,
|
|
# but the GUI tracks total bytes itself
|
|
pass
|
|
time.sleep(delay)
|
|
# Reconnect before retry
|
|
if not self._reconnect():
|
|
log.error("S3 reconnect failed on attempt %d", attempt + 1)
|
|
continue
|
|
|
|
log.error("S3 upload failed after %d attempts: %s -> s3://%s/%s",
|
|
_MAX_RETRIES, local_path, bucket, key)
|
|
return False
|
|
|
|
def download_file(self, bucket: str, key: str, local_path: str,
|
|
progress_cb=None, status_cb=None) -> bool:
|
|
"""Download an S3 object to a local file with retry.
|
|
|
|
progress_cb(bytes_transferred) — called periodically.
|
|
status_cb(message) — called with retry info.
|
|
|
|
boto3 TransferConfig.num_download_attempts handles part-level retries.
|
|
This method adds full-transfer retries with reconnect.
|
|
"""
|
|
if not self._ensure_connected():
|
|
return False
|
|
for attempt in range(_MAX_RETRIES):
|
|
try:
|
|
self._client.download_file(
|
|
bucket, key, local_path,
|
|
Config=self._transfer_config,
|
|
Callback=progress_cb,
|
|
)
|
|
log.info("S3 downloaded s3://%s/%s -> %s", bucket, key, local_path)
|
|
return True
|
|
except Exception as exc:
|
|
delay = _retry_delay(attempt)
|
|
log.warning("S3 download attempt %d/%d failed: %s (retry in %.0fs)",
|
|
attempt + 1, _MAX_RETRIES, exc, delay)
|
|
if status_cb:
|
|
status_cb(f"Retry {attempt + 1}/{_MAX_RETRIES} in {delay:.0f}s...")
|
|
time.sleep(delay)
|
|
if not self._reconnect():
|
|
log.error("S3 reconnect failed on attempt %d", attempt + 1)
|
|
continue
|
|
|
|
log.error("S3 download failed after %d attempts: s3://%s/%s -> %s",
|
|
_MAX_RETRIES, bucket, key, local_path)
|
|
return False
|
|
|
|
def delete_object(self, bucket: str, key: str) -> bool:
|
|
"""Delete an object from S3."""
|
|
if not self._ensure_connected():
|
|
return False
|
|
try:
|
|
self._client.delete_object(Bucket=bucket, Key=key)
|
|
log.info("S3 deleted s3://%s/%s", bucket, key)
|
|
return True
|
|
except Exception as exc:
|
|
log.error("S3 delete failed: %s", exc)
|
|
return False
|
|
|
|
def generate_presigned_url(self, bucket: str, key: str,
|
|
expires_in: int = 3600) -> str | None:
|
|
"""Generate a presigned download URL for an object.
|
|
|
|
expires_in: URL lifetime in seconds (default 1 hour).
|
|
Returns URL string or None on failure.
|
|
"""
|
|
if not self._ensure_connected():
|
|
return None
|
|
try:
|
|
url = self._client.generate_presigned_url(
|
|
"get_object",
|
|
Params={"Bucket": bucket, "Key": key},
|
|
ExpiresIn=expires_in,
|
|
)
|
|
return url
|
|
except Exception as exc:
|
|
log.error("S3 presigned URL failed: %s", exc)
|
|
return None
|
|
|
|
def get_direct_url(self, bucket: str, key: str) -> str:
|
|
"""Build a direct (permanent) URL: endpoint/bucket/key."""
|
|
endpoint = self._endpoint.rstrip("/")
|
|
return f"{endpoint}/{bucket}/{key}"
|
|
|
|
def get_object_size(self, bucket: str, key: str) -> int:
|
|
"""Get size of an object in bytes."""
|
|
if not self._ensure_connected():
|
|
return 0
|
|
try:
|
|
resp = self._client.head_object(Bucket=bucket, Key=key)
|
|
return resp.get("ContentLength", 0)
|
|
except Exception:
|
|
return 0
|