v1.9.8: S3 resumable download — Range GET with .s3part resume on disconnect

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 08:33:48 -05:00
parent f445953a82
commit 5b4672dfe3
4 changed files with 147 additions and 9 deletions
--- a/core/s3_client.py
+++ b/core/s3_client.py
@@ -240,16 +240,145 @@ class S3Client:

    def download_file(self, bucket: str, key: str, local_path: str,
                      progress_cb=None, status_cb=None) -> bool:
-        """Download an S3 object to a local file with retry.
+        """Download with resume support using S3 Range GET.

-        progress_cb(bytes_transferred) — called periodically.
-        status_cb(message) — called with retry info.
+        On disconnect, keeps the .s3part file and resumes from where it
+        stopped.  ETag is checked to detect if the remote file changed
+        (in that case the partial file is discarded and download restarts).

-        boto3 TransferConfig.num_download_attempts handles part-level retries.
-        This method adds full-transfer retries with reconnect.
+        progress_cb(bytes_delta) — called with each chunk size.
+        status_cb(message) — called with retry / resume info.
        """
        if not self._ensure_connected():
            return False
+
+        # --- 1. HEAD — get size and ETag ---
+        try:
+            head = self._client.head_object(Bucket=bucket, Key=key)
+            total_size = head["ContentLength"]
+            etag = head.get("ETag", "")
+        except Exception as exc:
+            log.error("S3 head_object failed: %s", exc)
+            return False
+
+        # Small files (< 1 MB) — simple download, no resume overhead
+        if total_size < 1024 * 1024:
+            return self._download_file_simple(
+                bucket, key, local_path, progress_cb, status_cb)
+
+        # --- 2. Check .s3part (partial download) ---
+        temp_path = local_path + ".s3part"
+        meta_path = local_path + ".s3meta"
+        start_byte = 0
+
+        if os.path.exists(temp_path):
+            saved_etag = ""
+            if os.path.exists(meta_path):
+                try:
+                    with open(meta_path, "r") as f:
+                        saved_etag = f.read().strip()
+                except Exception:
+                    pass
+            if saved_etag == etag and etag:
+                start_byte = os.path.getsize(temp_path)
+                if start_byte >= total_size:
+                    # Already fully downloaded
+                    os.replace(temp_path, local_path)
+                    self._cleanup_meta(meta_path)
+                    self._last_ok = time.time()
+                    return True
+                log.info("S3 resuming from byte %d / %d", start_byte, total_size)
+                if status_cb:
+                    mb = start_byte / (1024 * 1024)
+                    status_cb(f"Resuming from {mb:.1f} MB...")
+            else:
+                # ETag changed — file was modified on server, start fresh
+                try:
+                    os.remove(temp_path)
+                except OSError:
+                    pass
+                start_byte = 0
+
+        # Save ETag for future resume
+        try:
+            with open(meta_path, "w") as f:
+                f.write(etag)
+        except Exception:
+            pass
+
+        # Report already-downloaded bytes so progress bar is correct
+        if progress_cb and start_byte > 0:
+            progress_cb(start_byte)
+
+        # --- 3. Download loop with retry ---
+        chunk_size = _MULTIPART_CHUNKSIZE  # 8 MB
+
+        for attempt in range(_MAX_RETRIES):
+            try:
+                if start_byte >= total_size:
+                    break
+
+                range_header = f"bytes={start_byte}-"
+                resp = self._client.get_object(
+                    Bucket=bucket, Key=key, Range=range_header)
+
+                with open(temp_path, "ab") as f:
+                    for chunk in resp["Body"].iter_chunks(chunk_size=chunk_size):
+                        f.write(chunk)
+                        f.flush()
+                        start_byte += len(chunk)
+                        if progress_cb:
+                            progress_cb(len(chunk))
+
+                # --- 4. Verify size ---
+                actual = os.path.getsize(temp_path)
+                if actual != total_size:
+                    log.warning("S3 size mismatch: got %d, expected %d",
+                                actual, total_size)
+                    # Don't delete — maybe we can resume next attempt
+                    if actual < total_size:
+                        start_byte = actual
+                        continue
+                    # actual > total_size — corrupted, restart
+                    try:
+                        os.remove(temp_path)
+                    except OSError:
+                        pass
+                    start_byte = 0
+                    continue
+
+                # --- 5. Atomic rename ---
+                os.replace(temp_path, local_path)
+                self._cleanup_meta(meta_path)
+                self._last_ok = time.time()
+                log.info("S3 downloaded s3://%s/%s -> %s (%d bytes, resumed)",
+                         bucket, key, local_path, total_size)
+                return True
+
+            except Exception as exc:
+                # Update start_byte from actual file size
+                if os.path.exists(temp_path):
+                    start_byte = os.path.getsize(temp_path)
+                delay = _retry_delay(attempt)
+                log.warning("S3 download attempt %d/%d failed at byte %d: %s",
+                            attempt + 1, _MAX_RETRIES, start_byte, exc)
+                if status_cb:
+                    pct = (start_byte / total_size * 100) if total_size else 0
+                    status_cb(f"Retry {attempt+1}/{_MAX_RETRIES} at {pct:.0f}%...")
+                time.sleep(delay)
+                self._reconnect()
+                # Adaptive chunk: reduce on repeated failures
+                if attempt >= 2 and chunk_size > 1024 * 1024:
+                    chunk_size = 1024 * 1024  # 1 MB
+                    log.info("S3 reducing chunk size to 1 MB")
+
+        log.error("S3 download failed after %d attempts: s3://%s/%s -> %s",
+                  _MAX_RETRIES, bucket, key, local_path)
+        return False
+
+    def _download_file_simple(self, bucket: str, key: str, local_path: str,
+                              progress_cb=None, status_cb=None) -> bool:
+        """Simple download for small files (no resume overhead)."""
        for attempt in range(_MAX_RETRIES):
            try:
                self._client.download_file(
@@ -257,6 +386,7 @@ class S3Client:
                    Config=self._transfer_config,
                    Callback=progress_cb,
                )
+                self._last_ok = time.time()
                log.info("S3 downloaded s3://%s/%s -> %s", bucket, key, local_path)
                return True
            except Exception as exc:
@@ -269,11 +399,18 @@ class S3Client:
                if not self._reconnect():
                    log.error("S3 reconnect failed on attempt %d", attempt + 1)
                    continue
-
        log.error("S3 download failed after %d attempts: s3://%s/%s -> %s",
                  _MAX_RETRIES, bucket, key, local_path)
        return False

+    @staticmethod
+    def _cleanup_meta(meta_path: str):
+        """Remove .s3meta file silently."""
+        try:
+            os.remove(meta_path)
+        except OSError:
+            pass
+
    def delete_object(self, bucket: str, key: str) -> bool:
        """Delete an object from S3."""
        if not self._ensure_connected():
--- a/gui/tabs/s3_tab.py
+++ b/gui/tabs/s3_tab.py
@@ -349,8 +349,9 @@ class S3Tab(ctk.CTkFrame):

    def _on_transfer_status(self, message: str):
        """Called from transfer thread with retry/status info."""
-        # Reset progress on retry (boto3 restarts the transfer)
-        self._transfer_bytes = 0
+        # Note: do NOT reset _transfer_bytes here — resumable download
+        # reports already-downloaded bytes via progress_cb, so resetting
+        # would break the progress bar on resume.
        self.after(0, lambda: self._status_label.configure(text=message))

    def _upload_files(self, paths: list[str]):
--- a/releases/ServerManager-v1.9.8-win-x64.exe
+++ b/releases/ServerManager-v1.9.8-win-x64.exe
--- a/version.py
+++ b/version.py
@@ -1,6 +1,6 @@
 """Version info for ServerManager."""

-__version__ = "1.9.7"
+__version__ = "1.9.8"
 __app_name__ = "ServerManager"
 __author__ = "aibot777"
 __description__ = "Desktop GUI for managing remote servers"