Resume interrupted downloads.HEAD main

author: Arun Isaac 2025-01-08 14:08:53 +0000
committer: Arun Isaac 2025-01-08 14:22:09 +0000
commit: 82b2fa53e4b19d2bc90c2e5f879c2fa4336b6b39 (patch)
tree: 699e61c4c0f5ae80c86adfe3c001006457f966cc
parent: bd69a8b6c33f56d1ea9eb83f0fc612b310d0a43d (diff)
download: globus-weblinks-main.tar.gz
globus-weblinks-main.tar.lz
globus-weblinks-main.zip
2 files changed, 28 insertions, 7 deletions
diff --git a/README.md b/README.md
index 5e68609..11cef40 100644
--- a/README.md
+++ b/README.md
@@ -32,4 +32,6 @@ Now, all that remains is to download your files. Do it like so:
 ```
 ./globus-weblinks YOUR-ENDPOINT-ID cookies.json
 ```
+If the download is interrupted, you can re-run the command and only incomplete files will be downloaded again.
+
 Enjoy!
diff --git a/globus-weblinks b/globus-weblinks
index 8a7d718..35ddc47 100755
--- a/globus-weblinks
+++ b/globus-weblinks
@@ -31,12 +31,29 @@ def find_files(transfer_client, endpoint_id, path=PurePath("/")):
             yield path / file["name"]
 
 def download_file(url, cookies):
+    """Download URL.
+
+    Return True if it was actually downloaded, or return False if it
+    was skipped.
+    """
     filepath = Path(urlparse(url).path).relative_to("/")
-    filepath.parent.mkdir(parents=True, exist_ok=True)
-    with open(filepath, "wb") as f:
-        for chunk in (requests.get(url, cookies=cookies, stream=True)
-                      .iter_content(chunk_size=1024*1024)):
-            f.write(chunk)
+    url_size = int(requests.head(url, cookies=cookies).headers['content-length'])
+    # If local file is larger than remote, something is wrong.
+    if filepath.exists() and (filepath.stat().st_size > url_size):
+        raise Exception("Local file is larger than remote. "
+                        "Something is wrong, aborting. "
+                        "Maybe your autentication cookies are invalid?")
+    # If local file does not exist or is smaller than remote, proceed
+    # to download.
+    elif (not filepath.exists()) or (filepath.stat().st_size < url_size):
+        filepath.parent.mkdir(parents=True, exist_ok=True)
+        with open(filepath, "wb") as f:
+            for chunk in (requests.get(url, cookies=cookies, stream=True)
+                          .iter_content(chunk_size=1024*1024)):
+                f.write(chunk)
+        return True
+    else:
+        return False
 
 parser = argparse.ArgumentParser(description="Get web links for Globus collection")
 parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection")
@@ -55,6 +72,8 @@ with open(args.cookies) as f:
     cookies = json.load(f)
 
 for i, url in enumerate(urls, 1):
-    print(f"{i}/{total}: Downloading {url}")
-    download_file(url, cookies)
+    if download_file(url, cookies):
+        print(f"{i}/{total}: Downloaded {url}")
+    else:
+        print(f"{i}/{total}: Skipped {url}")
 print("Download complete!")
author	Arun Isaac	2025-01-08 14:08:53 +0000
committer	Arun Isaac	2025-01-08 14:22:09 +0000
commit	82b2fa53e4b19d2bc90c2e5f879c2fa4336b6b39 (patch)
tree	699e61c4c0f5ae80c86adfe3c001006457f966cc
parent	bd69a8b6c33f56d1ea9eb83f0fc612b310d0a43d (diff)
download	globus-weblinks-main.tar.gz globus-weblinks-main.tar.lz globus-weblinks-main.zip