aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2025-01-08 14:08:53 +0000
committerArun Isaac2025-01-08 14:22:09 +0000
commit82b2fa53e4b19d2bc90c2e5f879c2fa4336b6b39 (patch)
tree699e61c4c0f5ae80c86adfe3c001006457f966cc
parentbd69a8b6c33f56d1ea9eb83f0fc612b310d0a43d (diff)
downloadglobus-weblinks-main.tar.gz
globus-weblinks-main.tar.lz
globus-weblinks-main.zip
Resume interrupted downloads.HEADmain
-rw-r--r--README.md2
-rwxr-xr-xglobus-weblinks33
2 files changed, 28 insertions, 7 deletions
diff --git a/README.md b/README.md
index 5e68609..11cef40 100644
--- a/README.md
+++ b/README.md
@@ -32,4 +32,6 @@ Now, all that remains is to download your files. Do it like so:
```
./globus-weblinks YOUR-ENDPOINT-ID cookies.json
```
+If the download is interrupted, you can re-run the command and only incomplete files will be downloaded again.
+
Enjoy!
diff --git a/globus-weblinks b/globus-weblinks
index 8a7d718..35ddc47 100755
--- a/globus-weblinks
+++ b/globus-weblinks
@@ -31,12 +31,29 @@ def find_files(transfer_client, endpoint_id, path=PurePath("/")):
yield path / file["name"]
def download_file(url, cookies):
+ """Download URL.
+
+ Return True if it was actually downloaded, or return False if it
+ was skipped.
+ """
filepath = Path(urlparse(url).path).relative_to("/")
- filepath.parent.mkdir(parents=True, exist_ok=True)
- with open(filepath, "wb") as f:
- for chunk in (requests.get(url, cookies=cookies, stream=True)
- .iter_content(chunk_size=1024*1024)):
- f.write(chunk)
+ url_size = int(requests.head(url, cookies=cookies).headers['content-length'])
+ # If local file is larger than remote, something is wrong.
+ if filepath.exists() and (filepath.stat().st_size > url_size):
+ raise Exception("Local file is larger than remote. "
+ "Something is wrong, aborting. "
+ "Maybe your autentication cookies are invalid?")
+ # If local file does not exist or is smaller than remote, proceed
+ # to download.
+ elif (not filepath.exists()) or (filepath.stat().st_size < url_size):
+ filepath.parent.mkdir(parents=True, exist_ok=True)
+ with open(filepath, "wb") as f:
+ for chunk in (requests.get(url, cookies=cookies, stream=True)
+ .iter_content(chunk_size=1024*1024)):
+ f.write(chunk)
+ return True
+ else:
+ return False
parser = argparse.ArgumentParser(description="Get web links for Globus collection")
parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection")
@@ -55,6 +72,8 @@ with open(args.cookies) as f:
cookies = json.load(f)
for i, url in enumerate(urls, 1):
- print(f"{i}/{total}: Downloading {url}")
- download_file(url, cookies)
+ if download_file(url, cookies):
+ print(f"{i}/{total}: Downloaded {url}")
+ else:
+ print(f"{i}/{total}: Skipped {url}")
print("Download complete!")