aboutsummaryrefslogtreecommitdiff
path: root/globus-weblinks
diff options
context:
space:
mode:
authorArun Isaac2025-01-04 01:34:35 +0000
committerArun Isaac2025-01-04 01:39:23 +0000
commit1718ff1bf4611e05d2fb952240369e27fe1504bd (patch)
treee02862b8e3542547a022e7c6fe9c5f3f37010d46 /globus-weblinks
parent22d1c69a4921d01e19c178cbfef8d361f5f731f6 (diff)
downloadglobus-weblinks-1718ff1bf4611e05d2fb952240369e27fe1504bd.tar.gz
globus-weblinks-1718ff1bf4611e05d2fb952240369e27fe1504bd.tar.lz
globus-weblinks-1718ff1bf4611e05d2fb952240369e27fe1504bd.zip
Implement downloader in python too.
Passing cookies to wget on the command line is a security risk. On a shared machine, other users can see your full command line. Passing it in using the --load-cookies option is too tedious—the file format required is archaic and hard to replicate by hand. So, we simply implement the downloader in python too. In any case, this makes for a more cohesive user experience.
Diffstat (limited to 'globus-weblinks')
-rwxr-xr-xglobus-weblinks28
1 files changed, 25 insertions, 3 deletions
diff --git a/globus-weblinks b/globus-weblinks
index 74fd6f0..92f598d 100755
--- a/globus-weblinks
+++ b/globus-weblinks
@@ -1,9 +1,12 @@
#! /usr/bin/env python3
import argparse
-from pathlib import PurePath
+from pathlib import Path, PurePath
+import json
+import requests
import sys
import globus_sdk
+from urllib.parse import urlparse
# This is the tutorial client ID from
# https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html.
@@ -31,12 +34,31 @@ def find_files(transfer_client, endpoint_id, path=PurePath("/")):
else:
yield path / file["name"]
+def download_file(url, cookies):
+ filepath = Path(urlparse(url).path).relative_to("/")
+ filepath.parent.mkdir(parents=True, exist_ok=True)
+ with open(filepath, "wb") as f:
+ for chunk in (requests.get(url, cookies=cookies, stream=True)
+ .iter_content(chunk_size=1024*1024)):
+ f.write(chunk)
+
parser = argparse.ArgumentParser(description="Get web links for Globus collection")
parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection")
+parser.add_argument("cookies", help="JSON file with cookies from Globus web app")
args = parser.parse_args()
transfer_client = globus_sdk.TransferClient(
authorizer=globus_sdk.AccessTokenAuthorizer(get_transfer_token()))
endpoint = transfer_client.get_endpoint(args.endpoint_id)
-for path in find_files(transfer_client, args.endpoint_id):
- print(endpoint["https_server"] + str(path))
+urls = [endpoint["https_server"] + str(path)
+ for path in find_files(transfer_client, args.endpoint_id)]
+total = len(urls)
+print(f"Found {total} files")
+
+with open(args.cookies) as f:
+ cookies = json.load(f)
+
+for i, url in enumerate(urls, 1):
+ print(f"{i}/{total}: Downloading {url}")
+ download_file(url, cookies)
+print("Download complete!")