#! /usr/bin/env python3 import argparse from pathlib import Path, PurePath import json import requests import globus_sdk from urllib.parse import urlparse # This is the tutorial client ID from # https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html. # Let's not bother to create our own. CLIENT_ID = "61338d24-54d5-408f-a10d-66c06b59f6d2" def get_transfer_token(): client = globus_sdk.NativeAppAuthClient(CLIENT_ID) client.oauth2_start_flow() authorize_url = client.oauth2_get_authorize_url() print(f"Please go to this URL and login:\n\n{authorize_url}\n") auth_code = input("Please enter the code you get after login here: ").strip() return (client.oauth2_exchange_code_for_tokens(auth_code) .by_resource_server["transfer.api.globus.org"]["access_token"]) def find_files(transfer_client, endpoint_id, path=PurePath("/")): for file in transfer_client.operation_ls(endpoint_id, path=str(path))["DATA"]: if file["type"] == "dir": yield from find_files(transfer_client, endpoint_id, path / file["name"]) else: yield path / file["name"] def download_file(url, cookies): """Download URL. Return True if it was actually downloaded, or return False if it was skipped. """ filepath = Path(urlparse(url).path).relative_to("/") url_size = int(requests.head(url, cookies=cookies).headers['content-length']) # If local file is larger than remote, something is wrong. if filepath.exists() and (filepath.stat().st_size > url_size): raise Exception("Local file is larger than remote. " "Something is wrong, aborting. " "Maybe your autentication cookies are invalid?") # If local file does not exist or is smaller than remote, proceed # to download. elif (not filepath.exists()) or (filepath.stat().st_size < url_size): filepath.parent.mkdir(parents=True, exist_ok=True) with open(filepath, "wb") as f: for chunk in (requests.get(url, cookies=cookies, stream=True) .iter_content(chunk_size=1024*1024)): f.write(chunk) return True else: return False parser = argparse.ArgumentParser(description="Get web links for Globus collection") parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection") parser.add_argument("cookies", help="JSON file with cookies from Globus web app") args = parser.parse_args() transfer_client = globus_sdk.TransferClient( authorizer=globus_sdk.AccessTokenAuthorizer(get_transfer_token())) endpoint = transfer_client.get_endpoint(args.endpoint_id) urls = [endpoint["https_server"] + str(path) for path in find_files(transfer_client, args.endpoint_id)] total = len(urls) print(f"Found {total} files") with open(args.cookies) as f: cookies = json.load(f) for i, url in enumerate(urls, 1): if download_file(url, cookies): print(f"{i}/{total}: Downloaded {url}") else: print(f"{i}/{total}: Skipped {url}") print("Download complete!")