aboutsummaryrefslogtreecommitdiff
#! /usr/bin/env python3

import argparse
from pathlib import Path, PurePath
import json
import requests
import globus_sdk
from urllib.parse import urlparse

# This is the tutorial client ID from
# https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html.
# Let's not bother to create our own.
CLIENT_ID = "61338d24-54d5-408f-a10d-66c06b59f6d2"

def get_transfer_token():
    client = globus_sdk.NativeAppAuthClient(CLIENT_ID)
    client.oauth2_start_flow()

    authorize_url = client.oauth2_get_authorize_url()
    print(f"Please go to this URL and login:\n\n{authorize_url}\n")

    auth_code = input("Please enter the code you get after login here: ").strip()
    return (client.oauth2_exchange_code_for_tokens(auth_code)
            .by_resource_server["transfer.api.globus.org"]["access_token"])

def find_files(transfer_client, endpoint_id, path=PurePath("/")):
    for file in transfer_client.operation_ls(endpoint_id, path=str(path))["DATA"]:
        if file["type"] == "dir":
            yield from find_files(transfer_client, endpoint_id, path / file["name"])
        else:
            yield path / file["name"]

def download_file(url, cookies):
    """Download URL.

    Return True if it was actually downloaded, or return False if it
    was skipped.
    """
    filepath = Path(urlparse(url).path).relative_to("/")
    url_size = int(requests.head(url, cookies=cookies).headers['content-length'])
    # If local file is larger than remote, something is wrong.
    if filepath.exists() and (filepath.stat().st_size > url_size):
        raise Exception("Local file is larger than remote. "
                        "Something is wrong, aborting. "
                        "Maybe your autentication cookies are invalid?")
    # If local file does not exist or is smaller than remote, proceed
    # to download.
    elif (not filepath.exists()) or (filepath.stat().st_size < url_size):
        filepath.parent.mkdir(parents=True, exist_ok=True)
        with open(filepath, "wb") as f:
            for chunk in (requests.get(url, cookies=cookies, stream=True)
                          .iter_content(chunk_size=1024*1024)):
                f.write(chunk)
        return True
    else:
        return False

parser = argparse.ArgumentParser(description="Get web links for Globus collection")
parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection")
parser.add_argument("cookies", help="JSON file with cookies from Globus web app")
args = parser.parse_args()

transfer_client = globus_sdk.TransferClient(
    authorizer=globus_sdk.AccessTokenAuthorizer(get_transfer_token()))
endpoint = transfer_client.get_endpoint(args.endpoint_id)
urls = [endpoint["https_server"] + str(path)
        for path in find_files(transfer_client, args.endpoint_id)]
total = len(urls)
print(f"Found {total} files")

with open(args.cookies) as f:
    cookies = json.load(f)

for i, url in enumerate(urls, 1):
    if download_file(url, cookies):
        print(f"{i}/{total}: Downloaded {url}")
    else:
        print(f"{i}/{total}: Skipped {url}")
print("Download complete!")