From 82b2fa53e4b19d2bc90c2e5f879c2fa4336b6b39 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Wed, 8 Jan 2025 14:08:53 +0000 Subject: Resume interrupted downloads. --- globus-weblinks | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'globus-weblinks') diff --git a/globus-weblinks b/globus-weblinks index 8a7d718..35ddc47 100755 --- a/globus-weblinks +++ b/globus-weblinks @@ -31,12 +31,29 @@ def find_files(transfer_client, endpoint_id, path=PurePath("/")): yield path / file["name"] def download_file(url, cookies): + """Download URL. + + Return True if it was actually downloaded, or return False if it + was skipped. + """ filepath = Path(urlparse(url).path).relative_to("/") - filepath.parent.mkdir(parents=True, exist_ok=True) - with open(filepath, "wb") as f: - for chunk in (requests.get(url, cookies=cookies, stream=True) - .iter_content(chunk_size=1024*1024)): - f.write(chunk) + url_size = int(requests.head(url, cookies=cookies).headers['content-length']) + # If local file is larger than remote, something is wrong. + if filepath.exists() and (filepath.stat().st_size > url_size): + raise Exception("Local file is larger than remote. " + "Something is wrong, aborting. " + "Maybe your autentication cookies are invalid?") + # If local file does not exist or is smaller than remote, proceed + # to download. + elif (not filepath.exists()) or (filepath.stat().st_size < url_size): + filepath.parent.mkdir(parents=True, exist_ok=True) + with open(filepath, "wb") as f: + for chunk in (requests.get(url, cookies=cookies, stream=True) + .iter_content(chunk_size=1024*1024)): + f.write(chunk) + return True + else: + return False parser = argparse.ArgumentParser(description="Get web links for Globus collection") parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection") @@ -55,6 +72,8 @@ with open(args.cookies) as f: cookies = json.load(f) for i, url in enumerate(urls, 1): - print(f"{i}/{total}: Downloading {url}") - download_file(url, cookies) + if download_file(url, cookies): + print(f"{i}/{total}: Downloaded {url}") + else: + print(f"{i}/{total}: Skipped {url}") print("Download complete!") -- cgit v1.2.3