diff options
Diffstat (limited to 'workflows/tools/pubseq-fetch-data.py')
-rwxr-xr-x | workflows/tools/pubseq-fetch-data.py | 55 |
1 files changed, 0 insertions, 55 deletions
diff --git a/workflows/tools/pubseq-fetch-data.py b/workflows/tools/pubseq-fetch-data.py deleted file mode 100755 index ef4edde..0000000 --- a/workflows/tools/pubseq-fetch-data.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import json -import os -import requests -import sys -import time - -parser = argparse.ArgumentParser(description=""" - -Fetch metadata (JSON) from PubSeq and optionally the FASTA files. IDs -can be passed in on the command line or in a file. - -""") -parser.add_argument('--fasta', action='store_true', help='Also fetch FASTA records') -parser.add_argument('--out', type=str, help='Directory to write to', -required=True) -parser.add_argument('--ids', type=str, help='File with ids', required=False) -parser.add_argument('id', nargs='*', help='id(s)') -args = parser.parse_args() - -dir = args.out -if not os.path.exists(dir): - raise Exception(f"Directory {dir} does not exist") - -ids = args.id -if (len(ids)==0): - print(f"Reading {args.ids}") - with open(args.ids) as f: - ids = [ l.strip() for l in f.readlines() ] - -for id in ids: - print(id) - jsonfn = dir+"/"+id+".json" - if not os.path.exists(jsonfn): - count = 0 - r = requests.get(f"http://covid19.genenetwork.org/api/sample/{id}.json") - while not r: - count += 1 - if count>10: raise Exception(f"Can not find record for {id}") - time.sleep(15) - r = requests.get(f"http://covid19.genenetwork.org/api/sample/{id}.json") - m_url = r.json()[0]['metadata'] - mr = requests.get(m_url) - with open(dir+"/"+id+".json","w") as outf: - outf.write(mr.text) - if args.fasta: - fastafn = dir+"/"+id+".fa" - if os.path.exists(fastafn): continue - fa_url = r.json()[0]['fasta'] - fr = requests.get(fa_url) - with open(fastafn,"w") as outf: - outf.write(fr.text) - |