about summary refs log tree commit diff
path: root/workflows/tools/pubseq-fetch-data.py
diff options
context:
space:
mode:
authorPjotr Prins2021-01-28 18:45:52 +0000
committerPjotr Prins2021-01-28 18:45:52 +0000
commit8a7e79d6daa06da4d8ca2a391bae0a00124a2ed3 (patch)
tree3d17dd32522df3cfa808e8df6ebf722a70cc01d3 /workflows/tools/pubseq-fetch-data.py
parent90470bc795a17a6ddf6dca156f507d02cb056ec3 (diff)
downloadbh20-seq-resource-8a7e79d6daa06da4d8ca2a391bae0a00124a2ed3.tar.gz
bh20-seq-resource-8a7e79d6daa06da4d8ca2a391bae0a00124a2ed3.tar.lz
bh20-seq-resource-8a7e79d6daa06da4d8ca2a391bae0a00124a2ed3.zip
Moving tools out of submodules (sorry!)
Diffstat (limited to 'workflows/tools/pubseq-fetch-data.py')
-rwxr-xr-xworkflows/tools/pubseq-fetch-data.py55
1 files changed, 0 insertions, 55 deletions
diff --git a/workflows/tools/pubseq-fetch-data.py b/workflows/tools/pubseq-fetch-data.py
deleted file mode 100755
index ef4edde..0000000
--- a/workflows/tools/pubseq-fetch-data.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import json
-import os
-import requests
-import sys
-import time
-
-parser = argparse.ArgumentParser(description="""
-
-Fetch metadata (JSON) from PubSeq and optionally the FASTA files.  IDs
-can be passed in on the command line or in a file.
-
-""")
-parser.add_argument('--fasta', action='store_true', help='Also fetch FASTA records')
-parser.add_argument('--out', type=str, help='Directory to write to',
-required=True)
-parser.add_argument('--ids', type=str, help='File with ids', required=False)
-parser.add_argument('id', nargs='*', help='id(s)')
-args = parser.parse_args()
-
-dir = args.out
-if not os.path.exists(dir):
-    raise Exception(f"Directory {dir} does not exist")
-
-ids = args.id
-if (len(ids)==0):
-    print(f"Reading {args.ids}")
-    with open(args.ids) as f:
-        ids = [ l.strip() for l in f.readlines() ]
-
-for id in ids:
-    print(id)
-    jsonfn = dir+"/"+id+".json"
-    if not os.path.exists(jsonfn):
-        count = 0
-        r = requests.get(f"http://covid19.genenetwork.org/api/sample/{id}.json")
-        while not r:
-            count += 1
-            if count>10: raise Exception(f"Can not find record for {id}")
-            time.sleep(15)
-            r = requests.get(f"http://covid19.genenetwork.org/api/sample/{id}.json")
-        m_url = r.json()[0]['metadata']
-        mr = requests.get(m_url)
-        with open(dir+"/"+id+".json","w") as outf:
-            outf.write(mr.text)
-        if args.fasta:
-            fastafn = dir+"/"+id+".fa"
-            if os.path.exists(fastafn): continue
-            fa_url = r.json()[0]['fasta']
-            fr = requests.get(fa_url)
-            with open(fastafn,"w") as outf:
-                outf.write(fr.text)
-