aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2021-01-05 12:07:39 +0000
committerPjotr Prins2021-01-05 12:07:39 +0000
commit1187fa716cacde2b50566b67b5d619b8f12894f9 (patch)
treee2fd8b4749d0fc222ac39ebe4d4d9d7da0fce872
parentbcc2ea8521d0366753115546b30824a01757b570 (diff)
downloadbh20-seq-resource-1187fa716cacde2b50566b67b5d619b8f12894f9.tar.gz
bh20-seq-resource-1187fa716cacde2b50566b67b5d619b8f12894f9.tar.lz
bh20-seq-resource-1187fa716cacde2b50566b67b5d619b8f12894f9.zip
fetches original metadata from PubSeq/Arvados
-rwxr-xr-xworkflows/tools/pubseq-fetch-data.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/workflows/tools/pubseq-fetch-data.py b/workflows/tools/pubseq-fetch-data.py
new file mode 100755
index 0000000..c22d754
--- /dev/null
+++ b/workflows/tools/pubseq-fetch-data.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import requests
+import sys
+
+parser = argparse.ArgumentParser(description="""
+
+Fetch metadata (JSON) from PubSeq and optionally the FASTA files. IDs
+can be passed in on the command line or in a file.
+
+""")
+parser.add_argument('--out', type=str, help='Directory to write to',
+required=True)
+parser.add_argument('--ids', type=str, help='File with ids', required=False)
+parser.add_argument('id', nargs='*', help='id(s)')
+args = parser.parse_args()
+
+dir = args.out
+if not os.path.exists(dir):
+ raise Exception(f"Directory {dir} does not exist")
+
+ids = args.id
+if (len(ids)==0):
+ print(f"Reading {args.ids}")
+ with open(args.ids) as f:
+ ids = [ l.strip() for l in f.readlines() ]
+
+for id in ids[0:2]:
+ print(id)
+ r = requests.get(f"http://covid19.genenetwork.org/api/sample/{id}.json")
+ if r:
+ m_url = r.json()[0]['metadata']
+ mr = requests.get(m_url)
+ meta = mr.json()
+ with open(dir+"/"+id+".json","w") as outf:
+ json.dump(meta, outf, indent=4)
+ else:
+ raise Exception(f"Can not find record for {id}")