diff options
author | Pjotr Prins | 2021-01-05 12:07:39 +0000 |
---|---|---|
committer | Pjotr Prins | 2021-01-05 12:07:39 +0000 |
commit | 1187fa716cacde2b50566b67b5d619b8f12894f9 (patch) | |
tree | e2fd8b4749d0fc222ac39ebe4d4d9d7da0fce872 | |
parent | bcc2ea8521d0366753115546b30824a01757b570 (diff) | |
download | bh20-seq-resource-1187fa716cacde2b50566b67b5d619b8f12894f9.tar.gz bh20-seq-resource-1187fa716cacde2b50566b67b5d619b8f12894f9.tar.lz bh20-seq-resource-1187fa716cacde2b50566b67b5d619b8f12894f9.zip |
fetches original metadata from PubSeq/Arvados
-rwxr-xr-x | workflows/tools/pubseq-fetch-data.py | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/workflows/tools/pubseq-fetch-data.py b/workflows/tools/pubseq-fetch-data.py new file mode 100755 index 0000000..c22d754 --- /dev/null +++ b/workflows/tools/pubseq-fetch-data.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import requests +import sys + +parser = argparse.ArgumentParser(description=""" + +Fetch metadata (JSON) from PubSeq and optionally the FASTA files. IDs +can be passed in on the command line or in a file. + +""") +parser.add_argument('--out', type=str, help='Directory to write to', +required=True) +parser.add_argument('--ids', type=str, help='File with ids', required=False) +parser.add_argument('id', nargs='*', help='id(s)') +args = parser.parse_args() + +dir = args.out +if not os.path.exists(dir): + raise Exception(f"Directory {dir} does not exist") + +ids = args.id +if (len(ids)==0): + print(f"Reading {args.ids}") + with open(args.ids) as f: + ids = [ l.strip() for l in f.readlines() ] + +for id in ids[0:2]: + print(id) + r = requests.get(f"http://covid19.genenetwork.org/api/sample/{id}.json") + if r: + m_url = r.json()[0]['metadata'] + mr = requests.get(m_url) + meta = mr.json() + with open(dir+"/"+id+".json","w") as outf: + json.dump(meta, outf, indent=4) + else: + raise Exception(f"Can not find record for {id}") |