aboutsummaryrefslogtreecommitdiff
path: root/workflows/pangenome-generate/collect-seqs.py
diff options
context:
space:
mode:
authorPeter Amstutz2020-11-09 17:32:05 -0500
committerGitHub2020-11-09 17:32:05 -0500
commit74d46196ae69c0c557f64593910ada48d84b3654 (patch)
tree57c7df946e1aa1f2054eb49379439ebee4d4c0e6 /workflows/pangenome-generate/collect-seqs.py
parentb311e2ec0f1d02cf16152855dd8bdd760ed4578b (diff)
parent98a80bd64d8a495b8fddffdef6e07e5a3fbea1e3 (diff)
downloadbh20-seq-resource-74d46196ae69c0c557f64593910ada48d84b3654.tar.gz
bh20-seq-resource-74d46196ae69c0c557f64593910ada48d84b3654.tar.lz
bh20-seq-resource-74d46196ae69c0c557f64593910ada48d84b3654.zip
Merge pull request #105 from arvados/fasta-subset-from-query
Extract subset of the all-sequences fasta by running a sparql query. Also includes some tweaks to the permalinks.
Diffstat (limited to 'workflows/pangenome-generate/collect-seqs.py')
-rw-r--r--workflows/pangenome-generate/collect-seqs.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py
index af4a0dc..1a0807c 100644
--- a/workflows/pangenome-generate/collect-seqs.py
+++ b/workflows/pangenome-generate/collect-seqs.py
@@ -38,7 +38,7 @@ for item in validated:
pdh = item["portable_data_hash"]
with arvados.collection.CollectionReader(pdh, api_client=api, keep_client=keepclient) as col:
with col.open("sequence.fasta", "rt") as fa:
- subject = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % pdh
+ subject = "http://covid19.genenetwork.org/resource/%s" % pdh
label = fa.readline().strip()
merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subject, label[1:].replace('"', '\\"')))
skip = (subject in blacklist or label[1:] in blacklist)