From b5143c79de268b844f3a6a63d92c6389b047f35e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Nov 2020 16:55:33 -0500 Subject: Make it so "pangenome analysis" only runs collect-seqs. Will ensure that metadata is kept up to date. GFA isn't being generated. Will introduce new workflow that uses from_sparql to analyze a subset. Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20simplewebuploader/templates/resource.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'bh20simplewebuploader') diff --git a/bh20simplewebuploader/templates/resource.html b/bh20simplewebuploader/templates/resource.html index fc52f13..4c50fb9 100644 --- a/bh20simplewebuploader/templates/resource.html +++ b/bh20simplewebuploader/templates/resource.html @@ -10,8 +10,8 @@

All sequences project

All sequences (FASTA) relabled and deduplicated

Metadata (RDF) for all sequences

-

All sequences in Graphical Fragment Assembly (GFA) - More about GFA

-

All sequences in Optimized Dynamic Genome/Graph Implementation (ODGI) - More about ODGI

+ + -- cgit v1.2.3 From 1082b907d816f5da52aba6233073737632d0242f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Nov 2020 17:20:27 -0500 Subject: Make resource link work for both portable data hashes and sample id Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20simplewebuploader/main.py | 14 ++++++++++++++ workflows/pangenome-generate/collect-seqs.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'bh20simplewebuploader') diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index 73503b4..405544c 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -675,13 +675,27 @@ sparqlURL='http://sparql.genenetwork.org/sparql/' @app.route('/resource/') def resource(id): """Get a COVID19 resource using identifier""" + query=f""" PREFIX pubseq: PREFIX sio: select distinct ?sample ?geoname ?date ?source ?geo ?sampletype ?institute ?sequenceuri +where {{ {{ ?sample sio:SIO_000115 "{id}" . ?sequenceuri pubseq:sample ?sample . +}} +union +{{ + pubseq:sample ?sample . + ?sequenceuri pubseq:sample ?sample . +}} +union +{{ + pubseq:sample ?sample . + ?sequenceuri pubseq:sample ?sample . +}} + ?sample ?geo . ?geo rdfs:label ?geoname . ?sample ?date . diff --git a/workflows/pangenome-generate/collect-seqs.py b/workflows/pangenome-generate/collect-seqs.py index af4a0dc..1a0807c 100644 --- a/workflows/pangenome-generate/collect-seqs.py +++ b/workflows/pangenome-generate/collect-seqs.py @@ -38,7 +38,7 @@ for item in validated: pdh = item["portable_data_hash"] with arvados.collection.CollectionReader(pdh, api_client=api, keep_client=keepclient) as col: with col.open("sequence.fasta", "rt") as fa: - subject = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % pdh + subject = "http://covid19.genenetwork.org/resource/%s" % pdh label = fa.readline().strip() merged_metadata.write("<%s> \"%s\" .\n" % (subject, label[1:].replace('"', '\\"'))) skip = (subject in blacklist or label[1:] in blacklist) -- cgit v1.2.3 From 98a80bd64d8a495b8fddffdef6e07e5a3fbea1e3 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Nov 2020 17:30:16 -0500 Subject: Extract PDH from result and construct URIs from that Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20simplewebuploader/main.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'bh20simplewebuploader') diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index 405544c..51048a4 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -713,8 +713,9 @@ union logging.info("^^^^^^^^^^^^^^^^^^^^^^^^^^^^") # return jsonify({'sequences': int(result[0]["num"]["value"])}) sequenceuri=sample['sequenceuri']['value'] - collectionuri=sequenceuri.split('sequence.fasta')[0] - metauri=collectionuri+'metadata.yaml' + m = re.match(r"http://collections.lugli.arvadosapi.com/c=([^/]*)/sequence.fasta|http://covid19.genenetwork.org/resource/(.*)", sequenceuri) + fastauri = "http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % m.group(1) + metauri = "http://collections.lugli.arvadosapi.com/c=%s/metadata.yaml" % m.group(1) locationuri=sample['geo']['value'] location=sample['geoname']['value'] date=sample['date']['value'] @@ -729,7 +730,18 @@ union institute='' if 'institute' in sample: institute=sample['institute']['value'] - return render_template('permalink.html',id=id,menu='',uri=f"http://covid19.genenetwork.org/resource/{id}",sequenceuri=sequenceuri,locationuri=locationuri,location=location,date=date,source=source,sampletype=sampletype,institute=institute,collectionuri=collectionuri,metauri=metauri) + return render_template('permalink.html', + id=id, + menu='', + uri=f"http://covid19.genenetwork.org/resource/{id}", + sequenceuri=fastauri, + locationuri=locationuri, + location=location, + date=date, + source=source, + sampletype=sampletype, + institute=institute, + metauri=metauri) # http://covid19.genenetwork.org/location?label=http://www.wikidata.org/entity/Q114 # http://localhost:5067/location?label=http://www.wikidata.org/entity/Q114 -- cgit v1.2.3