From 2d201e156d530e5e912252c4300245da382b846e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 16 May 2020 10:14:13 -0500 Subject: Counting number of sequences --- bh20simplewebuploader/main.py | 17 +++++++++++++++++ bh20simplewebuploader/static/main.js | 5 ++++- bh20simplewebuploader/templates/form.html | 21 +++++++++++++++++---- doc/blog/using-covid-19-pubseq-part1.org | 31 +++++++++++++++++++++++++++---- 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index 1dff207..0ba43fd 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -424,6 +424,23 @@ def receive_files(): ## but most likley you don't want to touch the queries, Cheers. baseURL='http://sparql.genenetwork.org/sparql/' +@app.route('/api/getCount', methods=['GET']) +def getCount(): + query=""" +PREFIX pubseq: +select (COUNT(distinct ?dataset) as ?num) +{ + ?dataset pubseq:submitter ?id . + ?id ?p ?submitter +} +""" + payload = {'query': query, 'format': 'json'} + r = requests.get(baseURL, params=payload) + result = r.json()['results']['bindings'] + # [{'num': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1352'}}] + # print(result, file=sys.stderr) + return jsonify({'sequences': int(result[0]["num"]["value"])}) + @app.route('/api/getAllaccessions', methods=['GET']) def getAllaccessions(): query="""SELECT DISTINCT ?fasta ?value WHERE {?fasta ?x[ ?value ]}""" diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js index 7084e1f..bf95832 100644 --- a/bh20simplewebuploader/static/main.js +++ b/bh20simplewebuploader/static/main.js @@ -18,6 +18,10 @@ let search = () => { fetchAPI(scriptRoot + "/api/getDetailsForSeq?seq=" + encodeURIComponent(m)); } +let fetchCount = () => { + fetchAPI("/api/getCount"); +} + let fetchSEQBySpecimen = () => { fetchAPI("/api/getSEQCountbySpecimenSource"); } @@ -160,7 +164,6 @@ for (let button of document.getElementsByClassName('remove-field')) { } // Change the submit button after hitting - function on_submit_button() { var elem = document.getElementById("submit"); elem.value = "Submitting..."; diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html index 7d7cef8..2cbaf62 100644 --- a/bh20simplewebuploader/templates/form.html +++ b/bh20simplewebuploader/templates/form.html @@ -6,13 +6,14 @@ Web uploader for Public SARS-CoV-2 Sequence Resource +

Web uploader for Public SARS-CoV-2 Sequence Resource

-Disabled until we got everything wired up +

Database contains public sequences!

@@ -209,9 +210,21 @@ - - + diff --git a/doc/blog/using-covid-19-pubseq-part1.org b/doc/blog/using-covid-19-pubseq-part1.org index 617a01d..4b7ddc6 100644 --- a/doc/blog/using-covid-19-pubseq-part1.org +++ b/doc/blog/using-covid-19-pubseq-part1.org @@ -75,8 +75,10 @@ these identifiers throughout. * Predicates -Lets look at all the predicates in the dataset by pasting -the following in a SPARQL end point http://sparql.genenetwork.org/sparql/ +To explore an RDF dataset, the first query we can do is open and gets +us a list. Lets look at all the predicates in the dataset by pasting +the following in a SPARQL end point +http://sparql.genenetwork.org/sparql/ #+begin_src sql select distinct ?p @@ -86,9 +88,19 @@ select distinct ?p #+end_src you can ignore the openlink and w3 ones. To reduce results to a named -graph set the default graph to +graph set the default graph. +To get a [[http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=select+distinct+%3Fg%0D%0A%7B%0D%0A++++GRAPH+%3Fg+%7B%3Fs+%3Fp+%3Fo%7D%0D%0A%7D&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+][list of graphs]] in the dataset, first do + +#+begin_src sql +select distinct ?g +{ + GRAPH ?g {?s ?p ?o} +} +#+end_src + +Limiting search to metadata add http://covid-19.genenetwork.org/graph/metadata.ttl in the top input -box. There you can find a predicate for submitter that looks like +box. Now you can find a [[http://sparql.genenetwork.org/sparql/?default-graph-uri=http%3A%2F%2Fcovid-19.genenetwork.org%2Fgraph%2Fmetadata.ttl&query=select+distinct+%3Fp%0D%0A%7B%0D%0A+++%3Fo+%3Fp+%3Fs%0D%0A%7D&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+][predicate]] for submitter that looks like http://biohackathon.org/bh20-seq-schema#MainSchema/submitter. To list all submitters, try @@ -131,6 +143,17 @@ by University of Washington is is http://arvados.org/keep:00fede2c6f52b053a14edca01cfa02b7+126/sequence.fasta (note the ID may have changed so pick one with above query). +Now we got this far, lets [[http://sparql.genenetwork.org/sparql/?default-graph-uri=http%3A%2F%2Fcovid-19.genenetwork.org%2Fgraph%2Fmetadata.ttl&query=PREFIX+pubseq%3A+%3Chttp%3A%2F%2Fbiohackathon.org%2Fbh20-seq-schema%23MainSchema%2F%3E%0D%0Aselect+%28COUNT%28distinct+%3Fdataset%29+as+%3Fnum%29%0D%0A%7B%0D%0A+++%3Fdataset+pubseq%3Asubmitter+%3Fid+.%0D%0A+++%3Fid+%3Fp+%3Fsubmitter%0D%0A%7D+&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+][count the datasets]] submitted with + +#+begin_src sql +PREFIX pubseq: +select (COUNT(distinct ?dataset) as ?num) +{ + ?dataset pubseq:submitter ?id . + ?id ?p ?submitter +} +#+end_src + * Fetch submitter info and other metadata -- cgit v1.2.3