diff options
author | Pjotr Prins | 2020-12-31 14:25:33 +0000 |
---|---|---|
committer | Pjotr Prins | 2020-12-31 14:25:33 +0000 |
commit | 5216cf3149024052a3e87f4491d2bb7d9b06a237 (patch) | |
tree | 4ef4481c5c34ad8bd3442ca9ba6f8b869a117447 /workflows/pull-data/genbank | |
parent | 3541089aa8af5d229e669eb38d3735cd2b0b8a05 (diff) | |
download | bh20-seq-resource-5216cf3149024052a3e87f4491d2bb7d9b06a237.tar.gz bh20-seq-resource-5216cf3149024052a3e87f4491d2bb7d9b06a237.tar.lz bh20-seq-resource-5216cf3149024052a3e87f4491d2bb7d9b06a237.zip |
Improve SPARQL query and comments
Diffstat (limited to 'workflows/pull-data/genbank')
-rwxr-xr-x | workflows/pull-data/genbank/sparql-fetch-ids | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/workflows/pull-data/genbank/sparql-fetch-ids b/workflows/pull-data/genbank/sparql-fetch-ids index 9a8b8ee..683044c 100755 --- a/workflows/pull-data/genbank/sparql-fetch-ids +++ b/workflows/pull-data/genbank/sparql-fetch-ids @@ -1,4 +1,9 @@ #!/usr/bin/env ruby +# +# Use a SPARQL query to fetch all IDs in the PubSeq database +# +# sparql-fetch-ids > pubseq_ids.txt +# require 'net/http' require 'json' @@ -13,7 +18,8 @@ prefix schema: <https://schema.org/> PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> " -# Build a SPARQL query, submit and return results. Apply transform lambda +# Build a SPARQL query, submit and return results. Apply transform +# lambda when passed in def sparql q, transform = nil q = SPARQL_HEADER+q @@ -42,16 +48,18 @@ start = 0 num = MAX begin query = " -select distinct ?id where { +SELECT DISTINCT ?id +FROM <http://covid-19.genenetwork.org/graph/metadata.ttl> +WHERE { -?arvid <http://biohackathon.org/bh20-seq-schema/original_fasta_label> ?id . + ?arvid <http://biohackathon.org/bh20-seq-schema/original_fasta_label> ?id . -} limit #{num} offset #{start} +} LIMIT #{num} OFFSET #{start} " list = sparql(query, lambda { |rec| rec[:id] }) list.each do | l | print(l,"\n") end + $stderr.print("#{start}-#{start+list.size}:#{list.first}\n") # show progress start += num - $stderr.print(start,":",list.first,"\n") end while list.size == MAX |