From 5216cf3149024052a3e87f4491d2bb7d9b06a237 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 31 Dec 2020 14:25:33 +0000 Subject: Improve SPARQL query and comments --- workflows/pull-data/genbank/sparql-fetch-ids | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/workflows/pull-data/genbank/sparql-fetch-ids b/workflows/pull-data/genbank/sparql-fetch-ids index 9a8b8ee..683044c 100755 --- a/workflows/pull-data/genbank/sparql-fetch-ids +++ b/workflows/pull-data/genbank/sparql-fetch-ids @@ -1,4 +1,9 @@ #!/usr/bin/env ruby +# +# Use a SPARQL query to fetch all IDs in the PubSeq database +# +# sparql-fetch-ids > pubseq_ids.txt +# require 'net/http' require 'json' @@ -13,7 +18,8 @@ prefix schema: PREFIX pubseq: " -# Build a SPARQL query, submit and return results. Apply transform lambda +# Build a SPARQL query, submit and return results. Apply transform +# lambda when passed in def sparql q, transform = nil q = SPARQL_HEADER+q @@ -42,16 +48,18 @@ start = 0 num = MAX begin query = " -select distinct ?id where { +SELECT DISTINCT ?id +FROM +WHERE { -?arvid ?id . + ?arvid ?id . -} limit #{num} offset #{start} +} LIMIT #{num} OFFSET #{start} " list = sparql(query, lambda { |rec| rec[:id] }) list.each do | l | print(l,"\n") end + $stderr.print("#{start}-#{start+list.size}:#{list.first}\n") # show progress start += num - $stderr.print(start,":",list.first,"\n") end while list.size == MAX -- cgit v1.2.3