From 5216cf3149024052a3e87f4491d2bb7d9b06a237 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 31 Dec 2020 14:25:33 +0000
Subject: Improve SPARQL query and comments
---
workflows/pull-data/genbank/sparql-fetch-ids | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
(limited to 'workflows/pull-data')
diff --git a/workflows/pull-data/genbank/sparql-fetch-ids b/workflows/pull-data/genbank/sparql-fetch-ids
index 9a8b8ee..683044c 100755
--- a/workflows/pull-data/genbank/sparql-fetch-ids
+++ b/workflows/pull-data/genbank/sparql-fetch-ids
@@ -1,4 +1,9 @@
#!/usr/bin/env ruby
+#
+# Use a SPARQL query to fetch all IDs in the PubSeq database
+#
+# sparql-fetch-ids > pubseq_ids.txt
+#
require 'net/http'
require 'json'
@@ -13,7 +18,8 @@ prefix schema:
PREFIX pubseq:
"
-# Build a SPARQL query, submit and return results. Apply transform lambda
+# Build a SPARQL query, submit and return results. Apply transform
+# lambda when passed in
def sparql q, transform = nil
q = SPARQL_HEADER+q
@@ -42,16 +48,18 @@ start = 0
num = MAX
begin
query = "
-select distinct ?id where {
+SELECT DISTINCT ?id
+FROM
+WHERE {
-?arvid ?id .
+ ?arvid ?id .
-} limit #{num} offset #{start}
+} LIMIT #{num} OFFSET #{start}
"
list = sparql(query, lambda { |rec| rec[:id] })
list.each do | l |
print(l,"\n")
end
+ $stderr.print("#{start}-#{start+list.size}:#{list.first}\n") # show progress
start += num
- $stderr.print(start,":",list.first,"\n")
end while list.size == MAX
--
cgit v1.2.3