blob: f5920eca4956208ffa86420b2ef6ab2d7e2c4075 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
#!/usr/bin/env ruby
#
# Use a SPARQL query to fetch all IDs in the PubSeq database
#
# pubseq-fetch-ids > pubseq_ids.txt
#
# Note: requires Ruby 3.x. Older Ruby gives a syntax error
require 'net/http'
require 'json'
require 'ostruct'
require 'erb'
require 'pp'
MAX=5_000
SPARQL_HEADER="
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix dc: <http://purl.org/dc/terms/>
prefix schema: <https://schema.org/>
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
"
# Build a SPARQL query, submit and return results. Apply transform
# lambda when passed in
def sparql query, transform = nil
api_url = "http://sparql.genenetwork.org/sparql/?default-graph-uri=&format=application%2Fsparql-results%2Bjson&timeout=0&debug=on&run=+Run+Query+&query=#{ERB::Util.url_encode(SPARQL_HEADER + query)}"
response = Net::HTTP.get_response(URI.parse(api_url))
data = JSON.parse(response.body,symbolize_names: true)
data => { head: { vars: }, results: { bindings: results} }
vars = vars.map { |v| v.to_sym }
results.map { |rec|
# return results after transforming to a Hash and applying the
# optional transform lambda. Note the transform can not only
# reduce results, or create an array, but also may transform into
# an OpenStruct.
res = {}
vars.each { |name| res[name] = rec[name][:value] }
if transform
transform.call(res)
else
res
end
}
end
start = 0
num = MAX
begin
query = "
SELECT DISTINCT ?id
FROM <http://covid-19.genenetwork.org/graph/metadata.ttl>
WHERE {
?arvid <http://biohackathon.org/bh20-seq-schema/original_fasta_label> ?id .
} LIMIT #{num} OFFSET #{start}
"
list = sparql(query, lambda { |rec| rec[:id] })
list.each do | l |
print(l,"\n")
end
$stderr.print("#{start}-#{start+list.size}:#{list.first}\n") # show progress
start += num
end while list.size == MAX
|