blob: 9a8b8eeedde6d08e4f90b03f301783f6d6dfe219 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
#!/usr/bin/env ruby
require 'net/http'
require 'json'
require 'ostruct'
require 'erb'
SPARQL_HEADER="
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix dc: <http://purl.org/dc/terms/>
prefix schema: <https://schema.org/>
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
"
# Build a SPARQL query, submit and return results. Apply transform lambda
def sparql q, transform = nil
q = SPARQL_HEADER+q
api_url = "http://sparql.genenetwork.org/sparql/?default-graph-uri=&format=application%2Fsparql-results%2Bjson&timeout=0&debug=on&run=+Run+Query+&query=#{ERB::Util.url_encode(q)}"
response = Net::HTTP.get_response(URI.parse(api_url))
data = JSON.parse(response.body)
vars = data['head']['vars']
results = data['results']['bindings']
results.map { |rec|
res = {}
vars.each { |name|
res[name.to_sym] = rec[name]['value']
}
if transform
transform.call(res)
else
res
end
}
end
MAX=5_000
start = 0
num = MAX
begin
query = "
select distinct ?id where {
?arvid <http://biohackathon.org/bh20-seq-schema/original_fasta_label> ?id .
} limit #{num} offset #{start}
"
list = sparql(query, lambda { |rec| rec[:id] })
list.each do | l |
print(l,"\n")
end
start += num
$stderr.print(start,":",list.first,"\n")
end while list.size == MAX
|