diff options
Diffstat (limited to 'workflows/tools/pubseq-fetch-ids')
-rwxr-xr-x | workflows/tools/pubseq-fetch-ids | 67 |
1 files changed, 0 insertions, 67 deletions
diff --git a/workflows/tools/pubseq-fetch-ids b/workflows/tools/pubseq-fetch-ids deleted file mode 100755 index f5920ec..0000000 --- a/workflows/tools/pubseq-fetch-ids +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env ruby -# -# Use a SPARQL query to fetch all IDs in the PubSeq database -# -# pubseq-fetch-ids > pubseq_ids.txt -# -# Note: requires Ruby 3.x. Older Ruby gives a syntax error - -require 'net/http' -require 'json' -require 'ostruct' -require 'erb' -require 'pp' - -MAX=5_000 - -SPARQL_HEADER=" -prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> -prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -prefix dc: <http://purl.org/dc/terms/> -prefix schema: <https://schema.org/> -PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> -" - -# Build a SPARQL query, submit and return results. Apply transform -# lambda when passed in -def sparql query, transform = nil - api_url = "http://sparql.genenetwork.org/sparql/?default-graph-uri=&format=application%2Fsparql-results%2Bjson&timeout=0&debug=on&run=+Run+Query+&query=#{ERB::Util.url_encode(SPARQL_HEADER + query)}" - - response = Net::HTTP.get_response(URI.parse(api_url)) - data = JSON.parse(response.body,symbolize_names: true) - data => { head: { vars: }, results: { bindings: results} } - vars = vars.map { |v| v.to_sym } - results.map { |rec| - # return results after transforming to a Hash and applying the - # optional transform lambda. Note the transform can not only - # reduce results, or create an array, but also may transform into - # an OpenStruct. - res = {} - vars.each { |name| res[name] = rec[name][:value] } - if transform - transform.call(res) - else - res - end - } -end - -start = 0 -num = MAX -begin - query = " -SELECT DISTINCT ?id -FROM <http://covid-19.genenetwork.org/graph/metadata.ttl> -WHERE { - - ?arvid <http://biohackathon.org/bh20-seq-schema/original_fasta_label> ?id . - -} LIMIT #{num} OFFSET #{start} -" - list = sparql(query, lambda { |rec| rec[:id] }) - list.each do | l | - print(l,"\n") - end - $stderr.print("#{start}-#{start+list.size}:#{list.first}\n") # show progress - start += num -end while list.size == MAX |