1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
# This script by @LLTommy queries the main SPARQL end point to find what
# collections are missing country information for GPS coordinates, such
#
# <http://www.wikidata.org/entity/Q657004> rdfs:label "Canterbury Region" ;
# ns1:P17 <http://www.wikidata.org/entity/Q664> ;
# ns1:P625 "Point(172.0 -43.6)" .
#
# See also the ./readme.md
import requests
import csv
from rdflib import Graph, Literal, RDF, URIRef
import time
sparqlHeaders={'User-Agent': 'genenetworkCrawler/1.0 (covid-19.genenetwork.org; pjotr.public821@thebird.nl) genenetworkCrawler/1.0'}
def callSPARQL(query):
payload = {'query': query, 'format': 'json'}
try:
r = requests.get('https://query.wikidata.org/sparql', params=payload, headers=sparqlHeaders)
time.sleep(1)
# Slow process down, in case we did send too many processes. Sleep, then try again
if 'Retry_After' in r.headers:
print(r.headers)
time.sleep(45)
r = requests.get('https://query.wikidata.org/sparql', params=payload, headers=sparqlHeaders)
result = r.json()['results']['bindings']
except Exception as e:
print("Error during SPARQL call. We abort the process and have to investigate")
print(r)
print(r.content)
print(r.url)
raise Exception(e)
return result
g = Graph()
test_query="""
# Use with https://query.wikidata.org/
SELECT DISTINCT ?a ?label ?country ?continent ?coor WHERE {
BIND (XXX as ?a) .
OPTIONAL {
?a wdt:P625 ?coor.
}
?a rdfs:label ?label .
?a wdt:P17 ?country.
?country rdfs:label ?country_label .
OPTIONAL {
?country wdt:P30 ?continent.
?continent rdfs:label ?continent_label
FILTER (lang(?continent_label)='en')
}
FILTER (lang(?country_label)='en')
FILTER (lang(?label)='en')
}
"""
# wdt:P625 are GEO coordinates
query = """
construct {
?a wdt:P625 ?c.
?a rdfs:label ?label .
?a wdt:P17 ?country.
?country rdfs:label ?country_label .
?country wdt:P30 ?continent .
?continent rdfs:label ?continent_label .
} WHERE
{
BIND (XXX as ?a) .
?a wdt:P625 ?c.
?a rdfs:label ?label .
?a wdt:P17 ?country.
?country rdfs:label ?country_label .
?country wdt:P30 ?continent.
?continent rdfs:label ?continent_label
FILTER (lang(?continent_label)='en')
FILTER (lang(?country_label)='en')
FILTER (lang(?label)='en')
}
"""""
outputFile = 'input_location.csv'
with open(outputFile, 'r') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
counter=0
for row in spamreader:
counter=counter+1
try:
testq = test_query.replace("XXX", "<"+row[0]+">")
print(testq)
tmpquery=query.replace("XXX", "<"+row[0]+">")
print(tmpquery)
returnReply=callSPARQL(tmpquery)
print(returnReply)
for row in returnReply:
print(row)
sub=URIRef(row['subject']['value'])
pred=URIRef(row['predicate']['value'])
if row['object']['type'] == 'uri':
obj = URIRef(row['object']['value'])
elif row['object']['type'] == 'literal':
obj= Literal(row['object']['value'])
g.add(( sub, pred, obj ))
except Exception as e:
print(e)
raise
print(g.serialize(format='n3').decode("utf-8"))
g.serialize(destination='enriched_output.txt', format='turtle')
|