about summary refs log tree commit diff
path: root/scripts/db_enrichment/country_enrichment.py
diff options
context:
space:
mode:
authorPjotr Prins2020-11-06 10:13:05 +0000
committerPjotr Prins2020-11-06 10:13:05 +0000
commitd75f1c74fbf86652b02520de6ed46c981cf27e50 (patch)
tree6c2744b7830453c20292b83d9aa0e9245abee4c2 /scripts/db_enrichment/country_enrichment.py
parent43d7264dda8061a024befbc9ca0a89d7159b1e40 (diff)
downloadbh20-seq-resource-d75f1c74fbf86652b02520de6ed46c981cf27e50.tar.gz
bh20-seq-resource-d75f1c74fbf86652b02520de6ed46c981cf27e50.tar.lz
bh20-seq-resource-d75f1c74fbf86652b02520de6ed46c981cf27e50.zip
Adding Tennessee items
Diffstat (limited to 'scripts/db_enrichment/country_enrichment.py')
-rw-r--r--scripts/db_enrichment/country_enrichment.py43
1 files changed, 25 insertions, 18 deletions
diff --git a/scripts/db_enrichment/country_enrichment.py b/scripts/db_enrichment/country_enrichment.py
index 8dcf5f2..1f99d42 100644
--- a/scripts/db_enrichment/country_enrichment.py
+++ b/scripts/db_enrichment/country_enrichment.py
@@ -1,3 +1,12 @@
+# This script by @LLTommy queries the main SPARQL end point to find what
+# collections are missing country information for GPS coordinates, such
+#
+# <http://www.wikidata.org/entity/Q657004> rdfs:label "Canterbury Region" ;
+#    ns1:P17 <http://www.wikidata.org/entity/Q664> ;
+#    ns1:P625 "Point(172.0 -43.6)" .
+#
+# See also the ./readme.md
+
 import requests
 import csv
 from rdflib import Graph, Literal, RDF, URIRef
@@ -30,30 +39,28 @@ def callSPARQL(query):
 
 g = Graph()
 
-
-
 query = """
 construct {
-    ?a wdt:P625 ?c. 
+    ?a wdt:P625 ?c.
     ?a rdfs:label ?label .
-    ?a wdt:P17 ?country.      
-    ?country rdfs:label ?country_label . 
-    ?country wdt:P30 ?continent. 
-    ?continent rdfs:label ?continent_label   
-} WHERE 
-{ 
-    BIND (XXX as ?a) . 
-    ?a wdt:P625 ?c. 
+    ?a wdt:P17 ?country.
+    ?country rdfs:label ?country_label .
+    ?country wdt:P30 ?continent.
+    ?continent rdfs:label ?continent_label
+} WHERE
+{
+    BIND (XXX as ?a) .
+    ?a wdt:P625 ?c.
     ?a rdfs:label ?label .
-    ?a wdt:P17 ?country.      
-    ?country rdfs:label ?country_label .    
-    ?country wdt:P30 ?continent. 
+    ?a wdt:P17 ?country.
+    ?country rdfs:label ?country_label .
+    ?country wdt:P30 ?continent.
     ?continent rdfs:label ?continent_label
-    FILTER (lang(?continent_label)='en')           
+    FILTER (lang(?continent_label)='en')
     FILTER (lang(?country_label)='en')
-    FILTER (lang(?label)='en') 
+    FILTER (lang(?label)='en')
 
-}  
+}
 """""
 
 outputFile = 'input_location.csv'
@@ -88,4 +95,4 @@ with open(outputFile, 'r') as csvfile:
             raise
 
 print(g.serialize(format='n3').decode("utf-8"))
-g.serialize(destination='enriched_ouput.txt', format='turtle')
\ No newline at end of file
+g.serialize(destination='enriched_output.txt', format='turtle')