aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2020-11-06 12:48:00 +0000
committerPjotr Prins2020-11-06 12:48:00 +0000
commit7c74a20b90ca647ca387eff2ed830c22f5ba1282 (patch)
tree8583e915b945901a04907db4c9f924ee24be5bad
parent5fdfece97fb2d50a10eab5004a6467ec0097ece8 (diff)
downloadbh20-seq-resource-7c74a20b90ca647ca387eff2ed830c22f5ba1282.tar.gz
bh20-seq-resource-7c74a20b90ca647ca387eff2ed830c22f5ba1282.tar.lz
bh20-seq-resource-7c74a20b90ca647ca387eff2ed830c22f5ba1282.zip
Country trouble shooting
-rw-r--r--doc/INSTALL.md1
-rw-r--r--scripts/db_enrichment/country_enrichment.py29
-rw-r--r--scripts/db_enrichment/input_location.csv10
-rw-r--r--scripts/db_enrichment/readme.md12
4 files changed, 34 insertions, 18 deletions
diff --git a/doc/INSTALL.md b/doc/INSTALL.md
index 45aca0f..367b452 100644
--- a/doc/INSTALL.md
+++ b/doc/INSTALL.md
@@ -77,6 +77,7 @@ Note: see above on GUIX_PACKAGE_PATH.
## Run country semantic enrichment script
cd bh20-seq-resource/scripts/db_enrichment
+ edit input_location.csv
guix environment guix --ad-hoc git python nss-certs python-rdflib -- python3 country_enrichment.py
## Run the tests
diff --git a/scripts/db_enrichment/country_enrichment.py b/scripts/db_enrichment/country_enrichment.py
index 1f99d42..f62a64e 100644
--- a/scripts/db_enrichment/country_enrichment.py
+++ b/scripts/db_enrichment/country_enrichment.py
@@ -39,14 +39,36 @@ def callSPARQL(query):
g = Graph()
+test_query="""
+# Use with https://query.wikidata.org/
+SELECT DISTINCT ?a ?label ?country ?continent ?coor WHERE {
+ BIND (XXX as ?a) .
+ OPTIONAL {
+ ?a wdt:P625 ?coor.
+ }
+ ?a rdfs:label ?label .
+ ?a wdt:P17 ?country.
+ ?country rdfs:label ?country_label .
+ OPTIONAL {
+ ?country wdt:P30 ?continent.
+ ?continent rdfs:label ?continent_label
+ FILTER (lang(?continent_label)='en')
+ }
+ FILTER (lang(?country_label)='en')
+ FILTER (lang(?label)='en')
+}
+"""
+
+# wdt:P625 are GEO coordinates
+
query = """
construct {
?a wdt:P625 ?c.
?a rdfs:label ?label .
?a wdt:P17 ?country.
?country rdfs:label ?country_label .
- ?country wdt:P30 ?continent.
- ?continent rdfs:label ?continent_label
+ ?country wdt:P30 ?continent .
+ ?continent rdfs:label ?continent_label .
} WHERE
{
BIND (XXX as ?a) .
@@ -59,7 +81,6 @@ construct {
FILTER (lang(?continent_label)='en')
FILTER (lang(?country_label)='en')
FILTER (lang(?label)='en')
-
}
"""""
@@ -72,6 +93,8 @@ with open(outputFile, 'r') as csvfile:
counter=counter+1
try:
+ testq = test_query.replace("XXX", "<"+row[0]+">")
+ print(testq)
tmpquery=query.replace("XXX", "<"+row[0]+">")
print(tmpquery)
diff --git a/scripts/db_enrichment/input_location.csv b/scripts/db_enrichment/input_location.csv
index a4246cd..8c3308f 100644
--- a/scripts/db_enrichment/input_location.csv
+++ b/scripts/db_enrichment/input_location.csv
@@ -1,16 +1,6 @@
http://www.wikidata.org/entity/Q7960498
http://www.wikidata.org/entity/Q692895
-http://www.wikidata.org/entity/Q928
http://www.wikidata.org/entity/Q2722074
http://www.wikidata.org/entity/Q25622187
http://www.wikidata.org/entity/Q27684996
http://www.wikidata.org/entity/Q2757125
-http://www.wikidata.org/entity/Q1922283
-http://www.wikidata.org/entity/Q490
-http://www.wikidata.org/entity/Q677037
-http://www.wikidata.org/entity/Q3037
-http://www.wikidata.org/entity/Q843
-http://www.wikidata.org/entity/Q183
-http://www.wikidata.org/entity/Q29
-http://www.wikidata.org/entity/Q17
-http://www.wikidata.org/entity/Q810
diff --git a/scripts/db_enrichment/readme.md b/scripts/db_enrichment/readme.md
index 88e8be5..7539104 100644
--- a/scripts/db_enrichment/readme.md
+++ b/scripts/db_enrichment/readme.md
@@ -11,11 +11,13 @@ File containing information about the countries in our database. Additional info
This SPARQL query (http://sparql.genenetwork.org/sparql/) retrieves all countries (ids) from our database that do not have a label yet:
->SELECT DISTINCT ?geoLocation WHERE
->{
->?fasta ?x [ <<http://purl.obolibrary.org/obo/GAZ_00000448>> ?geoLocation] .
->FILTER NOT EXISTS {?geoLocation <<http://www.w3.org/2000/01/rdf-schema#label>> ?geoLocation_tmp_label}
->}
+```sparql
+SELECT DISTINCT ?geoLocation WHERE
+{
+ ?fasta ?x [ <http://purl.obolibrary.org/obo/GAZ_00000448> ?geoLocation] .
+ FILTER NOT EXISTS {?geoLocation <http://www.w3.org/2000/01/rdf-schema#label> ?geoLocation_tmp_label}
+}
+```
[Run query](http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=%0D%0ASELECT+DISTINCT+%3FgeoLocation++WHERE%0D%0A%7B%0D%0A++%3Ffasta+%3Fx+%5B+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FGAZ_00000448%3E+%3FgeoLocation%5D+.%0D%0A++FILTER+NOT+EXISTS+%7B%3FgeoLocation+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23label%3E+%3FgeoLocation_tmp_label%7D%0D%0A%7D&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+)