From c69046ee9a5e24eadcd8cb885633328b0fd88011 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 17 Jul 2020 11:06:33 +0100 Subject: Update generated docs --- doc/blog/using-covid-19-pubseq-part1.html | 192 +++++++++++++++--------------- 1 file changed, 99 insertions(+), 93 deletions(-) (limited to 'doc/blog/using-covid-19-pubseq-part1.html') diff --git a/doc/blog/using-covid-19-pubseq-part1.html b/doc/blog/using-covid-19-pubseq-part1.html index 1959fac..0e6136c 100644 --- a/doc/blog/using-covid-19-pubseq-part1.html +++ b/doc/blog/using-covid-19-pubseq-part1.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- +This means that when someone uploads a SARS-CoV-2 sequence using one @@ -274,24 +274,24 @@ expressed in a 9606 for Homo sapiens + doc: Host species as defined in NCBITaxon, e.g. http://purl.obolibrary.org/obo/NCBITaxon_9606 for Homo sapiens type: string jsonldPredicate: - _id: http://www.ebi.ac.uk/efo/EFO_0000532 - _type: "@id" - noLinkCheck: true + _id: http://www.ebi.ac.uk/efo/EFO_0000532 + _type: "@id" + noLinkCheck: true host_sex: - doc: Sex of the host as defined in PATO, expect male () or female () + doc: Sex of the host as defined in PATO, expect male () or female () type: string? jsonldPredicate: - _id: http://purl.obolibrary.org/obo/PATO_0000047 - _type: "@id" - noLinkCheck: true + _id: http://purl.obolibrary.org/obo/PATO_0000047 + _type: "@id" + noLinkCheck: true host_age: - doc: Age of the host as number (e.g. 50) + doc: Age of the host as number (e.g. 50) type: int? jsonldPredicate: - _id: http://purl.obolibrary.org/obo/PATO_0000011 + _id: http://purl.obolibrary.org/obo/PATO_0000011
The latest run of the pipeline can be viewed here. Each of these @@ -339,8 +339,8 @@ these identifiers throughout.
To explore an RDF dataset, the first query we can do is open and gets @@ -350,10 +350,10 @@ the following in a SPARQL end point
select distinct ?p
-{
+select distinct ?p
+{
?o ?p ?s
-}
+}
select distinct ?g -{ - GRAPH ?g {?s ?p ?o} -} +select distinct ?g +{ + GRAPH ?g {?s ?p ?o} +}
select distinct ?s -{ - ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?s -} +select distinct ?s +{ + ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?s +}
select distinct ?s
-{
- ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?id .
+select distinct ?s
+{
+ ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?id .
?id ?p ?s
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?submitter
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?submitter
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select (COUNT(distinct ?dataset) as ?num)
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select (COUNT(distinct ?dataset) as ?num)
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter
-}
+}
To get dataests with submitters we can do the above
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?p ?submitter
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?p ?submitter
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> -select distinct ?dataset ?submitter -{ +PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> +select distinct ?dataset ?submitter +{ ?dataset pubseq:submitter ?id . ?id ?p ?submitter . - FILTER(CONTAINS(?submitter,"Roychoudhury")) . -} + FILTER(CONTAINS(?submitter,"Roychoudhury")) . +}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?p
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?p
+{
?dataset ?p ?o .
?dataset pubseq:submitter ?id .
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> -select distinct ?sid ?sample ?p1 ?dataset ?submitter -{ +PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/> +select distinct ?sid ?sample ?p1 ?dataset ?submitter +{ ?dataset pubseq:submitter ?id . ?id ?p ?submitter . - FILTER(CONTAINS(?submitter,"Roychoudhury")) . + FILTER(CONTAINS(?submitter,"Roychoudhury")) . ?dataset pubseq:sample ?sid . ?sid ?p1 ?sample -} +}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-PREFIX sio: <http://semanticscience.org/resource/>
-select distinct ?sample ?p ?o
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+PREFIX sio: <http://semanticscience.org/resource/>
+select distinct ?sample ?p ?o
+{
?sample sio:SIO_000115 "MT326090.1" .
?sample ?p ?o .
-}
+}
Now we know how to get at the origin we can do it the other way round @@ -570,15 +565,11 @@ and fetch all sequences referring to Washington state
-select ?seq ?sample -{ - ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample . - ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q1223> -} +select ?seq ?sample +{ + ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample . + ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q1223> +}
+Likewise to list all sequences from Turkey we can find the wikidata +entity is Q43: +
+ +select ?seq ?sample +{ + ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample . + ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q43> +} +
The public sequence uploader collects sequences, raw data and @@ -601,8 +607,8 @@ referenced in publications and origins are citeable.
The overall effort was due to magnificent freely donated input by a @@ -617,7 +623,7 @@ Garrison this initiative would not have existed!