From c69046ee9a5e24eadcd8cb885633328b0fd88011 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 17 Jul 2020 11:06:33 +0100 Subject: Update generated docs --- doc/blog/using-covid-19-pubseq-part1.html | 192 +++++++++++++++--------------- 1 file changed, 99 insertions(+), 93 deletions(-) (limited to 'doc/blog/using-covid-19-pubseq-part1.html') diff --git a/doc/blog/using-covid-19-pubseq-part1.html b/doc/blog/using-covid-19-pubseq-part1.html index 1959fac..0e6136c 100644 --- a/doc/blog/using-covid-19-pubseq-part1.html +++ b/doc/blog/using-covid-19-pubseq-part1.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- +This means that when someone uploads a SARS-CoV-2 sequence using one @@ -274,24 +274,24 @@ expressed in a 9606 for Homo sapiens + doc: Host species as defined in NCBITaxon, e.g. http://purl.obolibrary.org/obo/NCBITaxon_9606 for Homo sapiens type: string jsonldPredicate: - _id: http://www.ebi.ac.uk/efo/EFO_0000532 - _type: "@id" - noLinkCheck: true + _id: http://www.ebi.ac.uk/efo/EFO_0000532 + _type: "@id" + noLinkCheck: true host_sex: - doc: Sex of the host as defined in PATO, expect male () or female () + doc: Sex of the host as defined in PATO, expect male () or female () type: string? jsonldPredicate: - _id: http://purl.obolibrary.org/obo/PATO_0000047 - _type: "@id" - noLinkCheck: true + _id: http://purl.obolibrary.org/obo/PATO_0000047 + _type: "@id" + noLinkCheck: true host_age: - doc: Age of the host as number (e.g. 50) + doc: Age of the host as number (e.g. 50) type: int? jsonldPredicate: - _id: http://purl.obolibrary.org/obo/PATO_0000011 + _id: http://purl.obolibrary.org/obo/PATO_0000011
The latest run of the pipeline can be viewed here. Each of these @@ -339,8 +339,8 @@ these identifiers throughout.
To explore an RDF dataset, the first query we can do is open and gets @@ -350,10 +350,10 @@ the following in a SPARQL end point
select distinct ?p
-{
+select distinct ?p
+{
?o ?p ?s
-}
+}
select distinct ?g
-{
- GRAPH ?g {?s ?p ?o}
-}
+select distinct ?g
+{
+ GRAPH ?g {?s ?p ?o}
+}
select distinct ?s
-{
- ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?s
-}
+select distinct ?s
+{
+ ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?s
+}
select distinct ?s
-{
- ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?id .
+select distinct ?s
+{
+ ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?id .
?id ?p ?s
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?submitter
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?submitter
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select (COUNT(distinct ?dataset) as ?num)
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select (COUNT(distinct ?dataset) as ?num)
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter
-}
+}
To get dataests with submitters we can do the above
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?p ?submitter
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?p ?submitter
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?submitter
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?submitter
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter .
- FILTER(CONTAINS(?submitter,"Roychoudhury")) .
-}
+ FILTER(CONTAINS(?submitter,"Roychoudhury")) .
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?p
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?p
+{
?dataset ?p ?o .
?dataset pubseq:submitter ?id .
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?sid ?sample ?p1 ?dataset ?submitter
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?sid ?sample ?p1 ?dataset ?submitter
+{
?dataset pubseq:submitter ?id .
?id ?p ?submitter .
- FILTER(CONTAINS(?submitter,"Roychoudhury")) .
+ FILTER(CONTAINS(?submitter,"Roychoudhury")) .
?dataset pubseq:sample ?sid .
?sid ?p1 ?sample
-}
+}
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-PREFIX sio: <http://semanticscience.org/resource/>
-select distinct ?sample ?p ?o
-{
+PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+PREFIX sio: <http://semanticscience.org/resource/>
+select distinct ?sample ?p ?o
+{
?sample sio:SIO_000115 "MT326090.1" .
?sample ?p ?o .
-}
+}
Now we know how to get at the origin we can do it the other way round @@ -570,15 +565,11 @@ and fetch all sequences referring to Washington state
-select ?seq ?sample
-{
- ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample .
- ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q1223>
-}
+select ?seq ?sample
+{
+ ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample .
+ ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q1223>
+}
+Likewise to list all sequences from Turkey we can find the wikidata +entity is Q43: +
+ +select ?seq ?sample
+{
+ ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample .
+ ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q43>
+}
+
The public sequence uploader collects sequences, raw data and @@ -601,8 +607,8 @@ referenced in publications and origins are citeable.
The overall effort was due to magnificent freely donated input by a @@ -617,7 +623,7 @@ Garrison this initiative would not have existed!