From c69046ee9a5e24eadcd8cb885633328b0fd88011 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 17 Jul 2020 11:06:33 +0100 Subject: Update generated docs --- doc/blog/using-covid-19-pubseq-part1.html | 192 +++++++++++++++--------------- 1 file changed, 99 insertions(+), 93 deletions(-) (limited to 'doc/blog/using-covid-19-pubseq-part1.html') diff --git a/doc/blog/using-covid-19-pubseq-part1.html b/doc/blog/using-covid-19-pubseq-part1.html index 1959fac..0e6136c 100644 --- a/doc/blog/using-covid-19-pubseq-part1.html +++ b/doc/blog/using-covid-19-pubseq-part1.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + COVID-19 PubSeq (part 1) @@ -248,20 +248,20 @@ for the JavaScript code in this tag.

Table of Contents

-
-

1 What does this mean?

+
+

1 What does this mean?

@@ -314,8 +314,8 @@ initiative!
-
-

2 Fetch sequence data

+
+

2 Fetch sequence data

The latest run of the pipeline can be viewed here. Each of these @@ -339,8 +339,8 @@ these identifiers throughout.

-
-

3 Predicates

+
+

3 Predicates

To explore an RDF dataset, the first query we can do is open and gets @@ -350,10 +350,10 @@ the following in a SPARQL end point

-
select distinct ?p
-{
+
select distinct ?p
+{
    ?o ?p ?s
-}
+}
 
@@ -364,10 +364,10 @@ To get a -
select distinct ?g
-{
-    GRAPH ?g {?s ?p ?o}
-}
+
select distinct ?g
+{
+    GRAPH ?g {?s ?p ?o}
+}
 
@@ -383,10 +383,10 @@ To list all submitters, try

-
select distinct ?s
-{
-   ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?s
-}
+
select distinct ?s
+{
+   ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?s
+}
 
@@ -397,11 +397,11 @@ and by

-
select distinct ?s
-{
-   ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?id .
+
select distinct ?s
+{
+   ?o <http://biohackathon.org/bh20-seq-schema#MainSchema/submitter> ?id .
    ?id ?p ?s
-}
+}
 
@@ -415,12 +415,12 @@ To lift the full URL out of the query you can use a header like

-
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?submitter
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-}
+}
 
@@ -438,32 +438,32 @@ Now we got this far, lets -
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select (COUNT(distinct ?dataset) as ?num)
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select (COUNT(distinct ?dataset) as ?num)
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-}
+}
 
-
-

4 Fetch submitter info and other metadata

+
+

4 Fetch submitter info and other metadata

To get dataests with submitters we can do the above

-
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?p ?submitter
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?p ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-}
+}
 
@@ -480,13 +480,13 @@ Let's focus on one sample with

-
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?dataset ?submitter
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter .
-   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
-}
+   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
+}
 
@@ -496,12 +496,12 @@ see if we can get a sample ID by listing sample predicates

-
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?p
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?p
+{
    ?dataset ?p ?o .
    ?dataset pubseq:submitter ?id .
-}
+}
 
@@ -513,15 +513,15 @@ Let's zoom in on those of Roychoudhury with
-
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-select distinct ?sid ?sample ?p1 ?dataset ?submitter
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+select distinct ?sid ?sample ?p1 ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter .
-   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
+   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
    ?dataset pubseq:sample ?sid .
    ?sid ?p1 ?sample
-}
+}
 
@@ -532,18 +532,13 @@ this database. Let's focus on one sample "MT326090.1" with predicate

-
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
-PREFIX sio: <http://semanticscience.org/resource/>
-select distinct ?sample ?p ?o
-{
+
PREFIX pubseq: <http://biohackathon.org/bh20-seq-schema#MainSchema/>
+PREFIX sio: <http://semanticscience.org/resource/>
+select distinct ?sample ?p ?o
+{
    ?sample sio:SIO_000115 "MT326090.1" .
    ?sample ?p ?o .
-}
+}
 
@@ -561,8 +556,8 @@ to view/query the database.
-
-

5 Fetch all sequences from Washington state

+
+

5 Fetch all sequences from Washington state

Now we know how to get at the origin we can do it the other way round @@ -570,15 +565,11 @@ and fetch all sequences referring to Washington state

-
-select ?seq ?sample
-{
-    ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample .
-    ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q1223>
-}
+
select ?seq ?sample
+{
+    ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample .
+    ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q1223>
+}
 
@@ -586,11 +577,26 @@ and fetch all sequences referring to Washington state which lists 300 sequences originating from Washington state! Which is almost half of the set coming out of GenBank.

+ +

+Likewise to list all sequences from Turkey we can find the wikidata +entity is Q43: +

+ +
+
select ?seq ?sample
+{
+    ?seq <http://biohackathon.org/bh20-seq-schema#MainSchema/sample> ?sample .
+    ?sample <http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.wikidata.org/entity/Q43>
+}
+
+
+ -
-

6 Discussion

+
+

6 Discussion

The public sequence uploader collects sequences, raw data and @@ -601,8 +607,8 @@ referenced in publications and origins are citeable.

-
-

7 Acknowledgements

+
+

7 Acknowledgements

The overall effort was due to magnificent freely donated input by a @@ -617,7 +623,7 @@ Garrison this initiative would not have existed!

-
Created by Pjotr Prins (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!
Modified 2020-05-29 Fri 12:06
. +
Created by Pjotr Prins (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!
Modified 2020-07-17 Fri 05:02
.
-- cgit v1.2.3