aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bh20simplewebuploader/api.py34
-rw-r--r--test/rest-api.org29
-rw-r--r--workflows/pull-data/genbank/README.md12
-rwxr-xr-xworkflows/tools/pubseq-fetch-ids2
4 files changed, 72 insertions, 5 deletions
diff --git a/bh20simplewebuploader/api.py b/bh20simplewebuploader/api.py
index b1b505f..11c74f2 100644
--- a/bh20simplewebuploader/api.py
+++ b/bh20simplewebuploader/api.py
@@ -7,6 +7,9 @@ import sys
from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
from bh20simplewebuploader.main import app, sparqlURL
+PUBSEQ="http://covid19.genenetwork.org"
+ARVADOS="https://collections.lugli.arvadosapi.com/c="
+
# Helper functions
def fetch_sample_metadata(id):
@@ -42,13 +45,40 @@ def version():
@app.route('/api/sample/<id>.json')
def sample(id):
+ """
+
+API sample should return a record pointing to other resources,
+notably: permalink, original metadata record and the fasta
+data.
+
+curl http://localhost:5067/api/sample/MT533203.1.json
+[
+ {
+ "collection": "http://covid19.genenetwork.org/resource/lugli-4zz18-uovend31hdwa5ks",
+ "date": "2020-04-27",
+ "fasta": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/sequence.fasta",
+ "id": "MT533203.1",
+ "info": "http://identifiers.org/insdc/MT533203.1#sequence",
+ "mapper": "minimap v. 2.17",
+ "metadata": "https://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/metadata.yaml",
+ "permalink": "http://covid19.genenetwork.org/resource/MT533203.1",
+ "sequencer": "http://www.ebi.ac.uk/efo/EFO_0008632",
+ "specimen": "http://purl.obolibrary.org/obo/NCIT_C155831"
+ }
+]
+
+
+"""
# metadata = file.name(seq)+"/metadata.yaml"
meta = fetch_sample_metadata(id)
print(meta)
+ # http://collections.lugli.arvadosapi.com/c=lugli-4zz18-uovend31hdwa5ks/metadata.yaml
return jsonify([{
'id': x['id']['value'],
- 'fasta': x['seq']['value'],
- 'collection': os.path.dirname(x['seq']['value']),
+ 'collection': x['seq']['value'],
+ 'permalink': PUBSEQ+'/resource/'+x['id']['value'],
+ 'fasta': ARVADOS+os.path.basename(x['seq']['value'])+'/sequence.fasta',
+ 'metadata': ARVADOS+os.path.basename(x['seq']['value'])+'/metadata.yaml',
'date': x['date']['value'],
'info': x['info']['value'],
'specimen': x['specimen']['value'],
diff --git a/test/rest-api.org b/test/rest-api.org
index 66639c3..2ea2b11 100644
--- a/test/rest-api.org
+++ b/test/rest-api.org
@@ -36,6 +36,35 @@ curl http://covid19.genenetwork.org/api/version
}
#+end_src
+The current API can fetch data
+
+#+begin_src js
+curl http://covid19.genenetwork.org/api/search?s=MT533203.1
+[
+ {
+ "collection": "http://covid19.genenetwork.org/resource",
+ "fasta": "http://covid19.genenetwork.org/resource/lugli-4zz18-uovend31hdwa5ks",
+ "id": "MT533203.1",
+ "info": "http://identifiers.org/insdc/MT533203.1#sequence"
+ }
+]
+
+curl http://covid19.genenetwork.org/api/sample/MT533203.1.json
+[
+ {
+ "collection": "http://covid19.genenetwork.org/resource",
+ "date": "2020-04-27",
+ "fasta": "http://covid19.genenetwork.org/resource/lugli-4zz18-uovend31hdwa5ks",
+ "id": "MT533203.1",
+ "info": "http://identifiers.org/insdc/MT533203.1#sequence",
+ "mapper": "minimap v. 2.17",
+ "sequencer": "http://www.ebi.ac.uk/efo/EFO_0008632",
+ "specimen": "http://purl.obolibrary.org/obo/NCIT_C155831"
+ }
+]
+#+end_src
+
+
The Python3 version is
#+begin_src python :session :exports both
diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md
index 5464d1d..188ff6f 100644
--- a/workflows/pull-data/genbank/README.md
+++ b/workflows/pull-data/genbank/README.md
@@ -11,7 +11,8 @@ The following workflow sends GenBank data into PubSeq
```sh
# --- get list of IDs already in PubSeq
-../../tools/sparql-fetch-ids > pubseq_ids.txt
+../../tools/pubseq-fetch-ids > pubseq_ids.txt
+
# --- get list of missing genbank IDs
python3 genbank-fetch-ids.py --skip pubseq_ids.txt > genbank_ids.txt
@@ -26,6 +27,13 @@ python3 ../../workflows/tools/normalize-yamlfa.py -s ~/tmp/yamlfa/state.json --s
```
+## Validate GenBank data
+
+To pull the data from PubSeq use the list of pubseq ids generated
+above.
+
+
+
# TODO
-- [ ] Add id for GenBank accession - i.e. how can we tell a record is from GenBank
+- [X] Add id for GenBank accession - i.e. how can we tell a record is from GenBank
diff --git a/workflows/tools/pubseq-fetch-ids b/workflows/tools/pubseq-fetch-ids
index 19b2d82..f5920ec 100755
--- a/workflows/tools/pubseq-fetch-ids
+++ b/workflows/tools/pubseq-fetch-ids
@@ -2,7 +2,7 @@
#
# Use a SPARQL query to fetch all IDs in the PubSeq database
#
-# sparql-fetch-ids > pubseq_ids.txt
+# pubseq-fetch-ids > pubseq_ids.txt
#
# Note: requires Ruby 3.x. Older Ruby gives a syntax error