From 6ade3c1d47a83f0ed52a4efe287017b6a0fcc44e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 17 Jul 2020 12:05:53 +0100 Subject: Preparing for EBI submission --- bh20simplewebuploader/templates/blog.html | 8 + doc/blog/using-covid-19-pubseq-part6.html | 393 ++++++++++++++++++++++ doc/blog/using-covid-19-pubseq-part6.org | 7 +- scripts/submit_ebi/example/project-submission.xml | 3 +- scripts/submit_ebi/example/project.xml | 3 +- scripts/submit_ebi/example/sample-submission.xml | 8 + scripts/submit_ebi/example/sample.xml | 68 ++++ 7 files changed, 486 insertions(+), 4 deletions(-) create mode 100644 doc/blog/using-covid-19-pubseq-part6.html create mode 100644 scripts/submit_ebi/example/sample-submission.xml create mode 100644 scripts/submit_ebi/example/sample.xml diff --git a/bh20simplewebuploader/templates/blog.html b/bh20simplewebuploader/templates/blog.html index 823f8a1..f4c2a85 100644 --- a/bh20simplewebuploader/templates/blog.html +++ b/bh20simplewebuploader/templates/blog.html @@ -63,6 +63,14 @@ We explore the Arvados command line and API +
+
+ Prepare for uploading to EBI/ENA +
+
+ Generate the files needed for uploading to EBI/ENA +
+
diff --git a/doc/blog/using-covid-19-pubseq-part6.html b/doc/blog/using-covid-19-pubseq-part6.html new file mode 100644 index 0000000..278abe8 --- /dev/null +++ b/doc/blog/using-covid-19-pubseq-part6.html @@ -0,0 +1,393 @@ + + + + + + + +COVID-19 PubSeq (part 6) + + + + + + + +
+

COVID-19 PubSeq (part 6)

+
+

Table of Contents

+ +
+ + +
+

1 Generating output for EBI

+
+

+Would it not be great an uploader to PubSeq also can export samples +to, say, EBI? That is what we discuss in this section. The submission +process is somewhat laborious and when you have submitted to PubSeq +why not export the same to EBI too with the least amount of effort? +

+ +

+COVID-19 PubSeq is a data source - both sequence data and metadata - +that can be used to push data to other sources, such as EBI. You can +register samples programmatically with a specific XML interface. Note +that (at this point) if you want to submit a sequence (FASTA) it can +only be done through the Webin-CLI. Raw data (FASTQ) can go through +the XML interface. +

+ +

+EBI sequence resources are presented through ENA. For example +Sequence: MT394864.1. +

+ +

+EBI has XML Formats for +

+ +
    +
  • SUBMISSION
  • +
  • STUDY
  • +
  • SAMPLE
  • +
  • EXPERIMENT
  • +
  • RUN
  • +
  • ANALYSIS
  • +
  • DAC
  • +
  • POLICY
  • +
  • DATASET
  • +
  • PROJECT
  • +
+ +

+with the schemas listed here. Since we are submitting sequences we +should follow submitting full genome assembly guidelines and +ENA guidelines. The first step is to define the study, next the sample +and finally the sequence (assembly). +

+
+
+ +
+

2 Defining the EBI study

+
+

+A study is defined here and looks like +

+ +
+
<PROJECT_SET>
+   <PROJECT alias="COVID-19 Washington DC">
+      <TITLE>Sequencing SARS-CoV-2 in the Washington DC area</TITLE>
+      <DESCRIPTION>This study collects samples from COVID-19 patients in the Washington DC area</DESCRIPTION>
+      <SUBMISSION_PROJECT>
+         <SEQUENCING_PROJECT/>
+      </SUBMISSION_PROJECT>
+   </PROJECT>
+</PROJECT_SET>
+
+
+ +

+also a submission 'command' is required looking like +

+ +
+
<SUBMISSION>
+   <ACTIONS>
+      <ACTION>
+         <ADD/>
+      </ACTION>
+      <ACTION>
+         <HOLD HoldUntilDate="TODO: release date"/>
+      </ACTION>
+   </ACTIONS>
+</SUBMISSION>
+
+
+
+ +

+The webin system accepts such sources using a command like +

+ +
+curl -u username:password -F "SUBMISSION=@submission.xml" \
+  -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/"
+
+ + +

+as described here. Note that this is the test server. For the final +version use www.ebi.ac.uk instead of wwwdev.ebi.ac.uk. You may also +need the –insecure switch to circumvent certificate checking. +

+ +

+work in progress (WIP) +

+
+
+ +
+

3 Define the EBI sample

+
+

+work in progress (WIP) +

+
+
+ +
+

4 Define the EBI sequence

+
+

+work in progress (WIP) +

+
+
+
+
+
Created by Pjotr Prins (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!
Modified 2020-07-17 Fri 06:05
. +
+ + diff --git a/doc/blog/using-covid-19-pubseq-part6.org b/doc/blog/using-covid-19-pubseq-part6.org index 2d1c5e0..8964700 100644 --- a/doc/blog/using-covid-19-pubseq-part6.org +++ b/doc/blog/using-covid-19-pubseq-part6.org @@ -83,9 +83,12 @@ also a submission 'command' is required looking like The webin system accepts such sources using a command like -: curl -u username:password -F "SUBMISSION=@submission.xml" -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/" +: curl -u username:password -F "SUBMISSION=@submission.xml" \ +: -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/" -as described [[https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl][here]]. +as described [[https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl][here]]. Note that this is the test server. For the final +version use www.ebi.ac.uk instead of wwwdev.ebi.ac.uk. You may also +need the --insecure switch to circumvent certificate checking. /work in progress (WIP)/ diff --git a/scripts/submit_ebi/example/project-submission.xml b/scripts/submit_ebi/example/project-submission.xml index 2d3ddc1..1abb827 100644 --- a/scripts/submit_ebi/example/project-submission.xml +++ b/scripts/submit_ebi/example/project-submission.xml @@ -1,3 +1,4 @@ + @@ -6,6 +7,6 @@ - + diff --git a/scripts/submit_ebi/example/project.xml b/scripts/submit_ebi/example/project.xml index 90704ab..6a817e7 100644 --- a/scripts/submit_ebi/example/project.xml +++ b/scripts/submit_ebi/example/project.xml @@ -1,7 +1,8 @@ + Testing PubSeq Sample uploads - This study aimed to allow for uploading sequences from PubSeq + This is a test to allow for uploading sequences from PubSeq diff --git a/scripts/submit_ebi/example/sample-submission.xml b/scripts/submit_ebi/example/sample-submission.xml new file mode 100644 index 0000000..9d13512 --- /dev/null +++ b/scripts/submit_ebi/example/sample-submission.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/scripts/submit_ebi/example/sample.xml b/scripts/submit_ebi/example/sample.xml new file mode 100644 index 0000000..694c471 --- /dev/null +++ b/scripts/submit_ebi/example/sample.xml @@ -0,0 +1,68 @@ + + + + human gastric microbiota, mucosal + + 1284369 + stomach metagenome + + + + + investigation type + mimarks-survey + + + sequencing method + pyrosequencing + + + collection date + 2010 + + + host body site + Mucosa of stomach + + + human-associated environmental package + human-associated + + + geographic location (latitude) + 1.81 + DD + + + geographic location (longitude) + -78.76 + DD + + + geographic location (country and/or sea) + Colombia + + + geographic location (region and locality) + Tumaco + + + environment (biome) + coast + + + environment (feature) + human-associated habitat + + + environment (material) + gastric biopsy + + + ENA-CHECKLIST + ERC000011 + + + + + -- cgit v1.2.3