diff options
author | Pjotr Prins | 2020-07-17 12:05:53 +0100 |
---|---|---|
committer | Pjotr Prins | 2020-07-17 12:08:43 +0100 |
commit | 6ade3c1d47a83f0ed52a4efe287017b6a0fcc44e (patch) | |
tree | 00f28ae6d9e973098aaee08534aa8414d54a957b | |
parent | 4f3469e2d5395526c0376688ded4340a4834677d (diff) | |
download | bh20-seq-resource-6ade3c1d47a83f0ed52a4efe287017b6a0fcc44e.tar.gz bh20-seq-resource-6ade3c1d47a83f0ed52a4efe287017b6a0fcc44e.tar.lz bh20-seq-resource-6ade3c1d47a83f0ed52a4efe287017b6a0fcc44e.zip |
Preparing for EBI submission
-rw-r--r-- | bh20simplewebuploader/templates/blog.html | 8 | ||||
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part6.html | 393 | ||||
-rw-r--r-- | doc/blog/using-covid-19-pubseq-part6.org | 7 | ||||
-rw-r--r-- | scripts/submit_ebi/example/project-submission.xml | 3 | ||||
-rw-r--r-- | scripts/submit_ebi/example/project.xml | 3 | ||||
-rw-r--r-- | scripts/submit_ebi/example/sample-submission.xml | 8 | ||||
-rw-r--r-- | scripts/submit_ebi/example/sample.xml | 68 |
7 files changed, 486 insertions, 4 deletions
diff --git a/bh20simplewebuploader/templates/blog.html b/bh20simplewebuploader/templates/blog.html index 823f8a1..f4c2a85 100644 --- a/bh20simplewebuploader/templates/blog.html +++ b/bh20simplewebuploader/templates/blog.html @@ -63,6 +63,14 @@ We explore the Arvados command line and API </div> </div> + <div class="blog-table-row"> + <div class="blog-table-cell"> + <a href="/blog?id=using-covid-19-pubseq-part6">Prepare for uploading to EBI/ENA</a> + </div> + <div class="blog-table-cell"> + Generate the files needed for uploading to EBI/ENA + </div> + </div> </div> </div> </section> diff --git a/doc/blog/using-covid-19-pubseq-part6.html b/doc/blog/using-covid-19-pubseq-part6.html new file mode 100644 index 0000000..278abe8 --- /dev/null +++ b/doc/blog/using-covid-19-pubseq-part6.html @@ -0,0 +1,393 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> +<head> +<!-- 2020-07-17 Fri 06:05 --> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> +<meta name="viewport" content="width=device-width, initial-scale=1" /> +<title>COVID-19 PubSeq (part 6)</title> +<meta name="generator" content="Org mode" /> +<meta name="author" content="Pjotr Prins" /> +<style type="text/css"> + <!--/*--><![CDATA[/*><!--*/ + .title { text-align: center; + margin-bottom: .2em; } + .subtitle { text-align: center; + font-size: medium; + font-weight: bold; + margin-top:0; } + .todo { font-family: monospace; color: red; } + .done { font-family: monospace; color: green; } + .priority { font-family: monospace; color: orange; } + .tag { background-color: #eee; font-family: monospace; + padding: 2px; font-size: 80%; font-weight: normal; } + .timestamp { color: #bebebe; } + .timestamp-kwd { color: #5f9ea0; } + .org-right { margin-left: auto; margin-right: 0px; text-align: right; } + .org-left { margin-left: 0px; margin-right: auto; text-align: left; } + .org-center { margin-left: auto; margin-right: auto; text-align: center; } + .underline { text-decoration: underline; } + #postamble p, #preamble p { font-size: 90%; margin: .2em; } + p.verse { margin-left: 3%; } + pre { + border: 1px solid #ccc; + box-shadow: 3px 3px 3px #eee; + padding: 8pt; + font-family: monospace; + overflow: auto; + margin: 1.2em; + } + pre.src { + position: relative; + overflow: visible; + padding-top: 1.2em; + } + pre.src:before { + display: none; + position: absolute; + background-color: white; + top: -10px; + right: 10px; + padding: 3px; + border: 1px solid black; + } + pre.src:hover:before { display: inline;} + /* Languages per Org manual */ + pre.src-asymptote:before { content: 'Asymptote'; } + pre.src-awk:before { content: 'Awk'; } + pre.src-C:before { content: 'C'; } + /* pre.src-C++ doesn't work in CSS */ + pre.src-clojure:before { content: 'Clojure'; } + pre.src-css:before { content: 'CSS'; } + pre.src-D:before { content: 'D'; } + pre.src-ditaa:before { content: 'ditaa'; } + pre.src-dot:before { content: 'Graphviz'; } + pre.src-calc:before { content: 'Emacs Calc'; } + pre.src-emacs-lisp:before { content: 'Emacs Lisp'; } + pre.src-fortran:before { content: 'Fortran'; } + pre.src-gnuplot:before { content: 'gnuplot'; } + pre.src-haskell:before { content: 'Haskell'; } + pre.src-hledger:before { content: 'hledger'; } + pre.src-java:before { content: 'Java'; } + pre.src-js:before { content: 'Javascript'; } + pre.src-latex:before { content: 'LaTeX'; } + pre.src-ledger:before { content: 'Ledger'; } + pre.src-lisp:before { content: 'Lisp'; } + pre.src-lilypond:before { content: 'Lilypond'; } + pre.src-lua:before { content: 'Lua'; } + pre.src-matlab:before { content: 'MATLAB'; } + pre.src-mscgen:before { content: 'Mscgen'; } + pre.src-ocaml:before { content: 'Objective Caml'; } + pre.src-octave:before { content: 'Octave'; } + pre.src-org:before { content: 'Org mode'; } + pre.src-oz:before { content: 'OZ'; } + pre.src-plantuml:before { content: 'Plantuml'; } + pre.src-processing:before { content: 'Processing.js'; } + pre.src-python:before { content: 'Python'; } + pre.src-R:before { content: 'R'; } + pre.src-ruby:before { content: 'Ruby'; } + pre.src-sass:before { content: 'Sass'; } + pre.src-scheme:before { content: 'Scheme'; } + pre.src-screen:before { content: 'Gnu Screen'; } + pre.src-sed:before { content: 'Sed'; } + pre.src-sh:before { content: 'shell'; } + pre.src-sql:before { content: 'SQL'; } + pre.src-sqlite:before { content: 'SQLite'; } + /* additional languages in org.el's org-babel-load-languages alist */ + pre.src-forth:before { content: 'Forth'; } + pre.src-io:before { content: 'IO'; } + pre.src-J:before { content: 'J'; } + pre.src-makefile:before { content: 'Makefile'; } + pre.src-maxima:before { content: 'Maxima'; } + pre.src-perl:before { content: 'Perl'; } + pre.src-picolisp:before { content: 'Pico Lisp'; } + pre.src-scala:before { content: 'Scala'; } + pre.src-shell:before { content: 'Shell Script'; } + pre.src-ebnf2ps:before { content: 'ebfn2ps'; } + /* additional language identifiers per "defun org-babel-execute" + in ob-*.el */ + pre.src-cpp:before { content: 'C++'; } + pre.src-abc:before { content: 'ABC'; } + pre.src-coq:before { content: 'Coq'; } + pre.src-groovy:before { content: 'Groovy'; } + /* additional language identifiers from org-babel-shell-names in + ob-shell.el: ob-shell is the only babel language using a lambda to put + the execution function name together. */ + pre.src-bash:before { content: 'bash'; } + pre.src-csh:before { content: 'csh'; } + pre.src-ash:before { content: 'ash'; } + pre.src-dash:before { content: 'dash'; } + pre.src-ksh:before { content: 'ksh'; } + pre.src-mksh:before { content: 'mksh'; } + pre.src-posh:before { content: 'posh'; } + /* Additional Emacs modes also supported by the LaTeX listings package */ + pre.src-ada:before { content: 'Ada'; } + pre.src-asm:before { content: 'Assembler'; } + pre.src-caml:before { content: 'Caml'; } + pre.src-delphi:before { content: 'Delphi'; } + pre.src-html:before { content: 'HTML'; } + pre.src-idl:before { content: 'IDL'; } + pre.src-mercury:before { content: 'Mercury'; } + pre.src-metapost:before { content: 'MetaPost'; } + pre.src-modula-2:before { content: 'Modula-2'; } + pre.src-pascal:before { content: 'Pascal'; } + pre.src-ps:before { content: 'PostScript'; } + pre.src-prolog:before { content: 'Prolog'; } + pre.src-simula:before { content: 'Simula'; } + pre.src-tcl:before { content: 'tcl'; } + pre.src-tex:before { content: 'TeX'; } + pre.src-plain-tex:before { content: 'Plain TeX'; } + pre.src-verilog:before { content: 'Verilog'; } + pre.src-vhdl:before { content: 'VHDL'; } + pre.src-xml:before { content: 'XML'; } + pre.src-nxml:before { content: 'XML'; } + /* add a generic configuration mode; LaTeX export needs an additional + (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */ + pre.src-conf:before { content: 'Configuration File'; } + + table { border-collapse:collapse; } + caption.t-above { caption-side: top; } + caption.t-bottom { caption-side: bottom; } + td, th { vertical-align:top; } + th.org-right { text-align: center; } + th.org-left { text-align: center; } + th.org-center { text-align: center; } + td.org-right { text-align: right; } + td.org-left { text-align: left; } + td.org-center { text-align: center; } + dt { font-weight: bold; } + .footpara { display: inline; } + .footdef { margin-bottom: 1em; } + .figure { padding: 1em; } + .figure p { text-align: center; } + .equation-container { + display: table; + text-align: center; + width: 100%; + } + .equation { + vertical-align: middle; + } + .equation-label { + display: table-cell; + text-align: right; + vertical-align: middle; + } + .inlinetask { + padding: 10px; + border: 2px solid gray; + margin: 10px; + background: #ffffcc; + } + #org-div-home-and-up + { text-align: right; font-size: 70%; white-space: nowrap; } + textarea { overflow-x: auto; } + .linenr { font-size: smaller } + .code-highlighted { background-color: #ffff00; } + .org-info-js_info-navigation { border-style: none; } + #org-info-js_console-label + { font-size: 10px; font-weight: bold; white-space: nowrap; } + .org-info-js_search-highlight + { background-color: #ffff00; color: #000000; font-weight: bold; } + .org-svg { width: 90%; } + /*]]>*/--> +</style> +<link rel="Blog stylesheet" type="text/css" href="blog.css" /> +<script type="text/javascript"> +/* +@licstart The following is the entire license notice for the +JavaScript code in this tag. + +Copyright (C) 2012-2020 Free Software Foundation, Inc. + +The JavaScript code in this tag is free software: you can +redistribute it and/or modify it under the terms of the GNU +General Public License (GNU GPL) as published by the Free Software +Foundation, either version 3 of the License, or (at your option) +any later version. The code is distributed WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU GPL for more details. + +As additional permission under GNU GPL version 3 section 7, you +may distribute non-source (e.g., minimized or compacted) forms of +that code without the copy of the GNU GPL normally required by +section 4, provided you include this license notice and a URL +through which recipients can access the Corresponding Source. + + +@licend The above is the entire license notice +for the JavaScript code in this tag. +*/ +<!--/*--><![CDATA[/*><!--*/ + function CodeHighlightOn(elem, id) + { + var target = document.getElementById(id); + if(null != target) { + elem.cacheClassElem = elem.className; + elem.cacheClassTarget = target.className; + target.className = "code-highlighted"; + elem.className = "code-highlighted"; + } + } + function CodeHighlightOff(elem, id) + { + var target = document.getElementById(id); + if(elem.cacheClassElem) + elem.className = elem.cacheClassElem; + if(elem.cacheClassTarget) + target.className = elem.cacheClassTarget; + } +/*]]>*///--> +</script> +</head> +<body> +<div id="content"> +<h1 class="title">COVID-19 PubSeq (part 6)</h1> +<div id="table-of-contents"> +<h2>Table of Contents</h2> +<div id="text-table-of-contents"> +<ul> +<li><a href="#orge6aea9e">1. Generating output for EBI</a></li> +<li><a href="#org95e5e17">2. Defining the EBI study</a></li> +<li><a href="#org9181a73">3. Define the EBI sample</a></li> +<li><a href="#orga29cad0">4. Define the EBI sequence</a></li> +</ul> +</div> +</div> + + +<div id="outline-container-orge6aea9e" class="outline-2"> +<h2 id="orge6aea9e"><span class="section-number-2">1</span> Generating output for EBI</h2> +<div class="outline-text-2" id="text-1"> +<p> +Would it not be great an uploader to PubSeq also can export samples +to, say, EBI? That is what we discuss in this section. The submission +process is somewhat laborious and when you have submitted to PubSeq +why not export the same to EBI too with the least amount of effort? +</p> + +<p> +COVID-19 PubSeq is a data source - both sequence data and metadata - +that can be used to push data to other sources, such as EBI. You can +register <a href="https://ena-docs.readthedocs.io/en/latest/submit/samples/programmatic.html">samples programmatically</a> with a specific XML interface. Note +that (at this point) if you want to submit a sequence (FASTA) it can +only be done through the <a href="https://ena-docs.readthedocs.io/en/latest/submit/general-guide/webin-cli.html">Webin-CLI</a>. Raw data (FASTQ) can go through +the XML interface. +</p> + +<p> +EBI sequence resources are presented through ENA. For example +<a href="https://www.ebi.ac.uk/ena/browser/view/MT394864">Sequence: MT394864.1</a>. +</p> + +<p> +EBI has XML Formats for +</p> + +<ul class="org-ul"> +<li>SUBMISSION</li> +<li>STUDY</li> +<li>SAMPLE</li> +<li>EXPERIMENT</li> +<li>RUN</li> +<li>ANALYSIS</li> +<li>DAC</li> +<li>POLICY</li> +<li>DATASET</li> +<li>PROJECT</li> +</ul> + +<p> +with the schemas listed <a href="ftp://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/">here</a>. Since we are submitting sequences we +should follow submitting <a href="https://ena-docs.readthedocs.io/en/latest/submit/assembly.html">full genome assembly guidelines</a> and +<a href="https://ena-docs.readthedocs.io/en/latest/submit/general-guide/programmatic.html">ENA guidelines</a>. The first step is to define the study, next the sample +and finally the sequence (assembly). +</p> +</div> +</div> + +<div id="outline-container-org95e5e17" class="outline-2"> +<h2 id="org95e5e17"><span class="section-number-2">2</span> Defining the EBI study</h2> +<div class="outline-text-2" id="text-2"> +<p> +A study is defined <a href="https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html">here</a> and looks like +</p> + +<div class="org-src-container"> +<pre class="src src-xml"><PROJECT_SET> + <PROJECT alias="COVID-19 Washington DC"> + <TITLE>Sequencing SARS-CoV-2 in the Washington DC area</TITLE> + <DESCRIPTION>This study collects samples from COVID-19 patients in the Washington DC area</DESCRIPTION> + <SUBMISSION_PROJECT> + <SEQUENCING_PROJECT/> + </SUBMISSION_PROJECT> + </PROJECT> +</PROJECT_SET> +</pre> +</div> + +<p> +also a submission 'command' is required looking like +</p> + +<div class="org-src-container"> +<pre class="src src-xml"><SUBMISSION> + <ACTIONS> + <ACTION> + <ADD/> + </ACTION> + <ACTION> + <HOLD HoldUntilDate="TODO: release date"/> + </ACTION> + </ACTIONS> +</SUBMISSION> + +</pre> +</div> + +<p> +The webin system accepts such sources using a command like +</p> + +<pre class="example"> +curl -u username:password -F "SUBMISSION=@submission.xml" \ + -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/" +</pre> + + +<p> +as described <a href="https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl">here</a>. Note that this is the test server. For the final +version use www.ebi.ac.uk instead of wwwdev.ebi.ac.uk. You may also +need the –insecure switch to circumvent certificate checking. +</p> + +<p> +<i>work in progress (WIP)</i> +</p> +</div> +</div> + +<div id="outline-container-org9181a73" class="outline-2"> +<h2 id="org9181a73"><span class="section-number-2">3</span> Define the EBI sample</h2> +<div class="outline-text-2" id="text-3"> +<p> +<i>work in progress (WIP)</i> +</p> +</div> +</div> + +<div id="outline-container-orga29cad0" class="outline-2"> +<h2 id="orga29cad0"><span class="section-number-2">4</span> Define the EBI sequence</h2> +<div class="outline-text-2" id="text-4"> +<p> +<i>work in progress (WIP)</i> +</p> +</div> +</div> +</div> +<div id="postamble" class="status"> +<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-17 Fri 06:05</small>. +</div> +</body> +</html> diff --git a/doc/blog/using-covid-19-pubseq-part6.org b/doc/blog/using-covid-19-pubseq-part6.org index 2d1c5e0..8964700 100644 --- a/doc/blog/using-covid-19-pubseq-part6.org +++ b/doc/blog/using-covid-19-pubseq-part6.org @@ -83,9 +83,12 @@ also a submission 'command' is required looking like The webin system accepts such sources using a command like -: curl -u username:password -F "SUBMISSION=@submission.xml" -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/" +: curl -u username:password -F "SUBMISSION=@submission.xml" \ +: -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/" -as described [[https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl][here]]. +as described [[https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl][here]]. Note that this is the test server. For the final +version use www.ebi.ac.uk instead of wwwdev.ebi.ac.uk. You may also +need the --insecure switch to circumvent certificate checking. /work in progress (WIP)/ diff --git a/scripts/submit_ebi/example/project-submission.xml b/scripts/submit_ebi/example/project-submission.xml index 2d3ddc1..1abb827 100644 --- a/scripts/submit_ebi/example/project-submission.xml +++ b/scripts/submit_ebi/example/project-submission.xml @@ -1,3 +1,4 @@ +<?xml version="1.0" encoding="UTF-8"?> <SUBMISSION> <ACTIONS> <ACTION> @@ -6,6 +7,6 @@ <ACTION> <HOLD HoldUntilDate="2020-10-10"/> </ACTION> - </ACTIONS> + </ACTIONS> </SUBMISSION> diff --git a/scripts/submit_ebi/example/project.xml b/scripts/submit_ebi/example/project.xml index 90704ab..6a817e7 100644 --- a/scripts/submit_ebi/example/project.xml +++ b/scripts/submit_ebi/example/project.xml @@ -1,7 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> <PROJECT_SET> <PROJECT alias="PubSeq01"> <TITLE>Testing PubSeq Sample uploads</TITLE> - <DESCRIPTION>This study aimed to allow for uploading sequences from PubSeq</DESCRIPTION> + <DESCRIPTION>This is a test to allow for uploading sequences from PubSeq</DESCRIPTION> <SUBMISSION_PROJECT> <SEQUENCING_PROJECT/> </SUBMISSION_PROJECT> diff --git a/scripts/submit_ebi/example/sample-submission.xml b/scripts/submit_ebi/example/sample-submission.xml new file mode 100644 index 0000000..9d13512 --- /dev/null +++ b/scripts/submit_ebi/example/sample-submission.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<SUBMISSION> + <ACTIONS> + <ACTION> + <ADD/> + </ACTION> + </ACTIONS> +</SUBMISSION> diff --git a/scripts/submit_ebi/example/sample.xml b/scripts/submit_ebi/example/sample.xml new file mode 100644 index 0000000..694c471 --- /dev/null +++ b/scripts/submit_ebi/example/sample.xml @@ -0,0 +1,68 @@ +<?xml version="1.0" encoding="UTF-8"?> +<SAMPLE_SET> + <SAMPLE alias="PubSeqSam0001" center_name="PubSeq01"> + <TITLE>human gastric microbiota, mucosal</TITLE> + <SAMPLE_NAME> + <TAXON_ID>1284369</TAXON_ID> + <SCIENTIFIC_NAME>stomach metagenome</SCIENTIFIC_NAME> + <COMMON_NAME></COMMON_NAME> + </SAMPLE_NAME> + <SAMPLE_ATTRIBUTES> + <SAMPLE_ATTRIBUTE> + <TAG>investigation type</TAG> + <VALUE>mimarks-survey</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>sequencing method</TAG> + <VALUE>pyrosequencing</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>collection date</TAG> + <VALUE>2010</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>host body site</TAG> + <VALUE>Mucosa of stomach</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>human-associated environmental package</TAG> + <VALUE>human-associated</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (latitude)</TAG> + <VALUE>1.81</VALUE> + <UNITS>DD</UNITS> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (longitude)</TAG> + <VALUE>-78.76</VALUE> + <UNITS>DD</UNITS> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (country and/or sea)</TAG> + <VALUE>Colombia</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (region and locality)</TAG> + <VALUE>Tumaco</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>environment (biome)</TAG> + <VALUE>coast</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>environment (feature)</TAG> + <VALUE>human-associated habitat</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>environment (material)</TAG> + <VALUE>gastric biopsy</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>ENA-CHECKLIST</TAG> + <VALUE>ERC000011</VALUE> + </SAMPLE_ATTRIBUTE> + </SAMPLE_ATTRIBUTES> + </SAMPLE> +</SAMPLE_SET> + |