From 1d8ec14213c922c398bebba7dda44394ea7c1596 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2020 14:41:15 -0500 Subject: Remove backup file --- bh20sequploader/main.py~ | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 bh20sequploader/main.py~ diff --git a/bh20sequploader/main.py~ b/bh20sequploader/main.py~ deleted file mode 100644 index 336e825..0000000 --- a/bh20sequploader/main.py~ +++ /dev/null @@ -1,2 +0,0 @@ -def main(): - pass -- cgit v1.2.3 From 9d4c695cd7001a4e5d5dd7970694c7f9418d947d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2020 14:41:39 -0500 Subject: ignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3ce9a6e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.py~ -- cgit v1.2.3 From c138b367229b5c0323984668315f369151668e32 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2020 16:41:12 -0500 Subject: - Added some output and run main() by default - Disabled ssl checking which does not work if you lack a certificate --- bh20sequploader/main.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index 9de51f4..17ad492 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -17,18 +17,22 @@ def main(): parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json') args = parser.parse_args() - api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN) + api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True) col = arvados.collection.Collection(api_client=api) + print("Reading FASTA") with col.open("sequence.fasta", "w") as f: r = args.sequence.read(65536) + print(r[0:20]) while r: f.write(r) r = args.sequence.read(65536) + print("Reading JSONLD") with col.open("metadata.jsonld", "w") as f: r = args.metadata.read(65536) + print(r[0:20]) while r: f.write(r) r = args.metadata.read(65536) @@ -44,3 +48,5 @@ def main(): col.save_new(owner_uuid=UPLOAD_PROJECT, name="Uploaded by %s from %s" % (properties['upload_user'], properties['upload_ip']), properties=properties, ensure_unique_name=True) + +main() -- cgit v1.2.3 From e8eb3d7eba3fdaa925b45bd914bf6b5d65682d7f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2020 16:42:00 -0500 Subject: README and example --- .gitignore | 1 + README.md | 35 ++++++++ README.rst | 3 - example/metadata.json | 0 example/sequence.fasta | 214 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 README.md delete mode 100644 README.rst create mode 100644 example/metadata.json create mode 100644 example/sequence.fasta diff --git a/.gitignore b/.gitignore index 3ce9a6e..8e8c01b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.py~ +build/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..749121c --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +# Sequence uploader + +This repository provides a sequence uploader for the + +# Run + +Run the uploader with a FASTA file and accompanying metadata: + + python3 bh20sequploader/main.py example/sequence.fasta example/metadata.json + +# Installation + +This tool requires the arvados Python module which can be installed +using .deb or .rpm packages through +https://doc.arvados.org/v2.0/sdk/python/sdk-python.html. The actual +code lives [here](https://github.com/arvados/arvados/tree/master/sdk/python) and +suggests a local install using + + apt-get install libcurl4-openssl-dev libssl1.0-dev + pip3 install --user arvados-python-client + +Next update + + export PATH=$PATH:$HOME/.local/bin + +## Install with GNU Guix + +Set up a container: + + ~/opt/guix/bin/guix environment -C guix --ad-hoc python openssl python-pycurl nss-certs + pip3 install --user arvados-python-client + +Pip installed the following modules + + arvados-python-client-2.0.1 ciso8601-2.1.3 future-0.18.2 google-api-python-client-1.6.7 httplib2-0.17.1 oauth2client-4.1.3 pyasn1-0.4.8 pyasn1-modules-0.2.8 rsa-4.0 ruamel.yaml-0.15.77 six-1.14.0 uritemplate-3.0.1 ws4py-0.5.1 diff --git a/README.rst b/README.rst deleted file mode 100644 index 71ec67e..0000000 --- a/README.rst +++ /dev/null @@ -1,3 +0,0 @@ -========================================== -Sequence uploader -========================================== diff --git a/example/metadata.json b/example/metadata.json new file mode 100644 index 0000000..e69de29 diff --git a/example/sequence.fasta b/example/sequence.fasta new file mode 100644 index 0000000..3c4c0ef --- /dev/null +++ b/example/sequence.fasta @@ -0,0 +1,214 @@ +>AF324493.2 HIV-1 vector pNL4-3, complete sequence +TGGAAGGGCTAATTTGGTCCCAAAAAAGACAAGAGATCCTTGATCTGTGGATCTACCACACACAAGGCTA +CTTCCCTGATTGGCAGAACTACACACCAGGGCCAGGGATCAGATATCCACTGACCTTTGGATGGTGCTTC +AAGTTAGTACCAGTTGAACCAGAGCAAGTAGAAGAGGCCAATGAAGGAGAGAACAACAGCTTGTTACACC +CTATGAGCCAGCATGGGATGGAGGACCCGGAGGGAGAAGTATTAGTGTGGAAGTTTGACAGCCTCCTAGC +ATTTCGTCACATGGCCCGAGAGCTGCATCCGGAGTACTACAAAGACTGCTGACATCGAGCTTTCTACAAG +GGACTTTCCGCTGGGGACTTTCCAGGGAGGTGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGAT +GCTACATATAAGCAGCTGCTTTTTGCCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGC +TCTCTGGCTAACTAGGGAACCCACTGCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTCAAAGTAGTGTG +TGCCCGTCTGTTGTGTGACTCTGGTAACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCT +AGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTAAAGCCAGAGGAGATCTCTCGACGCAGGACTCG +GCTTGCTGAAGCGCGCACGGCAAGAGGCGAGGGGCGGCGACTGGTGAGTACGCCAAAAATTTTGACTAGC +GGAGGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCGGTATTAAGCGGGGGAGAATTAGATAAATGGGAA +AAAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAACTAAAACATATAGTATGGGCAAGCAGGGAGC +TAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACATCAGAAGGCTGTAGACAAATACTGGGACAGCT +ACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGT +GTGCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCCTTAGATAAGATAGAGGAAGAGCAAAACAAAA +GTAAGAAAAAGGCACAGCAAGCAGCAGCTGACACAGGAAACAACAGCCAGGTCAGCCAAAATTACCCTAT +AGTGCAGAACCTCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAA +GTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCC +CACAAGATTTAAATACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGAC +CATCAATGAGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAG +ATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGA +CACATAATCCACCTATCCCAGTAGGAGAAATCTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGT +AAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTA +GACCGATTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAAGAGGTAAAAAATTGGATGACAGAAACCT +TGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGGAGCGACACTAGA +AGAAATGATGACAGCATGTCAGGGAGTGGGGGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATG +AGCCAAGTAACAAATCCAGCTACCATAATGATACAGAAAGGCAATTTTAGGAACCAAAGAAAGACTGTTA +AGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAAAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTG +GAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATC +TGGCCTTCCCACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAG +AGAGCTTCAGGTTTGGGGAAGAGACAACAACTCCCTCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTA +TCCTTTAGCTTCCCTCAGATCACTCTTTGGCAGCGACCCCTCGTCACAATAAAGATAGGGGGGCAATTAA +AGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAA +ACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATC +TGCGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGT +TGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTATTGAGACTGTACCAGTAAAATTAAAGCC +AGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATT +TGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTAT +TTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAAC +TCAAGATTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAACAGAAAAAATCAGTAACA +GTACTGGATGTGGGCGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTA +CCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAA +AGGATCACCAGCAATATTCCAGTGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGAC +ATAGTCATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAA +AAATAGAGGAACTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGA +ACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCA +GAAAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTGGGAAAATTGAATTGGGCAAGTCAGATTT +ATGCAGGGATTAAAGTAAGGCAATTATGTAAACTTCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACC +ACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTG +TATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAA +TTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAGTATGCAAGAATGAAGGGTGCCCACACTAATGA +TGTGAAACAATTAACAGAGGCAGTACAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCCT +AAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGA +TTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAGTTATGGTACCAGTTAGAGAAAGAACCCAT +AATAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCCAATAGGGAAACTAAATTAGGAAAAGCAGGATAT +GTAACTGACAGAGGAAGACAAAAAGTTGTCCCCCTAACGGACACAACAAATCAGAAGACTGAGTTACAAG +CAATTCATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTGACAGACTCACAATATGCATTGGG +AATCATTCAAGCACAACCAGATAAGAGTGAATCAGAGTTAGTCAGTCAAATAATAGAGCAGTTAATAAAA +AAGGAAAAAGTCTACCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAAT +TGGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGAAGAACATGAGAA +ATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTACCACCTGTAGTAGCAAAAGAAATAGTA +GCCAGCTGTGATAAATGTCAGCTAAAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGCCCAGGAATAT +GGCAGCTAGATTGTACACATTTAGAAGGAAAAGTTATCTTGGTAGCAGTTCATGTAGCCAGTGGATATAT +AGAAGCAGAAGTAATTCCAGCAGAGACAGGGCAAGAAACAGCATACTTCCTCTTAAAATTAGCAGGAAGA +TGGCCAGTAAAAACAGTACATACAGACAATGGCAGCAATTTCACCAGTACTACAGTTAAGGCCGCCTGTT +GGTGGGCGGGGATCAAGCAGGAATTTGGCATTCCCTACAATCCCCAAAGTCAAGGAGTAATAGAATCTAT +GAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAA +ATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAG +TAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCG +GGTTTATTACAGGGACAGCAGAGATCCAGTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGG +GCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATCAGGGATT +ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAACACATGGAAAAGAT +TAGTAAAACACCATATGTATATTTCAAGGAAAGCTAAGGACTGGTTTTATAGACATCACTATGAAAGTAC +TAATCCAAAAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAAATTAGTAATAACAACATATTGG +GGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGAT +ATAGCACACAAGTAGACCCTGACCTAGCAGACCAACTAATTCATCTGCACTATTTTGATTGTTTTTCAGA +ATCTGCTATAAGAAATACCATATTAGGACGTATAGTTAGTCCTAGGTGTGAATATCAAGCAGGACATAAC +AAGGTAGGATCTCTACAGTACTTGGCACTAGCAGCATTAATAAAACCAAAACAGATAAAGCCACCTTTGC +CTAGTGTTAGGAAACTGACAGAGGACAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCA +TACAATGAATGGACACTAGAGCTTTTAGAGGAACTTAAGAGTGAAGCTGTTAGACATTTTCCTAGGATAT +GGCTCCATAACTTAGGACAACATATCTATGAAACTTACGGGGATACTTGGGCAGGAGTGGAAGCCATAAT +AAGAATTCTGCAACAACTGCTGTTTATCCATTTCAGAATTGGGTGTCGACATAGCAGAATAGGCGTTACT +CGACAGAGGAGAGCAAGAAATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAG +CCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATGACAA +AAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGAC +TCATCAAGCTTCTCTATCAAAGCAGTAAGTAGTACATGTAATGCAACCTATAATAGTAGCAATAGTAGCA +TTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAA +GACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGA +AGGAGAAGTATCAGCACTTGTGGAGATGGGGGTGGAAATGGGGCACCATGCTCCTTGGGATATTGATGAT +CTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACC +ACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCT +GTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAA +AAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTA +AAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTA +GCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGA +TAAGGTGCAGAAAGAATATGCATTCTTTTATAAACTTGATATAGTACCAATAGATAATACCAGCTATAGG +TTGATAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATAC +ATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATG +TACAAATGTCAGCACAGTACAATGTACACATGGAATCAGGCCAGTAGTATCAACTCAACTGCTGTTAAAT +GGCAGTCTAGCAGAAGAAGATGTAGTAATTAGATCTGCCAATTTCACAGACAATGCTAAAACCATAATAG +TACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGTATCCGTAT +CCAGAGGGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAAC +ATTAGTAGAGCAAAATGGAATGCCACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATA +ATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGG +AGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACT +GAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACACTCCCATGCAGAATAAAACAATTTATAAACA +TGTGGCAGGAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATAT +TACTGGGCTGCTATTAACAAGAGATGGTGGTAATAACAACAATGGGTCCGAGATCTTCAGACCTGGAGGA +GGCGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAG +TAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTT +CCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGA +CAATTATTGTCTGATATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGT +TGCAACTCACAGTCTGGGGCATCAAACAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGA +TCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCT +AGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATAACATGACCTGGATGGAGTGGGACAGAGAAATTA +ACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGA +ATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATA +AAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGA +ATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAATCCCGAGGGGACCCGACAG +GCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCC +TTAGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCT +TGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCT +CCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAACTTGCTCAATGCCACAGCCATAGCAGTA +GCTGAGGGGACAGATAGGGTTATAGAAGTATTACAAGCAGCTTATAGAGCTATTCGCCACATACCTAGAA +GAATAAGACAGGGCTTGGAAAGGATTTTGCTATAAGATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTG +GATGGCCTGCTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATGGGGTGGGAGCAGTATCTCG +AGACCTAGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTAACAATGCTGCTTGTGCCTGGCTA +GAAGCACAAGAGGAGGAAGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACA +AGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAG +AAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGATTGGCAGAACTACACA +CCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATA +AGGTAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGAATGGATGA +CCCTGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTG +CATCCGGAGTACTTCAAGAACTGCTGACATCGAGCTTGCTACAAGGGACTTTCCGCTGGGGACTTTCCAG +GGAGGCGTGGCCTGGGCGGGACTGGGGAGTGGCGAGCCCTCAGATGCTGCATATAAGCAGCTGCTTTTTG +CCTGTACTGGGTCTCTCTGGTTAGACCAGATCTGAGCCTGGGAGCTCTCTGGCTAACTAGGGAACCCACT +GCTTAAGCCTCAATAAAGCTTGCCTTGAGTGCTTCAAGTAGTGTGTGCCCGTCTGTTGTGTGACTCTGGT +AACTAGAGATCCCTCAGACCCTTTTAGTCAGTGTGGAAAATCTCTAGCACCCAGGAGGTAGAGGTTGCAG +TGAGCCAAGATCGCGCCACTGCATTCCAGCCTGGGCAAGAAAACAAGACTGTCTAAAATAATAATAATAA +GTTAAGGGTATTAAATATATTTATACATGGAGGTCATAAAAATATATATATTTGGGCTGGGCGCAGTGGC +TCACACCTGCGCCCGGCCCTTTGGGAGGCCGAGGCAGGTGGATCACCTGAGTTTGGGAGTTCCAGACCAG +CCTGACCAACATGGAGAAACCCCTTCTCTGTGTATTTTTAGTAGATTTTATTTTATGTGTATTTTATTCA +CAGGTATTTCTGGAAAACTGAAACTGTTTTTCCTCTACTCTGATACCACAAGAATCATCAGCACAGAGGA +AGACTTCTGTGATCAAATGTGGTGGGAGAGGGAGGTTTTCACCAGCACATGAGCAGTCAGTTCTGCCGCA +GACTCGGCGGGTGTCCTTCGGTTCAGTTCCAACACCGCCTGCCTGGAGAGAGGTCAGACCACAGGGTGAG +GGCTCAGTCCCCAAGACATAAACACCCAAGACATAAACACCCAACAGGTCCACCCCGCCTGCTGCCCAGG +CAGAGCCGATTCACCAAGACGGGAATTAGGATAGAGAAAGAGTAAGTCACACAGAGCCGGCTGTGCGGGA +GAACGGAGTTCTATTATGACTCAAATCAGTCTCCCCAAGCATTCGGGGATCAGAGTTTTTAAGGATAACT +TAGTGTGTAGGGGGCCAGTGAGTTGGAGATGAAAGCGTAGGGAGTCGAAGGTGTCCTTTTGCGCCGAGTC +AGTTCCTGGGTGGGGGCCACAAGATCGGATGAGCCAGTTTATCAATCCGGGGGTGCCAGCTGATCCATGG +AGTGCAGGGTCTGCAAAATATCTCAAGCACTGATTGATCTTAGGTTTTACAATAGTGATGTTACCCCAGG +AACAATTTGGGGAAGGTCAGAATCTTGTAGCCTGTAGCTGCATGACTCCTAAACCATAATTTCTTTTTTG +TTTTTTTTTTTTTATTTTTGAGACAGGGTCTCACTCTGTCACCTAGGCTGGAGTGCAGTGGTGCAATCAC +AGCTCACTGCAGCCTCAACGTCGTAAGCTCAAGCGATCCTCCCACCTCAGCCTGCCTGGTAGCTGAGACT +ACAAGCGACGCCCCAGTTAATTTTTGTATTTTTGGTAGAGGCAGCGTTTTGCCGTGTGGCCCTGGCTGGT +CTCGAACTCCTGGGCTCAAGTGATCCAGCCTCAGCCTCCCAAAGTGCTGGGACAACCGGGGCCAGTCACT +GCACCTGGCCCTAAACCATAATTTCTAATCTTTTGGCTAATTTGTTAGTCCTACAAAGGCAGTCTAGTCC +CCAGGCAAAAAGGGGGTTTGTTTCGGGAAAGGGCTGTTACTGTCTTTGTTTCAAACTATAAACTAAGTTC +CTCCTAAACTTAGTTCGGCCTACACCCAGGAATGAACAAGGAGAGCTTGGAGGTTAGAAGCACGATGGAA +TTGGTTAGGTCAGATCTCTTTCACTGTCTGAGTTATAATTTTGCAATGGTGGTTCAAAGACTGCCCGCTT +CTGACACCAGTCGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCTCT +TCCGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAA +AGGCGGTAATACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCA +AAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCAT +CACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCC +CTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCC +TTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCC +AAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTG +AGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAG +GTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTT +GGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAA +CCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGA +AGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTC +ATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAA +GTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTG +TCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCA +TCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACC +AGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTG +TTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGC +ATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTA +CATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTT +GGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGA +TGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCT +CTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAA +ACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGT +GCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAA +ATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTA +TTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAA +ATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACAT +TAACCTATAAAAATAGGCGTATCACGAGGCCCTTTCGTCTCGCGCGTTTCGGTGATGACGGTGAAAACCT +CTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCCGT +CAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTAC +TGAGAGTGCACCATATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCA +TTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGGGG +AGGCAGAGATTGCAGTAAGCTGAGATCGCAGCACTGCACTCCAGCCTGGGCGACAGAGTAAGACTCTGTC +TCAAAAATAAAATAAATAAATCAATCAGATATTCCAATCTTTTCCTTTATTTATTTATTTATTTTCTATT +TTGGAAACACAGTCCTTCCTTATTCCAGAATTACACATATATTCTATTTTTCTTTATATGCTCCAGTTTT +TTTTAGACCTTCACCTGAAATGTGTGTATACAAAATCTAGGCCAGTCCAGCAGAGCCTAAAGGTAAAAAA +TAAAATAATAAAAAATAAATAAAATCTAGCTCACTCCTTCACATCAAAATGGAGATACAGCTGTTAGCAT +TAAATACCAAATAACCCATCTTGTCCTCAATAATTTTAAGCGCCTCTCTCCACCACATCTAACTCCTGTC +AAAGGCATGTGCCCCTTCCGGGCGCTCTGCTGTGCTGCCAACCAACTGGCATGTGGACTCTGCAGGGTCC +CTAACTGCCAAGCCCCACAGTGTGCCCTGAGGCTGCCCCTTCCTTCTAGCGGCTGCCCCCACTCGGCTTT +GCTTTCCCTAGTTTCAGTTACTTGCGTTCAGCCAAGGTCTGAAACTAGGTGCGCACAGAGCGGTAAGACT +GCGAGAGAAAGAGACCAGCTTTACAGGGGGTTTATCACAGTGCACCCTGACAGTCGTCAGCCTCACAGGG +GGTTTATCACATTGCACCCTGACAGTCGTCAGCCTCACAGGGGGTTTATCACAGTGCACCCTTACAATCA +TTCCATTTGATTCACAATTTTTTTAGTCTCTACTGTGCCTAACTTGTAAGTTAAATTTGATCAGAGGTGT +GTTCCCAGAGGGGAAAACAGTATATACAGGGTTCAGTACTATCGCATTTCAGGCCTCCACCTGGGTCTTG +GAATGTGTCCCCCGAGGGGTGATGACTACCTCAGTTGGATCTCCACAGGTCACAGTGACACAAGATAACC +AAGACACCTCCCAAGGCTACCACAATGGGCCGCCCTCCACGTGCACATGGCCGGAGGAACTGCCATGTCG +GAGGTGCAAGCACACCTGCGCATCAGAGTCCTTGGTGTGGAGGGAGGGACCAGCGCAGCTTCCAGCCATC +CACCTGATGAACAGAACCTAGGGAAAGCCCCAGTTCTACTTACACCAGGAAAGGC + -- cgit v1.2.3 From 762b770eda79fd4006b767e2dd768746232995ba Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2020 16:49:19 -0500 Subject: Add note on workflow --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 749121c..ec9afb1 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,19 @@ Run the uploader with a FASTA file and accompanying metadata: python3 bh20sequploader/main.py example/sequence.fasta example/metadata.json +# Add a workflow + +get your SARS-CoV-2 sequences from GenBank in seqs.fa + +```sh +minimap2 -cx asm20 -X seqs.fa seqs.fa >seqs.paf +seqwish -s seqs.fa -p seqs.paf -g seqs.gfa +odgi build -g seqs.gfa -s -o seqs.odgi +odgi viz -i seqs.odgi -o seqs.png -x 4000 -y 500 -R -P 5 +``` + +from https://github.com/virtual-biohackathons/covid-19-bh20/wiki/Pangenome#pangenome-model-from-available-genomes + # Installation This tool requires the arvados Python module which can be installed -- cgit v1.2.3