about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitignore16
-rw-r--r--README.md10
-rw-r--r--bh20seqanalyzer/main.py2
-rw-r--r--bh20sequploader/SARS-CoV-2-reference.fasta430
-rw-r--r--bh20sequploader/bh20seq-options.yml30
-rw-r--r--bh20sequploader/bh20seq-schema.yml65
-rw-r--r--bh20sequploader/bh20seq-shex.rdf12
-rw-r--r--bh20sequploader/main.py7
-rw-r--r--bh20sequploader/qc_fasta.py63
-rw-r--r--bh20simplewebuploader/__init__.py0
-rw-r--r--bh20simplewebuploader/main.py127
-rw-r--r--bh20simplewebuploader/static/main.css269
-rw-r--r--bh20simplewebuploader/static/main.js47
-rw-r--r--bh20simplewebuploader/templates/form.html325
-rw-r--r--example/maximum_metadata_example.yaml46
-rw-r--r--example/metadata.yaml43
-rw-r--r--example/minimal_metadata_example.yaml (renamed from example/minimal_example.yaml)6
-rw-r--r--scripts/dict_ontology_standardization/ncbi_countries.csv9
-rw-r--r--scripts/dict_ontology_standardization/ncbi_speciesman_source.csv1
-rw-r--r--scripts/docker/Dockerfile10
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py96
-rw-r--r--scripts/import.cwl30
-rw-r--r--scripts/import_to_arvados.py14
-rw-r--r--scripts/sequences.acc297
-rw-r--r--semantic_enrichment/countries.ttl279
-rw-r--r--semantic_enrichment/labels.ttl24
-rw-r--r--setup.py8
-rw-r--r--workflows/pangenome-generate/minimap2.cwl2
-rw-r--r--workflows/pangenome-generate/odgi_to_rdf.cwl4
29 files changed, 1862 insertions, 410 deletions
diff --git a/.gitignore b/.gitignore
index 925698c..9057a4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,20 @@
 *.py~
+
+# Distribution / packaging
 build/
 cache.txt
 metadata.ttl
+__pycache__/
+eggs/
+.eggs/
+*.egg-info/
+*.egg
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
diff --git a/README.md b/README.md
index 7588bbc..e8896a0 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,14 @@ Note that you will need to repeat the `. venv/bin/activate` step from this direc
 
 3. **Install the tool.** Once in your virtualenv, install this project:
 
+Install from PyPi:
+
+```sh
+pip3 bh20-seq-uploader
+```
+
+Install from git:
+
 ```sh
 pip3 install git+https://github.com/arvados/bh20-seq-resource.git@master
 ```
@@ -166,7 +174,7 @@ To run it locally:
 ```
 virtualenv --python python3 venv
 . venv/bin/activate
-pip install -e .[web]
+pip install -e ".[web]"
 env FLASK_APP=bh20simplewebuploader/main.py flask run
 ```
 
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 8d0f562..07e5f69 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -90,7 +90,6 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
         cmd = ["arvados-cwl-runner",
                "--submit",
                "--no-wait",
-               "--debug",
                "--project-uuid=%s" % project["uuid"],
                "arvwf:%s" % workflow_uuid,
                tmp.name]
@@ -137,6 +136,7 @@ def start_pangenome_analysis(api,
             "location": schema_ref
         }
     }
+    validated.sort(key=lambda v: v["portable_data_hash"])
     for v in validated:
         inputobj["inputReads"].append({
             "class": "File",
diff --git a/bh20sequploader/SARS-CoV-2-reference.fasta b/bh20sequploader/SARS-CoV-2-reference.fasta
new file mode 100644
index 0000000..b364687
--- /dev/null
+++ b/bh20sequploader/SARS-CoV-2-reference.fasta
@@ -0,0 +1,430 @@
+>NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome
+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA
+CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC
+TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG
+TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC
+CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC
+GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG
+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT
+GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC
+GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT
+TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA
+GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG
+TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG
+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG
+TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG
+CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA
+ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA
+CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC
+CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA
+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT
+ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG
+GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG
+CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA
+CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA
+ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA
+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT
+TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG
+GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG
+TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC
+GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG
+ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG
+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT
+AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA
+TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT
+AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA
+GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC
+TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT
+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA
+GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT
+ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA
+GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT
+GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA
+ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC
+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA
+TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG
+AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT
+TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA
+CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC
+AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT
+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA
+GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA
+CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG
+TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT
+GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT
+TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA
+TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT
+GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTA
+AACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAAC
+TCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCA
+GATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTG
+ATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT
+GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAAT
+GGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTA
+TTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGC
+AGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAA
+TATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA
+CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA
+TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTT
+TCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAG
+AACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACA
+ACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC
+CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTA
+AGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA
+ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGT
+AAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTG
+ATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA
+TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAA
+ATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTA
+ACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT
+GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGT
+GGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT
+TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTC
+ACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGT
+GAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAG
+ACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG
+TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG
+TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAAC
+CATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAA
+CCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGT
+GATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAAC
+CTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG
+TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGA
+ATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGA
+AAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAA
+TAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTT
+ACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG
+CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC
+AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTA
+TTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAG
+CAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAA
+TTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC
+TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAG
+GCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT
+TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAA
+TGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCT
+ATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC
+TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATC
+TTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTT
+GTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG
+GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGT
+GATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA
+GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCA
+TCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGAC
+AACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAAT
+GTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT
+AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT
+AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTG
+AACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGT
+TGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTT
+ACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTG
+GTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT
+ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAG
+AATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAG
+CACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTT
+TGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAA
+ATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA
+ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC
+ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGC
+ACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACAC
+CATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTT
+TAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT
+GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACC
+TTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC
+AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCA
+GGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTG
+GTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA
+CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTC
+CTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTT
+ACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT
+CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGG
+TTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG
+CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTAC
+GCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGC
+TACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTC
+TTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC
+ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT
+GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAG
+ATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGG
+ACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAG
+TTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTT
+ACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG
+TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCAT
+GCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTA
+CGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTT
+TCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTA
+ACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG
+CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA
+TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACA
+ATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTC
+AATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTC
+TGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC
+ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATA
+TGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT
+AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTG
+ACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCT
+CTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG
+TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTC
+TTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTG
+GTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA
+GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTA
+GCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC
+TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAA
+AGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTA
+GACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTA
+GTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGA
+TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA
+GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTG
+AGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAA
+TGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACA
+ACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACAT
+TTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG
+TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCT
+GCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTA
+CACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACT
+TGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATC
+TATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT
+ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT
+ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGAT
+GCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGT
+GTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGG
+TGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA
+AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAG
+TCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA
+GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCA
+CAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAA
+ATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT
+GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTC
+CAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCA
+ACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC
+ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATG
+ATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT
+AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAA
+GATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTG
+TAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGT
+TGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA
+AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG
+ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACCTACAAGTTTTGG
+ACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAG
+CTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGT
+ATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTC
+AGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT
+GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTC
+AGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAG
+ACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCT
+AACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGAC
+TTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC
+TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC
+TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAG
+GAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAG
+TGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTT
+AGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA
+GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACC
+AGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC
+ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTAC
+AACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGC
+ATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT
+GCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTA
+TGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATAC
+AATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC
+GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTA
+TAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA
+CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGAT
+AACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTG
+TTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTT
+ATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT
+GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT
+GTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAA
+TACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGT
+GATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTG
+AGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCT
+TTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT
+AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACC
+GAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATT
+AAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATC
+TCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGG
+GACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT
+GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT
+AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACAT
+TAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGA
+AATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTAC
+ATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT
+TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCC
+TGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA
+GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCAC
+AAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTA
+TAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC
+TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTA
+ATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTT
+GCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC
+TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACA
+CTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT
+CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAA
+GAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTG
+TTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTA
+TGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA
+CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA
+GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATC
+TATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTT
+TCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTA
+TGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCA
+TGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT
+AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAA
+AGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAA
+CCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGT
+GACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTG
+TATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAG
+AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC
+ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTC
+CATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTAT
+AACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCT
+TATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA
+ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGG
+ACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA
+GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTA
+AACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGA
+CTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA
+CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTAT
+TTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCC
+CAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG
+AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTA
+AACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT
+AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTA
+CTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTA
+CAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTT
+ATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG
+ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA
+AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCT
+ATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTC
+GCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTA
+TACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTAC
+GGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT
+TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAA
+ATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCT
+AGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATG
+GGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTG
+GATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG
+GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA
+AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAG
+GTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAA
+CAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCA
+ATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA
+GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATG
+TCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC
+TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCC
+CTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCAT
+TTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGC
+GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTC
+AAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTA
+TTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT
+TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCA
+GGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATA
+ATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTT
+GAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATT
+GTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTG
+TTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATC
+ATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTAT
+GCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTG
+ATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTC
+TAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGA
+GATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACT
+TTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT
+TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAAC
+AAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTC
+TGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGA
+GATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAAC
+CAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTA
+CTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC
+TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACT
+CAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTG
+GTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTAC
+CACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCA
+ACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAA
+TAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC
+AATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCA
+TTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATT
+GCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACC
+TTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGG
+ACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTG
+GAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA
+AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCA
+CAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATA
+TCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAG
+TTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCT
+ACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTA
+TGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA
+GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTT
+TCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACA
+CATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACC
+TGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTA
+GGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTG
+CCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC
+ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGT
+ATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACG
+ACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGA
+ATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTC
+GCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCT
+TGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT
+GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTG
+GCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAAT
+AATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTT
+CTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTA
+CTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATG
+GGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA
+ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC
+CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT
+TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC
+TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT
+TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT
+GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT
+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA
+TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC
+CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA
+AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT
+AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC
+ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC
+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT
+GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA
+GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG
+ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG
+CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC
+TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA
+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC
+CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA
+GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA
+TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT
+TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT
+GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT
+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG
+CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA
+GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG
+TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC
+GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA
+TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT
+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA
+AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG
+ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG
+TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT
+GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC
+CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG
+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT
+GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA
+AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC
+ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT
+AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA
+ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG
+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG
+CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC
+AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA
+ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG
+TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC
+TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC
+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT
+TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG
+CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT
+GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT
+TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC
+GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT
+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAA
+
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml
index 104ed6c..c553f41 100644
--- a/bh20sequploader/bh20seq-options.yml
+++ b/bh20sequploader/bh20seq-options.yml
@@ -35,38 +35,8 @@ sample_sequencing_technology:
   Oxford Nanopore Sequencing: http://purl.obolibrary.org/obo/NCIT_C146818
   Sanger dideoxy sequencing: http://purl.obolibrary.org/obo/NCIT_C19641
 
-sample_sequencing_technology2:
-  Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173
-  Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566
-  Illumina HiSeq X: http://www.ebi.ac.uk/efo/EFO_0008567
-  Illumina MiSeq: http://www.ebi.ac.uk/efo/EFO_0004205
-  Illumina: http://purl.obolibrary.org/obo/OBI_0000759
-  IonTorrent: http://purl.obolibrary.org/obo/NCIT_C125894
-  Ion Semiconductor Sequencing: http://purl.obolibraryorg/obo/NCIT_C125894
-  Oxford Nanopore MinION: http://www.ebi.ac.uk/efo/EFO_0008632
-  Oxford Nanopore Sequencing: http://purl.obolibrary.org/obo/NCIT_C146818
-  Sanger dideoxy sequencing: http://purl.obolibrary.org/obo/NCIT_C19641
-
-sample_sequencing_technology3:
-  Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173
-  Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566
-  Illumina HiSeq X: http://www.ebi.ac.uk/efo/EFO_0008567
-  Illumina MiSeq: http://www.ebi.ac.uk/efo/EFO_0004205
-  Illumina: http://purl.obolibrary.org/obo/OBI_0000759
-  IonTorrent: http://purl.obolibrary.org/obo/NCIT_C125894
-  Ion Semiconductor Sequencing: http://purl.obolibraryorg/obo/NCIT_C125894
-  Oxford Nanopore MinION: http://www.ebi.ac.uk/efo/EFO_0008632
-  Oxford Nanopore Sequencing: http://purl.obolibrary.org/obo/NCIT_C146818
-  Sanger dideoxy sequencing: http://purl.obolibrary.org/obo/NCIT_C19641
-
 specimen_source:
   nasopharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155831
   oropharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155835
   sputum: http://purl.obolibrary.org/obo/NCIT_C13278
   bronchoalveolar lavage fluid: http://purl.obolibrary.org/obo/NCIT_C13195
-
-specimen_source2:
-  nasopharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155831
-  oropharyngeal swab: http://purl.obolibrary.org/obo/NCIT_C155835
-  sputum: http://purl.obolibrary.org/obo/NCIT_C13278
-  bronchoalveolar lavage fluid: http://purl.obolibrary.org/obo/NCIT_C13195
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ea813a0..99e1a11 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -48,6 +48,8 @@ $graph:
         type: string?
         jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C25688
+          _type: "@id"
+          noLinkCheck: true
     host_treatment:
       doc: Process in which the act is intended to modify or alter host status
       type: string?
@@ -55,9 +57,16 @@ $graph:
           _id: http://www.ebi.ac.uk/efo/EFO_0000727
     host_vaccination:
       doc: List of vaccines given to the host
-      type: string?
+      type: string[]?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/VO_0000002
+    ethnicity:
+      doc: Ethinicity of the host e.g. http://purl.obolibrary.org/obo/HANCESTRO_0010
+      type: string?
+      jsonldPredicate:
+          _id: http://semanticscience.org/resource/SIO_001014
+          _type: "@id"
+          noLinkCheck: true
     additional_host_information:
       doc: Field for additional host information
       type: string?
@@ -90,20 +99,13 @@ $graph:
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/OBI_0001895
     collecting_institution:
-      doc: Institute that was responsible of sampeling
+      doc: Institute that was responsible for sampeling
       type: string?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C41206
     specimen_source:
       doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155831 (=nasopharyngeal swab)
-      type: string?
-      jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/OBI_0001479
-          _type: "@id"
-          noLinkCheck: true
-    specimen_source2:
-      doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155835 (=throat swabb)
-      type: string?
+      type: string[]?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/OBI_0001479
           _type: "@id"
@@ -119,16 +121,18 @@ $graph:
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_001167
     source_database_accession:
-      doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here
-      type: string?
+      doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here. Please use a resolveable URL (e.g. http://identifiers.org/insdc/LC522350.1#sequence)
+      type: string[]?
       jsonldPredicate:
           _id: http://edamontology.org/data_2091
+          _type: "@id"
+          noLinkCheck: true
 
 - name: virusSchema
   type: record
   fields:
     virus_species:
-      doc: The name of a taxon from the NCBI taxonomy database
+      doc: The name of virus species from the NCBI taxonomy database, e.g. http://purl.obolibrary.org/obo/NCBITaxon_2697049 for Severe acute respiratory syndrome coronavirus 2
       type: string
       jsonldPredicate:
           _id: http://edamontology.org/data_1875
@@ -145,21 +149,7 @@ $graph:
   fields:
     sample_sequencing_technology:
       doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string?
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/OBI_0600047
-        _type: "@id"
-        noLinkCheck: true
-    sample_sequencing_technology2:
-      doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string?
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/OBI_0600047
-        _type: "@id"
-        noLinkCheck: true
-    sample_sequencing_technology3:
-      doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string?
+      type: string[]?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/OBI_0600047
         _type: "@id"
@@ -170,13 +160,8 @@ $graph:
       jsonldPredicate:
         _id: http://www.ebi.ac.uk/efo/EFO_0002699
     sequencing_coverage:
-      doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x)
-      type: float?
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/FLU_0000848
-    sequencing_coverage2:
-      doc: If a second sequence technology was used you can submit its coverage here
-      type: float?
+      doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. [100]) - if multiple technologies were used multiple float values can be submitted e.g. [100, 20]
+      type: double[]?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/FLU_0000848
     additional_technology_information:
@@ -189,13 +174,13 @@ $graph:
   type: record
   fields:
     authors:
-      doc: Name of the author(s)
-      type: string
+      doc: Name(s) of the author(s)
+      type: string[]
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C42781
     submitter_name:
-      doc: Name of the submitter
-      type: string?
+      doc: Name of the submitter(s)
+      type: string[]?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_000116
     submitter_address:
@@ -228,7 +213,7 @@ $graph:
         _id: http://purl.obolibrary.org/obo/NCIT_C19026
     submitter_orcid:
       doc: ORCID of the submitter as a full URI, e.g. https://orcid.org/0000-0002-1825-0097
-      type: string?
+      type: string[]?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_000115
           _type: "@id"
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index c3b0ae1..cdf2296 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -25,8 +25,9 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
     obo:NCIT_C42574 [ obo:UO_~ ] ?;
 	obo:NCIT_C25688 [obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987 ] ? ;
     efo:EFO_0000727 xsd:string ?;
-    obo:VO_0000002 xsd:string ?;
+    obo:VO_0000002 xsd:string {0,10};
     sio:SIO_001167 xsd:string ?;
+    sio:SIO_001014 [ obo:HANCESTRO_~ ] ? ; #ethnicity
 }
 
 :sampleShape  {
@@ -38,25 +39,26 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
     obo:OBI_0001479 IRI {0,2};
     obo:OBI_0001472 xsd:string ?;
     sio:SIO_001167 xsd:string ?;
+	edam:data_2091 IRI {0,3};
 }
 
 :submitterShape {
-    obo:NCIT_C42781 xsd:string ;
-    sio:SIO_000116 xsd:string ?;
+    obo:NCIT_C42781 xsd:string + ;
+    sio:SIO_000116 xsd:string *;
     sio:SIO_000172 xsd:string ?;
     obo:NCIT_C37984 xsd:string ?;
     obo:OBI_0600047 xsd:string ?;
   	obo:NCIT_C37900 xsd:string ?;
     efo:EFO_0001741 xsd:string ?;
     obo:NCIT_C19026 xsd:string ?;
-    sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
+    sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/ {0,10};
     sio:SIO_001167 xsd:string ?;
 }
 
 :technologyShape {
     obo:OBI_0600047 IRI {0,3} ;
     efo:EFO_0002699 xsd:string ?;
-    obo:FLU_0000848 xsd:double {0,2};
+    obo:FLU_0000848 xsd:double OR xsd:integer {0,3};
     sio:SIO_001167 xsd:string ?;
 }
 
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 4c4711d..10d1029 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -62,11 +62,16 @@ def main():
 
     external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')
 
+    try:
+        username = getpass.getuser()
+    except KeyError:
+        username = "unknown"
+
     properties = {
         "sequence_label": seqlabel,
         "upload_app": "bh20-seq-uploader",
         "upload_ip": external_ip,
-        "upload_user": "%s@%s" % (getpass.getuser(), socket.gethostname())
+        "upload_user": "%s@%s" % (username, socket.gethostname())
     }
 
     col.save_new(owner_uuid=UPLOAD_PROJECT, name="%s uploaded by %s from %s" %
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index e47d66b..16cf2c9 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -1,6 +1,25 @@
 import pkg_resources
 import tempfile
 import magic
+import subprocess
+import tempfile
+import logging
+import re
+
+def read_fasta(sequence):
+    entries = 0
+    bases = []
+    label = None
+    for line in sequence:
+        if line.startswith(">"):
+            label = line
+            entries += 1
+        else:
+            bases.append(line)
+        if entries > 1:
+            raise ValueError("FASTA file contains multiple entries")
+            break
+    return label, bases
 
 def qc_fasta(sequence):
     schema_resource = pkg_resources.resource_stream(__name__, "validation/formats")
@@ -13,16 +32,44 @@ def qc_fasta(sequence):
     sequence.seek(0)
     if seq_type == "text/fasta":
         # ensure that contains only one entry
-        entries = 0
-        for line in sequence:
-            if line.startswith(">"):
-                entries += 1
-            if entries > 1:
-                raise ValueError("FASTA file contains multiple entries")
-                break
+        submitlabel, submitseq = read_fasta(sequence)
         sequence.seek(0)
+
+        with tempfile.NamedTemporaryFile() as tmp1:
+            refstring = pkg_resources.resource_string(__name__, "SARS-CoV-2-reference.fasta")
+            tmp1.write(refstring)
+            tmp1.write(submitlabel.encode("utf8"))
+            tmp1.write(("".join(submitseq)).encode("utf8"))
+            tmp1.flush()
+            try:
+                cmd = ["clustalw", "-infile="+tmp1.name,
+                       "-quicktree", "-iteration=none", "-type=DNA"]
+                print("QC checking similarity to reference")
+                print(" ".join(cmd))
+                result = subprocess.run(cmd, stdout=subprocess.PIPE)
+                res = result.stdout.decode("utf-8")
+                g1 = re.search(r"^Sequence 1: [^ ]+ +(\d+) bp$", res, flags=re.MULTILINE)
+                refbp = float(g1.group(1))
+                g2 = re.search(r"^Sequence 2: [^ ]+ +(\d+) bp$", res, flags=re.MULTILINE)
+                subbp = float(g2.group(1))
+                g3 = re.search(r"^Sequences \(1:2\) Aligned\. Score: (\d+(\.\d+)?)$", res, flags=re.MULTILINE)
+                similarity = float(g3.group(1))
+
+                print(g1.group(0))
+                print(g2.group(0))
+                print(g3.group(0))
+            except Exception as e:
+                logging.warn("Error trying to QC against reference sequence using 'clustalw': %s", e)
+
+            if (subbp/refbp) < .7:
+                raise ValueError("QC fail: submit sequence length is shorter than 70% reference")
+            if (subbp/refbp) > 1.3:
+                raise ValueError("QC fail: submit sequence length is greater than 130% reference")
+            if similarity < 70.0:
+                raise ValueError("QC fail: submit similarity is less than 70%")
+
         return "sequence.fasta"
     elif seq_type == "text/fastq":
         return "reads.fastq"
     else:
-        raise ValueError("Sequence file does not look like FASTA or FASTQ")
+        raise ValueError("Sequence file does not look like a DNA FASTA or FASTQ")
diff --git a/bh20simplewebuploader/__init__.py b/bh20simplewebuploader/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bh20simplewebuploader/__init__.py
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index e88eb4c..126b8dd 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -8,7 +8,7 @@ import re
 import string
 import yaml
 import pkg_resources
-from flask import Flask, request, redirect, send_file, send_from_directory, render_template
+from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
 import os.path
 import requests
 
@@ -197,6 +197,14 @@ def generate_form(schema, options):
                     record['type'] = 'number'
                     # Choose a reasonable precision for the control
                     record['step'] = '0.0001'
+
+                ### This is to fix the homepage for the moment ## needs more love though
+                # implementation of the [] stuff instead of just text fields
+                ## ToDo - implement lists
+                elif field_type == 'string[]':
+                    record['type'] = 'text'
+                elif field_type == 'float[]':
+                    record['type'] = 'text'
                 else:
                     raise NotImplementedError('Unimplemented field type {} in {} in metadata schema'.format(field_type, type_name))
                 yield record
@@ -358,7 +366,8 @@ def getAllaccessions():
     payload = {'query': query, 'format': 'json'}
     r = requests.get(baseURL, params=payload)
     result = r.json()['results']['bindings']
-    return str(result)
+    return jsonify([{'uri': x['fasta']['value'],
+                     'value': x['value']['value']} for x in result])
 
 
 # parameter must be encoded e.g. http://arvados.org/keep:6e6276698ed8b0e6cd21f523e4f91179+123/sequence.fasta must become
@@ -368,26 +377,69 @@ def getDetailsForSeq():
     seq_id = request.args.get('seq')
     query="""SELECT DISTINCT ?key ?value WHERE {<placeholder> ?x [?key ?value]}"""
     query=query.replace("placeholder", seq_id)
-    print(query)
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
+    return jsonify([{'uri': x['key']['value'],
+                     'value': x['value']['value']} for x in result])
+
+
+@app.route('/api/getSEQCountbytech', methods=['GET'])
+def getSEQCountbytech():
+    query="""SELECT ?tech ?tech_label (count(?fasta) as ?fastaCount) WHERE 
+    {?fasta ?x [<http://purl.obolibrary.org/obo/OBI_0600047>  ?tech] 
+    BIND (concat(?tech,"_label") as ?tech_label)}
+    GROUP BY ?tech ?tech_label ORDER BY DESC (?fastaCount)
+    """
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
+    return jsonify([{'Fasta Count': x['fastaCount']['value'],
+                     'tech': x['tech']['value'],
+                     'Label': x['tech_label']['value']} for x in result])
+
+## Is this one really necessary or should we just use getSEQCountbytech instead?
+@app.route('/api/getAvailableTech', methods=['GET'])
+def getAvailableTech():
+    query="""SELECT distinct ?tech ?tech_label WHERE 
+    {?fasta ?x [<http://purl.obolibrary.org/obo/OBI_0600047> ?tech] 
+     BIND (concat(?tech,"_label") as ?tech_label)
+    } """
     payload = {'query': query, 'format': 'json'}
     r = requests.get(baseURL, params=payload)
     result = r.json()['results']['bindings']
     return str(result)
 
+## List all Sequences/submissions by a given tech, as example e.g. http://purl.obolibrary.org/obo/OBI_0000759
+## Has to be encoded again so should be --> http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FOBI_0000759
 @app.route('/api/getSEQbytech', methods=['GET'])
 def getSEQbytech():
-    query="""SELECT ?specimen_source ?specimen_source_label (count(?fasta) as ?fastaCount) WHERE 
-    {?fasta ?x [<http://purl.obolibrary.org/obo/OBI_0600047>  ?specimen_source] 
-    BIND (concat(?specimen_source,"_label") as ?specimen_source_label)}
-    GROUP BY ?specimen_source ?specimen_source_label ORDER BY DESC (?fastaCount)
+    query="""SELECT ?fasta WHERE 
+    {?fasta ?x [<http://purl.obolibrary.org/obo/OBI_0600047>  <placeholder>] }
     """
+    tech = request.args.get('tech')
+    query=query.replace("placeholder", tech)
     payload = {'query': query, 'format': 'json'}
     r = requests.get(baseURL, params=payload)
     result = r.json()['results']['bindings']
     return str(result)
 
+
+## Example location, encoded http%3A%2F%2Fwww.wikidata.org%2Fentity%2FQ1223
 @app.route('/api/getSEQbyLocation', methods=['GET'])
 def getSEQbyLocation():
+    query="""SELECT ?fasta WHERE {?fasta ?x[ <http://purl.obolibrary.org/obo/GAZ_00000448> <placeholder>]}"""
+    location=request.args.get('location')
+    query=query.replace("placeholder", location)
+    print(query)
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
+    return str(result)
+
+
+@app.route('/api/getSEQCountbyLocation', methods=['GET'])
+def getSEQCountbyLocation():
     query="""SELECT ?geoLocation ?geoLocation_label (count(?fasta) as ?fastaCount)  WHERE
     {?fasta ?x [<http://purl.obolibrary.org/obo/GAZ_00000448> ?geoLocation]
     BIND (concat(?geoLocation,"_label") as ?geoLocation_label)}
@@ -396,10 +448,13 @@ def getSEQbyLocation():
     payload = {'query': query, 'format': 'json'}
     r = requests.get(baseURL, params=payload)
     result = r.json()['results']['bindings']
-    return str(result)
+    return jsonify([{'Fasta Count': x['fastaCount']['value'],
+                     'GeoLocation': x['geoLocation']['value'],
+                     'GeoLocation Label': x['geoLocation_label']['value']} for x in result])
 
-@app.route('/api/getSEQbySpecimenSource', methods=['GET'])
-def getSEQbySpecimenSource():
+
+@app.route('/api/getSEQCountbySpecimenSource', methods=['GET'])
+def getSEQCountbySpecimenSource():
     query="""SELECT ?specimen_source ?specimen_source_label (count(?fasta) as ?fastaCount)  WHERE
     {?fasta ?x [<http://purl.obolibrary.org/obo/OBI_0001479>  ?specimen_source]
     BIND (concat(?specimen_source,"_label") as ?specimen_source_label)}
@@ -409,11 +464,27 @@ def getSEQbySpecimenSource():
     payload = {'query': query, 'format': 'json'}
     r = requests.get(baseURL, params=payload)
     result = r.json()['results']['bindings']
+    return jsonify([{'Fasta Count': x['fastaCount']['value'],
+                     'Specimen Source': x['specimen_source']['value'],
+                     'Label': x['specimen_source_label']['value']} for x in result])
+
+# Example specimen http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FNCIT_C155831
+@app.route('/api/getSEQbySpecimenSource', methods=['GET'])
+def getSEQBySpecimenSource():
+    query="""SELECT ?fasta ?specimen_source ?specimen_source_label  WHERE
+    {?fasta ?x [<http://purl.obolibrary.org/obo/OBI_0001479> <placeholder>]
+    BIND (concat(?specimen_source,"_label") as ?specimen_source_label)}
+    """
+    specimen=request.args.get('specimen')
+    query = query.replace("placeholder", specimen)
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
     return str(result)
 
 #No data for this atm
-@app.route('/api/getSEQbyHostHealthStatus', methods=['GET'])
-def getSEQbyHostHealthStatus():
+@app.route('/api/getSEQCountbyHostHealthStatus', methods=['GET'])
+def getSEQCountbyHostHealthStatus():
     query="""SELECT ?health_status ?health_status_label (count(?fasta) as ?fastaCount)  WHERE
     {?fasta ?x [<http://purl.obolibrary.org/obo/NCIT_C25688> ?health_status]
     BIND (concat(?health_status,"_label") as ?health_status_label)}
@@ -423,4 +494,36 @@ def getSEQbyHostHealthStatus():
     payload = {'query': query, 'format': 'json'}
     r = requests.get(baseURL, params=payload)
     result = r.json()['results']['bindings']
+    return str(result)
+
+@app.route('/api/getSEQbyLocationAndTech', methods=['GET'])
+def getSEQbyLocationAndTech():
+    query="""SELECT ?fasta WHERE { ?fasta ?x [ 
+        <http://purl.obolibrary.org/obo/GAZ_00000448> <placeholderLoc>; <http://purl.obolibrary.org/obo/OBI_0600047>  <placeholderTech> ]}"""
+    location=request.args.get('location')
+    tech=request.args.get('tech')
+    query=query.replace("placeholderLoc", location)
+    query = query.replace("placeholderTech", tech)
+    print(query)
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
+    return str(result)
+
+
+# Example Location http%3A%2F%2Fwww.wikidata.org%2Fentity%2FQ1223
+# Example specimen http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FNCIT_C155831
+@app.route('/api/getSEQbyLocationAndSpecimenSource', methods=['GET'])
+def getSEQbyLocationAndSpecimenSource():
+    query="""SELECT ?fasta WHERE { ?fasta ?x [ 
+        <http://purl.obolibrary.org/obo/GAZ_00000448> <placeholderLoc>; <http://purl.obolibrary.org/obo/OBI_0001479>  <placeholderSpecimen> ]}
+    """
+    location = request.args.get('location')
+    specimen = request.args.get('specimen')
+    query = query.replace("placeholderLoc", location)
+    query = query.replace("placeholderSpecimen", specimen)
+    print(query)
+    payload = {'query': query, 'format': 'json'}
+    r = requests.get(baseURL, params=payload)
+    result = r.json()['results']['bindings']
     return str(result)
\ No newline at end of file
diff --git a/bh20simplewebuploader/static/main.css b/bh20simplewebuploader/static/main.css
new file mode 100644
index 0000000..57e29ef
--- /dev/null
+++ b/bh20simplewebuploader/static/main.css
@@ -0,0 +1,269 @@
+hr {
+    margin: auto 0;
+}
+
+body {
+    color: #101010;
+    background-color: #F5FFFF;
+    margin: 0;
+}
+
+h1, h2, h3, h4 {
+    font-family: 'Inter', sans-serif;
+    color: #0ED1CD;
+}
+
+h1 {
+    text-align: center;
+}
+
+.intro {
+    color: #505050;
+    font-weight: 300;
+}
+
+.header {
+    background-color: white;
+    margin: 0 auto;
+    padding: 20px;
+    text-align: center;
+    height: 150px;
+}
+
+h2 > svg {
+    position: relative;
+    top: 8px;
+}
+
+.logo {
+    float: right;
+}
+
+p, form, .about, .footer {
+    font-family: 'Inter', sans-serif;
+    line-height: 1.5;
+}
+
+form h4 {
+    text-transform: 'uppercase';
+}
+
+.intro, form, .search {
+    padding: 20px;
+}
+
+.intro {
+    background-color: inherit;
+    margin: 0 auto;
+    padding: 20px;
+}
+
+.about {
+    background-color: #B2F8F8;
+    margin: 30px auto;
+    padding: 20px;
+    width: 95%;
+    border-radius: 20px;
+}
+
+.button {
+    border-radius: 5px;
+    background: #0ED1CD;
+    margin: 0.3em auto;
+    padding: 0.4em;
+}
+
+.footer {
+    background: #058280;;
+    margin: 0 auto;
+    color: #fff;
+}
+
+.footer a {
+    color: #fff;
+}
+
+span.dropt {border-bottom: thin dotted; background: #ffeedd;}
+span.dropt:hover {text-decoration: none; background: #ffffff; z-index: 6; }
+
+.grid-container {
+    display: grid;
+    grid-template-columns: repeat(4, 1fr);
+    grid-template-rows: auto;
+    row-gap:5px;
+    grid-template-areas:
+        "a a b b"
+            "a a c c"
+            "a a d d"
+            "e e e e"
+            "f f f f";
+    grid-auto-flow: column;
+}
+
+.about {
+    display: grid;
+    grid-template-columns: repeat(2, 1fr);
+    grid-auto-flow: row;
+}
+
+.about h1 {
+    text-align: left;
+}
+
+.about p {
+    font-weight: 300;
+    color: #505050;
+}
+
+.intro {
+    grid-area: a;
+}
+
+.fasta-file-select {
+    padding: 1em;
+    grid-area: b;
+}
+
+a {
+    color: #40DBD8;
+    font-weight: 700;
+}
+
+.fasta-file-select label, .metadata label {
+    font-weight: 600;
+}
+
+.metadata {
+    padding: 1em;
+    grid-area: c;
+}
+.metadata_upload_form {
+    padding: 1em;
+    grid-area: c;
+}
+
+#metadata_upload_form_spot {
+    grid-area: d;
+}
+
+#metadata_fill_form_spot {
+    grid-area: e;
+}
+
+#metadata_fill_form {
+    column-count: 4;
+    margin-top: 0.5em;
+    column-width: 250px;
+}
+
+pre code {
+    background-color: #eee;
+    display: flex;
+    width: max-content;
+    margin: 0 auto;
+    overflow-y: scroll;
+    max-height: 300px;
+    padding: 10px;
+    border: solid 1px black;
+}
+
+.record {
+    display: flex;
+    flex-direction: column;
+    border: solid 1px #808080;
+    padding: 1em;
+    background: #F8F8F8;
+    margin-bottom: 1em;
+    -webkit-column-break-inside: avoid; /* Chrome, Safari, Opera */
+    page-break-inside: avoid; /* Firefox */
+    break-inside: avoid;
+}
+
+.record label {
+    font-size: small;
+    margin-top: 10px;
+}
+
+.search-section {
+    display: flex;
+    justify-content: space-between;
+}
+
+.search-section .filter-options {
+    display: flex;
+    flex-direction: column;
+    width: max-content;
+    padding: 20px;
+}
+
+.search-section p {
+    margin: 0;
+}
+
+.submit {
+    grid-area: f;
+    width: 17em;
+    justify-self: center;
+}
+
+footer {
+    display: block;
+    width: 100%;
+}
+
+.sponsors {
+    width: inherit;
+    display: flex;
+    flex-direction: row;
+    flex-wrap: wrap;
+    justify-content: space-evenly;
+    align-content: space-evenly;
+}
+
+.sponsors a {
+    flex-grow: 4;
+    height: 200px;
+    margin: 10px;
+    background: white;
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+}
+.sponsors img {
+    width: 100%;
+}
+.metadata input#metadata_upload:checked ~ #metadata_upload_form_spot {
+    display: block;
+}
+
+.metadata input#metadata_upload ~ #metadata_upload_form_spot {
+    display: none;
+}
+
+.loader {
+    display: block;
+    border: 5px solid #f3f3f3; /* Light grey */
+    border-top: 5px solid #3498db; /* Blue */
+    border-radius: 50%;
+    width: 20px;
+    height: 20px;
+    margin-right: auto;
+    margin-left: auto;
+    animation: spin 1.5s linear infinite;
+}
+
+.invisible {
+    display: none;
+}
+
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+
+@media only screen and (max-device-width: 480px) {
+    .grid-container {
+        display: flex;
+        flex-direction: column;
+    }
+}
diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js
new file mode 100644
index 0000000..96199a0
--- /dev/null
+++ b/bh20simplewebuploader/static/main.js
@@ -0,0 +1,47 @@
+function fetchAPI(apiEndPoint) {
+  fetch(scriptRoot + apiEndPoint)
+    .then(response => {
+      return response.json();
+    })
+    .then(data => {
+      document.getElementById("json").textContent = JSON.stringify(data, undefined, 2);
+      document.getElementById("results").classList.remove("invisible");
+      document.getElementById("loader").classList.add("invisible");
+    });
+  document.getElementById("results").classList.add("invisible");
+  document.getElementById("loader").classList.remove("invisible");
+
+}
+
+let search = () => {
+  let m =  document.getElementById('search-input').value;
+  fetchAPI(scriptRoot + "/api/getDetailsForSeq?seq=" + encodeURIComponent(m));
+}
+
+let fetchSEQBySpecimen = () => {
+  fetchAPI("/api/getSEQCountbySpecimenSource");
+}
+
+let fetchSEQByLocation = () => {
+  fetchAPI("/api/getSEQCountbyLocation");
+}
+
+let fetchSEQByTech = () => {
+  fetchAPI("/api/getSEQCountbytech");
+}
+
+let fetchAllaccessions = () => {
+  fetchAPI("/api/getAllaccessions");
+};
+
+/**
+ * Show form if checked
+ */
+let fillFormSpot = document.getElementById('metadata_fill_form_spot');
+function displayForm() {
+  if (document.getElementById('metadata_form').checked) {
+    fillFormSpot.classList.remove("invisible");
+    return;
+  }
+  fillFormSpot.classList.add("invisible");
+}
diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html
index 02ae84d..ffd4158 100644
--- a/bh20simplewebuploader/templates/form.html
+++ b/bh20simplewebuploader/templates/form.html
@@ -1,152 +1,9 @@
 <!DOCTYPE html>
 <html>
-    <style>
-     hr {
-         margin: auto 0;
-     }
-
-     body {
-         color: #101010;
-         background-color: #F9EDE1;
-     }
-
-     h1, h2, h3, h4 {
-         font-family: 'Roboto Slab', serif;
-         color: darkblue;
-     }
-
-     h1 {
-         text-align: center;
-     }
-
-     p {
-         color: #505050;
-         font-style: italic;
-     }
-     .header {
-         background-color: white;
-         margin: 0 auto;
-         padding: 20px;
-         text-align: center;
-         height: 150px;
-     }
-
-     .logo {
-         float: right;
-     }
-
-     p, form, .about, .footer {
-         font-family: 'Raleway', sans-serif;
-         line-height: 1.5;
-     }
-
-     form h4 {
-         text-transform: 'uppercase';
-     }
-
-     .intro, form {
-         padding: 20px;
-     }
-
-     .intro {
-         background-color: lightgrey;
-         margin: 0 auto;
-         padding: 20px;
-     }
-
-     .about {
-         background-color: lightgrey;
-         margin: 0 auto;
-         padding: 20px;
-     }
-     .footer {
-         background-color: white;
-         margin: 0 auto;
-     }
-
-     span.dropt {border-bottom: thin dotted; background: #ffeedd;}
-     span.dropt:hover {text-decoration: none; background: #ffffff; z-index: 6; }
-
-     .grid-container {
-         display: grid;
-         grid-template-columns: repeat(4, 1fr);
-         grid-template-rows: auto;
-         row-gap:5px;
-         grid-template-areas:
-             "a a b b"
-             "a a c c"
-             "a a d d"
-             "e e e e"
-             "f f f f";
-         grid-auto-flow: column;
-     }
-
-     .intro {
-         grid-area: a;
-     }
-
-     .fasta-file-select {
-         padding: 1em;
-         grid-area: b;
-     }
-
-     .metadata {
-         padding: 1em;
-         grid-area: c;
-     }
-     .metadata_upload_form {
-         padding: 1em;
-         grid-area: c;
-     }
-
-     #metadata_upload_form_spot {
-         grid-area: d;
-     }
-
-     #metadata_fill_form_spot {
-         grid-area: e;
-     }
-
-     #metadata_fill_form {
-         column-count: 4;
-         margin-top: 0.5em;
-         column-width: 250px;
-     }
-
-     .record {
-         display: flex;
-         flex-direction: column;
-         border: solid 1px #808080;
-         padding: 1em;
-         background: #F8F8F8;
-         margin-bottom: 1em;
-         -webkit-column-break-inside: avoid; /* Chrome, Safari, Opera */
-         page-break-inside: avoid; /* Firefox */
-         break-inside: avoid;
-     }
-
-     .record label {
-         font-size: small;
-         margin-top: 10px;
-     }
-
-     .submit {
-         grid-area: f;
-         width: 17em;
-         justify-self: center;
-     }
-
-     @media only screen and (max-device-width: 480px) {
-         .grid-container {
-             display: flex;
-             flex-direction: column;
-         }
-     }
-    </style>
-
     <head>
         <meta charset="UTF-8">
-        <link href="https://fonts.googleapis.com/css2?family=Raleway:wght@500&family=Roboto+Slab&display=swap" rel="stylesheet">
+        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@100;200;300;400;500;600;700;800;900&display=swap" rel="stylesheet"> 
+        <link href="/static/main.css" rel="stylesheet" type="text/css">
         <meta name="viewport" content="width=device-width, initial-scale=1">
         <title>Web uploader for Public SARS-CoV-2 Sequence Resource</title>
     </head>
@@ -158,28 +15,67 @@
 <small>Disabled until we got everything wired up</small>
 
       </section>
-        <hr>
+
+      <section class="search-section">
+          <div class="filter-options" action="#">
+              <p>[Demo] Display content sequences by: </p>
+
+              <div>
+                  <button class="button" onclick="fetchSEQBySpecimen()">Specimen Source</button>
+                  <button class="button" onclick="fetchSEQByLocation()">Location</button>
+                  <button class="button" onclick="fetchSEQByTech()">Tech</button>
+                  <button class="button" onclick="fetchAllaccessions()">Allaccessions</button>
+              </div>
+
+          </div>
+
+          <div class="search">
+              <input id="search-input" id="global-search" type="search" placeholder="FASTA uri" required>
+              <button class="button search-button" type="submit" onclick="search()">
+		  <span class="icon ion-search">
+		      <span class="sr-only">Search</span>
+		  </span>
+	      </button>
+          </div>
+      </section>
+
+      <div id="loader" class="loader invisible"></div>
+
+      <section id="results" class="invisible">
+          <pre><code id="json"></code></pre>
+      </section>
 
         <section>
             <form action="/submit" method="POST" enctype="multipart/form-data" id="main_form" class="grid-container">
-              <p class="intro">
-                Upload your SARS-CoV-2 sequence (FASTA or FASTQ formats) with metadata (JSONLD) to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">public sequence resource</a>. The upload will trigger a
-                recompute with all available sequences into a Pangenome
-                available for
-                <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">download</a>!
-                Your uploaded sequence will automatically be processed
-                and incorporated into the public pangenome with
-                metadata using worklows from the High Performance Open Biology Lab defined <a href="https://github.com/hpobio-lab/viral-analysis/tree/master/cwl/pangenome-generate">here</a>. All data is published under
-                a <a href="https://creativecommons.org/licenses/by/4.0/">Creative
-                Commons 4.0 attribution license</a> (CC-BY-4.0). You
-                can take the published (GFA/RDF/FASTA) data and store it in
-                a triple store for further processing. We also plan to
-                combine identifiers with clinical data stored securely at <a href="https://redcap-covid19.elixir-luxembourg.org/redcap/">REDCap</a>.
-                A free command line version of the uploader can be
-                installed from <a href="https://github.com/arvados/bh20-seq-resource">source</a>.
-              </p>
+                <div class="intro">
+                    <p>
+                        Upload your SARS-CoV-2 sequence (FASTA or FASTQ formats) with metadata (JSONLD) to the <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">public sequence resource</a>. The upload will trigger a
+                        recompute with all available sequences into a Pangenome
+                        available for
+                        <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">download</a>!
+                    </p>
+                    <p>
+                        Your uploaded sequence will automatically be processed
+                        and incorporated into the public pangenome with
+                        metadata using worklows from the High Performance Open Biology Lab defined <a href="https://github.com/hpobio-lab/viral-analysis/tree/master/cwl/pangenome-generate">here</a>. All data is published under
+                        a <a href="https://creativecommons.org/licenses/by/4.0/">Creative
+                        Commons 4.0 attribution license</a> (CC-BY-4.0). You
+                        can take the published (GFA/RDF/FASTA) data and store it in
+                        a triple store for further processing. We also plan to
+                        combine identifiers with clinical data stored securely at <a href="https://redcap-covid19.elixir-luxembourg.org/redcap/">REDCap</a>.
+                        A free command line version of the uploader can be
+                        installed from <a href="https://github.com/arvados/bh20-seq-resource">source</a>.
+                    </p>
+
+                </div>
 
                 <div class="fasta-file-select">
+                    <h2><svg class="bi bi-cloud-upload" width="1.2em" height="1.2em" viewBox="0 0 16 16" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
+                        <path d="M4.887 6.2l-.964-.165A2.5 2.5 0 103.5 11H6v1H3.5a3.5 3.5 0 11.59-6.95 5.002 5.002 0 119.804 1.98A2.501 2.501 0 0113.5 12H10v-1h3.5a1.5 1.5 0 00.237-2.981L12.7 7.854l.216-1.028a4 4 0 10-7.843-1.587l-.185.96z"/>
+                        <path fill-rule="evenodd" d="M5 8.854a.5.5 0 00.707 0L8 6.56l2.293 2.293A.5.5 0 1011 8.146L8.354 5.5a.5.5 0 00-.708 0L5 8.146a.5.5 0 000 .708z" clip-rule="evenodd"/>
+                        <path fill-rule="evenodd" d="M8 6a.5.5 0 01.5.5v8a.5.5 0 01-1 0v-8A.5.5 0 018 6z" clip-rule="evenodd"/>
+                    </svg> Upload SARS-CoV-2 Sequence</h2>
+                    
                     <label for="fasta">Select FASTA file of assembled genome (max 50K), or FASTQ of reads (<span class="dropt" title="For a larger fastq file you'll need to use a CLI uploader">max 150MB<span style="width:500px;"></span></span>) : </label>
                     <br>
                     <input type="file" id="fasta" name="fasta" accept=".fa,.fasta,.fna,.fq" required>
@@ -189,16 +85,16 @@
                 </div>
 
                 <div class="metadata">
-                  <label>Select metadata submission method:</label>
-                  <br>
-                  <input type="radio" id="metadata_form" name="metadata_type" value="fill" onchange="setMode()" checked required>
-                  <label for="metadata_form">Fill in metadata manually</label>
-                  <input type="radio" id="metadata_upload" name="metadata_type" value="upload" onchange="setMode()" required>
-                  <label for="metadata_upload">Upload metadata file</label>
-                  <br>
-                  <small>Make sure the metadata has submitter attribution details.</small>
+                    <label>Select metadata submission method:</label>
+                    <br>
+                    <input type="radio" id="metadata_form" name="metadata_type" value="fill" checked  onchange="displayForm()" required>
+                    <label for="metadata_form">Fill in metadata manually</label>
+                    <input type="radio" id="metadata_upload" name="metadata_type" value="upload" onchange="displayForm()" required>
+                    <label for="metadata_upload">Upload metadata file</label>
+                    <br>
+                    <small>Make sure the metadata has submitter attribution details.</small>
 
-                  <div id="metadata_upload_form_spot">
+                    <div id="metadata_upload_form_spot">
                     <div id="metadata_upload_form">
                       <br>
                       <label for="metadata">Select JSON or YAML metadata file following <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml" target="_blank">this schema</a> and <a href="https://github.com/arvados/bh20-seq-resource/blob/master/example/metadata.yaml" target="_blank">example</a> (max 50K):</label>
@@ -206,9 +102,9 @@
                       <input type="file" id="metadata" name="metadata" accept=".json,.yml,.yaml" required>
                       <br>
                     </div>
-                  </div>
-                </div>
+                    </div>
 
+                </div>
                 <div id="metadata_fill_form_spot">
                     <div id="metadata_fill_form">
                         {% for record in fields %}
@@ -246,40 +142,44 @@
                     {% endif %}
                     {% endfor %}
                 </div>
+
                 </div>
 
 
 <input class="submit" type="submit" value="Add to Pangenome">
             </form>
         </section>
-<hr>
 <br>
 <div class="about">
-  <h3>ABOUT</h3>
-  <p>
-  This a public repository created at the COVID-19 BioHackathon
-    that has a low barrier to entry for uploading sequence data using
-    best practices. I.e., data is published with a creative commons
-    4.0 (CC-4.0) license with metadata using state-of-the art
-    standards and, perhaps most importantly, providing standardized
-    workflows that get triggered on upload, so that results are
-    immediately available in standardized data formats. The repository
-    will be maintained and expanded for the duration of the
-    pandemic. To contribute data simply upload it! To contribute code
-    and/or workflows see
-    the <a href="https://github.com/arvados/bh20-seq-resource">project
-    repository</a>. For more information see the <a href="https://github.com/arvados/bh20-seq-resource/blob/master/paper/paper.md">paper</a> (WIP).
-  </p>
-                    <br>
+    <div>
+        <h1>ABOUT</h1>
+        <p>
+            This a public repository created at the COVID-19 BioHackathon
+            that has a low barrier to entry for uploading sequence data using
+            best practices. I.e., data is published with a creative commons
+            4.0 (CC-4.0) license with metadata using state-of-the art
+            standards and, perhaps most importantly, providing standardized
+            workflows that get triggered on upload, so that results are
+            immediately available in standardized data formats. The repository
+            will be maintained and expanded for the duration of the
+            pandemic. To contribute data simply upload it! To contribute code
+            and/or workflows see
+            the <a href="https://github.com/arvados/bh20-seq-resource">project
+            repository</a>. For more information see the <a href="https://github.com/arvados/bh20-seq-resource/blob/master/paper/paper.md">paper</a> (WIP).
+        </p>
+
+    </div>
+  <div class="sponsors">
+      <a href="https://arvados.org/"><img src="static/image/arvados-logo.png"></a>
+      <a href="https://www.commonwl.org/"><img src="static/image/CWL-Logo-Header.png"></a>
+
+      <a href="https://github.com/virtual-biohackathons/covid-19-bh20">
+          <img src="static/image/covid19biohackathon.png"></a>
+  </div>
 </div>
-
-                    <hr>
 <div class="footer">
-                    <a href="https://arvados.org/"><img src="static/image/arvados-logo.png" align="top"></a>
-                    <a href="https://www.commonwl.org/"><img src="static/image/CWL-Logo-Header.png" height="70"></a>
+    <!-- Sponsors -->
 
-        <a href="https://github.com/virtual-biohackathons/covid-19-bh20">
-        <img src="static/image/covid19biohackathon.png" align="right" height="70"></a>
 
         <center>
         <small><a href="https://github.com/arvados/bh20-seq-resource">Source code</a> &middot; Powered by <a href="https://www.commonwl.org/">Common Workflow Language</a> &amp; <a href="https://arvados.org/">Arvados</a>; Made for <a href="https://github.com/virtual-biohackathons/covid-19-bh20">COVID-19-BH20</a>
@@ -289,35 +189,10 @@
 
 </div>
 
-        <script type="text/javascript">
-         let uploadForm = document.getElementById('metadata_upload_form')
-         let uploadFormSpot = document.getElementById('metadata_upload_form_spot')
-         let fillForm = document.getElementById('metadata_fill_form')
-         let fillFormSpot = document.getElementById('metadata_fill_form_spot')
-
-         function setUploadMode() {
-             // Make the upload form the one in use
-             uploadFormSpot.appendChild(uploadForm)
-             fillFormSpot.removeChild(fillForm)
-         }
-
-         function setFillMode() {
-             // Make the fillable form the one in use
-             uploadFormSpot.removeChild(uploadForm)
-             fillFormSpot.appendChild(fillForm)
-         }
-
-         function setMode() {
-             // Pick mode based on radio
-             if (document.getElementById('metadata_upload').checked) {
-                 setUploadMode()
-             } else {
-                 setFillMode()
-             }
-         }
+<script type="text/javascript">
+ let scriptRoot = {{ request.script_root|tojson|safe }};
+</script>
 
-         // Start in mode appropriate to selected form item
-         setMode()
-        </script>
+<script type="text/javascript" src="/static/main.js"></script>
     </body>
 </html>
diff --git a/example/maximum_metadata_example.yaml b/example/maximum_metadata_example.yaml
new file mode 100644
index 0000000..1bc70d7
--- /dev/null
+++ b/example/maximum_metadata_example.yaml
@@ -0,0 +1,46 @@
+id: placeholder
+
+host:
+    host_id: XX1
+    host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
+    host_sex: http://purl.obolibrary.org/obo/PATO_0000384
+    host_age: 20
+    host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
+    host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
+    host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
+    host_vaccination: [vaccines1,vaccine2]
+    ethnicity: http://purl.obolibrary.org/obo/HANCESTRO_0010
+    additional_host_information: Optional free text field for addtional information
+
+sample:
+    sample_id: Id of the sample as defined by the submitter 
+    collector_name: Name of the person that took the sample
+    collecting_institution: Institute that was responsible of sampling  
+    specimen_source: [http://purl.obolibrary.org/obo/NCIT_C155831,http://purl.obolibrary.org/obo/NCIT_C155835]
+    collection_date: "2020-01-01"
+    collection_location: http://www.wikidata.org/entity/Q148
+    sample_storage_conditions: frozen specimen
+    source_database_accession: [http://identifiers.org/insdc/LC522350.1#sequence]
+    additional_collection_information: Optional free text field for addtional information
+
+virus:
+    virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
+    virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
+
+technology:
+    sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0009173,http://www.ebi.ac.uk/efo/EFO_0009173]
+    sequence_assembly_method: Protocol used for assembly
+    sequencing_coverage: [70.0, 100.0]
+    additional_technology_information: Optional free text field for addtional information
+
+submitter:
+    authors: [John Doe, Joe Boe, Jonny Oe]
+    submitter_name: [John Doe]
+    submitter_address: John Doe's adress
+    originating_lab: John Doe kitchen
+    lab_address: John Doe's address
+    provider_sample_id: XXX1
+    submitter_sample_id: XXX2
+    publication: PMID00001113
+    submitter_orcid: [https://orcid.org/0000-0000-0000-0000,https://orcid.org/0000-0000-0000-0001]
+    additional_submitter_information: Optional free text field for addtional information
\ No newline at end of file
diff --git a/example/metadata.yaml b/example/metadata.yaml
deleted file mode 100644
index a76616c..0000000
--- a/example/metadata.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-id: placeholder
-
-host:
-    host_id: XX1
-    host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606
-    host_sex: http://purl.obolibrary.org/obo/NCIT_C27993
-    host_age: 20
-    host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
-    host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269
-    host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
-    host_vaccination: List of vaccines given to the host (RRIDs?)
-    additional_host_information: Field for additional host information
-
-sample:
-    sample_id: Id of the sample as defined by the submitter 
-    collector_name: Name of the person that took the sample
-    collecting_institution: Institute that was responsible of sampling  
-    specimen_source: http://purl.obolibrary.org/obo/NCIT_C155831
-    specimen_source2: http://purl.obolibrary.org/obo/NCIT_C155835
-    collection_date: "2020-01-01"
-    collection_location: http://www.wikidata.org/entity/Q148
-    sample_storage_conditions: XXX
-    additional_collection_information: XXX
-
-virus:
-    virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
-    virus_strain: SARS-CoV-2/human/CHN/HS_8/2020
-
-technology:
-    sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0009173
-    sample_sequencing_technology2: http://www.ebi.ac.uk/efo/EFO_0009173
-    sequence_assembly_method: Protocol used for assembly
-    sequencing_coverage: 70
-
-submitter:
-    submitter_name: John Doe
-    submitter_address: John Doe's adress
-    originating_lab: John Doe kitchen
-    lab_address: John Doe's address
-    provider_sample_id: HmX
-    submitter_sample_id: xXx
-    authors: John Doe et all
-    submitter_orcid: https://orcid.org/0000-0000-0000-0000
\ No newline at end of file
diff --git a/example/minimal_example.yaml b/example/minimal_metadata_example.yaml
index 0e36a25..51f8a87 100644
--- a/example/minimal_example.yaml
+++ b/example/minimal_metadata_example.yaml
@@ -5,14 +5,14 @@ host:
 
 sample:
     sample_id: XX
-    collection_date: 2020-01
+    collection_date: "2020-01-01"
     collection_location: http://www.wikidata.org/entity/Q148
 
 virus:
     virus_species: http://purl.obolibrary.org/obo/NCBITaxon_2697049
 
 technology:
-    sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0008632
+    sample_sequencing_technology: [http://www.ebi.ac.uk/efo/EFO_0008632]
 
 submitter:
-    authors: John Doe
\ No newline at end of file
+    authors: [John Doe]
\ No newline at end of file
diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv
index 20e8a9b..6b43137 100644
--- a/scripts/dict_ontology_standardization/ncbi_countries.csv
+++ b/scripts/dict_ontology_standardization/ncbi_countries.csv
@@ -39,6 +39,7 @@ Chad,http://www.wikidata.org/entity/Q657
 Chile,http://www.wikidata.org/entity/Q298
 China,http://www.wikidata.org/entity/Q148
 China: Anhui,http://www.wikidata.org/entity/Q40956
+"China: Anhui, Fuyang":http://www.wikidata.org/entity/Q360584
 China: Beijing,http://www.wikidata.org/entity/Q956
 China: Chongqing,http://www.wikidata.org/entity/Q11725
 China: Fujian,http://www.wikidata.org/entity/Q41705
@@ -48,6 +49,7 @@ China: Guangdong,http://www.wikidata.org/entity/Q15175
 China: Guangxi Zhuang Autonomous Region,http://www.wikidata.org/entity/Q15176
 China: Guangzhou,http://www.wikidata.org/entity/Q16572
 China: Guizhou,http://www.wikidata.org/entity/Q47097
+China: Hangzhou,http://www.wikidata.org/entity/Q4970
 China: Hainan,http://www.wikidata.org/entity/Q42200
 China: Hebei,http://www.wikidata.org/entity/Q21208
 China: Heilongjiang,http://www.wikidata.org/entity/Q19206
@@ -109,6 +111,7 @@ France,http://www.wikidata.org/entity/Q142
 Gabon,http://www.wikidata.org/entity/Q1000
 Georgia,http://www.wikidata.org/entity/Q230
 Germany,http://www.wikidata.org/entity/Q183
+Germany: Dusseldorf,https://www.wikidata.org/wiki/Q1718
 Ghana,http://www.wikidata.org/entity/Q117
 Greece,http://www.wikidata.org/entity/Q41
 Grenada,http://www.wikidata.org/entity/Q769
@@ -123,6 +126,7 @@ Iceland,http://www.wikidata.org/entity/Q189
 Icelandic Commonwealth,http://www.wikidata.org/entity/Q62389
 India,http://www.wikidata.org/entity/Q668
 India: Kerala State,http://www.wikidata.org/entity/Q1186
+India: Rajkot,http://www.wikidata.org/entity/Q1815245
 Indonesia,http://www.wikidata.org/entity/Q252
 Iran,http://www.wikidata.org/entity/Q794
 Iran: Qum,http://www.wikidata.org/entity/Q131664
@@ -172,6 +176,7 @@ Mozambique,http://www.wikidata.org/entity/Q1029
 Myanmar,http://www.wikidata.org/entity/Q836
 Namibia,http://www.wikidata.org/entity/Q1030
 Nauru,http://www.wikidata.org/entity/Q697
+Netherlands: Milheeze,https://www.wikidata.org/wiki/Q3314115
 Nepal,http://www.wikidata.org/entity/Q837
 New Zealand,http://www.wikidata.org/entity/Q664
 Nicaragua,http://www.wikidata.org/entity/Q811
@@ -263,6 +268,7 @@ USA: CA,http://www.wikidata.org/entity/Q99
 "USA: CA, San Diego County",http://www.wikidata.org/entity/Q108143
 USA: CO,http://www.wikidata.org/entity/Q1261
 USA: CT,http://www.wikidata.org/entity/Q779
+USA: DC,http://www.wikidata.org/entity/Q3551781
 USA: DE,http://www.wikidata.org/entity/Q1393
 USA: FL,http://www.wikidata.org/entity/Q812
 USA: GA,http://www.wikidata.org/entity/Q1428
@@ -293,6 +299,7 @@ USA: NM,http://www.wikidata.org/entity/Q1522
 USA: North Carolina,http://www.wikidata.org/entity/Q1454
 USA: NV,http://www.wikidata.org/entity/Q1227
 USA: NY,http://www.wikidata.org/entity/Q1384
+USA: New York,http://www.wikidata.org/entity/Q1384
 USA: OH,http://www.wikidata.org/entity/Q1397
 USA: OK,http://www.wikidata.org/entity/Q1649
 USA: OR,http://www.wikidata.org/entity/Q824
@@ -321,4 +328,4 @@ Viet Nam: Ho Chi Minh city,http://www.wikidata.org/entity/Q1854
 Vietnam,http://www.wikidata.org/entity/Q881
 Yemen,http://www.wikidata.org/entity/Q805
 Zambia,http://www.wikidata.org/entity/Q953
-Zimbabwe,http://www.wikidata.org/entity/Q954
+Zimbabwe,http://www.wikidata.org/entity/Q954
\ No newline at end of file
diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
index f5aeaae..7fa67f8 100644
--- a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
+++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
@@ -1,5 +1,6 @@
 nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831
 nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal,http://purl.obolibrary.org/obo/NCIT_C155831
 respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831
 naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
 nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831
diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile
new file mode 100644
index 0000000..5bd38dd
--- /dev/null
+++ b/scripts/docker/Dockerfile
@@ -0,0 +1,10 @@
+FROM debian:10
+
+RUN apt-get update && \
+    apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
+    python3 python3-pip python3-setuptools python3-dev python-pycurl \
+    clustalw python3-biopython libcurl4-openssl-dev build-essential \
+    libssl-dev && \
+    apt-get clean
+
+RUN pip3 install bh20-seq-uploader
\ No newline at end of file
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 5257bd1..6f046ea 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -7,6 +7,8 @@ import xml.etree.ElementTree as ET
 import json
 import os
 
+from dateutil import parser
+
 num_ids_for_request = 100
 
 dir_metadata = 'metadata_from_nuccore'
@@ -37,20 +39,19 @@ if not os.path.exists(dir_metadata):
         tmp_list = [x.split('.')[0] for x in tmp_list]
 
         print(term, len(tmp_list))
-        tmp_list=tmp_list
-    #    tmp_list = tmp_list[0:2] # restricting to small run
+        #tmp_list = tmp_list[0:2] # restricting to small run
 
         id_set.update([x.split('.')[0] for x in tmp_list])
 
     print(term_list, len(id_set))
 
-    with open(path_ncbi_virus_accession) as f:
-        tmp_list = [line.strip('\n') for line in f]
-
-    print('NCBI Virus', len(tmp_list))
-    id_set.update(tmp_list)
-
-    print(term_list + ['NCBI Virus'], len(id_set))
+    if os.path.exists(path_ncbi_virus_accession):
+        with open(path_ncbi_virus_accession) as f:
+            tmp_list = [line.strip('\n') for line in f]
+        print('NCBI Virus', len(tmp_list))
+        id_set.update(tmp_list)
+        term_list.append('NCBI Virus')
+        print(term_list, len(id_set))
 
     for i, id_x_list in enumerate(chunks(list(id_set), num_ids_for_request)):
         path_metadata_xxx_xml = os.path.join(dir_metadata, 'metadata_{}.xml'.format(i))
@@ -86,7 +87,7 @@ if not os.path.exists(dir_fasta_and_yaml):
     os.makedirs(dir_fasta_and_yaml)
 
 missing_value_list = []
-    
+
 for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) for name_metadata_xxx_xml in os.listdir(dir_metadata) if name_metadata_xxx_xml.endswith('.xml')]:
     tree = ET.parse(path_metadata_xxx_xml)
     GBSet = tree.getroot()
@@ -110,23 +111,23 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
             'submitter': {}
         }
 
-        
+
         info_for_yaml_dict['sample']['sample_id'] = accession_version
-        info_for_yaml_dict['sample']['source_database_accession'] = accession_version
-        
-        
+        info_for_yaml_dict['sample']['source_database_accession'] = ["http://identifiers.org/insdc/"+accession_version+"#sequence"] #accession is turned into resolvable URL/URI now
+
+
         # submitter info
         GBSeq_references = GBSeq.find('GBSeq_references')
         if GBSeq_references is not None:
-            info_for_yaml_dict['submitter']['authors'] = ';'.join([x.text for x in GBSeq_references.iter('GBAuthor')])
-            
+            info_for_yaml_dict['submitter']['authors'] = ["{}".format(x.text) for x in GBSeq_references.iter('GBAuthor')]
+
             GBReference = GBSeq_references.find('GBReference')
             if GBReference is not None:
                 GBReference_journal = GBReference.find('GBReference_journal')
-                
+
                 if GBReference_journal is not None and GBReference_journal.text != 'Unpublished':
                     if 'Submitted' in GBReference_journal.text:
-                        info_for_yaml_dict['submitter']['submitter_name'] = GBReference_journal.text.split(') ')[1].split(',')[0].strip()
+                        info_for_yaml_dict['submitter']['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())]
                         info_for_yaml_dict['submitter']['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip()
                     else:
                         info_for_yaml_dict['submitter']['additional_submitter_information'] = GBReference_journal.text
@@ -146,8 +147,9 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
                     if field_in_yaml == 'sequencing_coverage':
                         # A regular expression would be better!
                         try:
-                            info_for_yaml_dict['technology'][field_in_yaml] = float(
-                                tech_info_to_parse.strip('(average)').strip("reads/nt").strip('(average for 6 sequences)').replace(',', '.').strip(' xX>'))
+                            info_for_yaml_dict['technology'][field_in_yaml] = [
+                                float(tech_info_to_parse.strip('(average)').strip("reads/nt").strip('(average for 6 sequences)').replace(',', '.').strip(' xX>'))
+                            ]
                         except ValueError:
                             print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse)
                             pass
@@ -162,8 +164,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
 
                             new_seq_tec_list.append(seq_tec)
 
-                        for n, seq_tec in enumerate(new_seq_tec_list):
-                            info_for_yaml_dict['technology'][field_in_yaml + ('' if n == 0 else str(n + 1))] = seq_tec
+                        info_for_yaml_dict['technology']['sample_sequencing_technology'] = [x for x in new_seq_tec_list]
                     else:
                         info_for_yaml_dict['technology'][field_in_yaml] = tech_info_to_parse
 
@@ -199,7 +200,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
 
                         if 'age' in GBQualifier_value_text:
                             info_for_yaml_dict['host']['host_age'] = int(GBQualifier_value_text_list[2].split('age ')[1])
-                            info_for_yaml_dict['host']['host_age_unit'] = 'year'
+                            info_for_yaml_dict['host']['host_age_unit'] = 'http://purl.obolibrary.org/obo/UO_0000036'
                 elif GBQualifier_name_text == 'collected_by':
                     if any([x in GBQualifier_value_text.lower() for x in ['institute', 'hospital', 'city', 'center']]):
                         info_for_yaml_dict['sample']['collecting_institution'] = GBQualifier_value_text
@@ -208,24 +209,46 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
                 elif GBQualifier_name_text == 'isolation_source':
                     if GBQualifier_value_text.upper() in term_to_uri_dict:
                         GBQualifier_value_text = GBQualifier_value_text.upper() # For example, in case of 'usa: wa'
-                    
+
                     if GBQualifier_value_text in term_to_uri_dict:
-                        info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict[GBQualifier_value_text]
+                        info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict[GBQualifier_value_text]]
                     else:
                         if GBQualifier_value_text in ['NP/OP swab', 'nasopharyngeal and oropharyngeal swab', 'nasopharyngeal/oropharyngeal swab', 'np/np swab', 'np/op']:
-                            info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal swab']
-                            info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['oropharyngeal swab']
-                        elif GBQualifier_value_text in ['nasopharyngeal swab/throat swab']:
-                            info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal swab']
-                            info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['throat swab']
+                            info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal swab'], term_to_uri_dict['oropharyngeal swab']]
+                        elif GBQualifier_value_text in ['nasopharyngeal swab/throat swab', 'nasopharyngeal/throat swab']:
+                            info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal swab'], term_to_uri_dict['throat swab']]
                         elif GBQualifier_value_text in ['nasopharyngeal aspirate/throat swab']:
-                            info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal aspirate']
-                            info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['throat swab']
+                            info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal aspirate'], term_to_uri_dict['throat swab']]
                         else:
                             missing_value_list.append('\t'.join([accession_version, 'specimen_source', GBQualifier_value_text]))
                 elif GBQualifier_name_text == 'collection_date':
                     # TO_DO: which format we will use?
-                    info_for_yaml_dict['sample']['collection_date'] = GBQualifier_value_text
+                    date_to_write = GBQualifier_value_text
+                    
+                    if len(GBQualifier_value_text.split('-')) == 1:
+                        if int(GBQualifier_value_text) < 2020:
+                            date_to_write = "15 12 {}".format(GBQualifier_value_text)
+                        else:
+                            date_to_write = "15 01 {}".format(GBQualifier_value_text)
+
+                        if 'additional_collection_information' in info_for_yaml_dict['sample']:
+                            info_for_yaml_dict['sample']['additional_collection_information'] += "; The 'collection_date' is estimated (the original date was: {})".format(GBQualifier_value_text)
+                        else:
+                            info_for_yaml_dict['sample']['additional_collection_information'] = "The 'collection_date' is estimated (the original date was: {})".format(GBQualifier_value_text)
+                    elif len(GBQualifier_value_text.split('-')) == 2:
+                        date_to_write += '-15'
+                        
+                        if 'additional_collection_information' in info_for_yaml_dict['sample']:
+                            info_for_yaml_dict['sample']['additional_collection_information'] += "; The 'collection_date' is estimated (the original date was: {})".format(GBQualifier_value_text)
+                        else:
+                            info_for_yaml_dict['sample']['additional_collection_information'] = "The 'collection_date' is estimated (the original date was: {})".format(GBQualifier_value_text)
+                    elif len(GBQualifier_value_text.split('-')) == 3:
+                        GBQualifier_value_text_list = GBQualifier_value_text.split('-')
+
+                        if GBQualifier_value_text_list[1].isalpha():
+                            date_to_write = GBQualifier_value_text_list[1] + ' ' + GBQualifier_value_text_list[0] + ' ' + GBQualifier_value_text_list[2]
+
+                    info_for_yaml_dict['sample']['collection_date'] = date_to_write
                 elif GBQualifier_name_text in ['lat_lon', 'country']:
                     if GBQualifier_value_text == 'Hong Kong':
                         GBQualifier_value_text = 'China: Hong Kong'
@@ -237,7 +260,10 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
 
                     info_for_yaml_dict['sample']['collection_location'] = GBQualifier_value_text
                 elif GBQualifier_name_text == 'note':
-                    info_for_yaml_dict['sample']['additional_collection_information'] = GBQualifier_value_text
+                    if 'additional_collection_information' in info_for_yaml_dict['sample']:
+                        info_for_yaml_dict['sample']['additional_collection_information'] += '; ' + GBQualifier_value_text
+                    else:
+                        info_for_yaml_dict['sample']['additional_collection_information'] = GBQualifier_value_text
                 elif GBQualifier_name_text == 'isolate':
                     info_for_yaml_dict['virus']['virus_strain'] = GBQualifier_value_text
                 elif GBQualifier_name_text == 'db_xref':
@@ -254,7 +280,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
         with open(os.path.join(dir_fasta_and_yaml, '{}.yaml'.format(accession_version)), 'w') as fw:
             json.dump(info_for_yaml_dict, fw, indent=2)
 
-        
+
 if len(missing_value_list) > 0:
     with open('missing_terms.tsv', 'w') as fw:
         fw.write('\n'.join(missing_value_list))
diff --git a/scripts/import.cwl b/scripts/import.cwl
new file mode 100644
index 0000000..d84516b
--- /dev/null
+++ b/scripts/import.cwl
@@ -0,0 +1,30 @@
+cwlVersion: v1.1
+class: CommandLineTool
+baseCommand: python3
+inputs:
+  scripts:
+    type: File
+    default:
+      class: File
+      location: import_to_arvados.py
+    inputBinding: {position: 1}
+  importScript:
+    type: File
+    default:
+      class: File
+      location: from_genbank_to_fasta_and_yaml.py
+    inputBinding: {position: 2}
+  dict:
+    type: Directory
+    default:
+      class: Directory
+      location: dict_ontology_standardization
+    inputBinding: {position: 3}
+outputs: []
+requirements:
+  DockerRequirement:
+    dockerPull: bh20-seq-uploader/import
+  NetworkAccess:
+    networkAccess: true
+  WorkReuse:
+    enableReuse: false
diff --git a/scripts/import_to_arvados.py b/scripts/import_to_arvados.py
new file mode 100644
index 0000000..78cd13d
--- /dev/null
+++ b/scripts/import_to_arvados.py
@@ -0,0 +1,14 @@
+import os
+import subprocess
+import glob
+import sys
+
+os.chdir(os.environ["TMPDIR"])
+os.symlink(sys.argv[2], "dict_ontology_standardization")
+subprocess.run(sys.argv[1])
+
+os.chdir("fasta_and_yaml")
+fasta_files = glob.glob("*.fasta")
+
+for f in fasta_files:
+    subprocess.run(["bh20-seq-uploader", f, "%s.yaml" %f[:-6]])
diff --git a/scripts/sequences.acc b/scripts/sequences.acc
index a99c4e6..697d868 100644
--- a/scripts/sequences.acc
+++ b/scripts/sequences.acc
@@ -1,4 +1,299 @@
 NC_045512
+MT394528
+MT394529
+MT394530
+MT394531
+MT394864
+MT396241
+MT396242
+MT396243
+MT396244
+MT396245
+MT396246
+MT396247
+MT396248
+MT396266
+MT380726
+MT380727
+MT380728
+MT380729
+MT380730
+MT380731
+MT380732
+MT380733
+MT380734
+MT385414
+MT385415
+MT385416
+MT385417
+MT385418
+MT385419
+MT385420
+MT385421
+MT385422
+MT385423
+MT385424
+MT385425
+MT385426
+MT385427
+MT385428
+MT385429
+MT385430
+MT385431
+MT385432
+MT385433
+MT385434
+MT385435
+MT385436
+MT385437
+MT385438
+MT385439
+MT385440
+MT385441
+MT385442
+MT385443
+MT385444
+MT385445
+MT385446
+MT385447
+MT385448
+MT385449
+MT385450
+MT385451
+MT385452
+MT385453
+MT385454
+MT385455
+MT385456
+MT385457
+MT385458
+MT385459
+MT385460
+MT385461
+MT385462
+MT385463
+MT385464
+MT385465
+MT385466
+MT385467
+MT385468
+MT385469
+MT385470
+MT385471
+MT385472
+MT385473
+MT385474
+MT385475
+MT385476
+MT385477
+MT385478
+MT385479
+MT385480
+MT385481
+MT385482
+MT385483
+MT385484
+MT385485
+MT385486
+MT385487
+MT385488
+MT385489
+MT385490
+MT385491
+MT385492
+MT385493
+MT385494
+MT385495
+MT385496
+MT385497
+MT186683
+MT252677
+MT252678
+MT252679
+MT252680
+MT252681
+MT252682
+MT252683
+MT252684
+MT252685
+MT252686
+MT252687
+MT252688
+MT252689
+MT252690
+MT252691
+MT252692
+MT252693
+MT252694
+MT252695
+MT252696
+MT252697
+MT252698
+MT252699
+MT252700
+MT252701
+MT252702
+MT252703
+MT252704
+MT252705
+MT252706
+MT252707
+MT252708
+MT252709
+MT252710
+MT252711
+MT252712
+MT252713
+MT252715
+MT252716
+MT252717
+MT252719
+MT252721
+MT252723
+MT252725
+MT252726
+MT252728
+MT252729
+MT252730
+MT252733
+MT252734
+MT252735
+MT252736
+MT252737
+MT252738
+MT252739
+MT252740
+MT252741
+MT252742
+MT252745
+MT252746
+MT252747
+MT252748
+MT252749
+MT252756
+MT252757
+MT252758
+MT252761
+MT252763
+MT252764
+MT252765
+MT252766
+MT252767
+MT252768
+MT252769
+MT252770
+MT252771
+MT252772
+MT252773
+MT252774
+MT252775
+MT252778
+MT252779
+MT252780
+MT252781
+MT252782
+MT252783
+MT252784
+MT252785
+MT252787
+MT252788
+MT252792
+MT252793
+MT252794
+MT252795
+MT252797
+MT252798
+MT252799
+MT252800
+MT252801
+MT252802
+MT252803
+MT252804
+MT252805
+MT252806
+MT252807
+MT252808
+MT252809
+MT252810
+MT252811
+MT252821
+MT252822
+MT252823
+MT252824
+MT339043
+MT365033
+MT374101
+MT374102
+MT374103
+MT374104
+MT374105
+MT374106
+MT374107
+MT374108
+MT374109
+MT374110
+MT374111
+MT374112
+MT374113
+MT374114
+MT374115
+MT374116
+MT375428
+MT375429
+MT375430
+MT375431
+MT375432
+MT375433
+MT375434
+MT375435
+MT375436
+MT375437
+MT375438
+MT375439
+MT375440
+MT375441
+MT375442
+MT375443
+MT375444
+MT375445
+MT375446
+MT375447
+MT375448
+MT375449
+MT375450
+MT375451
+MT375452
+MT375453
+MT375454
+MT375455
+MT375456
+MT375457
+MT375458
+MT375459
+MT375460
+MT375461
+MT375462
+MT375463
+MT375464
+MT375465
+MT375466
+MT375467
+MT375468
+MT375469
+MT375470
+MT375471
+MT375472
+MT375473
+MT375474
+MT375475
+MT375476
+MT375477
+MT375478
+MT375479
+MT375480
+MT375481
+MT375482
+MT375483
 MT370516
 MT370517
 MT370518
@@ -225,6 +520,8 @@ MT372480
 MT372481
 MT372482
 MT372483
+7BV2_P
+7BV2_T
 LC542976
 LC542809
 MT114412
diff --git a/semantic_enrichment/countries.ttl b/semantic_enrichment/countries.ttl
new file mode 100644
index 0000000..0f364fc
--- /dev/null
+++ b/semantic_enrichment/countries.ttl
@@ -0,0 +1,279 @@
+@prefix ns1: <http://www.wikidata.org/prop/direct/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<http://www.wikidata.org/entity/Q108143> rdfs:label "San Diego County" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-116.77 33.02)" .
+
+<http://www.wikidata.org/entity/Q110403> rdfs:label "Snohomish County" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-121.71 48.04)" .
+
+<http://www.wikidata.org/entity/Q1166> rdfs:label "Michigan" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-85.58 44.34)" .
+
+<http://www.wikidata.org/entity/Q11746> rdfs:label "Wuhan" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(114.288055555 30.587222222)" .
+
+<http://www.wikidata.org/entity/Q1186> rdfs:label "Kerala" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q668> ;
+    ns1:P625 "Point(76.972 8.5074)" .
+
+<http://www.wikidata.org/entity/Q1204> rdfs:label "Illinois" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-89.0 40.0)" .
+
+<http://www.wikidata.org/entity/Q1221> rdfs:label "Idaho" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-114.0 45.0)" .
+
+<http://www.wikidata.org/entity/Q1223> rdfs:label "Washington" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-120.5 47.5)" .
+
+<http://www.wikidata.org/entity/Q1227> rdfs:label "Nevada" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-117.0 39.0)" .
+
+<http://www.wikidata.org/entity/Q123304> rdfs:label "Antioquia Department" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q739> ;
+    ns1:P625 "Point(-75.566666666 6.216666666)" .
+
+<http://www.wikidata.org/entity/Q1370> rdfs:label "Virginia" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-79.0 37.5)" .
+
+<http://www.wikidata.org/entity/Q1384> rdfs:label "New York" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-75.0 43.0)" .
+
+<http://www.wikidata.org/entity/Q1387> rdfs:label "Rhode Island" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-71.5 41.7)" .
+
+<http://www.wikidata.org/entity/Q1391> rdfs:label "Maryland" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-76.7 39.0)" .
+
+<http://www.wikidata.org/entity/Q1397> rdfs:label "Ohio" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-82.5 40.5)" .
+
+<http://www.wikidata.org/entity/Q1400> rdfs:label "Pennsylvania" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-77.5 41.0)" .
+
+<http://www.wikidata.org/entity/Q1408> rdfs:label "New Jersey" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-74.5 40.0)" .
+
+<http://www.wikidata.org/entity/Q1415> rdfs:label "Indiana" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-86.216666666 39.933333333)" .
+
+<http://www.wikidata.org/entity/Q1428> rdfs:label "Georgia" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-83.5 33.0)" .
+
+<http://www.wikidata.org/entity/Q1439> rdfs:label "Texas" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-100.0 31.0)" .
+
+<http://www.wikidata.org/entity/Q1454> rdfs:label "North Carolina" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-80.0 35.5)" .
+
+<http://www.wikidata.org/entity/Q1456> rdfs:label "South Carolina" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-81.0 34.0)" .
+
+<http://www.wikidata.org/entity/Q15174> rdfs:label "Shenzhen" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(114.054 22.535)" .
+
+<http://www.wikidata.org/entity/Q1527> rdfs:label "Minnesota" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-94.0 46.0)" .
+
+<http://www.wikidata.org/entity/Q1537> rdfs:label "Wisconsin" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-89.5 44.5)" .
+
+<http://www.wikidata.org/entity/Q1546> rdfs:label "Iowa" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-93.0 42.0)" .
+
+<http://www.wikidata.org/entity/Q1553> rdfs:label "Nebraska" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-100.0 41.5)" .
+
+<http://www.wikidata.org/entity/Q1558> rdfs:label "Kansas" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-98.0 38.5)" .
+
+<http://www.wikidata.org/entity/Q1581> rdfs:label "Missouri" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-92.5 38.5)" .
+
+<http://www.wikidata.org/entity/Q1588> rdfs:label "Louisiana" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-92.0 31.0)" .
+
+<http://www.wikidata.org/entity/Q16572> rdfs:label "Guangzhou" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(113.258976 23.128795)" .
+
+<http://www.wikidata.org/entity/Q1854> rdfs:label "Ho Chi Minh City" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q881> ;
+    ns1:P625 "Point(106.62965 10.82302)",
+        "Point(106.633333333 10.816666666)" .
+
+<http://www.wikidata.org/entity/Q198244> rdfs:label "Zhuozhou" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(115.99176 39.48873)" .
+
+<http://www.wikidata.org/entity/Q36687> rdfs:label "Victoria" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q408> ;
+    ns1:P625 "Point(144.0 -37.0)" .
+
+<http://www.wikidata.org/entity/Q43194> rdfs:label "Yunnan" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(101.5 24.5)" .
+
+<http://www.wikidata.org/entity/Q4970> rdfs:label "Hangzhou" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(120.1675 30.25)" .
+
+<http://www.wikidata.org/entity/Q62> rdfs:label "San Francisco" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-122.416388888 37.7775)" .
+
+<http://www.wikidata.org/entity/Q759> rdfs:label "New Hampshire" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-71.5 44.0)" .
+
+<http://www.wikidata.org/entity/Q771> rdfs:label "Massachusetts" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-71.8 42.3)" .
+
+<http://www.wikidata.org/entity/Q779> rdfs:label "Connecticut" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-72.7 41.6)" .
+
+<http://www.wikidata.org/entity/Q782> rdfs:label "Hawaii" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-157.796388888 21.311388888)" .
+
+<http://www.wikidata.org/entity/Q812> rdfs:label "Florida" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-81.631666666 28.133333333)" .
+
+<http://www.wikidata.org/entity/Q816> rdfs:label "Arizona" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-111.656944 34.286667)" .
+
+<http://www.wikidata.org/entity/Q81725> rdfs:label "KwaZulu-Natal" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q258> ;
+    ns1:P625 "Point(31.0 -29.0)" .
+
+<http://www.wikidata.org/entity/Q824> rdfs:label "Oregon" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-120.575 43.935833)" .
+
+<http://www.wikidata.org/entity/Q829> rdfs:label "Utah" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-111.5 39.5)" .
+
+<http://www.wikidata.org/entity/Q8686> rdfs:label "Shanghai" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(121.466666666 31.166666666)" .
+
+<http://www.wikidata.org/entity/Q8818> rdfs:label "Valencia" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q29> ;
+    ns1:P625 "Point(-0.375 39.466666666)" .
+
+<http://www.wikidata.org/entity/Q956> rdfs:label "Beijing" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(116.391388888 39.905)" .
+
+<http://www.wikidata.org/entity/Q99> rdfs:label "California" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-120.0 37.0)" .
+
+<http://www.wikidata.org/entity/Q142> rdfs:label "France" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q142> ;
+    ns1:P625 "Point(2.0 47.0)" .
+
+<http://www.wikidata.org/entity/Q155> rdfs:label "Brazil" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q155> ;
+    ns1:P625 "Point(-53.0 -14.0)" .
+
+<http://www.wikidata.org/entity/Q258> rdfs:label "South Africa" .
+
+<http://www.wikidata.org/entity/Q29> rdfs:label "Spain" .
+
+<http://www.wikidata.org/entity/Q33> rdfs:label "Finland" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q33> ;
+    ns1:P625 "Point(27.0 65.0)" .
+
+<http://www.wikidata.org/entity/Q34> rdfs:label "Sweden" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q34> ;
+    ns1:P625 "Point(15.0 61.0)" .
+
+<http://www.wikidata.org/entity/Q38> rdfs:label "Italy" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q38> ;
+    ns1:P625 "Point(12.5 42.5)" .
+
+<http://www.wikidata.org/entity/Q408> rdfs:label "Australia" .
+
+<http://www.wikidata.org/entity/Q41> rdfs:label "Greece" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q41> ;
+    ns1:P625 "Point(23.0 38.5)" .
+
+<http://www.wikidata.org/entity/Q419> rdfs:label "Peru" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q419> ;
+    ns1:P625 "Point(-76.0 -9.4)" .
+
+<http://www.wikidata.org/entity/Q43> rdfs:label "Turkey" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q43> ;
+    ns1:P625 "Point(36.0 39.0)" .
+
+<http://www.wikidata.org/entity/Q668> rdfs:label "India" .
+
+<http://www.wikidata.org/entity/Q739> rdfs:label "Colombia" .
+
+<http://www.wikidata.org/entity/Q794> rdfs:label "Iran" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q794> ;
+    ns1:P625 "Point(53.0 32.0)" .
+
+<http://www.wikidata.org/entity/Q801> rdfs:label "Israel" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q801> ;
+    ns1:P625 "Point(35.0 31.0)" .
+
+<http://www.wikidata.org/entity/Q837> rdfs:label "Nepal" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q837> ;
+    ns1:P625 "Point(84.0 28.0)" .
+
+<http://www.wikidata.org/entity/Q865> rdfs:label "Taiwan" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q865> ;
+    ns1:P625 "Point(121.0 24.0)" .
+
+<http://www.wikidata.org/entity/Q881> rdfs:label "Vietnam" .
+
+<http://www.wikidata.org/entity/Q884> rdfs:label "South Korea" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q884> ;
+    ns1:P625 "Point(128.0 36.0)" .
+
+<http://www.wikidata.org/entity/Q148> rdfs:label "People's Republic of China" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q148> ;
+    ns1:P625 "Point(103.0 35.0)" .
+
+<http://www.wikidata.org/entity/Q30> rdfs:label "United States of America" ;
+    ns1:P17 <http://www.wikidata.org/entity/Q30> ;
+    ns1:P625 "Point(-77.036666666 38.895)" .
+
diff --git a/semantic_enrichment/labels.ttl b/semantic_enrichment/labels.ttl
new file mode 100644
index 0000000..b4e5d1f
--- /dev/null
+++ b/semantic_enrichment/labels.ttl
@@ -0,0 +1,24 @@
+<http://edamontology.org/data_1875> <http://www.w3.org/2000/01/rdf-schema#label> "NCBI taxon" .
+<http://purl.obolibrary.org/obo/GAZ_00000448> <http://www.w3.org/2000/01/rdf-schema#label> "geographic location" .
+<http://purl.obolibrary.org/obo/FLU_0000848> <http://www.w3.org/2000/01/rdf-schema#label> "sequence coverage" .
+<http://purl.obolibrary.org/obo/NCIT_C41206> <http://www.w3.org/2000/01/rdf-schema#label> "Institution" .
+<http://purl.obolibrary.org/obo/NCIT_C42781> <http://www.w3.org/2000/01/rdf-schema#label> "Author" .
+<http://purl.obolibrary.org/obo/OBI_0001479> <http://www.w3.org/2000/01/rdf-schema#label> "specimen from organism" .
+<http://purl.obolibrary.org/obo/OBI_0600047> <http://www.w3.org/2000/01/rdf-schema#label> "sequencing assay" .
+<http://semanticscience.org/resource/SIO_000115> <http://www.w3.org/2000/01/rdf-schema#label> "identifier" .
+<http://www.ebi.ac.uk/efo/EFO_0000532> <http://www.w3.org/2000/01/rdf-schema#label> "host" .
+<http://semanticscience.org/resource/SIO_001167> <http://www.w3.org/2000/01/rdf-schema#label> "comment" .
+<http://www.ebi.ac.uk/efo/EFO_0002699> <http://www.w3.org/2000/01/rdf-schema#label> "high throughput sequencer" .
+<http://semanticscience.org/resource/SIO_010055> <http://www.w3.org/2000/01/rdf-schema#label> "strain" .
+<http://purl.obolibrary.org/obo/OBI_0001895> <http://www.w3.org/2000/01/rdf-schema#label> "specimen collector" .
+<http://edamontology.org/data_2091> <http://www.w3.org/2000/01/rdf-schema#label> "Accession" .
+<http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164> <http://www.w3.org/2000/01/rdf-schema#label> "collection date" .
+<http://purl.obolibrary.org/obo/NCIT_C3833> <http://www.w3.org/2000/01/rdf-schema#label> "Asymptomatic" .
+<http://purl.obolibrary.org/obo/NCIT_C25269> <http://www.w3.org/2000/01/rdf-schema#label> "Symptomatic" .
+<http://purl.obolibrary.org/obo/GENEPIO_0002020> <http://www.w3.org/2000/01/rdf-schema#label> "admitted to hospital" .
+<http://purl.obolibrary.org/obo/GENEPIO_0001849> <http://www.w3.org/2000/01/rdf-schema#label> "discharged from hospital" .
+<http://purl.obolibrary.org/obo/NCIT_C28554> <http://www.w3.org/2000/01/rdf-schema#label> "Dead" .
+<http://purl.obolibrary.org/obo/NCIT_C37987> <http://www.w3.org/2000/01/rdf-schema#label> "Alive" .
+<http://purl.obolibrary.org/obo/NCIT_C115935> <http://www.w3.org/2000/01/rdf-schema#label> "Healthy" .
+<http://purl.obolibrary.org/obo/PATO_0000384> <http://www.w3.org/2000/01/rdf-schema#label> "male".
+<http://purl.obolibrary.org/obo/PATO_0000383> <http://www.w3.org/2000/01/rdf-schema#label> "female" .
diff --git a/setup.py b/setup.py
index 4ab6329..412c103 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,8 @@ try:
 except ImportError:
     tagger = egg_info_cmd.egg_info
 
-install_requires = ["arvados-python-client", "schema-salad", "python-magic", "pyshex"]
+install_requires = ["arvados-python-client", "schema-salad",
+                    "python-magic", "pyshex", "py-dateutil"]
 web_requires = ["flask", "pyyaml"]
 
 needs_pytest = {"pytest", "test", "ptr"}.intersection(sys.argv)
@@ -26,7 +27,7 @@ setup(
     version="1.0",
     description="Biohackathon sequence uploader",
     long_description=open(README).read(),
-    long_description_content_type="text/x-rst",
+    long_description_content_type="text/markdown",
     author="Peter Amstutz",
     author_email="peter.amstutz@curii.com",
     license="Apache 2.0",
@@ -34,7 +35,8 @@ setup(
     package_data={"bh20sequploader": ["bh20seq-schema.yml",
                                       "bh20seq-options.yml",
                                       "bh20seq-shex.rdf",
-                                      "validation/formats"],
+                                      "validation/formats",
+                                      "SARS-CoV-2-reference.fasta",],
     },
     install_requires=install_requires,
     extras_require={
diff --git a/workflows/pangenome-generate/minimap2.cwl b/workflows/pangenome-generate/minimap2.cwl
index 42d1dce..bf8eb4c 100644
--- a/workflows/pangenome-generate/minimap2.cwl
+++ b/workflows/pangenome-generate/minimap2.cwl
@@ -12,7 +12,7 @@ hints:
   ResourceRequirement:
     coresMin: 8
     coresMax: 32
-    ramMin: $(9 * 1024)
+    ramMin: $(15 * 1024)
     outdirMin: $(Math.ceil(inputs.readsFA.size/(1024*1024*1024) + 20))
 stdout: $(inputs.readsFA.nameroot).paf
 baseCommand: minimap2
diff --git a/workflows/pangenome-generate/odgi_to_rdf.cwl b/workflows/pangenome-generate/odgi_to_rdf.cwl
index 079d6fb..e6a279b 100644
--- a/workflows/pangenome-generate/odgi_to_rdf.cwl
+++ b/workflows/pangenome-generate/odgi_to_rdf.cwl
@@ -3,10 +3,12 @@ class: CommandLineTool
 cwlVersion: v1.1
 hints:
   DockerRequirement:
-    dockerPull: spodgi/spodgi
+    dockerPull: jerven/spodgi:0.0.6
 requirements:
   InlineJavascriptRequirement: {}
   ShellCommandRequirement: {}
+  ResourceRequirement:
+    ramMin: $((2 * 1024) + 1)
 inputs:
   - id: odgi
     type: File