34 files changed, 1656 insertions, 724 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 0b52e6b..b3a439d 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -16,277 +16,308 @@ logging.basicConfig(format="[%(asctime)s] %(levelname)s %(message)s", datefmt="%
                     level=logging.INFO)
 logging.getLogger("googleapiclient.discovery").setLevel(logging.WARN)
 
-def validate_upload(api, collection, validated_project,
-                    fastq_project, fastq_workflow_uuid,
-                    revalidate):
-    col = arvados.collection.Collection(collection["uuid"])
-
-    if not revalidate and collection["properties"].get("status") in ("validated", "rejected"):
-        return False
-
-    # validate the collection here.  Check metadata, etc.
-    logging.info("Validating upload '%s' (%s)" % (collection["name"], collection["uuid"]))
-
-    errors = []
-
-    if collection["owner_uuid"] != validated_project:
-        dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
-                                              ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
-        if dup["items"]:
-            # This exact collection has been uploaded before.
-            errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
-
-    if not errors:
-        if "metadata.yaml" not in col:
-            errors.append("Missing metadata.yaml", collection["name"])
-        else:
+class SeqAnalyzer:
+
+    def __init__(self, api, keepclient,
+                 uploader_project,
+                 pangenome_analysis_project,
+                 fastq_project,
+                 validated_project,
+                 workflow_def_project,
+                 pangenome_workflow_uuid,
+                 fastq_workflow_uuid,
+                 exclude_list,
+                 latest_result_collection):
+        self.api = api
+        self.keepclient = keepclient
+        self.uploader_project = uploader_project
+        self.pangenome_analysis_project = pangenome_analysis_project
+        self.fastq_project = fastq_project
+        self.validated_project = validated_project
+        self.workflow_def_project = workflow_def_project
+        self.pangenome_workflow_uuid = pangenome_workflow_uuid
+        self.fastq_workflow_uuid = fastq_workflow_uuid
+        self.exclude_list = exclude_list
+        self.latest_result_uuid = latest_result_collection
+        self.schema_ref = None
+
+    def validate_upload(self, collection, revalidate):
+        col = arvados.collection.Collection(collection["uuid"], api_client=self.api, keep_client=self.keepclient)
+
+        if not revalidate and collection["properties"].get("status") in ("validated", "rejected"):
+            return False
+
+        # validate the collection here.  Check metadata, etc.
+        logging.info("Validating upload '%s' (%s)" % (collection["name"], collection["uuid"]))
+
+        errors = []
+
+        if collection["owner_uuid"] != self.validated_project:
+            dup = self.api.collections().list(filters=[["owner_uuid", "=", self.validated_project],
+                                                  ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
+            if dup["items"]:
+                # This exact collection has been uploaded before.
+                errors.append("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+
+        if not errors:
+            if "metadata.yaml" not in col:
+                errors.append("Missing metadata.yaml", collection["name"])
+            else:
+                try:
+                    with col.open("metadata.yaml") as md:
+                        metadata_content = ruamel.yaml.round_trip_load(md)
+                    metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"]
+                    sample_id = metadata_content["sample"]["sample_id"]
+                    add_lc_filename(metadata_content, metadata_content["id"])
+                    valid = qc_metadata(metadata_content)
+                    if not valid:
+                        errors.append("Failed metadata qc")
+                except Exception as e:
+                    errors.append(str(e))
+
+        if not errors:
             try:
-                metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml"))
-                metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"]
-                sample_id = metadata_content["sample"]["sample_id"]
-                add_lc_filename(metadata_content, metadata_content["id"])
-                valid = qc_metadata(metadata_content)
-                if not valid:
-                    errors.append("Failed metadata qc")
-            except Exception as e:
-                errors.append(str(e))
-
-    if not errors:
-        try:
-            tgt = None
-            paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"}
-            for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
-                if n not in col:
-                    continue
-                with col.open(n, 'rb') as qf:
-                    tgt = qc_fasta(qf)[0]
-                    if tgt != n and tgt != paired.get(n):
-                        errors.append("Expected %s but magic says it should be %s", n, tgt)
-                    elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
-                        start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n, sample_id)
-                        return False
-            if tgt is None:
-                errors.append("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"])
-        except Exception as v:
-            errors.append(str(v))
-
-
-    if not errors:
-        # Move it to the "validated" project to be included in the next analysis
-        if "errors" in collection["properties"]:
-            del collection["properties"]["errors"]
-        collection["properties"]["status"] = "validated"
-        api.collections().update(uuid=collection["uuid"], body={
-            "owner_uuid": validated_project,
-            "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime())),
-            "properties": collection["properties"]}).execute()
-        logging.info("Added '%s' to validated sequences" % collection["name"])
-        return True
-    else:
-        # It is invalid
-        logging.warn("'%s' (%s) has validation errors: %s" % (
-            collection["name"], collection["uuid"], "\n".join(errors)))
-        collection["properties"]["status"] = "rejected"
-        collection["properties"]["errors"] = errors
-        api.collections().update(uuid=collection["uuid"], body={"properties": collection["properties"]}).execute()
-        return False
-
-
-def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
-    project = api.groups().create(body={
-        "group_class": "project",
-        "name": name,
-        "owner_uuid": parent_project,
-    }, ensure_unique_name=True).execute()
-
-    with tempfile.NamedTemporaryFile() as tmp:
-        tmp.write(json.dumps(inputobj, indent=2).encode('utf-8'))
-        tmp.flush()
-        cmd = ["arvados-cwl-runner",
-               "--submit",
-               "--no-wait",
-               "--project-uuid=%s" % project["uuid"],
-               "arvwf:%s" % workflow_uuid,
-               tmp.name]
-        logging.info("Running %s" % ' '.join(cmd))
-        comp = subprocess.run(cmd, capture_output=True)
-    logging.info("Submitted %s", comp.stdout)
-    if comp.returncode != 0:
-        logging.error(comp.stderr.decode('utf-8'))
-
-    return project
-
-
-def start_fastq_to_fasta(api, collection,
-                         analysis_project,
-                         fastq_workflow_uuid,
-                         tgt,
-                         sample_id):
-
-    params = {
-        "metadata": {
-            "class": "File",
-            "location": "keep:%s/metadata.yaml" % collection["portable_data_hash"]
-        },
-        "ref_fasta": {
-            "class": "File",
-            "location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta"
-        },
-        "sample_id": sample_id
-    }
-
-    if tgt.startswith("reads.fastq"):
-        params["fastq_forward"] = {
-            "class": "File",
-            "location": "keep:%s/%s" % (collection["portable_data_hash"], tgt)
-        }
-    elif tgt.startswith("reads_1.fastq"):
-        params["fastq_forward"] = {
-            "class": "File",
-            "location": "keep:%s/reads_1.%s" % (collection["portable_data_hash"], tgt[8:])
-        }
-        params["fastq_reverse"] = {
-            "class": "File",
-            "location": "keep:%s/reads_2.%s" % (collection["portable_data_hash"], tgt[8:])
+                tgt = None
+                paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"}
+                for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
+                    if n not in col:
+                        continue
+                    with col.open(n, 'rb') as qf:
+                        tgt = qc_fasta(qf)[0]
+                        if tgt != n and tgt != paired.get(n):
+                            errors.append("Expected %s but magic says it should be %s", n, tgt)
+                        elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
+                            self.start_fastq_to_fasta(collection, n, sample_id)
+                            return False
+                if tgt is None:
+                    errors.append("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"])
+            except Exception as v:
+                errors.append(str(v))
+
+
+        if not errors:
+            # Move it to the "validated" project to be included in the next analysis
+            if "errors" in collection["properties"]:
+                del collection["properties"]["errors"]
+            collection["properties"]["status"] = "validated"
+            self.api.collections().update(uuid=collection["uuid"], body={
+                "owner_uuid": self.validated_project,
+                "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime())),
+                "properties": collection["properties"]}).execute()
+            logging.info("Added '%s' to validated sequences" % collection["name"])
+            return True
+        else:
+            # It is invalid
+            logging.warn("'%s' (%s) has validation errors: %s" % (
+                collection["name"], collection["uuid"], "\n".join(errors)))
+            collection["properties"]["status"] = "rejected"
+            collection["properties"]["errors"] = errors
+            self.api.collections().update(uuid=collection["uuid"], body={"properties": collection["properties"]}).execute()
+            return False
+
+
+    def run_workflow(self, parent_project, workflow_uuid, name, inputobj):
+        project = self.api.groups().create(body={
+            "group_class": "project",
+            "name": name,
+            "owner_uuid": parent_project,
+        }, ensure_unique_name=True).execute()
+
+        with tempfile.NamedTemporaryFile() as tmp:
+            tmp.write(json.dumps(inputobj, indent=2).encode('utf-8'))
+            tmp.flush()
+            cmd = ["arvados-cwl-runner",
+                   "--submit",
+                   "--no-wait",
+                   "--project-uuid=%s" % project["uuid"],
+                   "arvwf:%s" % workflow_uuid,
+                   tmp.name]
+            logging.info("Running %s" % ' '.join(cmd))
+            comp = subprocess.run(cmd, capture_output=True)
+        logging.info("Submitted %s", comp.stdout)
+        if comp.returncode != 0:
+            logging.error(comp.stderr.decode('utf-8'))
+
+        return project
+
+
+    def start_fastq_to_fasta(self, collection,
+                             tgt,
+                             sample_id):
+
+        params = {
+            "metadata": {
+                "class": "File",
+                "location": "keep:%s/metadata.yaml" % collection["portable_data_hash"]
+            },
+            "ref_fasta": {
+                "class": "File",
+                "location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta"
+            },
+            "sample_id": sample_id
         }
 
-    newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", params)
-    api.collections().update(uuid=collection["uuid"],
-                             body={"owner_uuid": newproject["uuid"]}).execute()
-
-def start_pangenome_analysis(api,
-                             analysis_project,
-                             pangenome_workflow_uuid,
-                             validated_project,
-                             schema_ref,
-                             exclude_list):
-    validated = arvados.util.list_all(api.collections().list, filters=[
-        ["owner_uuid", "=", validated_project],
-        ["properties.status", "=", "validated"]])
-    inputobj = {
-        "inputReads": [],
-        "metadata": [],
-        "subjects": [],
-        "metadataSchema": {
-            "class": "File",
-            "location": schema_ref
-        },
-        "exclude": {
-            "class": "File",
-            "location": exclude_list
+        if tgt.startswith("reads.fastq"):
+            params["fastq_forward"] = {
+                "class": "File",
+                "location": "keep:%s/%s" % (collection["portable_data_hash"], tgt)
+            }
+        elif tgt.startswith("reads_1.fastq"):
+            params["fastq_forward"] = {
+                "class": "File",
+                "location": "keep:%s/reads_1.%s" % (collection["portable_data_hash"], tgt[8:])
+            }
+            params["fastq_reverse"] = {
+                "class": "File",
+                "location": "keep:%s/reads_2.%s" % (collection["portable_data_hash"], tgt[8:])
+            }
+
+        newproject = self.run_workflow(self.fastq_project, self.fastq_workflow_uuid, "FASTQ to FASTA", params)
+        self.api.collections().update(uuid=collection["uuid"],
+                                 body={"owner_uuid": newproject["uuid"]}).execute()
+
+    def start_pangenome_analysis(self):
+
+        if self.schema_ref is None:
+            self.upload_schema()
+
+        validated = arvados.util.list_all(self.api.collections().list, filters=[
+            ["owner_uuid", "=", self.validated_project],
+            ["properties.status", "=", "validated"]])
+        inputobj = {
+            "inputReads": [],
+            "metadata": [],
+            "subjects": [],
+            "metadataSchema": {
+                "class": "File",
+                "location": self.schema_ref
+            },
+            "exclude": {
+                "class": "File",
+                "location": self.exclude_list
+            }
         }
-    }
-    validated.sort(key=lambda v: v["portable_data_hash"])
-    for v in validated:
-        inputobj["inputReads"].append({
-            "class": "File",
-            "location": "keep:%s/sequence.fasta" % v["portable_data_hash"]
-        })
-        inputobj["metadata"].append({
-            "class": "File",
-            "location": "keep:%s/metadata.yaml" % v["portable_data_hash"]
-        })
-        inputobj["subjects"].append("http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % v["portable_data_hash"])
-    run_workflow(api, analysis_project, pangenome_workflow_uuid, "Pangenome analysis", inputobj)
-
-
-def get_workflow_output_from_project(api, uuid):
-    cr = api.container_requests().list(filters=[['owner_uuid', '=', uuid],
-                                                ["requesting_container_uuid", "=", None]]).execute()
-    if cr["items"] and cr["items"][0]["output_uuid"]:
-        container = api.containers().get(uuid=cr["items"][0]["container_uuid"]).execute()
-        if container["state"] == "Complete" and container["exit_code"] == 0:
-            return cr["items"][0]
-    return None
-
-
-def copy_most_recent_result(api, analysis_project, latest_result_uuid):
-    most_recent_analysis = api.groups().list(filters=[['owner_uuid', '=', analysis_project]],
-                                                  order="created_at desc").execute()
-    for m in most_recent_analysis["items"]:
-        wf = get_workflow_output_from_project(api, m["uuid"])
-        if wf:
-            src = api.collections().get(uuid=wf["output_uuid"]).execute()
-            dst = api.collections().get(uuid=latest_result_uuid).execute()
-            if src["portable_data_hash"] != dst["portable_data_hash"]:
-                logging.info("Copying latest result from '%s' to %s", m["name"], latest_result_uuid)
-                api.collections().update(uuid=latest_result_uuid,
-                                         body={"manifest_text": src["manifest_text"],
-                                               "description": "Result from %s %s" % (m["name"], wf["uuid"])}).execute()
-            break
-
+        validated.sort(key=lambda v: v["portable_data_hash"])
+        for v in validated:
+            inputobj["inputReads"].append({
+                "class": "File",
+                "location": "keep:%s/sequence.fasta" % v["portable_data_hash"]
+            })
+            inputobj["metadata"].append({
+                "class": "File",
+                "location": "keep:%s/metadata.yaml" % v["portable_data_hash"]
+            })
+            inputobj["subjects"].append("http://collections.lugli.arvadosapi.com/c=%s/sequence.fasta" % v["portable_data_hash"])
+        self.run_workflow(self.pangenome_analysis_project, self.pangenome_workflow_uuid, "Pangenome analysis", inputobj)
+
+
+    def get_workflow_output_from_project(self, uuid):
+        cr = self.api.container_requests().list(filters=[['owner_uuid', '=', uuid],
+                                                    ["requesting_container_uuid", "=", None]]).execute()
+        if cr["items"] and cr["items"][0]["output_uuid"]:
+            container = self.api.containers().get(uuid=cr["items"][0]["container_uuid"]).execute()
+            if container["state"] == "Complete" and container["exit_code"] == 0:
+                return cr["items"][0]
+        return None
+
+
+    def copy_most_recent_result(self):
+        most_recent_analysis = self.api.groups().list(filters=[['owner_uuid', '=', self.pangenome_analysis_project]],
+                                                      order="created_at desc").execute()
+        for m in most_recent_analysis["items"]:
+            wf = self.get_workflow_output_from_project(m["uuid"])
+            if wf:
+                src = self.api.collections().get(uuid=wf["output_uuid"]).execute()
+                dst = self.api.collections().get(uuid=self.latest_result_uuid).execute()
+                if src["portable_data_hash"] != dst["portable_data_hash"]:
+                    logging.info("Copying latest result from '%s' to %s", m["name"], self.latest_result_uuid)
+                    self.api.collections().update(uuid=self.latest_result_uuid,
+                                             body={"manifest_text": src["manifest_text"],
+                                                   "description": "Result from %s %s" % (m["name"], wf["uuid"])}).execute()
+                break
+
+
+    def move_fastq_to_fasta_results(self):
+        projects = self.api.groups().list(filters=[['owner_uuid', '=', self.fastq_project],
+                                              ["properties.moved_output", "!=", True]],
+                                     order="created_at asc",).execute()
+        for p in projects["items"]:
+            wf = self.get_workflow_output_from_project(p["uuid"])
+            if not wf:
+                continue
 
-def move_fastq_to_fasta_results(api, analysis_project, uploader_project):
-    projects = api.groups().list(filters=[['owner_uuid', '=', analysis_project],
-                                          ["properties.moved_output", "!=", True]],
-                                 order="created_at desc",).execute()
-    for p in projects["items"]:
-        wf = get_workflow_output_from_project(api, p["uuid"])
-        if wf:
             logging.info("Moving completed fastq2fasta result %s back to uploader project", wf["output_uuid"])
-            api.collections().update(uuid=wf["output_uuid"],
-                                     body={"owner_uuid": uploader_project}).execute()
-            p["properties"]["moved_output"] = True
-            api.groups().update(uuid=p["uuid"], body={"properties": p["properties"]}).execute()
-            break
 
+            col = arvados.collection.Collection(wf["output_uuid"], api_client=self.api, keep_client=self.keepclient)
+            with col.open("metadata.yaml") as md:
+                metadata_content = ruamel.yaml.round_trip_load(md)
+
+            colprop = col.get_properties()
+            colprop["sequence_label"] = metadata_content["sample"]["sample_id"]
+            self.api.collections().update(uuid=wf["output_uuid"],
+                                     body={"owner_uuid": self.uploader_project,
+                                           "properties": colprop}).execute()
 
-def upload_schema(api, workflow_def_project):
-    schema_resource = pkg_resources.resource_stream('bh20sequploader.qc_metadata', "bh20seq-schema.yml")
-    c = arvados.collection.Collection()
-    with c.open("schema.yml", "wb") as f:
-        f.write(schema_resource.read())
-    pdh = c.portable_data_hash()
-    wd = api.collections().list(filters=[["owner_uuid", "=", workflow_def_project],
-                                         ["portable_data_hash", "=", pdh]]).execute()
-    if len(wd["items"]) == 0:
-        c.save_new(owner_uuid=workflow_def_project, name="Metadata schema", ensure_unique_name=True)
-    return "keep:%s/schema.yml" % pdh
-
-
-def print_status(api, uploader_project, fmt):
-    pending = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", uploader_project]])
-    out = []
-    status = {}
-    for p in pending:
-        prop = p["properties"]
-        out.append(prop)
-        if "status" not in prop:
-            prop["status"] = "pending"
-        prop["created_at"] = p["created_at"]
-        prop["uuid"] = p["uuid"]
-        status[prop["status"]] = status.get(prop["status"], 0) + 1
-    if fmt == "html":
-        print(
-"""
-<html>
-<body>
-""")
-        print("<p>Total collections in upload project %s</p>" % len(out))
-        print("<p>Status %s</p>" % status)
-        print(
-"""
-<table>
-<tr><th>Collection</th>
-<th>Sequence label</th>
-<th>Status</th>
-<th>Errors</th></tr>
-""")
-        for r in out:
-            print("<tr valign='top'>")
-            print("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
-            print("<td>%s</td>" % r["sequence_label"])
-            print("<td>%s</td>" % r["status"])
-            print("<td><pre>%s</pre></td>" % "\n".join(r.get("errors", [])))
-            print("</tr>")
-        print(
-"""
-</table>
-</body>
-</html>
-""")
-    else:
-        print(json.dumps(out, indent=2))
+            p["properties"]["moved_output"] = True
+            self.api.groups().update(uuid=p["uuid"], body={"properties": p["properties"]}).execute()
+
+
+    def upload_schema(self):
+        schema_resource = pkg_resources.resource_stream('bh20sequploader.qc_metadata', "bh20seq-schema.yml")
+        c = arvados.collection.Collection(api_client=self.api, keep_client=self.keepclient)
+        with c.open("schema.yml", "wb") as f:
+            f.write(schema_resource.read())
+        pdh = c.portable_data_hash()
+        wd = self.api.collections().list(filters=[["owner_uuid", "=", self.workflow_def_project],
+                                             ["portable_data_hash", "=", pdh]]).execute()
+        if len(wd["items"]) == 0:
+            c.save_new(owner_uuid=self.workflow_def_project, name="Metadata schema", ensure_unique_name=True)
+        self.schema_ref = "keep:%s/schema.yml" % pdh
+
+
+    def print_status(self, fmt):
+        pending = arvados.util.list_all(self.api.collections().list, filters=[["owner_uuid", "=", self.uploader_project]])
+        out = []
+        status = {}
+        for p in pending:
+            prop = p["properties"]
+            out.append(prop)
+            if "status" not in prop:
+                prop["status"] = "pending"
+            prop["created_at"] = p["created_at"]
+            prop["uuid"] = p["uuid"]
+            status[prop["status"]] = status.get(prop["status"], 0) + 1
+        if fmt == "html":
+            print(
+    """
+    <html>
+    <body>
+    """)
+            print("<p>Total collections in upload project %s</p>" % len(out))
+            print("<p>Status %s</p>" % status)
+            print(
+    """
+    <table>
+    <tr><th>Collection</th>
+    <th>Sequence label</th>
+    <th>Status</th>
+    <th>Errors</th></tr>
+    """)
+            for r in out:
+                print("<tr valign='top'>")
+                print("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+                print("<td>%s</td>" % r["sequence_label"])
+                print("<td>%s</td>" % r["status"])
+                print("<td><pre>%s</pre></td>" % "\n".join(r.get("errors", [])))
+                print("</tr>")
+            print(
+    """
+    </table>
+    </body>
+    </html>
+    """)
+        else:
+            print(json.dumps(out, indent=2))
 
 def main():
     parser = argparse.ArgumentParser(description='Analyze collections uploaded to a project')
@@ -310,50 +341,45 @@ def main():
     args = parser.parse_args()
 
     api = arvados.api()
-
-
-
-    schema_ref = upload_schema(api, args.workflow_def_project)
+    keepclient = arvados.keep.KeepClient(api_client=api)
+
+    seqanalyzer = SeqAnalyzer(api, keepclient,
+                              args.uploader_project,
+                              args.pangenome_analysis_project,
+                              args.fastq_project,
+                              args.validated_project,
+                              args.workflow_def_project,
+                              args.pangenome_workflow_uuid,
+                              args.fastq_workflow_uuid,
+                              args.exclude_list,
+                              args.latest_result_collection)
 
     if args.kickoff:
         logging.info("Starting a single analysis run")
-        start_pangenome_analysis(api,
-                                 args.pangenome_analysis_project,
-                                 args.pangenome_workflow_uuid,
-                                 args.validated_project,
-                                 schema_ref,
-                                 args.exclude_list)
+        seqanalyzer.start_pangenome_analysis()
         return
 
     if args.print_status:
-        print_status(api, args.uploader_project, args.print_status)
+        seqanalyzer.print_status(args.print_status)
         exit(0)
 
     logging.info("Starting up, monitoring %s for uploads" % (args.uploader_project))
 
     while True:
-        move_fastq_to_fasta_results(api, args.fastq_project, args.uploader_project)
-
-        new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]])
-        at_least_one_new_valid_seq = False
-        for c in new_collections:
-            at_least_one_new_valid_seq = validate_upload(api, c,
-                                                         args.validated_project,
-                                                         args.fastq_project,
-                                                         args.fastq_workflow_uuid,
-                                                         args.revalidate) or at_least_one_new_valid_seq
-
-        if at_least_one_new_valid_seq and not args.no_start_analysis:
-            start_pangenome_analysis(api,
-                                     args.pangenome_analysis_project,
-                                     args.pangenome_workflow_uuid,
-                                     args.validated_project,
-                                     schema_ref,
-                                     args.exclude_list)
-
-        copy_most_recent_result(api,
-                                args.pangenome_analysis_project,
-                                args.latest_result_collection)
+        try:
+            seqanalyzer.move_fastq_to_fasta_results()
+
+            new_collections = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", args.uploader_project]])
+            at_least_one_new_valid_seq = False
+            for c in new_collections:
+                at_least_one_new_valid_seq = seqanalyzer.validate_upload(c, args.revalidate) or at_least_one_new_valid_seq
+
+            if at_least_one_new_valid_seq and not args.no_start_analysis:
+                seqanalyzer.start_pangenome_analysis()
+
+            seqanalyzer.copy_most_recent_result()
+        except Exception as e:
+            logging.exeception("Error in main loop")
 
         if args.once:
             break
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ee852fa..0aead3b 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -16,7 +16,7 @@ $graph:
   fields:
     license_type:
       doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
-      type: string?
+      type: string
       jsonldPredicate:
           _id: https://creativecommons.org/ns#License
     title:
@@ -264,7 +264,7 @@ $graph:
     virus: virusSchema
     technology: technologySchema
     submitter: submitterSchema
-    license: licenseSchema
+    license: ["null", licenseSchema]
     id:
       doc: The subject (eg the fasta/fastq file) that the metadata describes
       type: string
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 7331e86..bbc7309 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -17,7 +17,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
   MainSchema:submitter @:submitterShape ;
   MainSchema:technology @:technologyShape ;
   MainSchema:virus @:virusShape;
-  MainSchema:license @:licenseShape;
+  MainSchema:license @:licenseShape ?;
 }
 
 :hostShape  {
@@ -71,7 +71,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 }
 
 :licenseShape{
-    cc:License xsd:string ?;
+    cc:License xsd:string ;
     dc:Title xsd:string ?;
     cc:attributionName xsd:string ?;
     cc:attributionURL xsd:string ?;
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index f744a8c..6049bf9 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -29,11 +29,10 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
     try:
         log.debug("Checking metadata" if do_qc else "Skipping metadata check")
         if do_qc and not qc_metadata(metadata.name):
-            log.warning("Failed metadata qc")
+            log.warning("Failed metadata QC")
             failed = True
     except Exception as e:
-        log.debug(e)
-        print(e)
+        log.exception("Failed metadata QC")
         failed = True
 
     target = []
@@ -45,8 +44,7 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
             target[0] = ("reads_1."+target[0][0][6:], target[0][1])
             target[1] = ("reads_2."+target[1][0][6:], target[0][1])
     except Exception as e:
-        log.debug(e)
-        print(e)
+        log.exception("Failed sequence QC")
         failed = True
 
     if failed:
@@ -82,7 +80,7 @@ def main():
     seqlabel = target[0][1]
 
     if args.validate:
-        print("Valid")
+        log.info("Valid")
         exit(0)
 
     col = arvados.collection.Collection(api_client=api)
@@ -91,10 +89,10 @@ def main():
     if args.sequence_p2:
         upload_sequence(col, target[1], args.sequence_p2)
 
-    print("Reading metadata")
+    log.info("Reading metadata")
     with col.open("metadata.yaml", "w") as f:
         r = args.metadata.read(65536)
-        print(r[0:20])
+        log.info(r[0:20])
         while r:
             f.write(r)
             r = args.metadata.read(65536)
@@ -118,7 +116,7 @@ def main():
                                            ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
     if dup["items"]:
         # This exact collection has been uploaded before.
-        print("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+        log.error("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
         exit(1)
 
     if args.trusted:
@@ -131,9 +129,9 @@ def main():
                  (seqlabel, properties['upload_user'], properties['upload_ip']),
                  properties=properties, ensure_unique_name=True)
 
-    print("Saved to %s" % col.manifest_locator())
-
-    print("Done")
+    log.info("Saved to %s" % col.manifest_locator())
+    log.info("Done")
+    exit(0)
 
 if __name__ == "__main__":
     main()
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index 37eb4e8..0c7e16d 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -84,10 +84,8 @@ def qc_fasta(arg_sequence, check_with_clustalw=True):
                 except Exception as e:
                     logging.warn("QC against reference sequence using 'minimap2': %s", e, exc_info=e)
 
-                if similarity and similarity < 70.0:
+                if similarity < 70.0:
                     raise ValueError("QC fail: alignment to reference was less than 70%% (was %2.2f%%)" % (similarity))
-                if similarity == 0:
-                    raise ValueError("QC fail")
 
         return ("sequence.fasta"+gz, seqlabel)
     elif seq_type == "text/fastq":
diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py
index 77b3832..206f884 100644
--- a/bh20simplewebuploader/main.py
+++ b/bh20simplewebuploader/main.py
@@ -8,7 +8,7 @@ import os
 import sys
 import re
 import string
-import yaml
+import ruamel.yaml as yaml
 import pkg_resources
 from flask import Flask, request, redirect, send_file, send_from_directory, render_template, jsonify
 import os.path
@@ -16,6 +16,9 @@ import requests
 import io
 import arvados
 from markupsafe import Markup
+from schema_salad.sourceline import add_lc_filename
+from schema_salad.schema import shortname
+from typing import MutableSequence, MutableMapping
 
 ARVADOS_API = 'lugli.arvadosapi.com'
 ANONYMOUS_TOKEN = '5o42qdxpxp5cj15jqjf7vnxx5xduhm4ret703suuoa3ivfglfh'
@@ -47,6 +50,8 @@ def type_to_heading(type_name):
     Turn a type name like "sampleSchema" from the metadata schema into a human-readable heading.
     """
 
+    type_name = shortname(type_name)
+
     print(type_name,file=sys.stderr)
     # Remove camel case
     decamel = re.sub('([A-Z])', r' \1', type_name)
@@ -78,7 +83,7 @@ def is_iri(string):
 
     return string.startswith('http')
 
-def generate_form(schema, options):
+def generate_form(components, options):
     """
     Linearize the schema into a list of dicts.
 
@@ -101,9 +106,6 @@ def generate_form(schema, options):
     IRI.
     """
 
-    # Get the list of form components, one of which is the root
-    components = schema.get('$graph', [])
-
     # Find the root
     root_name = None
     # And also index components by type name
@@ -131,55 +133,54 @@ def generate_form(schema, options):
             # First make a heading, if we aren't the very root of the form
             yield {'heading': type_to_heading(type_name)}
 
-        for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items():
+        for field in by_name.get(type_name, {}).get('fields', []):
+            field_name = shortname(field["name"])
+            field_type = field["type"]
             # For each field
 
             ref_iri = None
             docstring = None
-            if not isinstance(field_type, str):
-                # If the type isn't a string
-
-                # It may have documentation
-                docstring = field_type.get('doc', None)
-
-                # See if it has a more info/what goes here URL
-                predicate = field_type.get('jsonldPredicate', {})
-                # Predicate may be a URL, a dict with a URL in _id, maybe a
-                # dict with a URL in _type, or a dict with _id and _type but no
-                # URLs anywhere. Some of these may not technically be allowed
-                # by the format, but if they occur, we might as well try to
-                # handle them.
-                if isinstance(predicate, str):
-                    if is_iri(predicate):
-                        ref_iri = predicate
-                else:
-                    # Assume it's a dict. Look at the fields we know about.
-                    for field in ['_id', 'type']:
-                        field_value = predicate.get(field, None)
-                        if isinstance(field_value, str) and is_iri(field_value) and ref_iri is None:
-                            # Take the first URL-looking thing we find
-                            ref_iri = field_value
-                            break
 
-
-                # Now overwrite the field type with the actual type string
-                field_type = field_type.get('type', '')
-
-            # Decide if the field is optional (type ends in ?)
             optional = False
-            if field_type.endswith('?'):
-                # It's optional
-                optional = True
-                # Drop the ?
-                field_type = field_type[:-1]
-
-            # Decide if the field is a list (type ends in [])
             is_list = False
-            if field_type.endswith('[]'):
-                # It's a list
-                is_list = True
-                # Reduce to the normal type
-                field_type = field_type[:-2]
+
+            # It may have documentation
+            docstring = field.get('doc', None)
+
+            # See if it has a more info/what goes here URL
+            predicate = field.get('jsonldPredicate', {})
+            # Predicate may be a URL, a dict with a URL in _id, maybe a
+            # dict with a URL in _type, or a dict with _id and _type but no
+            # URLs anywhere. Some of these may not technically be allowed
+            # by the format, but if they occur, we might as well try to
+            # handle them.
+            if isinstance(predicate, str):
+                if is_iri(predicate):
+                    ref_iri = predicate
+            else:
+                # Assume it's a dict. Look at the fields we know about.
+                for field in ['_id', 'type']:
+                    field_value = predicate.get(field, None)
+                    if isinstance(field_value, str) and is_iri(field_value) and ref_iri is None:
+                        # Take the first URL-looking thing we find
+                        ref_iri = field_value
+                        break
+
+            if isinstance(field_type, MutableSequence):
+                if field_type[0] == "null" and len(field_type) == 2:
+                    optional = True
+                    field_type = field_type[1]
+                else:
+                    raise Exception("Can't handle it")
+
+            if isinstance(field_type, MutableMapping):
+                if field_type["type"] == "array":
+                    # Now replace the field type with the actual type string
+                    is_list = True
+                    field_type = field_type.get('items', '')
+                else:
+                    field_type = field_type.get('type', '')
+                    pass
 
             if field_type in by_name:
                 # This is a subrecord. We need to recurse
@@ -227,15 +228,24 @@ def generate_form(schema, options):
     return list(walk_fields(root_name))
 
 
-# At startup, we need to load the metadata schema from the uploader module, so we can make a form for it
-if os.path.isfile("bh20sequploader/bh20seq-schema.yml"):
-    METADATA_SCHEMA = yaml.safe_load(open("bh20sequploader/bh20seq-schema.yml","r").read())
-    METADATA_OPTION_DEFINITIONS = yaml.safe_load(open("bh20sequploader/bh20seq-options.yml","r").read())
-else:
-    METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml"))
-    METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml"))
-# print(METADATA_SCHEMA,file=sys.stderr)
-FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS)
+import schema_salad.schema
+def load_schema_generate_form():
+    # At startup, we need to load the metadata schema from the uploader module, so we can make a form for it
+    if os.path.isfile("bh20sequploader/bh20seq-schema.yml"):
+        METADATA_SCHEMA = yaml.round_trip_load(open("bh20sequploader/bh20seq-schema.yml","r").read())
+        METADATA_OPTION_DEFINITIONS = yaml.safe_load(open("bh20sequploader/bh20seq-options.yml","r").read())
+    else:
+        METADATA_SCHEMA = yaml.round_trip_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml"))
+        METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml"))
+
+    METADATA_SCHEMA["name"] = "bh20seq-schema.yml"
+    add_lc_filename(METADATA_SCHEMA, "bh20seq-schema.yml")
+    metaschema_names, _metaschema_doc, metaschema_loader = schema_salad.schema.get_metaschema()
+    schema_doc, schema_metadata = metaschema_loader.resolve_ref(METADATA_SCHEMA, "")
+
+    return generate_form(schema_doc, METADATA_OPTION_DEFINITIONS)
+
+FORM_ITEMS = load_schema_generate_form()
 
 @app.route('/')
 def send_home():
@@ -243,7 +253,7 @@ def send_home():
     Send the front page.
     """
 
-    return render_template('home.html', menu='HOME')
+    return render_template('home.html', menu='HOME', load_map=True)
 
 
 @app.route('/upload')
@@ -435,12 +445,12 @@ def receive_files():
 
         if result.returncode != 0:
             # It didn't work. Complain.
-            error_message="Uploader returned value {} and said:".format(result.returncode) + str(result.stderr.decode('utf-8'))
+            error_message="Uploader returned value {} and said:\n".format(result.returncode) + str(result.stderr.decode('utf-8'))
             print(error_message, file=sys.stderr)
             return (render_template('error.html', error_message=error_message), 403)
         else:
             # It worked. Say so.
-            return render_template('success.html', log=result.stdout.decode('utf-8', errors='replace'))
+            return render_template('success.html', log=result.stderr.decode('utf-8', errors='replace'))
     finally:
         shutil.rmtree(dest_dir)
 
@@ -479,10 +489,13 @@ def pending_table(output, items):
     for r in items:
         if r["status"] != "pending":
             continue
-        output.write("<tr>")
-        output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
-        output.write("<td>%s</td>" % Markup.escape(r["sequence_label"]))
-        output.write("</tr>")
+        try:
+            output.write("<tr>")
+            output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+            output.write("<td>%s</td>" % Markup.escape(r.get("sequence_label")))
+            output.write("</tr>")
+        except:
+            pass
     output.write(
 """
 </table>
@@ -497,18 +510,69 @@ def rejected_table(output, items):
 <th>Errors</th></tr>
 """)
     for r in items:
-        if r["status"] != "rejected":
-            continue
+        try:
+            if r["status"] != "rejected":
+                continue
+            output.write("<tr>")
+            output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+            output.write("<td>%s</td>" % Markup.escape(r.get("sequence_label")))
+            output.write("<td><pre>%s</pre></td>" % Markup.escape("\n".join(r.get("errors", []))))
+            output.write("</tr>")
+        except:
+            pass
+    output.write(
+"""
+</table>
+""")
+
+def workflows_table(output, items):
+    output.write(
+"""
+<table>
+<tr>
+<th>Name</th>
+<th>Sample id</th>
+<th>Started</th>
+<th>Container request</th>
+</tr>
+""")
+    for r in items:
         output.write("<tr>")
-        output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
-        output.write("<td>%s</td>" % Markup.escape(r["sequence_label"]))
-        output.write("<td><pre>%s</pre></td>" % Markup.escape("\n".join(r.get("errors", []))))
+        try:
+            sid = r["mounts"]["/var/lib/cwl/cwl.input.json"]["content"]["sample_id"]
+            output.write("<td>%s</td>" % Markup.escape(r["name"]))
+            output.write("<td>%s</td>" % Markup.escape(sid))
+            output.write("<td>%s</td>" % Markup.escape(r["created_at"]))
+            output.write("<td><a href='https://workbench.lugli.arvadosapi.com/container_requests/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+        except:
+            pass
         output.write("</tr>")
     output.write(
 """
 </table>
 """)
 
+def validated_table(output, items):
+    output.write(
+"""
+<table>
+<tr>
+<th>Collection</th>
+<th>Sequence label</th>
+</tr>
+""")
+    for r in items:
+        try:
+            output.write("<tr>")
+            output.write("<td><a href='https://workbench.lugli.arvadosapi.com/collections/%s'>%s</a></td>" % (r["uuid"], r["uuid"]))
+            output.write("<td>%s</td>" % Markup.escape(r["properties"].get("sequence_label")))
+            output.write("</tr>")
+        except:
+            pass
+    output.write(
+"""
+</table>
+""")
 
 @app.route('/status')
 def status_page():
@@ -529,25 +593,42 @@ def status_page():
         prop["uuid"] = p["uuid"]
         status[prop["status"]] = status.get(prop["status"], 0) + 1
 
+    workflows = arvados.util.list_all(api.container_requests().list,
+                                      filters=[["name", "in", ["fastq2fasta.cwl"]], ["state", "=", "Committed"]],
+                                      order="created_at asc")
+
     output = io.StringIO()
 
     validated = api.collections().list(filters=[["owner_uuid", "=", VALIDATED_PROJECT]], limit=1).execute()
     status["passed"] = validated["items_available"]
 
-    for s in (("passed", "/download"), ("pending", "#pending"), ("rejected", "#rejected")):
+    for s in (("passed", "/validated"), ("pending", "#pending"), ("rejected", "#rejected")):
         output.write("<p><a href='%s'>%s sequences QC %s</a></p>" % (s[1], status.get(s[0], 0), s[0]))
 
-    output.write("<a id='pending'><h1>Pending</h1>")
+    output.write("<p><a href='%s'>%s analysis workflows running</a></p>" % ('#workflows', len(workflows)))
+
+    output.write("<a id='pending'><h1>Pending</h1></a>")
     pending_table(output, out)
 
-    output.write("<a id='rejected'><h1>Rejected</h1>")
+    output.write("<a id='rejected'><h1>Rejected</h1></a>")
     rejected_table(output, out)
 
+    output.write("<a id='workflows'><h1>Running Workflows</h1></a>")
+    workflows_table(output, workflows)
+
     return render_template('status.html', table=Markup(output.getvalue()), menu='STATUS')
 
+@app.route('/validated')
+def validated_page():
+    api = arvados.api(host=ARVADOS_API, token=ANONYMOUS_TOKEN, insecure=True)
+    output = io.StringIO()
+    validated = arvados.util.list_all(api.collections().list, filters=[["owner_uuid", "=", VALIDATED_PROJECT]])
+    validated_table(output, validated)
+    return render_template('validated.html', table=Markup(output.getvalue()), menu='STATUS')
+
 @app.route('/demo')
 def demo_page():
-    return render_template('demo.html',menu='DEMO')
+    return render_template('demo.html',menu='DEMO',load_map=True)
 
 @app.route('/blog',methods=['GET'])
 def blog_page():
@@ -563,12 +644,6 @@ def about_page():
     buf = get_html_body('doc/web/about.html','https://github.com/arvados/bh20-seq-resource/blob/master/doc/web/about.org')
     return render_template('about.html',menu='ABOUT',embed=buf)
 
-##
-@app.route('/map')
-def map_page():
-    return render_template('map.html',menu='DEMO')
-
-
 
 ## Dynamic API functions starting here
 ## This is quick and dirty for now, just to get something out and demonstrate the queries
diff --git a/bh20simplewebuploader/static/image/curii.logo.ai.png b/bh20simplewebuploader/static/image/curii.logo.ai.png
new file mode 100644
index 0000000..401afad
--- /dev/null
+++ b/bh20simplewebuploader/static/image/curii.logo.ai.png
Binary files differdiff --git a/bh20simplewebuploader/static/image/curii.logo.ai.svg b/bh20simplewebuploader/static/image/curii.logo.ai.svg
new file mode 100644
index 0000000..e87ea05
--- /dev/null
+++ b/bh20simplewebuploader/static/image/curii.logo.ai.svg
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg width="1333.3" height="1333.3" version="1.1" viewBox="0 0 1333.3 1333.3" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><metadata><rdf:RDF><cc:Work rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/><dc:title/></cc:Work></rdf:RDF></metadata><defs><clipPath id="clipPath18"><path d="m0 1e3h1e3v-1e3h-1e3z"/></clipPath></defs><g transform="matrix(1.3333 0 0 -1.3333 0 1333.3)"><g clip-path="url(#clipPath18)"><g transform="translate(473.7 553.52)"><path d="m0 0c-23.91 0-41.063-17.932-41.063-44.182 0-26.251 17.153-44.179 41.063-44.179 17.414 0 31.448 5.714 40.805 16.63l16.63-16.63c-12.476-16.377-32.488-25.735-58.734-25.735-41.063 0-69.653 28.329-69.653 69.914 0 41.843 28.59 70.428 69.653 70.428 24.95 0 44.439-8.053 57.174-22.611l-16.37-19.229c-9.619 9.876-22.875 15.851-39.505 15.594" fill="#008a82"/></g><g transform="translate(681.34 578.73)"><path d="m0 0v-138.26h-29.889v27.029c-9.355-19.232-26.765-27.806-49.377-28.07-31.707 0-50.676 20.276-50.676 53.281v86.022h29.884v-77.707c0-20.792 12.216-33.526 32.228-33.526 23.911 0.523 37.941 18.972 37.941 43.143v68.09z" fill="#008a82"/></g><g transform="translate(796.73 579.77)"><path d="m0 0v-28.585c-27.809 1.556-45.481-14.815-47.818-38.468v-72.248h-29.887v138.26h29.887v-27.546c9.356 18.71 25.99 28.585 47.818 28.585" fill="#008a82"/></g><g transform="translate(847.15 618.24)"><path d="m0 0c0-10.136-7.54-17.673-17.412-17.673-9.877 0-17.413 7.537-17.413 17.673 0 10.396 7.536 17.933 17.413 17.933 9.872 0 17.412-7.537 17.412-17.933" fill="#f15a29"/></g><path d="m844.54 440.47h-29.625v138.26h29.625z" fill="#008a82"/><g transform="translate(914.46 618.24)"><path d="m0 0c0-10.136-7.541-17.673-17.413-17.673-9.876 0-17.413 7.537-17.413 17.673 0 10.396 7.537 17.933 17.413 17.933 9.872 0 17.413-7.537 17.413-17.933" fill="#f15a29"/></g><path d="m911.85 440.47h-29.625v138.26h29.625z" fill="#008a82"/><g transform="translate(131.67 492.85)"><path d="m0 0c0.628 4.633 3.756 8.256 7.824 9.843l-9.454 65.551c-0.213 0.021-0.426-0.01-0.638 0.021-2.008 0.27-3.807 1.046-5.367 2.126l-57.718-52.611c2.31-21.875 9.31-42.198 19.929-60.052l46.065 29.321c-0.641 1.805-0.924 3.766-0.641 5.801" fill="#f15a29"/></g><g transform="translate(135.54 569.04)"><path d="m0 0 9.451-65.545c0.223-0.018 0.432 0.01 0.648-0.02 0.975-0.131 1.887-0.395 2.767-0.737l38.262 56.827-44.955 14.619c-1.414-2.373-3.608-4.149-6.173-5.144" fill="#f15a29"/></g><g transform="translate(280.25 609.22)"><path d="m0 0-48.729-0.574c-1.211-5.586-6.039-9.562-11.655-9.744l-24.468-36.342 53.617-17.44c0 0.809-0.074 1.62-0.051 2.43 0.912 25.152 12.634 47.22 31.286 61.67" fill="#f15a29"/></g><g transform="translate(217.14 623.42)"><path d="m0 0 2.255 58.382c-4.958 0.183-9.984 0.112-15.047-0.205-15.084-0.972-29.49-4.186-42.934-9.259l51.399-50.633c1.31 0.834 2.768 1.411 4.327 1.715" fill="#f15a29"/></g><g transform="translate(132.75 592.86)"><path d="m0 0c6.582-0.901 11.22-6.828 10.639-13.386l46.505-15.128 24.123 35.815c-4.675 2.295-7.615 7.341-6.872 12.785 0.239 1.772 0.867 3.403 1.748 4.824l-53.093 52.29c-12.31-5.236-23.718-12.06-34.013-20.225l9.958-56.937c0.33-0.028 0.661 6e-3 1.005-0.038" fill="#f15a29"/></g><g transform="translate(135.28 482.35)"><path d="m0 0-46.12-29.351c14.73-22.781 35.474-41.144 59.841-53.023l-6.781 78.911c-2.738 0.383-5.104 1.664-6.94 3.463" fill="#008a82"/></g><g transform="translate(118.77 582.23)"><path d="m0 0c0.611 4.509 3.588 8.073 7.483 9.721l-9.4 53.816c-33.716-29.174-53.94-73.126-51.126-121.01l54.559 49.732c-1.283 2.272-1.897 4.952-1.516 7.74" fill="#f15a29"/></g><g transform="translate(361.67 546.89)"><path d="m0 0c-7.048-4.637-14.905-8.13-23.401-10.048-23.806-5.372-49.043-2.852-73.21-5.508 3.301-19.489 10.484-39.441 11.517-59.119 1.522-29.071-4.033-57.766-26.257-78.143-2.545-2.344-5.248-4.393-8.036-6.28 72.262 13.991 124.59 79.812 119.73 155.21-0.078 1.306-0.227 2.592-0.345 3.888" fill="#008a82"/></g><g transform="translate(250.67 529.01)"><path d="m0 0c-8.863-1.971-17.477-5.104-25.76-9.947 7.733-9.046 15.469-18.091 23.188-27.143 4.901-5.729-4.405-12.76-9.275-7.059-7.844 9.184-15.692 18.355-23.533 27.543-3.244-2.674-6.116-5.496-8.563-8.494 13.566-16.281 27.111-32.576 40.666-48.876 4.826-5.793-4.486-12.83-9.286-7.061-12.67 15.225-25.348 30.468-38.019 45.701-2.14-4.209-3.767-8.604-4.84-13.168 8.495-9.946 16.998-19.892 25.493-29.843 4.898-5.729-4.411-12.758-9.281-7.061-6.019 7.032-12.026 14.071-18.048 21.114 0 0-4.766-16.237-0.865-52.291-1.66 0.746-3.085 2.109-3.847 4.189-9.88 27.309-16.654 62.017 0.85 88.074 12.678 18.854 32.288 30.459 53.735 36.456l-51.115 16.627-39.174-58.167c2.461-2.63 3.791-6.261 3.274-10.104-0.661-4.881-4.104-8.68-8.509-10.1l7.041-82.087c11.054-4.806 22.783-8.293 34.971-10.297 31.35 2.055 62.177 14.151 70.26 48.559 4.169 17.775 3.976 36.163 0.456 54.069-2.558 13.035-7.277 26.098-9.819 39.366" fill="#008a82"/></g><g transform="translate(312.28 562.6)"><path d="m0 0c-9.515 0-17.227 7.709-17.227 17.228 0 9.514 7.712 17.227 17.227 17.227 9.519 0 17.228-7.713 17.228-17.227 0-9.519-7.709-17.228-17.228-17.228m-87.342 118.87-2.248-58.267c4.425-1.198 7.753-4.685 8.799-9.009l56.965 0.679c7.689 4.574 16.285 7.993 25.578 9.991 2.66 0.576 4.931-0.281 6.548-1.786l-2.677 1.573c-28.575-13.342-42.238-34.715-48.462-48.807-2.258-4.6-3.79-9.265-4.739-13.983-0.01-0.048-0.03-0.179-0.03-0.179-1.08-5.452-1.428-10.967-1.175-16.536 30.725 3.518 70.847-3.875 96.101 16.526-12.633 65.686-68.086 114.92-134.66 119.8" fill="#f15a29"/></g></g></g></svg>
\ No newline at end of file
diff --git a/bh20simplewebuploader/static/main.css b/bh20simplewebuploader/static/main.css
index 47fb408..6e651a4 100644
--- a/bh20simplewebuploader/static/main.css
+++ b/bh20simplewebuploader/static/main.css
@@ -47,7 +47,7 @@ h2 > svg {
     float: right;
 }
 
-#map {
+#mapid {
     width: 800px;
     height: 440px;
     border: 1px solid #AAA;
@@ -178,7 +178,7 @@ span.dropt:hover {text-decoration: none; background: #ffffff; z-index: 6; }
 
 .about {
     display: grid;
-    grid-template-columns: repeat(2, 1fr);
+    grid-template-columns: 1fr 1fr;
     grid-auto-flow: row;
 }
 
@@ -229,7 +229,7 @@ a {
 #metadata_fill_form {
     column-count: 4;
     margin-top: 0.5em;
-    column-width: 250px;
+    column-width: 15em;
 }
 
 .record, .record .field-group, .record .field-group .field {
@@ -238,6 +238,8 @@ a {
     -webkit-column-break-inside: avoid; /* Chrome, Safari, Opera */
     page-break-inside: avoid; /* Firefox */
     break-inside: avoid;
+    display: block;
+    width: 90%;
 }
 
 .record {
@@ -258,6 +260,10 @@ a {
     width: max-content;
 }
 
+.control {
+    width: 100%;
+}
+
 .filter-options {
     width: 100%;
 }
@@ -304,9 +310,10 @@ footer {
 }
 
 .sponsors img {
-    width: 80%;
-    display:block;
-    margin:auto;
+    width: auto;
+    display: block;
+    margin: auto;
+    height: 4em;
 }
 
 .loader {
@@ -378,6 +385,11 @@ div.status {
     border-bottom: 1px solid #ddd;
 }
 
+.map {
+    padding: 20px 32px;
+    // display: inline-block;
+}
+
 .editbutton {
     float: right;
     text-align: right;
diff --git a/bh20simplewebuploader/static/main.js b/bh20simplewebuploader/static/main.js
index 4703047..1633c25 100644
--- a/bh20simplewebuploader/static/main.js
+++ b/bh20simplewebuploader/static/main.js
@@ -13,70 +13,41 @@ function myFunction() {
     }
 }
 
-let map = L.map( 'map', {
-  center: [37.0902, -95.7129],  // Default to U.S.A
-  minZoom: 3,
-  zoom: 0
-});
-L.tileLayer( 'http://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
-  attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a>',
-  subdomains: ['a','b','c']
-}).addTo( map );
-
-let markers = L.markerClusterGroup().addTo(map)
-
-
 function fetchAPI(apiEndPoint) {
-  fetch(scriptRoot + apiEndPoint)
-    .then(response => {
-      return response.json();
-    })
-    .then(data => {
-        console.log(data);
-      markers.clearLayers();
-      document.getElementById("results").classList.remove("invisible");
-      document.getElementById("loader").classList.add("invisible");
-      if (!(apiEndPoint === "/api/getAllaccessions")) {
-        for (let i = 0; i < data.length; i++) {
-          let {"count": fastaCount, GPS, LocationLabel: label } = data[i];
-          let coordinates = GPS.split(" ");
-          if (!(coordinates == null)) {
-            let lat, lon;
-            [lon, lat] = coordinates.map(parseFloat);
-            let point = L.point()
-            let marker = L.marker([lat, lon]);
-            marker.bindPopup("<b>" + label + "</b><br/>" + "FastaCount: " +fastaCount);
-            markers.addLayer(marker)
-          }}
-      }
-      // Reload the map
-      map.invalidateSize();
-    });
-  document.getElementById("results").classList.add("invisible");
-  document.getElementById("loader").classList.remove("invisible");
-
-}
-
-// Copy from function above but now added as table instead of plain json
-function fetchAPIV2(apiEndPoint) {
-  fetch(scriptRoot + apiEndPoint)
-    .then(response => {
-      return response.json();
-    })
-    .then(data => {
-      console.log(data)
-       htmlString="<table>"
-
-       // Depending on what we want to explore we'd have to call a different function ....? But how to Include that?
-       for (var i=0; i<data.length;i++) {
-            htmlString=htmlString+"<tr><td><a href='#' onclick='fetchSEQByLocation(\""+data[i]["key"]+"\");'>"+data[i]["label"]+"</a></td><td>"+data[i]["count"]+"<td></tr>"
-       }
-       htmlString=htmlString+"</table>"
-
-      document.getElementById("table").innerHTML = htmlString
-    });
-
-  document.getElementById("results").classList.add("invisible");
+    fetch(scriptRoot + apiEndPoint)
+        .then(response => {
+            return response.json();
+        })
+        .then(data => {
+            console.log(data);
+        });
+    document.getElementById("map_view").classList.add("invisible");
+    document.getElementById("loader").classList.remove("invisible");
+}
+
+// Copy from function above but now output HTML table instead of plain json
+function fetchHTMLTable(apiEndPoint) {
+    fetch(scriptRoot + apiEndPoint)
+        .then(response => {
+            return response.json();
+        })
+        .then(data => {
+            console.log(data)
+            htmlString="<table>"
+
+            // Depending on what we want to explore we'd have to call a different function ....? But how to Include that?
+            /*
+            for (var i=0; i<data.length;i++) {
+                htmlString=htmlString+"<tr><td><a href='#' onclick='fetchSEQByLocation(\""+data[i]["key"]+"\");'>"+data[i]["label"]+"</a></td><td>"+data[i]["count"]+"<td></tr>"
+            }
+*/
+            for (var i=0; i<data.length;i++) {
+                htmlString=htmlString+"<tr><td>"+data[i]["label"]+"</td><td>"+data[i]["count"]+"<td></tr>"
+            }
+            htmlString=htmlString+"</table>"
+
+            document.getElementById("table").innerHTML = htmlString
+        });
 }
 
 
@@ -85,36 +56,39 @@ let search = () => {
   fetchAPI(scriptRoot + "/api/getDetailsForSeq?seq=" + encodeURIComponent(m));
 }
 
+// Get count from Arvados
 let fetchCount = () => {
   fetchAPI("/api/getCount");
 }
 
+// Get count from Virtuoso
 let fetchCountDB = () => {
   fetchAPI("/api/getCountDB");
 }
 
 let fetchSEQCountBySpecimen = () => {
-  fetchAPIV2("/api/getSEQCountbySpecimenSource");
+  fetchHTMLTable("/api/getSEQCountbySpecimenSource");
 }
 
 let fetchSEQCountByLocation = () => {
-  fetchAPIV2("/api/getSEQCountbyLocation");
+  fetchHTMLTable("/api/getSEQCountbyLocation");
 }
 
 let fetchSEQCountByTech = () => {
-  fetchAPIV2("/api/getSEQCountbytech");
+  fetchHTMLTable("/api/getSEQCountbytech");
 }
 
 let fetchAllaccessions = () => {
-  fetchAPI("/api/getAllaccessions");
+  fetchHTMLTable("/api/getAllaccessions");
 };
 
-let fetchCountByGPS = () => {
-  fetchAPI("/api/getCountByGPS");
+let fetchMap = () => {
+    fetchAPI("/api/getCountByGPS");
+    updateMapMarkers();
 };
 
 let fetchSEQCountbyLocation = () => {
-  fetchAPIV2("/api/getSEQCountbyLocation");
+  fetchHTMLTable("/api/getSEQCountbyLocation");
 };
 
 let fetchSEQByLocation = () => {
@@ -122,7 +96,7 @@ let fetchSEQByLocation = () => {
 };
 
 let fetchSEQCountbyContinent = () => {
-  fetchAPIV2("/api/getSEQCountbyContinent");
+  fetchHTMLTable("/api/getSEQCountbyContinent");
 }
 
 
@@ -252,36 +226,3 @@ function on_submit_button() {
         return false;
     }
 }
-
-
-
-//
-
-function drawMap(){
-
-// initialize the map on the "map" div with a given center and zoom
-var mymap = L.map('mapid').setView([51.505, -0.09], 1);
-
-L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
-    attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors'
-}).addTo(mymap);
-
-fetch(scriptRoot + "api/getCountByGPS")
-    .then(response => {
-    console.log(response)
-      return response.json();
-    })
-    .then(data => {
-
-   for (var i=0; i<data.length;i++) {
-   gps=data[i]["GPS"].split(" ")
-    var circle = L.circle([gps[1], gps[0]], {
-    color: 'red',
-    fillColor: '#f03',
-    fillOpacity: 0.5,
-    radius: parseInt(data[i]["count"])  //not working for whatever reason
-        }).addTo(mymap);
-      }
-
-      });
-}
diff --git a/bh20simplewebuploader/static/map.js b/bh20simplewebuploader/static/map.js
new file mode 100644
index 0000000..1003f7d
--- /dev/null
+++ b/bh20simplewebuploader/static/map.js
@@ -0,0 +1,50 @@
+
+var map = L.map( 'mapid', {
+    center: [51.505, -0.09],  // Default to U.S.A
+    minZoom: 2,
+    zoom: 0
+});
+
+L.tileLayer( 'http://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
+    attribution: '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> | <a href="http://covid19.genenetwork.org/">COVID-19 PubSeq</a>',
+    subdomains: ['a','b','c']
+}).addTo(map);
+
+
+function drawMap(){
+    var mymap = map;
+
+    fetch(scriptRoot + "api/getCountByGPS")
+        .then(response => {
+            console.log(response)
+            return response.json();
+        })
+        .then(data => {
+            updateMapMarkers(data);
+
+      });
+    document.getElementById("map_view").classList.remove("invisible");
+    map.invalidateSize();
+}
+
+
+
+/* This function updates the map with markers
+ *
+*/
+function updateMapMarkers(data) {
+    let markers = L.markerClusterGroup();
+    for (let i = 0; i < data.length; i++) {
+        let {"count": fastaCount, GPS, LocationLabel: label } = data[i];
+        let coordinates = GPS.split(" ");
+        if (!(coordinates == null)) {
+            let lat, lon;
+            [lon, lat] = coordinates.map(parseFloat);
+            let point = L.point()
+            marker = (L.marker([lat, lon]));
+            marker.bindPopup("<b>" + label + "</b><br/>" + "SARS-CoV-2<br/>sequences: " +fastaCount);
+            markers.addLayer(marker);
+        }
+    }
+    map.addLayer(markers);
+}
diff --git a/bh20simplewebuploader/templates/blog.html b/bh20simplewebuploader/templates/blog.html
index 823f8a1..f4c2a85 100644
--- a/bh20simplewebuploader/templates/blog.html
+++ b/bh20simplewebuploader/templates/blog.html
@@ -63,6 +63,14 @@
               We explore the Arvados command line and API
             </div>
           </div>
+          <div class="blog-table-row">
+            <div class="blog-table-cell">
+              <a href="/blog?id=using-covid-19-pubseq-part6">Prepare for uploading to EBI/ENA</a>
+            </div>
+            <div class="blog-table-cell">
+              Generate the files needed for uploading to EBI/ENA
+            </div>
+          </div>
         </div>
       </div>
     </section>
diff --git a/bh20simplewebuploader/templates/demo-run.html b/bh20simplewebuploader/templates/demo-run.html
deleted file mode 100644
index a8f9edc..0000000
--- a/bh20simplewebuploader/templates/demo-run.html
+++ /dev/null
@@ -1,26 +0,0 @@
-<section class="search-section">
-  <div class="filter-options" action="#">
-    <p>[Demo] Display content sequences by: </p>
-    <div>
-      <button class="button" onclick="fetchSEQCountBySpecimen()">Count by Specimen source</button>
-      <button class="button" onclick="fetchSEQCountByLocation()">Count by Location</button>
-      <button class="button" onclick="fetchSEQCountByTech()">Count by Sequencer</button>
-      <button class="button" onclick="fetchAllaccessions()">Show All accessions</button>
-      <button class="button" onclick="fetchSEQCountbyContinent()">Count by Continent</button>
-      <button class="button" onclick="fetchCountByGPS()">Map</button>
-
-    </div>
-
-  </div>
-
-</section>
-<div id="loader" class="loader invisible">
-</div>
-
-<section id="results" class="invisible">
-    <div id="map"></div>
-</section>
-
- <section>
-    <div id="table"></div>
- </section>
diff --git a/bh20simplewebuploader/templates/demo.html b/bh20simplewebuploader/templates/demo.html
index 44aded0..75bc0e2 100644
--- a/bh20simplewebuploader/templates/demo.html
+++ b/bh20simplewebuploader/templates/demo.html
@@ -1,13 +1,51 @@
 <!DOCTYPE html>
 <html>
   {% include 'header.html' %}
+  {% include 'mapheader.html' %}
   <body>
     {% include 'banner.html' %}
     {% include 'menu.html' %}
-    {% include 'search.html' %}
-      <p>The Virtuoso database contains <span id="CounterDB"></span> public sequences!</p>
-    {% include 'demo-run.html' %}
-    {% include 'footer.html' %}
+
+    <p>The Virtuoso database contains <span id="CounterDB"></span> public sequences!</p>
+
+    <!--
+    <div class="search">
+      <input id="search-input" type="search" placeholder="FASTA uri" required>
+      <button class="button search-button" type="submit" onclick="search()">
+        <span class="icon ion-search">
+          <span class="sr-only">Search</span>
+        </span>
+      </button>
+      <span class="dropt" title="http://collections.lugli.arvadosapi.com/c=00fede2c6f52b053a14edca01cfa02b7+126/sequence.fasta">(example)<span style="width:500px;"></span></span>
+    </div>
+    -->
+
+    <section class="search-section">
+      <div class="filter-options" action="#">
+        <p>[Demo] Display content sequences by: </p>
+        <div>
+          <button class="button" onclick="fetchSEQCountBySpecimen()">Count by Specimen source</button>
+          <button class="button" onclick="fetchSEQCountByLocation()">Count by Location</button>
+          <button class="button" onclick="fetchSEQCountByTech()">Count by Sequencer</button>
+          <!-- <button class="button" onclick="fetchAllaccessions()">Show All accessions</button> -->
+          <button class="button" onclick="fetchSEQCountbyContinent()">Count by Continent</button>
+        </div>
+
+      </div>
+
+    </section>
+    <div id="loader" class="loader invisible">
+    </div>
+
+    <section id="map_view" class="map">
+      <div id="mapid"></div>
+    </section>
+
+    <section>
+      <div id="table"></div>
+    </section>
+
+ {% include 'footer.html' %}
 
     <script type="text/javascript">
       let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
@@ -24,7 +62,10 @@
               });
       });
 
+     drawMap()
+
     </script>
+
   </body>
 
 </html>
diff --git a/bh20simplewebuploader/templates/error.html b/bh20simplewebuploader/templates/error.html
index b1d9402..fc08aed 100644
--- a/bh20simplewebuploader/templates/error.html
+++ b/bh20simplewebuploader/templates/error.html
@@ -15,7 +15,7 @@
           </pre>
         </p>
         <p>
-            <a href="/">Click here to try again.</a>
+            <a href="/upload">Click here to try again.</a>
         </p>
         <hr>
     </body>
diff --git a/bh20simplewebuploader/templates/footer.html b/bh20simplewebuploader/templates/footer.html
index 37a6b64..f84cef5 100644
--- a/bh20simplewebuploader/templates/footer.html
+++ b/bh20simplewebuploader/templates/footer.html
@@ -21,12 +21,15 @@
           <img src="static/image/covid19biohackathon.png"></a>
       </div>
       <div class="sponsorimg">
-        <a href="https://www.commonwl.org/"><img src="static/image/CWL.png"></a>
+        <a href="https://www.curii.com/"><img src="static/image/curii.logo.ai.png"></a>
       </div>
       <div class="sponsorimg">
         <a href="https://arvados.org/"><img src="static/image/arvados-logo.png"></a>
       </div>
       <div class="sponsorimg">
+        <a href="https://www.commonwl.org/"><img src="static/image/CWL.png"></a>
+      </div>
+      <div class="sponsorimg">
         <a href="https://uthsc.edu/"><img src="static/image/UTHSC-primary-stacked-logo-4c.png"></a>
       </div>
       <div class="sponsorimg">
@@ -44,6 +47,9 @@
     </center>
   </div>
 </section>
+{% if load_map %}
+<script type="text/javascript" src="/static/map.js"></script>
+{% endif %}
 <script type="text/javascript" src="/static/main.js"></script>
 
 <script type="text/javascript">
diff --git a/bh20simplewebuploader/templates/header.html b/bh20simplewebuploader/templates/header.html
index 0ac5157..1d66590 100644
--- a/bh20simplewebuploader/templates/header.html
+++ b/bh20simplewebuploader/templates/header.html
@@ -6,22 +6,4 @@
         {% if blog %}
         <link rel="Blog stylesheet" type="text/css" href="/static/blog.css" />
         {% endif %}
-        <link rel="stylesheet" href="https://unpkg.com/leaflet@1.6.0/dist/leaflet.css"
-              integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
-              crossorigin=""/>
-        <link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.4.1/dist/MarkerCluster.css"
-              integrity="sha512-RLEjtaFGdC4iQMJDbMzim/dOvAu+8Qp9sw7QE4wIMYcg2goVoivzwgSZq9CsIxp4xKAZPKh5J2f2lOko2Ze6FQ=="
-              crossorigin=""/>
-
-        <link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.4.1/dist/MarkerCluster.Default.css"
-              integrity="sha512-BBToHPBStgMiw0lD4AtkRIZmdndhB6aQbXpX7omcrXeG2PauGBl2lzq2xUZTxaLxYz5IDHlmneCZ1IJ+P3kYtQ=="
-              crossorigin=""/>
-
-        <script src="https://unpkg.com/leaflet@1.6.0/dist/leaflet.js"
-                integrity="sha512-gZwIG9x3wUXg2hdXF6+rVkLF/0Vi9U8D2Ntg4Ga5I5BZpVkVxlJWbSQtXPSiUTtC0TjtGOmxa1AJPuV0CPthew=="
-                crossorigin=""></script>
-
-        <script src="https://unpkg.com/leaflet.markercluster@1.4.1/dist/leaflet.markercluster.js"
-                integrity="sha512-MQlyPV+ol2lp4KodaU/Xmrn+txc1TP15pOBF/2Sfre7MRsA/pB4Vy58bEqe9u7a7DczMLtU5wT8n7OblJepKbg=="
-                crossorigin=""></script>
     </head>
diff --git a/bh20simplewebuploader/templates/home.html b/bh20simplewebuploader/templates/home.html
index b90a18d..bede611 100644
--- a/bh20simplewebuploader/templates/home.html
+++ b/bh20simplewebuploader/templates/home.html
@@ -1,6 +1,7 @@
 <!DOCTYPE html>
 <html>
   {% include 'header.html' %}
+  {% include 'mapheader.html' %}
     <body>
       {% include 'banner.html' %}
       {% include 'menu.html' %}
@@ -44,7 +45,19 @@
                 </div>
         </section>
 
-{% include 'footer.html' %}
+      <section id="map_view" class="map">
+        <div id="mapid"></div>
+      </section>
+
+      {% include 'footer.html' %}
+
+
+      <script type="text/javascript">
+        let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
+
+        drawMap()
+
+      </script>
 
    </body>
 </html>
diff --git a/bh20simplewebuploader/templates/map.html b/bh20simplewebuploader/templates/map.html
deleted file mode 100644
index 595af0c..0000000
--- a/bh20simplewebuploader/templates/map.html
+++ /dev/null
@@ -1,33 +0,0 @@
-<!DOCTYPE html>
-<html>
-  {% include 'header.html' %}
-<link rel="stylesheet" href="https://unpkg.com/leaflet@1.6.0/dist/leaflet.css"
-   integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
-   crossorigin=""/>
-
-    {% include 'banner.html' %}
-    {% include 'menu.html' %}
-    <div id="mapid" style="height: 500px;"></div>
-
-    {% include 'footer.html' %}
-
-
-
-
-       <script type="text/javascript">
-        let scriptRoot = {{ request.script_root|tojson|safe }}; // examples
-      </script>
-
-<!-- Make sure you put this AFTER Leaflet's CSS -->
- <script src="https://unpkg.com/leaflet@1.6.0/dist/leaflet.js"
-   integrity="sha512-gZwIG9x3wUXg2hdXF6+rVkLF/0Vi9U8D2Ntg4Ga5I5BZpVkVxlJWbSQtXPSiUTtC0TjtGOmxa1AJPuV0CPthew=="
-   crossorigin=""></script>
-
-  <script>
-     //drawMap
-     drawMap()
-  </script>
-
-  </body>
-
-</html>
diff --git a/bh20simplewebuploader/templates/mapheader.html b/bh20simplewebuploader/templates/mapheader.html
new file mode 100644
index 0000000..ca62051
--- /dev/null
+++ b/bh20simplewebuploader/templates/mapheader.html
@@ -0,0 +1,16 @@
+  <link rel="stylesheet" href="https://unpkg.com/leaflet@1.6.0/dist/leaflet.css"
+        integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
+        crossorigin=""/>
+  <link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.4.1/dist/MarkerCluster.css"
+        integrity="sha512-RLEjtaFGdC4iQMJDbMzim/dOvAu+8Qp9sw7QE4wIMYcg2goVoivzwgSZq9CsIxp4xKAZPKh5J2f2lOko2Ze6FQ=="
+        crossorigin=""/>
+  <link rel="stylesheet" href="https://unpkg.com/leaflet.markercluster@1.4.1/dist/MarkerCluster.Default.css"
+        integrity="sha512-BBToHPBStgMiw0lD4AtkRIZmdndhB6aQbXpX7omcrXeG2PauGBl2lzq2xUZTxaLxYz5IDHlmneCZ1IJ+P3kYtQ=="
+        crossorigin=""/>
+
+  <script src="https://unpkg.com/leaflet@1.6.0/dist/leaflet.js"
+          integrity="sha512-gZwIG9x3wUXg2hdXF6+rVkLF/0Vi9U8D2Ntg4Ga5I5BZpVkVxlJWbSQtXPSiUTtC0TjtGOmxa1AJPuV0CPthew=="
+          crossorigin=""></script>
+  <script src="https://unpkg.com/leaflet.markercluster@1.4.1/dist/leaflet.markercluster.js"
+          integrity="sha512-MQlyPV+ol2lp4KodaU/Xmrn+txc1TP15pOBF/2Sfre7MRsA/pB4Vy58bEqe9u7a7DczMLtU5wT8n7OblJepKbg=="
+          crossorigin=""></script>
diff --git a/bh20simplewebuploader/templates/search.html b/bh20simplewebuploader/templates/search.html
index dbdca90..e69de29 100644
--- a/bh20simplewebuploader/templates/search.html
+++ b/bh20simplewebuploader/templates/search.html
@@ -1,10 +0,0 @@
-<div class="search">
-  <input id="search-input" type="search" placeholder="FASTA uri" required>
-  <button class="button search-button" type="submit" onclick="search()">
-    <span class="icon ion-search">
-      <span class="sr-only">Search</span>
-    </span>
-  </button>
-  <span class="dropt" title="http://collections.lugli.arvadosapi.com/c=00fede2c6f52b053a14edca01cfa02b7+126/sequence.fasta">(example)<span style="width:500px;"></span></span>
-</div>
-
diff --git a/bh20simplewebuploader/templates/status.html b/bh20simplewebuploader/templates/status.html
index a1cf28f..e89437e 100644
--- a/bh20simplewebuploader/templates/status.html
+++ b/bh20simplewebuploader/templates/status.html
@@ -7,7 +7,8 @@
 
       <h1>Sequence upload processing status</h1>
 
-        <div class="status">
+      <div class="status">
+
 	  {{ table }}
         </div>
 
diff --git a/bh20simplewebuploader/templates/success.html b/bh20simplewebuploader/templates/success.html
index 9f0987c..c2302fa 100644
--- a/bh20simplewebuploader/templates/success.html
+++ b/bh20simplewebuploader/templates/success.html
@@ -9,7 +9,7 @@
         <h1>Upload Successful</h1>
         <hr>
         <p>
-            Your files have been uploaded. They should soon appear as output of the <a href="/download">Public SARS-CoV-2 Sequence Resource</a>.
+            Your files have been uploaded. You can track their <a href="/status">QC status</a>, once validated they will be part of the <a href="/download">Public SARS-CoV-2 Sequence Resource</a>.
         </p>
         <p>
             The upload log was:
diff --git a/bh20simplewebuploader/templates/validated.html b/bh20simplewebuploader/templates/validated.html
new file mode 100644
index 0000000..cee94bd
--- /dev/null
+++ b/bh20simplewebuploader/templates/validated.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html>
+  {% include 'header.html' %}
+    <body>
+      {% include 'banner.html' %}
+      {% include 'menu.html' %}
+
+      <h1>Validated sequences</h1>
+
+        <div class="status">
+	  {{ table }}
+        </div>
+
+{% include 'footer.html' %}
+
+   </body>
+</html>
diff --git a/doc/blog/using-covid-19-pubseq-part1.html b/doc/blog/using-covid-19-pubseq-part1.html
index 1959fac..0e6136c 100644
--- a/doc/blog/using-covid-19-pubseq-part1.html
+++ b/doc/blog/using-covid-19-pubseq-part1.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-05-29 Fri 12:06 -->
+<!-- 2020-07-17 Fri 05:05 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>COVID-19 PubSeq (part 1)</title>
@@ -248,20 +248,20 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org9afe6ab">1. What does this mean?</a></li>
-<li><a href="#orgf4bc3d4">2. Fetch sequence data</a></li>
-<li><a href="#org9d7d482">3. Predicates</a></li>
-<li><a href="#orgc6046bb">4. Fetch submitter info and other metadata</a></li>
-<li><a href="#orgdcb216b">5. Fetch all sequences from Washington state</a></li>
-<li><a href="#org7060f51">6. Discussion</a></li>
-<li><a href="#orgdc51ccc">7. Acknowledgements</a></li>
+<li><a href="#org0db5db0">1. What does this mean?</a></li>
+<li><a href="#orge5267fd">2. Fetch sequence data</a></li>
+<li><a href="#orgfbd3adc">3. Predicates</a></li>
+<li><a href="#org08e70e1">4. Fetch submitter info and other metadata</a></li>
+<li><a href="#org9194557">5. Fetch all sequences from Washington state</a></li>
+<li><a href="#org76317ad">6. Discussion</a></li>
+<li><a href="#orgeb871a1">7. Acknowledgements</a></li>
 </ul>
 </div>
 </div>
 
 
-<div id="outline-container-org9afe6ab" class="outline-2">
-<h2 id="org9afe6ab"><span class="section-number-2">1</span> What does this mean?</h2>
+<div id="outline-container-org0db5db0" class="outline-2">
+<h2 id="org0db5db0"><span class="section-number-2">1</span> What does this mean?</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 This means that when someone uploads a SARS-CoV-2 sequence using one
@@ -274,24 +274,24 @@ expressed in a <a href="https://github.com/arvados/bh20-seq-resource/blob/master
   type: record
   fields:
     host_species:
-        doc: Host species as defined in NCBITaxon, e.g. http://purl.obolibrary.org/obo/NCBITaxon_<span style="color: #8bc34a;">9606</span> for Homo sapiens
+        doc: Host species as defined in NCBITaxon, e.g. http://purl.obolibrary.org/obo/NCBITaxon_9606 for Homo sapiens
         type: string
         jsonldPredicate:
-          _id: http://www.ebi.ac.uk/efo/EFO_<span style="color: #8bc34a;">0000532</span>
-          _type: <span style="color: #9ccc65;">"@id"</span>
-          noLinkCheck: <span style="color: #8bc34a;">true</span>
+          _id: http://www.ebi.ac.uk/efo/EFO_0000532
+          _type: "@id"
+          noLinkCheck: true
     host_sex:
-        doc: Sex of the host as defined in PATO, expect male <span style="color: #e91e63;">()</span> or female <span style="color: #e91e63;">()</span>
+        doc: Sex of the host as defined in PATO, expect male () or female ()
         type: string?
         jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/PATO_<span style="color: #8bc34a;">0000047</span>
-          _type: <span style="color: #9ccc65;">"@id"</span>
-          noLinkCheck: <span style="color: #8bc34a;">true</span>
+          _id: http://purl.obolibrary.org/obo/PATO_0000047
+          _type: "@id"
+          noLinkCheck: true
     host_age:
-        doc: Age of the host as number <span style="color: #e91e63;">(</span>e.g. <span style="color: #8bc34a;">50</span><span style="color: #e91e63;">)</span>
+        doc: Age of the host as number (e.g. 50)
         type: int?
         jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/PATO_<span style="color: #8bc34a;">0000011</span>
+          _id: http://purl.obolibrary.org/obo/PATO_0000011
 </pre>
 </div>
 
@@ -314,8 +314,8 @@ initiative!
 </div>
 
 
-<div id="outline-container-orgf4bc3d4" class="outline-2">
-<h2 id="orgf4bc3d4"><span class="section-number-2">2</span> Fetch sequence data</h2>
+<div id="outline-container-orge5267fd" class="outline-2">
+<h2 id="orge5267fd"><span class="section-number-2">2</span> Fetch sequence data</h2>
 <div class="outline-text-2" id="text-2">
 <p>
 The latest run of the pipeline can be viewed <a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca">here</a>. Each of these
@@ -339,8 +339,8 @@ these identifiers throughout.
 </div>
 </div>
 
-<div id="outline-container-org9d7d482" class="outline-2">
-<h2 id="org9d7d482"><span class="section-number-2">3</span> Predicates</h2>
+<div id="outline-container-orgfbd3adc" class="outline-2">
+<h2 id="orgfbd3adc"><span class="section-number-2">3</span> Predicates</h2>
 <div class="outline-text-2" id="text-3">
 <p>
 To explore an RDF dataset, the first query we can do is open and gets
@@ -350,10 +350,10 @@ the following in a SPARQL end point
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?p
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">select distinct ?p
+{
    ?o ?p ?s
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -364,10 +364,10 @@ To get a <a href="http://sparql.genenetwork.org/sparql/?default-graph-uri=&amp;q
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?g
-<span style="color: #e91e63;">{</span>
-    GRAPH ?g <span style="color: #2196F3;">{</span>?s ?p ?o<span style="color: #2196F3;">}</span>
-<span style="color: #e91e63;">}</span>
+<pre class="src src-sql">select distinct ?g
+{
+    GRAPH ?g {?s ?p ?o}
+}
 </pre>
 </div>
 
@@ -383,10 +383,10 @@ To list all submitters, try
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?s
-<span style="color: #e91e63;">{</span>
-   ?o <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">#MainSchema/submitter&gt;</a> ?s
-<span style="color: #e91e63;">}</span>
+<pre class="src src-sql">select distinct ?s
+{
+   ?o &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/submitter&gt; ?s
+}
 </pre>
 </div>
 
@@ -397,11 +397,11 @@ and by
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?s
-<span style="color: #e91e63;">{</span>
-   ?o <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/submitter">#MainSchema/submitter&gt;</a> ?id .
+<pre class="src src-sql">select distinct ?s
+{
+   ?o &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/submitter&gt; ?id .
    ?id ?p ?s
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -415,12 +415,12 @@ To lift the full URL out of the query you can use a header like
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?dataset ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -438,32 +438,32 @@ Now we got this far, lets <a href="http://sparql.genenetwork.org/sparql/?default
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #e91e63;">(</span><span style="color: #ff8A65;">COUNT</span><span style="color: #2196F3;">(</span><span style="color: #fff59d;">distinct</span> ?dataset<span style="color: #2196F3;">)</span> <span style="color: #fff59d;">as</span> ?num<span style="color: #e91e63;">)</span>
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select (COUNT(distinct ?dataset) as ?num)
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 </div>
 </div>
 
 
-<div id="outline-container-orgc6046bb" class="outline-2">
-<h2 id="orgc6046bb"><span class="section-number-2">4</span> Fetch submitter info and other metadata</h2>
+<div id="outline-container-org08e70e1" class="outline-2">
+<h2 id="org08e70e1"><span class="section-number-2">4</span> Fetch submitter info and other metadata</h2>
 <div class="outline-text-2" id="text-4">
 <p>
 To get dataests with submitters we can do the above
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?dataset ?p ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?dataset ?p ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -480,13 +480,13 @@ Let's focus on one sample with
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?dataset ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter .
-   FILTER<span style="color: #2196F3;">(</span><span style="color: #fff59d;">CONTAINS</span><span style="color: #EF6C00;">(</span>?submitter,"Roychoudhury"<span style="color: #EF6C00;">)</span><span style="color: #2196F3;">)</span> .
-<span style="color: #e91e63;">}</span>
+   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
+}
 </pre>
 </div>
 
@@ -496,12 +496,12 @@ see if we can get a sample ID by listing sample predicates
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?p
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?p
+{
    ?dataset ?p ?o .
    ?dataset pubseq:submitter ?id .
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -513,15 +513,15 @@ Let's zoom in on those of Roychoudhury with
 
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/">#MainSchema/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?sid ?sample ?p1 ?dataset ?submitter
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+select distinct ?sid ?sample ?p1 ?dataset ?submitter
+{
    ?dataset pubseq:submitter ?id .
    ?id ?p ?submitter .
-   FILTER<span style="color: #2196F3;">(</span><span style="color: #fff59d;">CONTAINS</span><span style="color: #EF6C00;">(</span>?submitter,"Roychoudhury"<span style="color: #EF6C00;">)</span><span style="color: #2196F3;">)</span> .
+   FILTER(CONTAINS(?submitter,"Roychoudhury")) .
    ?dataset pubseq:sample ?sid .
    ?sid ?p1 ?sample
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -532,18 +532,13 @@ this database. Let's focus on one sample "MT326090.1" with predicate
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql"><span style="color: #fff59d;">PREFIX</span> pubseq: <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">#MainSchema/&gt;
-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/">PREFIX</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
-PREFIX sio: &lt;http://semanticscience.org/resource/"> sio: &lt;http://semanticscience.org/resource/&gt;</a>
-<span style="color: #fff59d;">select</span> <span style="color: #fff59d;">distinct</span> ?sample ?p ?o
-<span style="color: #e91e63;">{</span>
+<pre class="src src-sql">PREFIX pubseq: &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/&gt;
+PREFIX sio: &lt;http://semanticscience.org/resource/&gt;
+select distinct ?sample ?p ?o
+{
    ?sample sio:SIO_000115 "MT326090.1" .
    ?sample ?p ?o .
-<span style="color: #e91e63;">}</span>
+}
 </pre>
 </div>
 
@@ -561,8 +556,8 @@ to view/query the database.
 </div>
 </div>
 
-<div id="outline-container-orgdcb216b" class="outline-2">
-<h2 id="orgdcb216b"><span class="section-number-2">5</span> Fetch all sequences from Washington state</h2>
+<div id="outline-container-org9194557" class="outline-2">
+<h2 id="org9194557"><span class="section-number-2">5</span> Fetch all sequences from Washington state</h2>
 <div class="outline-text-2" id="text-5">
 <p>
 Now we know how to get at the origin we can do it the other way round
@@ -570,15 +565,11 @@ and fetch all sequences referring to Washington state
 </p>
 
 <div class="org-src-container">
-<pre class="src src-sql">
-<span style="color: #fff59d;">select</span> ?seq ?sample
-<span style="color: #e91e63;">{</span>
-    ?seq <a href="http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223">&lt;http://biohackathon.org/bh20-seq-</a><span style="color: #fff59d;"><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223">schema</a></span><a href="http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223">#MainSchema/sample&gt; ?sample .
-    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223&gt;</a>
-<span style="color: #e91e63;">}</span>
+<pre class="src src-sql">select ?seq ?sample
+{
+    ?seq &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
+    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q1223&gt;
+}
 </pre>
 </div>
 
@@ -586,11 +577,26 @@ and fetch all sequences referring to Washington state
 which lists 300 sequences originating from Washington state! Which is almost
 half of the set coming out of GenBank.
 </p>
+
+<p>
+Likewise to list all sequences from Turkey we can find the wikidata
+entity is <a href="https://www.wikidata.org/wiki/Q43">Q43</a>:
+</p>
+
+<div class="org-src-container">
+<pre class="src src-sql">select ?seq ?sample
+{
+    ?seq &lt;http://biohackathon.org/bh20-seq-schema#MainSchema/sample&gt; ?sample .
+    ?sample &lt;http://purl.obolibrary.org/obo/GAZ_00000448&gt; &lt;http://www.wikidata.org/entity/Q43&gt;
+}
+</pre>
 </div>
 </div>
+</div>
+
 
-<div id="outline-container-org7060f51" class="outline-2">
-<h2 id="org7060f51"><span class="section-number-2">6</span> Discussion</h2>
+<div id="outline-container-org76317ad" class="outline-2">
+<h2 id="org76317ad"><span class="section-number-2">6</span> Discussion</h2>
 <div class="outline-text-2" id="text-6">
 <p>
 The public sequence uploader collects sequences, raw data and
@@ -601,8 +607,8 @@ referenced in publications and origins are citeable.
 </div>
 </div>
 
-<div id="outline-container-orgdc51ccc" class="outline-2">
-<h2 id="orgdc51ccc"><span class="section-number-2">7</span> Acknowledgements</h2>
+<div id="outline-container-orgeb871a1" class="outline-2">
+<h2 id="orgeb871a1"><span class="section-number-2">7</span> Acknowledgements</h2>
 <div class="outline-text-2" id="text-7">
 <p>
 The overall effort was due to magnificent freely donated input by a
@@ -617,7 +623,7 @@ Garrison this initiative would not have existed!
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-05-29 Fri 12:06</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-17 Fri 05:02</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part4.html b/doc/blog/using-covid-19-pubseq-part4.html
index b5a05ca..c975c21 100644
--- a/doc/blog/using-covid-19-pubseq-part4.html
+++ b/doc/blog/using-covid-19-pubseq-part4.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-07-12 Sun 06:24 -->
+<!-- 2020-07-17 Fri 05:04 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>COVID-19 PubSeq (part 4)</title>
@@ -161,6 +161,19 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
+  .equation-container {
+    display: table;
+    text-align: center;
+    width: 100%;
+  }
+  .equation {
+    vertical-align: middle;
+  }
+  .equation-label {
+    display: table-cell;
+    text-align: right;
+    vertical-align: middle;
+  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -186,7 +199,7 @@
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2018 Free Software Foundation, Inc.
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -235,15 +248,16 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org8f8b64a">1. What does this mean?</a></li>
-<li><a href="#orgcc7a403">2. Modify Workflow</a></li>
+<li><a href="#orgc2ee09f">1. What does this mean?</a></li>
+<li><a href="#org0d37881">2. Where can I find the workflows?</a></li>
+<li><a href="#orgddb0531">3. Modify Workflow</a></li>
 </ul>
 </div>
 </div>
 
 
-<div id="outline-container-org8f8b64a" class="outline-2">
-<h2 id="org8f8b64a"><span class="section-number-2">1</span> What does this mean?</h2>
+<div id="outline-container-orgc2ee09f" class="outline-2">
+<h2 id="orgc2ee09f"><span class="section-number-2">1</span> What does this mean?</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 This means that when someone uploads a SARS-CoV-2 sequence using one
@@ -253,18 +267,28 @@ which triggers a rerun of our workflows.
 </div>
 </div>
 
-
-<div id="outline-container-orgcc7a403" class="outline-2">
-<h2 id="orgcc7a403"><span class="section-number-2">2</span> Modify Workflow</h2>
+<div id="outline-container-org0d37881" class="outline-2">
+<h2 id="org0d37881"><span class="section-number-2">2</span> Where can I find the workflows?</h2>
 <div class="outline-text-2" id="text-2">
 <p>
+Workflows are written in the common workflow language (CWL) and listed
+on <a href="https://github.com/arvados/bh20-seq-resource/tree/master/workflows">github</a>. PubSeq being an open project these workflows can be studied
+and modified!
+</p>
+</div>
+</div>
+
+<div id="outline-container-orgddb0531" class="outline-2">
+<h2 id="orgddb0531"><span class="section-number-2">3</span> Modify Workflow</h2>
+<div class="outline-text-2" id="text-3">
+<p>
 <i>Work in progress!</i>
 </p>
 </div>
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-17 Fri 01:47</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part5.html b/doc/blog/using-covid-19-pubseq-part5.html
index 80bf559..4caa5ac 100644
--- a/doc/blog/using-covid-19-pubseq-part5.html
+++ b/doc/blog/using-covid-19-pubseq-part5.html
@@ -3,7 +3,7 @@
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
-<!-- 2020-07-12 Sun 06:24 -->
+<!-- 2020-07-17 Fri 05:03 -->
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <title>COVID-19 PubSeq (part 4)</title>
@@ -161,6 +161,19 @@
   .footdef  { margin-bottom: 1em; }
   .figure { padding: 1em; }
   .figure p { text-align: center; }
+  .equation-container {
+    display: table;
+    text-align: center;
+    width: 100%;
+  }
+  .equation {
+    vertical-align: middle;
+  }
+  .equation-label {
+    display: table-cell;
+    text-align: right;
+    vertical-align: middle;
+  }
   .inlinetask {
     padding: 10px;
     border: 2px solid gray;
@@ -186,7 +199,7 @@
 @licstart  The following is the entire license notice for the
 JavaScript code in this tag.
 
-Copyright (C) 2012-2018 Free Software Foundation, Inc.
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
 
 The JavaScript code in this tag is free software: you can
 redistribute it and/or modify it under the terms of the GNU
@@ -235,38 +248,40 @@ for the JavaScript code in this tag.
 <h2>Table of Contents</h2>
 <div id="text-table-of-contents">
 <ul>
-<li><a href="#org871ad58">1. Modify Metadata</a></li>
-<li><a href="#org07e8755">2. What is the schema?</a></li>
-<li><a href="#org4857280">3. How is the website generated?</a></li>
-<li><a href="#orge709ae2">4. Modifying the schema</a></li>
+<li><a href="#org758b923">1. Modify Metadata</a></li>
+<li><a href="#orgec32c13">2. What is the schema?</a></li>
+<li><a href="#org2e487b2">3. How is the website generated?</a></li>
+<li><a href="#orge4dfe84">4. Modifying the schema</a></li>
+<li><a href="#org564a7a8">5. Adding fields to the form</a></li>
+<li><a href="#org633781a">6. <span class="todo TODO">TODO</span> Testing the license fields</a></li>
 </ul>
 </div>
 </div>
 
 
-<div id="outline-container-org871ad58" class="outline-2">
-<h2 id="org871ad58"><span class="section-number-2">1</span> Modify Metadata</h2>
+<div id="outline-container-org758b923" class="outline-2">
+<h2 id="org758b923"><span class="section-number-2">1</span> Modify Metadata</h2>
 <div class="outline-text-2" id="text-1">
 <p>
 The public sequence resource uses multiple data formats listed on the
-<a href="./download">DOWNLOAD</a> page. One of the most exciting features is the full support
+<a href="http://covid19.genenetwork.org/download">download</a> page. One of the most exciting features is the full support
 for RDF and semantic web/linked data ontologies. This technology
 allows for querying data in unprescribed ways - that is, you can
 formulate your own queries without dealing with a preset model of that
 data (so typical of CSV files and SQL tables). Examples of exploring
-data are listed <a href="./blog?id=using-covid-19-pubseq-part1">here</a>.
+data are listed <a href="http://covid19.genenetwork.org/blog?id=using-covid-19-pubseq-part1">here</a>.
 </p>
 
 <p>
 In this BLOG we are going to look at the metadata entered on the
-<a href="./">COVID-19 PubSeq</a> website (or command line client). It is important to
+COVID-19 PubSeq website (or command line client). It is important to
 understand that anyone, including you, can change that information!
 </p>
 </div>
 </div>
 
-<div id="outline-container-org07e8755" class="outline-2">
-<h2 id="org07e8755"><span class="section-number-2">2</span> What is the schema?</h2>
+<div id="outline-container-orgec32c13" class="outline-2">
+<h2 id="orgec32c13"><span class="section-number-2">2</span> What is the schema?</h2>
 <div class="outline-text-2" id="text-2">
 <p>
 The default metadata schema is listed <a href="https://github.com/arvados/bh20-seq-resource/blob/master/bh20sequploader/bh20seq-schema.yml">here</a>.
@@ -274,8 +289,8 @@ The default metadata schema is listed <a href="https://github.com/arvados/bh20-s
 </div>
 </div>
 
-<div id="outline-container-org4857280" class="outline-2">
-<h2 id="org4857280"><span class="section-number-2">3</span> How is the website generated?</h2>
+<div id="outline-container-org2e487b2" class="outline-2">
+<h2 id="org2e487b2"><span class="section-number-2">3</span> How is the website generated?</h2>
 <div class="outline-text-2" id="text-3">
 <p>
 Using the schema we use <a href="https://pypi.org/project/PyShEx/">pyshex</a> shex expressions and <a href="https://github.com/common-workflow-language/schema_salad">schema salad</a> to
@@ -285,13 +300,13 @@ All from that one metadata schema.
 </div>
 </div>
 
-<div id="outline-container-orge709ae2" class="outline-2">
-<h2 id="orge709ae2"><span class="section-number-2">4</span> Modifying the schema</h2>
+<div id="outline-container-orge4dfe84" class="outline-2">
+<h2 id="orge4dfe84"><span class="section-number-2">4</span> Modifying the schema</h2>
 <div class="outline-text-2" id="text-4">
 <p>
-One of the first things we wanted to do is to add a field for the data
-license. Initially we only support CC-4.0 as a license by default, but
-now we want to give uploaders the option to make it an even more
+One of the first things we want to do is to add a field for the data
+license. Initially we only supported CC-4.0 as a license, but
+we wanted to give uploaders the option to use an even more
 liberal CC0 license. The first step is to find a good ontology term
 for the field. Searching for `creative commons cc0 rdf' rendered this
 useful <a href="https://creativecommons.org/ns">page</a>.  We also find an <a href="https://wiki.creativecommons.org/wiki/CC_License_Rdf_Overview">overview</a> where CC0 is represented as URI
@@ -302,13 +317,148 @@ attributionName and attributionURL.
 </p>
 
 <p>
-<i>Note: work in progress</i>
+A minimal triple should be
+</p>
+
+<pre class="example">
+id  xhtml:license  &lt;http://creativecommons.org/licenses/by/4.0/&gt; .
+</pre>
+
+
+<p>
+Other suggestions are
+</p>
+
+<pre class="example">
+id  dc:title "Description" .
+id  cc:attributionName "Your Name" .
+id  cc:attributionURL &lt;http://resource.org/id&gt;
+</pre>
+
+
+<p>
+and 'dc:source' which indicates the original source of any modified
+work, specified as a URI.
+The prefix 'cc:' is an abbreviation for <a href="http://creativecommons.org/ns">http://creativecommons.org/ns</a>#.
+</p>
+
+<p>
+Going back to the schema, where does it fit? Under host, sample,
+virus, technology or submitter block? It could fit under sample, but
+actually the license concerns the whole metadata block and sequence,
+so I think we can fit under its own license tag. For example
+</p>
+
+
+<p>
+id: placeholder
+</p>
+
+<pre class="example">
+license:
+    license_type: http://creativecommons.org/licenses/by/4.0/
+    attribution_title: "Sample ID"
+    attribution_name: "John doe, Joe Boe, Jonny Oe"
+    attribution_url: http://covid19.genenetwork.org/id
+    attribution_source: https://www.ncbi.nlm.nih.gov/pubmed/323088888
+</pre>
+
+
+<p>
+So, let's update the example. Notice the license info is optional - if it is missing
+we just assume the default CC-4.0.
+</p>
+
+<p>
+One thing that is interesting is that in the name space <a href="https://creativecommons.org/ns">https://creativecommons.org/ns</a> there
+is no mention of a title. I think it is useful, however, because we have no such field.
+So, we'll add it simply as a title field. Now the draft schema is
 </p>
+
+<div class="org-src-container">
+<pre class="src src-js">- name: licenseSchema
+  type: record
+  fields:
+    license_type:
+      doc: License types as refined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#License
+    title:
+      doc: Attribution title related to license
+      type: string?
+      jsonldPredicate:
+          _id: http://semanticscience.org/resource/SIO_001167
+    attribution_url:
+      doc: Attribution URL related to license
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+    attribution_source:
+      doc: Attribution source URL
+      type: string?
+      jsonldPredicate:
+          _id: https://creativecommons.org/ns#Work
+</pre>
+</div>
+
+<p>
+Now, we are no ontology experts, right? So, next we submit a patch to
+our source tree and ask for feedback before wiring it up in the data
+entry form. The pull request was submitted <a href="https://github.com/arvados/bh20-seq-resource/pull/97">here</a> and reviewed on the
+gitter channel and I merged it.
+</p>
+</div>
 </div>
+
+<div id="outline-container-org564a7a8" class="outline-2">
+<h2 id="org564a7a8"><span class="section-number-2">5</span> Adding fields to the form</h2>
+<div class="outline-text-2" id="text-5">
+<p>
+To add the new fields to the form we have to modify it a little. If we
+go to the upload form we need to add the license box. The schema is
+loaded in <a href="https://github.com/arvados/bh20-seq-resource/blob/a0c8ebd57b875f265e8b0efec4abfaf892eb6c45/bh20simplewebuploader/main.py#L229">main.py</a> in the 'generate<sub>form</sub>' function.
+</p>
+
+<p>
+With this <a href="https://github.com/arvados/bh20-seq-resource/commit/b9691c7deae30bd6422fb7b0681572b7b6f78ae3">patch</a> the website adds the license input fields on the form.
+</p>
+
+<p>
+Finally, to make RDF output work we need to add expressions to bh20seq-shex.rdf. This
+was done with this <a href="https://github.com/arvados/bh20-seq-resource/commit/f4ed46dae20abe5147871495ede2d6ac2b0854bc">patch</a>. In the end we decided to use the Dublin core title,
+<a href="http://purl.org/metadata/dublin_core_elements#Title">http://purl.org/metadata/dublin_core_elements#Title</a>:
+</p>
+
+<div class="org-src-container">
+<pre class="src src-js">:licenseShape{
+    cc:License xsd:string;
+    dc:Title xsd:string ?;
+    cc:attributionName xsd:string ?;
+    cc:attributionURL xsd:string ?;
+    cc:attributionSource xsd:string ?;
+}
+</pre>
+</div>
+
+<p>
+Note that cc:AttributionSource is not really defined in the cc standard.
+</p>
+
+<p>
+When pushing the license info we discovered the workflow broke because
+the existing data had no licensing info. So we changed the license
+field to be optional - a missing license assumes it is CC-BY-4.0.
+</p>
+</div>
+</div>
+
+<div id="outline-container-org633781a" class="outline-2">
+<h2 id="org633781a"><span class="section-number-2">6</span> <span class="todo TODO">TODO</span> Testing the license fields</h2>
 </div>
 </div>
 <div id="postamble" class="status">
-<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-12 Sun 06:24</small>.
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-16 Thu 03:27</small>.
 </div>
 </body>
 </html>
diff --git a/doc/blog/using-covid-19-pubseq-part6.html b/doc/blog/using-covid-19-pubseq-part6.html
new file mode 100644
index 0000000..278abe8
--- /dev/null
+++ b/doc/blog/using-covid-19-pubseq-part6.html
@@ -0,0 +1,393 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+<head>
+<!-- 2020-07-17 Fri 06:05 -->
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>COVID-19 PubSeq (part 6)</title>
+<meta name="generator" content="Org mode" />
+<meta name="author" content="Pjotr Prins" />
+<style type="text/css">
+ <!--/*--><![CDATA[/*><!--*/
+  .title  { text-align: center;
+             margin-bottom: .2em; }
+  .subtitle { text-align: center;
+              font-size: medium;
+              font-weight: bold;
+              margin-top:0; }
+  .todo   { font-family: monospace; color: red; }
+  .done   { font-family: monospace; color: green; }
+  .priority { font-family: monospace; color: orange; }
+  .tag    { background-color: #eee; font-family: monospace;
+            padding: 2px; font-size: 80%; font-weight: normal; }
+  .timestamp { color: #bebebe; }
+  .timestamp-kwd { color: #5f9ea0; }
+  .org-right  { margin-left: auto; margin-right: 0px;  text-align: right; }
+  .org-left   { margin-left: 0px;  margin-right: auto; text-align: left; }
+  .org-center { margin-left: auto; margin-right: auto; text-align: center; }
+  .underline { text-decoration: underline; }
+  #postamble p, #preamble p { font-size: 90%; margin: .2em; }
+  p.verse { margin-left: 3%; }
+  pre {
+    border: 1px solid #ccc;
+    box-shadow: 3px 3px 3px #eee;
+    padding: 8pt;
+    font-family: monospace;
+    overflow: auto;
+    margin: 1.2em;
+  }
+  pre.src {
+    position: relative;
+    overflow: visible;
+    padding-top: 1.2em;
+  }
+  pre.src:before {
+    display: none;
+    position: absolute;
+    background-color: white;
+    top: -10px;
+    right: 10px;
+    padding: 3px;
+    border: 1px solid black;
+  }
+  pre.src:hover:before { display: inline;}
+  /* Languages per Org manual */
+  pre.src-asymptote:before { content: 'Asymptote'; }
+  pre.src-awk:before { content: 'Awk'; }
+  pre.src-C:before { content: 'C'; }
+  /* pre.src-C++ doesn't work in CSS */
+  pre.src-clojure:before { content: 'Clojure'; }
+  pre.src-css:before { content: 'CSS'; }
+  pre.src-D:before { content: 'D'; }
+  pre.src-ditaa:before { content: 'ditaa'; }
+  pre.src-dot:before { content: 'Graphviz'; }
+  pre.src-calc:before { content: 'Emacs Calc'; }
+  pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
+  pre.src-fortran:before { content: 'Fortran'; }
+  pre.src-gnuplot:before { content: 'gnuplot'; }
+  pre.src-haskell:before { content: 'Haskell'; }
+  pre.src-hledger:before { content: 'hledger'; }
+  pre.src-java:before { content: 'Java'; }
+  pre.src-js:before { content: 'Javascript'; }
+  pre.src-latex:before { content: 'LaTeX'; }
+  pre.src-ledger:before { content: 'Ledger'; }
+  pre.src-lisp:before { content: 'Lisp'; }
+  pre.src-lilypond:before { content: 'Lilypond'; }
+  pre.src-lua:before { content: 'Lua'; }
+  pre.src-matlab:before { content: 'MATLAB'; }
+  pre.src-mscgen:before { content: 'Mscgen'; }
+  pre.src-ocaml:before { content: 'Objective Caml'; }
+  pre.src-octave:before { content: 'Octave'; }
+  pre.src-org:before { content: 'Org mode'; }
+  pre.src-oz:before { content: 'OZ'; }
+  pre.src-plantuml:before { content: 'Plantuml'; }
+  pre.src-processing:before { content: 'Processing.js'; }
+  pre.src-python:before { content: 'Python'; }
+  pre.src-R:before { content: 'R'; }
+  pre.src-ruby:before { content: 'Ruby'; }
+  pre.src-sass:before { content: 'Sass'; }
+  pre.src-scheme:before { content: 'Scheme'; }
+  pre.src-screen:before { content: 'Gnu Screen'; }
+  pre.src-sed:before { content: 'Sed'; }
+  pre.src-sh:before { content: 'shell'; }
+  pre.src-sql:before { content: 'SQL'; }
+  pre.src-sqlite:before { content: 'SQLite'; }
+  /* additional languages in org.el's org-babel-load-languages alist */
+  pre.src-forth:before { content: 'Forth'; }
+  pre.src-io:before { content: 'IO'; }
+  pre.src-J:before { content: 'J'; }
+  pre.src-makefile:before { content: 'Makefile'; }
+  pre.src-maxima:before { content: 'Maxima'; }
+  pre.src-perl:before { content: 'Perl'; }
+  pre.src-picolisp:before { content: 'Pico Lisp'; }
+  pre.src-scala:before { content: 'Scala'; }
+  pre.src-shell:before { content: 'Shell Script'; }
+  pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
+  /* additional language identifiers per "defun org-babel-execute"
+       in ob-*.el */
+  pre.src-cpp:before  { content: 'C++'; }
+  pre.src-abc:before  { content: 'ABC'; }
+  pre.src-coq:before  { content: 'Coq'; }
+  pre.src-groovy:before  { content: 'Groovy'; }
+  /* additional language identifiers from org-babel-shell-names in
+     ob-shell.el: ob-shell is the only babel language using a lambda to put
+     the execution function name together. */
+  pre.src-bash:before  { content: 'bash'; }
+  pre.src-csh:before  { content: 'csh'; }
+  pre.src-ash:before  { content: 'ash'; }
+  pre.src-dash:before  { content: 'dash'; }
+  pre.src-ksh:before  { content: 'ksh'; }
+  pre.src-mksh:before  { content: 'mksh'; }
+  pre.src-posh:before  { content: 'posh'; }
+  /* Additional Emacs modes also supported by the LaTeX listings package */
+  pre.src-ada:before { content: 'Ada'; }
+  pre.src-asm:before { content: 'Assembler'; }
+  pre.src-caml:before { content: 'Caml'; }
+  pre.src-delphi:before { content: 'Delphi'; }
+  pre.src-html:before { content: 'HTML'; }
+  pre.src-idl:before { content: 'IDL'; }
+  pre.src-mercury:before { content: 'Mercury'; }
+  pre.src-metapost:before { content: 'MetaPost'; }
+  pre.src-modula-2:before { content: 'Modula-2'; }
+  pre.src-pascal:before { content: 'Pascal'; }
+  pre.src-ps:before { content: 'PostScript'; }
+  pre.src-prolog:before { content: 'Prolog'; }
+  pre.src-simula:before { content: 'Simula'; }
+  pre.src-tcl:before { content: 'tcl'; }
+  pre.src-tex:before { content: 'TeX'; }
+  pre.src-plain-tex:before { content: 'Plain TeX'; }
+  pre.src-verilog:before { content: 'Verilog'; }
+  pre.src-vhdl:before { content: 'VHDL'; }
+  pre.src-xml:before { content: 'XML'; }
+  pre.src-nxml:before { content: 'XML'; }
+  /* add a generic configuration mode; LaTeX export needs an additional
+     (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
+  pre.src-conf:before { content: 'Configuration File'; }
+
+  table { border-collapse:collapse; }
+  caption.t-above { caption-side: top; }
+  caption.t-bottom { caption-side: bottom; }
+  td, th { vertical-align:top;  }
+  th.org-right  { text-align: center;  }
+  th.org-left   { text-align: center;   }
+  th.org-center { text-align: center; }
+  td.org-right  { text-align: right;  }
+  td.org-left   { text-align: left;   }
+  td.org-center { text-align: center; }
+  dt { font-weight: bold; }
+  .footpara { display: inline; }
+  .footdef  { margin-bottom: 1em; }
+  .figure { padding: 1em; }
+  .figure p { text-align: center; }
+  .equation-container {
+    display: table;
+    text-align: center;
+    width: 100%;
+  }
+  .equation {
+    vertical-align: middle;
+  }
+  .equation-label {
+    display: table-cell;
+    text-align: right;
+    vertical-align: middle;
+  }
+  .inlinetask {
+    padding: 10px;
+    border: 2px solid gray;
+    margin: 10px;
+    background: #ffffcc;
+  }
+  #org-div-home-and-up
+   { text-align: right; font-size: 70%; white-space: nowrap; }
+  textarea { overflow-x: auto; }
+  .linenr { font-size: smaller }
+  .code-highlighted { background-color: #ffff00; }
+  .org-info-js_info-navigation { border-style: none; }
+  #org-info-js_console-label
+    { font-size: 10px; font-weight: bold; white-space: nowrap; }
+  .org-info-js_search-highlight
+    { background-color: #ffff00; color: #000000; font-weight: bold; }
+  .org-svg { width: 90%; }
+  /*]]>*/-->
+</style>
+<link rel="Blog stylesheet" type="text/css" href="blog.css" />
+<script type="text/javascript">
+/*
+@licstart  The following is the entire license notice for the
+JavaScript code in this tag.
+
+Copyright (C) 2012-2020 Free Software Foundation, Inc.
+
+The JavaScript code in this tag is free software: you can
+redistribute it and/or modify it under the terms of the GNU
+General Public License (GNU GPL) as published by the Free Software
+Foundation, either version 3 of the License, or (at your option)
+any later version.  The code is distributed WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU GPL for more details.
+
+As additional permission under GNU GPL version 3 section 7, you
+may distribute non-source (e.g., minimized or compacted) forms of
+that code without the copy of the GNU GPL normally required by
+section 4, provided you include this license notice and a URL
+through which recipients can access the Corresponding Source.
+
+
+@licend  The above is the entire license notice
+for the JavaScript code in this tag.
+*/
+<!--/*--><![CDATA[/*><!--*/
+ function CodeHighlightOn(elem, id)
+ {
+   var target = document.getElementById(id);
+   if(null != target) {
+     elem.cacheClassElem = elem.className;
+     elem.cacheClassTarget = target.className;
+     target.className = "code-highlighted";
+     elem.className   = "code-highlighted";
+   }
+ }
+ function CodeHighlightOff(elem, id)
+ {
+   var target = document.getElementById(id);
+   if(elem.cacheClassElem)
+     elem.className = elem.cacheClassElem;
+   if(elem.cacheClassTarget)
+     target.className = elem.cacheClassTarget;
+ }
+/*]]>*///-->
+</script>
+</head>
+<body>
+<div id="content">
+<h1 class="title">COVID-19 PubSeq (part 6)</h1>
+<div id="table-of-contents">
+<h2>Table of Contents</h2>
+<div id="text-table-of-contents">
+<ul>
+<li><a href="#orge6aea9e">1. Generating output for EBI</a></li>
+<li><a href="#org95e5e17">2. Defining the EBI study</a></li>
+<li><a href="#org9181a73">3. Define the EBI sample</a></li>
+<li><a href="#orga29cad0">4. Define the EBI sequence</a></li>
+</ul>
+</div>
+</div>
+
+
+<div id="outline-container-orge6aea9e" class="outline-2">
+<h2 id="orge6aea9e"><span class="section-number-2">1</span> Generating output for EBI</h2>
+<div class="outline-text-2" id="text-1">
+<p>
+Would it not be great an uploader to PubSeq also can export samples
+to, say, EBI? That is what we discuss in this section. The submission
+process is somewhat laborious and when you have submitted to PubSeq
+why not export the same to EBI too with the least amount of effort?
+</p>
+
+<p>
+COVID-19 PubSeq is a data source - both sequence data and metadata -
+that can be used to push data to other sources, such as EBI. You can
+register <a href="https://ena-docs.readthedocs.io/en/latest/submit/samples/programmatic.html">samples programmatically</a> with a specific XML interface.  Note
+that (at this point) if you want to submit a sequence (FASTA) it can
+only be done through the <a href="https://ena-docs.readthedocs.io/en/latest/submit/general-guide/webin-cli.html">Webin-CLI</a>. Raw data (FASTQ) can go through
+the XML interface.
+</p>
+
+<p>
+EBI sequence resources are presented through ENA. For example
+<a href="https://www.ebi.ac.uk/ena/browser/view/MT394864">Sequence: MT394864.1</a>.
+</p>
+
+<p>
+EBI has XML Formats for
+</p>
+
+<ul class="org-ul">
+<li>SUBMISSION</li>
+<li>STUDY</li>
+<li>SAMPLE</li>
+<li>EXPERIMENT</li>
+<li>RUN</li>
+<li>ANALYSIS</li>
+<li>DAC</li>
+<li>POLICY</li>
+<li>DATASET</li>
+<li>PROJECT</li>
+</ul>
+
+<p>
+with the schemas listed <a href="ftp://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/">here</a>.  Since we are submitting sequences we
+should follow submitting <a href="https://ena-docs.readthedocs.io/en/latest/submit/assembly.html">full genome assembly guidelines</a> and
+<a href="https://ena-docs.readthedocs.io/en/latest/submit/general-guide/programmatic.html">ENA guidelines</a>. The first step is to define the study, next the sample
+and finally the sequence (assembly).
+</p>
+</div>
+</div>
+
+<div id="outline-container-org95e5e17" class="outline-2">
+<h2 id="org95e5e17"><span class="section-number-2">2</span> Defining the EBI study</h2>
+<div class="outline-text-2" id="text-2">
+<p>
+A study is defined <a href="https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html">here</a> and looks like
+</p>
+
+<div class="org-src-container">
+<pre class="src src-xml">&lt;PROJECT_SET&gt;
+   &lt;PROJECT alias="COVID-19 Washington DC"&gt;
+      &lt;TITLE&gt;Sequencing SARS-CoV-2 in the Washington DC area&lt;/TITLE&gt;
+      &lt;DESCRIPTION&gt;This study collects samples from COVID-19 patients in the Washington DC area&lt;/DESCRIPTION&gt;
+      &lt;SUBMISSION_PROJECT&gt;
+         &lt;SEQUENCING_PROJECT/&gt;
+      &lt;/SUBMISSION_PROJECT&gt;
+   &lt;/PROJECT&gt;
+&lt;/PROJECT_SET&gt;
+</pre>
+</div>
+
+<p>
+also a submission 'command' is required looking like
+</p>
+
+<div class="org-src-container">
+<pre class="src src-xml">&lt;SUBMISSION&gt;
+   &lt;ACTIONS&gt;
+      &lt;ACTION&gt;
+         &lt;ADD/&gt;
+      &lt;/ACTION&gt;
+      &lt;ACTION&gt;
+         &lt;HOLD HoldUntilDate="TODO: release date"/&gt;
+      &lt;/ACTION&gt;
+   &lt;/ACTIONS&gt;
+&lt;/SUBMISSION&gt;
+
+</pre>
+</div>
+
+<p>
+The webin system accepts such sources using a command like
+</p>
+
+<pre class="example">
+curl -u username:password -F "SUBMISSION=@submission.xml" \
+  -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/"
+</pre>
+
+
+<p>
+as described <a href="https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl">here</a>. Note that this is the test server. For the final
+version use www.ebi.ac.uk instead of wwwdev.ebi.ac.uk.  You may also
+need the &#x2013;insecure switch to circumvent certificate checking.
+</p>
+
+<p>
+<i>work in progress (WIP)</i>
+</p>
+</div>
+</div>
+
+<div id="outline-container-org9181a73" class="outline-2">
+<h2 id="org9181a73"><span class="section-number-2">3</span> Define the EBI sample</h2>
+<div class="outline-text-2" id="text-3">
+<p>
+<i>work in progress (WIP)</i>
+</p>
+</div>
+</div>
+
+<div id="outline-container-orga29cad0" class="outline-2">
+<h2 id="orga29cad0"><span class="section-number-2">4</span> Define the EBI sequence</h2>
+<div class="outline-text-2" id="text-4">
+<p>
+<i>work in progress (WIP)</i>
+</p>
+</div>
+</div>
+</div>
+<div id="postamble" class="status">
+<hr><small>Created by <a href="http://thebird.nl/">Pjotr Prins</a> (pjotr.public768 at thebird 'dot' nl) using Emacs org-mode and a healthy dose of Lisp!<br />Modified 2020-07-17 Fri 06:05</small>.
+</div>
+</body>
+</html>
diff --git a/doc/blog/using-covid-19-pubseq-part6.org b/doc/blog/using-covid-19-pubseq-part6.org
new file mode 100644
index 0000000..8964700
--- /dev/null
+++ b/doc/blog/using-covid-19-pubseq-part6.org
@@ -0,0 +1,102 @@
+#+TITLE: COVID-19 PubSeq (part 6)
+#+AUTHOR: Pjotr Prins
+# C-c C-e h h   publish
+# C-c !         insert date (use . for active agenda, C-u C-c ! for date, C-u C-c . for time)
+# C-c C-t       task rotate
+# RSS_IMAGE_URL: http://xxxx.xxxx.free.fr/rss_icon.png
+
+#+HTML_HEAD: <link rel="Blog stylesheet" type="text/css" href="blog.css" />
+
+
+* Table of Contents                                                     :TOC:noexport:
+ - [[#generating-output-for-ebi][Generating output for EBI]]
+ - [[#defining-the-ebi-study][Defining the EBI study]]
+ - [[#define-the-ebi-sample][Define the EBI sample]]
+ - [[#define-the-ebi-sequence][Define the EBI sequence]]
+
+* Generating output for EBI
+
+Would it not be great an uploader to PubSeq also can export samples
+to, say, EBI? That is what we discuss in this section. The submission
+process is somewhat laborious and when you have submitted to PubSeq
+why not export the same to EBI too with the least amount of effort?
+
+COVID-19 PubSeq is a data source - both sequence data and metadata -
+that can be used to push data to other sources, such as EBI. You can
+register [[https://ena-docs.readthedocs.io/en/latest/submit/samples/programmatic.html][samples programmatically]] with a specific XML interface.  Note
+that (at this point) if you want to submit a sequence (FASTA) it can
+only be done through the [[https://ena-docs.readthedocs.io/en/latest/submit/general-guide/webin-cli.html][Webin-CLI]]. Raw data (FASTQ) can go through
+the XML interface.
+
+EBI sequence resources are presented through ENA. For example
+[[https://www.ebi.ac.uk/ena/browser/view/MT394864][Sequence: MT394864.1]].
+
+EBI has XML Formats for
+
+- SUBMISSION
+- STUDY
+- SAMPLE
+- EXPERIMENT
+- RUN
+- ANALYSIS
+- DAC
+- POLICY
+- DATASET
+- PROJECT
+
+with the schemas listed [[ftp://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/][here]].  Since we are submitting sequences we
+should follow submitting [[https://ena-docs.readthedocs.io/en/latest/submit/assembly.html][full genome assembly guidelines]] and
+[[https://ena-docs.readthedocs.io/en/latest/submit/general-guide/programmatic.html][ENA guidelines]]. The first step is to define the study, next the sample
+and finally the sequence (assembly).
+
+* Defining the EBI study
+
+A study is defined [[https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html][here]] and looks like
+
+#+BEGIN_SRC xml
+<PROJECT_SET>
+   <PROJECT alias="COVID-19 Washington DC">
+      <TITLE>Sequencing SARS-CoV-2 in the Washington DC area</TITLE>
+      <DESCRIPTION>This study collects samples from COVID-19 patients in the Washington DC area</DESCRIPTION>
+      <SUBMISSION_PROJECT>
+         <SEQUENCING_PROJECT/>
+      </SUBMISSION_PROJECT>
+   </PROJECT>
+</PROJECT_SET>
+#+END_SRC
+
+also a submission 'command' is required looking like
+
+#+BEGIN_SRC xml
+<SUBMISSION>
+   <ACTIONS>
+      <ACTION>
+         <ADD/>
+      </ACTION>
+      <ACTION>
+         <HOLD HoldUntilDate="TODO: release date"/>
+      </ACTION>
+   </ACTIONS>
+</SUBMISSION>
+
+#+END_SRC
+
+The webin system accepts such sources using a command like
+
+: curl -u username:password -F "SUBMISSION=@submission.xml" \
+:   -F "PROJECT=@project.xml" "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/"
+
+as described [[https://ena-docs.readthedocs.io/en/latest/submit/study/programmatic.html#submit-the-xmls-using-curl][here]]. Note that this is the test server. For the final
+version use www.ebi.ac.uk instead of wwwdev.ebi.ac.uk.  You may also
+need the --insecure switch to circumvent certificate checking.
+
+/work in progress (WIP)/
+
+* Define the EBI sample
+
+
+/work in progress (WIP)/
+
+* Define the EBI sequence
+
+/work in progress (WIP)/
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
new file mode 100644
index 0000000..78f34c8
--- /dev/null
+++ b/scripts/cleanup.py
@@ -0,0 +1,41 @@
+import arvados
+import arvados.util
+
+api = arvados.api()
+
+delete_patterns = [
+    "%missing%`collection_location`%",
+    "%missing%`technology`%",
+    "%missing%`host_species`%",
+    "%QC fail: alignment%",
+    "%does not look like a valid URI%",
+    "%Duplicate of%",
+    "%No matching triples found for predicate obo:NCIT_C42781%",
+    "%does not look like a valid URI%"
+    ]
+
+revalidate_patterns = [
+    "%missing%`license`%",
+    "%QC fail%"
+]
+
+for p in delete_patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        api.collections().delete(uuid=i["uuid"]).execute()
+
+for p in revalidate_patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        pr = i["properties"]
+        if "status" in pr:
+            del pr["status"]
+        if "errors" in pr:
+            del pr["errors"]
+        api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()
diff --git a/scripts/submit_ebi/example/project-submission.xml b/scripts/submit_ebi/example/project-submission.xml
new file mode 100644
index 0000000..1abb827
--- /dev/null
+++ b/scripts/submit_ebi/example/project-submission.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SUBMISSION>
+   <ACTIONS>
+      <ACTION>
+         <ADD/>
+      </ACTION>
+      <ACTION>
+         <HOLD HoldUntilDate="2020-10-10"/>
+      </ACTION>
+  </ACTIONS>
+</SUBMISSION>
+
diff --git a/scripts/submit_ebi/example/project.xml b/scripts/submit_ebi/example/project.xml
new file mode 100644
index 0000000..6a817e7
--- /dev/null
+++ b/scripts/submit_ebi/example/project.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<PROJECT_SET>
+   <PROJECT alias="PubSeq01">
+      <TITLE>Testing PubSeq Sample uploads</TITLE>
+      <DESCRIPTION>This is a test to allow for uploading sequences from PubSeq</DESCRIPTION>
+      <SUBMISSION_PROJECT>
+         <SEQUENCING_PROJECT/>
+      </SUBMISSION_PROJECT>
+   </PROJECT>
+</PROJECT_SET>
diff --git a/scripts/submit_ebi/example/sample-submission.xml b/scripts/submit_ebi/example/sample-submission.xml
new file mode 100644
index 0000000..9d13512
--- /dev/null
+++ b/scripts/submit_ebi/example/sample-submission.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SUBMISSION>
+   <ACTIONS>
+      <ACTION>
+         <ADD/>
+      </ACTION>
+   </ACTIONS>
+</SUBMISSION>
diff --git a/scripts/submit_ebi/example/sample.xml b/scripts/submit_ebi/example/sample.xml
new file mode 100644
index 0000000..694c471
--- /dev/null
+++ b/scripts/submit_ebi/example/sample.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SAMPLE_SET>
+  <SAMPLE alias="PubSeqSam0001" center_name="PubSeq01">
+    <TITLE>human gastric microbiota, mucosal</TITLE>
+    <SAMPLE_NAME>
+      <TAXON_ID>1284369</TAXON_ID>
+      <SCIENTIFIC_NAME>stomach metagenome</SCIENTIFIC_NAME>
+      <COMMON_NAME></COMMON_NAME>
+    </SAMPLE_NAME>
+    <SAMPLE_ATTRIBUTES>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>investigation type</TAG>
+        <VALUE>mimarks-survey</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>sequencing method</TAG>
+        <VALUE>pyrosequencing</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>collection date</TAG>
+        <VALUE>2010</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>host body site</TAG>
+        <VALUE>Mucosa of stomach</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>human-associated environmental package</TAG>
+        <VALUE>human-associated</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>geographic location (latitude)</TAG>
+        <VALUE>1.81</VALUE>
+     <UNITS>DD</UNITS>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>geographic location (longitude)</TAG>
+        <VALUE>-78.76</VALUE>
+     <UNITS>DD</UNITS>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+     <TAG>geographic location (country and/or sea)</TAG>
+     <VALUE>Colombia</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>geographic location (region and locality)</TAG>
+        <VALUE>Tumaco</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>environment (biome)</TAG>
+        <VALUE>coast</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>environment (feature)</TAG>
+        <VALUE>human-associated habitat</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>environment (material)</TAG>
+        <VALUE>gastric biopsy</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>ENA-CHECKLIST</TAG>
+        <VALUE>ERC000011</VALUE>
+      </SAMPLE_ATTRIBUTE>
+    </SAMPLE_ATTRIBUTES>
+  </SAMPLE>
+</SAMPLE_SET>
+