aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bh20simplewebuploader/templates/resource.html6
-rw-r--r--scripts/cleanup.py23
2 files changed, 24 insertions, 5 deletions
diff --git a/bh20simplewebuploader/templates/resource.html b/bh20simplewebuploader/templates/resource.html
index 91b6c20..9a905c2 100644
--- a/bh20simplewebuploader/templates/resource.html
+++ b/bh20simplewebuploader/templates/resource.html
@@ -6,13 +6,13 @@
{% include 'menu.html' %}
<div class="status">
- <p><img src="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.png" height="300px"></p>
+ <!-- <p><img src="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.png" height="300px"></p> -->
<p><a href="https://workbench.lugli.arvadosapi.com/projects/lugli-j7d0g-5ct8p1i1wrgyjvp#Data_collections">All sequences project</a></p>
<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup.fasta">All sequences (FASTA) relabled and deduplicated</a></p>
<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/mergedmetadata.ttl">Metadata (RDF) for all sequences</a></p>
- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p>
+ <!-- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p>
<p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Optimized Dynamic Genome/Graph Implementation (ODGI)</a> - <a href="https://github.com/vgteam/odgi">More about ODGI</a></p>
- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.ttl.xz">All sequences in RDF using spodgi</a> - <a href="https://github.com/pangenome/spodgi">More about spodgi</a></p>
+ <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.ttl.xz">All sequences in RDF using spodgi</a> - <a href="https://github.com/pangenome/spodgi">More about spodgi</a></p> -->
<p><a href="http://sparql.genenetwork.org/sparql/">SPARQL endpoint</a> - <a href="http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=SELECT+DISTINCT+%3Ffasta+%3Fvalue+WHERE+%7B%3Ffasta+%3Fx%5B+%3Chttp%3A%2F%2Fedamontology.org%2Fdata_2091%3E+%3Fvalue+%5D%7D%0D%0A&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+">Sample query for accessions</a>
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
index 78f34c8..6919305 100644
--- a/scripts/cleanup.py
+++ b/scripts/cleanup.py
@@ -1,7 +1,13 @@
import arvados
import arvados.util
+import arvados.keep
+import ruamel.yaml
api = arvados.api()
+keepclient = arvados.keep.KeepClient(api_client=api)
+
+UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa'
+VALIDATED_PROJECT = 'lugli-j7d0g-5ct8p1i1wrgyjvp'
delete_patterns = [
"%missing%`collection_location`%",
@@ -21,7 +27,7 @@ revalidate_patterns = [
for p in delete_patterns:
c = arvados.util.list_all(api.collections().list, filters=[
- ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["owner_uuid", "=", UPLOADER_PROJECT],
["properties.errors", "like", p]])
for i in c:
print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
@@ -29,7 +35,7 @@ for p in delete_patterns:
for p in revalidate_patterns:
c = arvados.util.list_all(api.collections().list, filters=[
- ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["owner_uuid", "=", UPLOADER_PROJECT],
["properties.errors", "like", p]])
for i in c:
print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
@@ -39,3 +45,16 @@ for p in revalidate_patterns:
if "errors" in pr:
del pr["errors"]
api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()
+
+c = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", VALIDATED_PROJECT],
+ ["properties.sequence_label", "exists", False]])
+for i in c:
+ col = arvados.collection.Collection(i["uuid"], api_client=api, keep_client=keepclient)
+ with col.open("metadata.yaml") as md:
+ metadata_content = ruamel.yaml.round_trip_load(md)
+ colprop = col.get_properties()
+ colprop["sequence_label"] = metadata_content["sample"]["sample_id"]
+
+ print("fixing sequence label %s %s" % (i["uuid"], colprop.get("sequence_label")))
+ api.collections().update(uuid=i["uuid"], body={"properties": colprop}).execute()