diff options
-rw-r--r-- | bh20simplewebuploader/templates/resource.html | 6 | ||||
-rw-r--r-- | scripts/cleanup.py | 23 |
2 files changed, 24 insertions, 5 deletions
diff --git a/bh20simplewebuploader/templates/resource.html b/bh20simplewebuploader/templates/resource.html index 91b6c20..9a905c2 100644 --- a/bh20simplewebuploader/templates/resource.html +++ b/bh20simplewebuploader/templates/resource.html @@ -6,13 +6,13 @@ {% include 'menu.html' %} <div class="status"> - <p><img src="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.png" height="300px"></p> + <!-- <p><img src="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.png" height="300px"></p> --> <p><a href="https://workbench.lugli.arvadosapi.com/projects/lugli-j7d0g-5ct8p1i1wrgyjvp#Data_collections">All sequences project</a></p> <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup.fasta">All sequences (FASTA) relabled and deduplicated</a></p> <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/mergedmetadata.ttl">Metadata (RDF) for all sequences</a></p> - <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p> + <!-- <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Graphical Fragment Assembly (GFA)</a> - <a href="https://github.com/GFA-spec/GFA-spec">More about GFA</a></p> <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.gfa">All sequences in Optimized Dynamic Genome/Graph Implementation (ODGI)</a> - <a href="https://github.com/vgteam/odgi">More about ODGI</a></p> - <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.ttl.xz">All sequences in RDF using spodgi</a> - <a href="https://github.com/pangenome/spodgi">More about spodgi</a></p> + <p><a href="https://workbench.lugli.arvadosapi.com/collections/lugli-4zz18-z513nlpqm03hpca/relabeledSeqs_dedup_relabeledSeqs_dedup.ttl.xz">All sequences in RDF using spodgi</a> - <a href="https://github.com/pangenome/spodgi">More about spodgi</a></p> --> <p><a href="http://sparql.genenetwork.org/sparql/">SPARQL endpoint</a> - <a href="http://sparql.genenetwork.org/sparql/?default-graph-uri=&query=SELECT+DISTINCT+%3Ffasta+%3Fvalue+WHERE+%7B%3Ffasta+%3Fx%5B+%3Chttp%3A%2F%2Fedamontology.org%2Fdata_2091%3E+%3Fvalue+%5D%7D%0D%0A&format=text%2Fhtml&timeout=0&debug=on&run=+Run+Query+">Sample query for accessions</a> diff --git a/scripts/cleanup.py b/scripts/cleanup.py index 78f34c8..6919305 100644 --- a/scripts/cleanup.py +++ b/scripts/cleanup.py @@ -1,7 +1,13 @@ import arvados import arvados.util +import arvados.keep +import ruamel.yaml api = arvados.api() +keepclient = arvados.keep.KeepClient(api_client=api) + +UPLOADER_PROJECT = 'lugli-j7d0g-n5clictpuvwk8aa' +VALIDATED_PROJECT = 'lugli-j7d0g-5ct8p1i1wrgyjvp' delete_patterns = [ "%missing%`collection_location`%", @@ -21,7 +27,7 @@ revalidate_patterns = [ for p in delete_patterns: c = arvados.util.list_all(api.collections().list, filters=[ - ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["owner_uuid", "=", UPLOADER_PROJECT], ["properties.errors", "like", p]]) for i in c: print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) @@ -29,7 +35,7 @@ for p in delete_patterns: for p in revalidate_patterns: c = arvados.util.list_all(api.collections().list, filters=[ - ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["owner_uuid", "=", UPLOADER_PROJECT], ["properties.errors", "like", p]]) for i in c: print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) @@ -39,3 +45,16 @@ for p in revalidate_patterns: if "errors" in pr: del pr["errors"] api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute() + +c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", VALIDATED_PROJECT], + ["properties.sequence_label", "exists", False]]) +for i in c: + col = arvados.collection.Collection(i["uuid"], api_client=api, keep_client=keepclient) + with col.open("metadata.yaml") as md: + metadata_content = ruamel.yaml.round_trip_load(md) + colprop = col.get_properties() + colprop["sequence_label"] = metadata_content["sample"]["sample_id"] + + print("fixing sequence label %s %s" % (i["uuid"], colprop.get("sequence_label"))) + api.collections().update(uuid=i["uuid"], body={"properties": colprop}).execute() |