diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/cleanup.py | 41 | ||||
-rw-r--r-- | scripts/submit_ebi/example/project-submission.xml | 12 | ||||
-rw-r--r-- | scripts/submit_ebi/example/project.xml | 10 | ||||
-rw-r--r-- | scripts/submit_ebi/example/sample-submission.xml | 8 | ||||
-rw-r--r-- | scripts/submit_ebi/example/sample.xml | 68 |
5 files changed, 139 insertions, 0 deletions
diff --git a/scripts/cleanup.py b/scripts/cleanup.py new file mode 100644 index 0000000..78f34c8 --- /dev/null +++ b/scripts/cleanup.py @@ -0,0 +1,41 @@ +import arvados +import arvados.util + +api = arvados.api() + +delete_patterns = [ + "%missing%`collection_location`%", + "%missing%`technology`%", + "%missing%`host_species`%", + "%QC fail: alignment%", + "%does not look like a valid URI%", + "%Duplicate of%", + "%No matching triples found for predicate obo:NCIT_C42781%", + "%does not look like a valid URI%" + ] + +revalidate_patterns = [ + "%missing%`license`%", + "%QC fail%" +] + +for p in delete_patterns: + c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["properties.errors", "like", p]]) + for i in c: + print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) + api.collections().delete(uuid=i["uuid"]).execute() + +for p in revalidate_patterns: + c = arvados.util.list_all(api.collections().list, filters=[ + ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"], + ["properties.errors", "like", p]]) + for i in c: + print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label"))) + pr = i["properties"] + if "status" in pr: + del pr["status"] + if "errors" in pr: + del pr["errors"] + api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute() diff --git a/scripts/submit_ebi/example/project-submission.xml b/scripts/submit_ebi/example/project-submission.xml new file mode 100644 index 0000000..1abb827 --- /dev/null +++ b/scripts/submit_ebi/example/project-submission.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="UTF-8"?> +<SUBMISSION> + <ACTIONS> + <ACTION> + <ADD/> + </ACTION> + <ACTION> + <HOLD HoldUntilDate="2020-10-10"/> + </ACTION> + </ACTIONS> +</SUBMISSION> + diff --git a/scripts/submit_ebi/example/project.xml b/scripts/submit_ebi/example/project.xml new file mode 100644 index 0000000..6a817e7 --- /dev/null +++ b/scripts/submit_ebi/example/project.xml @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<PROJECT_SET> + <PROJECT alias="PubSeq01"> + <TITLE>Testing PubSeq Sample uploads</TITLE> + <DESCRIPTION>This is a test to allow for uploading sequences from PubSeq</DESCRIPTION> + <SUBMISSION_PROJECT> + <SEQUENCING_PROJECT/> + </SUBMISSION_PROJECT> + </PROJECT> +</PROJECT_SET> diff --git a/scripts/submit_ebi/example/sample-submission.xml b/scripts/submit_ebi/example/sample-submission.xml new file mode 100644 index 0000000..9d13512 --- /dev/null +++ b/scripts/submit_ebi/example/sample-submission.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<SUBMISSION> + <ACTIONS> + <ACTION> + <ADD/> + </ACTION> + </ACTIONS> +</SUBMISSION> diff --git a/scripts/submit_ebi/example/sample.xml b/scripts/submit_ebi/example/sample.xml new file mode 100644 index 0000000..694c471 --- /dev/null +++ b/scripts/submit_ebi/example/sample.xml @@ -0,0 +1,68 @@ +<?xml version="1.0" encoding="UTF-8"?> +<SAMPLE_SET> + <SAMPLE alias="PubSeqSam0001" center_name="PubSeq01"> + <TITLE>human gastric microbiota, mucosal</TITLE> + <SAMPLE_NAME> + <TAXON_ID>1284369</TAXON_ID> + <SCIENTIFIC_NAME>stomach metagenome</SCIENTIFIC_NAME> + <COMMON_NAME></COMMON_NAME> + </SAMPLE_NAME> + <SAMPLE_ATTRIBUTES> + <SAMPLE_ATTRIBUTE> + <TAG>investigation type</TAG> + <VALUE>mimarks-survey</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>sequencing method</TAG> + <VALUE>pyrosequencing</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>collection date</TAG> + <VALUE>2010</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>host body site</TAG> + <VALUE>Mucosa of stomach</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>human-associated environmental package</TAG> + <VALUE>human-associated</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (latitude)</TAG> + <VALUE>1.81</VALUE> + <UNITS>DD</UNITS> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (longitude)</TAG> + <VALUE>-78.76</VALUE> + <UNITS>DD</UNITS> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (country and/or sea)</TAG> + <VALUE>Colombia</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>geographic location (region and locality)</TAG> + <VALUE>Tumaco</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>environment (biome)</TAG> + <VALUE>coast</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>environment (feature)</TAG> + <VALUE>human-associated habitat</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>environment (material)</TAG> + <VALUE>gastric biopsy</VALUE> + </SAMPLE_ATTRIBUTE> + <SAMPLE_ATTRIBUTE> + <TAG>ENA-CHECKLIST</TAG> + <VALUE>ERC000011</VALUE> + </SAMPLE_ATTRIBUTE> + </SAMPLE_ATTRIBUTES> + </SAMPLE> +</SAMPLE_SET> + |