aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/cleanup.py41
-rw-r--r--scripts/submit_ebi/example/project-submission.xml12
-rw-r--r--scripts/submit_ebi/example/project.xml10
-rw-r--r--scripts/submit_ebi/example/sample-submission.xml8
-rw-r--r--scripts/submit_ebi/example/sample.xml68
5 files changed, 139 insertions, 0 deletions
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
new file mode 100644
index 0000000..78f34c8
--- /dev/null
+++ b/scripts/cleanup.py
@@ -0,0 +1,41 @@
+import arvados
+import arvados.util
+
+api = arvados.api()
+
+delete_patterns = [
+ "%missing%`collection_location`%",
+ "%missing%`technology`%",
+ "%missing%`host_species`%",
+ "%QC fail: alignment%",
+ "%does not look like a valid URI%",
+ "%Duplicate of%",
+ "%No matching triples found for predicate obo:NCIT_C42781%",
+ "%does not look like a valid URI%"
+ ]
+
+revalidate_patterns = [
+ "%missing%`license`%",
+ "%QC fail%"
+]
+
+for p in delete_patterns:
+ c = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["properties.errors", "like", p]])
+ for i in c:
+ print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+ api.collections().delete(uuid=i["uuid"]).execute()
+
+for p in revalidate_patterns:
+ c = arvados.util.list_all(api.collections().list, filters=[
+ ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+ ["properties.errors", "like", p]])
+ for i in c:
+ print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+ pr = i["properties"]
+ if "status" in pr:
+ del pr["status"]
+ if "errors" in pr:
+ del pr["errors"]
+ api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()
diff --git a/scripts/submit_ebi/example/project-submission.xml b/scripts/submit_ebi/example/project-submission.xml
new file mode 100644
index 0000000..1abb827
--- /dev/null
+++ b/scripts/submit_ebi/example/project-submission.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SUBMISSION>
+ <ACTIONS>
+ <ACTION>
+ <ADD/>
+ </ACTION>
+ <ACTION>
+ <HOLD HoldUntilDate="2020-10-10"/>
+ </ACTION>
+ </ACTIONS>
+</SUBMISSION>
+
diff --git a/scripts/submit_ebi/example/project.xml b/scripts/submit_ebi/example/project.xml
new file mode 100644
index 0000000..6a817e7
--- /dev/null
+++ b/scripts/submit_ebi/example/project.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<PROJECT_SET>
+ <PROJECT alias="PubSeq01">
+ <TITLE>Testing PubSeq Sample uploads</TITLE>
+ <DESCRIPTION>This is a test to allow for uploading sequences from PubSeq</DESCRIPTION>
+ <SUBMISSION_PROJECT>
+ <SEQUENCING_PROJECT/>
+ </SUBMISSION_PROJECT>
+ </PROJECT>
+</PROJECT_SET>
diff --git a/scripts/submit_ebi/example/sample-submission.xml b/scripts/submit_ebi/example/sample-submission.xml
new file mode 100644
index 0000000..9d13512
--- /dev/null
+++ b/scripts/submit_ebi/example/sample-submission.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SUBMISSION>
+ <ACTIONS>
+ <ACTION>
+ <ADD/>
+ </ACTION>
+ </ACTIONS>
+</SUBMISSION>
diff --git a/scripts/submit_ebi/example/sample.xml b/scripts/submit_ebi/example/sample.xml
new file mode 100644
index 0000000..694c471
--- /dev/null
+++ b/scripts/submit_ebi/example/sample.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SAMPLE_SET>
+ <SAMPLE alias="PubSeqSam0001" center_name="PubSeq01">
+ <TITLE>human gastric microbiota, mucosal</TITLE>
+ <SAMPLE_NAME>
+ <TAXON_ID>1284369</TAXON_ID>
+ <SCIENTIFIC_NAME>stomach metagenome</SCIENTIFIC_NAME>
+ <COMMON_NAME></COMMON_NAME>
+ </SAMPLE_NAME>
+ <SAMPLE_ATTRIBUTES>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>investigation type</TAG>
+ <VALUE>mimarks-survey</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>sequencing method</TAG>
+ <VALUE>pyrosequencing</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>collection date</TAG>
+ <VALUE>2010</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>host body site</TAG>
+ <VALUE>Mucosa of stomach</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>human-associated environmental package</TAG>
+ <VALUE>human-associated</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>geographic location (latitude)</TAG>
+ <VALUE>1.81</VALUE>
+ <UNITS>DD</UNITS>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>geographic location (longitude)</TAG>
+ <VALUE>-78.76</VALUE>
+ <UNITS>DD</UNITS>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>geographic location (country and/or sea)</TAG>
+ <VALUE>Colombia</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>geographic location (region and locality)</TAG>
+ <VALUE>Tumaco</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>environment (biome)</TAG>
+ <VALUE>coast</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>environment (feature)</TAG>
+ <VALUE>human-associated habitat</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>environment (material)</TAG>
+ <VALUE>gastric biopsy</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ <SAMPLE_ATTRIBUTE>
+ <TAG>ENA-CHECKLIST</TAG>
+ <VALUE>ERC000011</VALUE>
+ </SAMPLE_ATTRIBUTE>
+ </SAMPLE_ATTRIBUTES>
+ </SAMPLE>
+</SAMPLE_SET>
+