about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/cleanup.py41
-rw-r--r--scripts/submit_ebi/example/project-submission.xml12
-rw-r--r--scripts/submit_ebi/example/project.xml10
-rw-r--r--scripts/submit_ebi/example/sample-submission.xml8
-rw-r--r--scripts/submit_ebi/example/sample.xml68
5 files changed, 139 insertions, 0 deletions
diff --git a/scripts/cleanup.py b/scripts/cleanup.py
new file mode 100644
index 0000000..78f34c8
--- /dev/null
+++ b/scripts/cleanup.py
@@ -0,0 +1,41 @@
+import arvados
+import arvados.util
+
+api = arvados.api()
+
+delete_patterns = [
+    "%missing%`collection_location`%",
+    "%missing%`technology`%",
+    "%missing%`host_species`%",
+    "%QC fail: alignment%",
+    "%does not look like a valid URI%",
+    "%Duplicate of%",
+    "%No matching triples found for predicate obo:NCIT_C42781%",
+    "%does not look like a valid URI%"
+    ]
+
+revalidate_patterns = [
+    "%missing%`license`%",
+    "%QC fail%"
+]
+
+for p in delete_patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("trashing %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        api.collections().delete(uuid=i["uuid"]).execute()
+
+for p in revalidate_patterns:
+    c = arvados.util.list_all(api.collections().list, filters=[
+        ["owner_uuid", "=", "lugli-j7d0g-n5clictpuvwk8aa"],
+        ["properties.errors", "like", p]])
+    for i in c:
+        print("clearing status %s %s" % (i["uuid"], i["properties"].get("sequence_label")))
+        pr = i["properties"]
+        if "status" in pr:
+            del pr["status"]
+        if "errors" in pr:
+            del pr["errors"]
+        api.collections().update(uuid=i["uuid"], body={"properties": pr}).execute()
diff --git a/scripts/submit_ebi/example/project-submission.xml b/scripts/submit_ebi/example/project-submission.xml
new file mode 100644
index 0000000..1abb827
--- /dev/null
+++ b/scripts/submit_ebi/example/project-submission.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SUBMISSION>
+   <ACTIONS>
+      <ACTION>
+         <ADD/>
+      </ACTION>
+      <ACTION>
+         <HOLD HoldUntilDate="2020-10-10"/>
+      </ACTION>
+  </ACTIONS>
+</SUBMISSION>
+
diff --git a/scripts/submit_ebi/example/project.xml b/scripts/submit_ebi/example/project.xml
new file mode 100644
index 0000000..6a817e7
--- /dev/null
+++ b/scripts/submit_ebi/example/project.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<PROJECT_SET>
+   <PROJECT alias="PubSeq01">
+      <TITLE>Testing PubSeq Sample uploads</TITLE>
+      <DESCRIPTION>This is a test to allow for uploading sequences from PubSeq</DESCRIPTION>
+      <SUBMISSION_PROJECT>
+         <SEQUENCING_PROJECT/>
+      </SUBMISSION_PROJECT>
+   </PROJECT>
+</PROJECT_SET>
diff --git a/scripts/submit_ebi/example/sample-submission.xml b/scripts/submit_ebi/example/sample-submission.xml
new file mode 100644
index 0000000..9d13512
--- /dev/null
+++ b/scripts/submit_ebi/example/sample-submission.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SUBMISSION>
+   <ACTIONS>
+      <ACTION>
+         <ADD/>
+      </ACTION>
+   </ACTIONS>
+</SUBMISSION>
diff --git a/scripts/submit_ebi/example/sample.xml b/scripts/submit_ebi/example/sample.xml
new file mode 100644
index 0000000..694c471
--- /dev/null
+++ b/scripts/submit_ebi/example/sample.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<SAMPLE_SET>
+  <SAMPLE alias="PubSeqSam0001" center_name="PubSeq01">
+    <TITLE>human gastric microbiota, mucosal</TITLE>
+    <SAMPLE_NAME>
+      <TAXON_ID>1284369</TAXON_ID>
+      <SCIENTIFIC_NAME>stomach metagenome</SCIENTIFIC_NAME>
+      <COMMON_NAME></COMMON_NAME>
+    </SAMPLE_NAME>
+    <SAMPLE_ATTRIBUTES>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>investigation type</TAG>
+        <VALUE>mimarks-survey</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>sequencing method</TAG>
+        <VALUE>pyrosequencing</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>collection date</TAG>
+        <VALUE>2010</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>host body site</TAG>
+        <VALUE>Mucosa of stomach</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>human-associated environmental package</TAG>
+        <VALUE>human-associated</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>geographic location (latitude)</TAG>
+        <VALUE>1.81</VALUE>
+     <UNITS>DD</UNITS>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>geographic location (longitude)</TAG>
+        <VALUE>-78.76</VALUE>
+     <UNITS>DD</UNITS>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+     <TAG>geographic location (country and/or sea)</TAG>
+     <VALUE>Colombia</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>geographic location (region and locality)</TAG>
+        <VALUE>Tumaco</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>environment (biome)</TAG>
+        <VALUE>coast</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>environment (feature)</TAG>
+        <VALUE>human-associated habitat</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>environment (material)</TAG>
+        <VALUE>gastric biopsy</VALUE>
+      </SAMPLE_ATTRIBUTE>
+      <SAMPLE_ATTRIBUTE>
+        <TAG>ENA-CHECKLIST</TAG>
+        <VALUE>ERC000011</VALUE>
+      </SAMPLE_ATTRIBUTE>
+    </SAMPLE_ATTRIBUTES>
+  </SAMPLE>
+</SAMPLE_SET>
+