aboutsummaryrefslogtreecommitdiff
path: root/workflows/pull-data/genbank
diff options
context:
space:
mode:
Diffstat (limited to 'workflows/pull-data/genbank')
-rw-r--r--workflows/pull-data/genbank/README.md13
-rwxr-xr-xworkflows/pull-data/genbank/genbank-fetch-ids.py0
-rwxr-xr-xworkflows/pull-data/genbank/sparql-fetch-ids.py0
-rwxr-xr-xworkflows/pull-data/genbank/transform-genbank-xml2yamlfa.py (renamed from workflows/pull-data/genbank/transform_genbank2yamlfa.py)0
-rwxr-xr-xworkflows/pull-data/genbank/update-from-genbank.py (renamed from workflows/pull-data/genbank/genbank_pull.py)0
5 files changed, 13 insertions, 0 deletions
diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md
new file mode 100644
index 0000000..ee67e70
--- /dev/null
+++ b/workflows/pull-data/genbank/README.md
@@ -0,0 +1,13 @@
+Pipeline:
+
+```sh
+# --- get list of IDs already in PubSeq
+sparql-fetch-ids > pubseq_ids.txt
+# --- fetch XML
+update-from-genbank --skip pubseq_ids.txt --max 100 --outdir ~/tmp/genbank
+# --- get new IDs
+genbank-fetch-ids --dir ~/tmp/pubseq > genbank_ids.txt
+# --- loop through IDs (pseudo code)
+for id in genbank_ids.txt:
+ transform-genbank-xml2yamlfa --dir ~/tmp/genbank id --outdir ~/tmp/pubseq
+```
diff --git a/workflows/pull-data/genbank/genbank-fetch-ids.py b/workflows/pull-data/genbank/genbank-fetch-ids.py
new file mode 100755
index 0000000..e69de29
--- /dev/null
+++ b/workflows/pull-data/genbank/genbank-fetch-ids.py
diff --git a/workflows/pull-data/genbank/sparql-fetch-ids.py b/workflows/pull-data/genbank/sparql-fetch-ids.py
new file mode 100755
index 0000000..e69de29
--- /dev/null
+++ b/workflows/pull-data/genbank/sparql-fetch-ids.py
diff --git a/workflows/pull-data/genbank/transform_genbank2yamlfa.py b/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py
index 8b970b6..8b970b6 100755
--- a/workflows/pull-data/genbank/transform_genbank2yamlfa.py
+++ b/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py
diff --git a/workflows/pull-data/genbank/genbank_pull.py b/workflows/pull-data/genbank/update-from-genbank.py
index 07bb15d..07bb15d 100755
--- a/workflows/pull-data/genbank/genbank_pull.py
+++ b/workflows/pull-data/genbank/update-from-genbank.py