aboutsummaryrefslogtreecommitdiff
path: root/workflows/pull-data/genbank
diff options
context:
space:
mode:
authorPjotr Prins2021-01-01 12:24:44 +0000
committerPjotr Prins2021-01-01 12:24:44 +0000
commit2c6181c7eb86c0285928a434a37401d6680f9f79 (patch)
tree90606b0276edf4a929ef639c5117e3afd2e094d4 /workflows/pull-data/genbank
parentdd9c8df418040093f2116de6592fc6add0c6a2ce (diff)
downloadbh20-seq-resource-2c6181c7eb86c0285928a434a37401d6680f9f79.tar.gz
bh20-seq-resource-2c6181c7eb86c0285928a434a37401d6680f9f79.tar.lz
bh20-seq-resource-2c6181c7eb86c0285928a434a37401d6680f9f79.zip
genbank-fetch-ids
Diffstat (limited to 'workflows/pull-data/genbank')
-rw-r--r--workflows/pull-data/genbank/README.md4
-rwxr-xr-xworkflows/pull-data/genbank/genbank-fetch-ids.py (renamed from workflows/pull-data/genbank/update-from-genbank.py)0
2 files changed, 3 insertions, 1 deletions
diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md
index c235be7..f442b5d 100644
--- a/workflows/pull-data/genbank/README.md
+++ b/workflows/pull-data/genbank/README.md
@@ -3,8 +3,10 @@
```sh
# --- get list of IDs already in PubSeq
sparql-fetch-ids > pubseq_ids.txt
+# --- get list of missing genbank IDs
+genbank-fetch-ids --skip pubseq_ids.txt > genbank_ids.txt
# --- fetch XML
-update-from-genbank.py --skip pubseq_ids.txt --outdir ~/tmp/genbank
+update-from-genbank.py --ids genbank_ids.txt --outdir ~/tmp/genbank
# --- Transform to YAML and FASTA
transform-genbank-xml2yamlfa --dir ~/tmp/genbank id --outdir ~/tmp/pubseq
```
diff --git a/workflows/pull-data/genbank/update-from-genbank.py b/workflows/pull-data/genbank/genbank-fetch-ids.py
index e62a611..e62a611 100755
--- a/workflows/pull-data/genbank/update-from-genbank.py
+++ b/workflows/pull-data/genbank/genbank-fetch-ids.py