diff options
author | Pjotr Prins | 2020-12-31 10:25:26 +0000 |
---|---|---|
committer | Pjotr Prins | 2020-12-31 10:25:26 +0000 |
commit | e33ce088225b9975a84a6724ab8e3a9f82964ec6 (patch) | |
tree | 338f687aaf75755158352ef7524d0af3d716cfdb | |
parent | 63b75e9cf095737fa8d700d1fd1e4d90e17999f5 (diff) | |
download | bh20-seq-resource-e33ce088225b9975a84a6724ab8e3a9f82964ec6.tar.gz bh20-seq-resource-e33ce088225b9975a84a6724ab8e3a9f82964ec6.tar.lz bh20-seq-resource-e33ce088225b9975a84a6724ab8e3a9f82964ec6.zip |
genbank: pseudo workflow
-rw-r--r-- | workflows/pull-data/genbank/README.md | 13 | ||||
-rwxr-xr-x | workflows/pull-data/genbank/genbank-fetch-ids.py | 0 | ||||
-rwxr-xr-x | workflows/pull-data/genbank/sparql-fetch-ids.py | 0 | ||||
-rwxr-xr-x | workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py (renamed from workflows/pull-data/genbank/transform_genbank2yamlfa.py) | 0 | ||||
-rwxr-xr-x | workflows/pull-data/genbank/update-from-genbank.py (renamed from workflows/pull-data/genbank/genbank_pull.py) | 0 |
5 files changed, 13 insertions, 0 deletions
diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md new file mode 100644 index 0000000..ee67e70 --- /dev/null +++ b/workflows/pull-data/genbank/README.md @@ -0,0 +1,13 @@ +Pipeline: + +```sh +# --- get list of IDs already in PubSeq +sparql-fetch-ids > pubseq_ids.txt +# --- fetch XML +update-from-genbank --skip pubseq_ids.txt --max 100 --outdir ~/tmp/genbank +# --- get new IDs +genbank-fetch-ids --dir ~/tmp/pubseq > genbank_ids.txt +# --- loop through IDs (pseudo code) +for id in genbank_ids.txt: + transform-genbank-xml2yamlfa --dir ~/tmp/genbank id --outdir ~/tmp/pubseq +``` diff --git a/workflows/pull-data/genbank/genbank-fetch-ids.py b/workflows/pull-data/genbank/genbank-fetch-ids.py new file mode 100755 index 0000000..e69de29 --- /dev/null +++ b/workflows/pull-data/genbank/genbank-fetch-ids.py diff --git a/workflows/pull-data/genbank/sparql-fetch-ids.py b/workflows/pull-data/genbank/sparql-fetch-ids.py new file mode 100755 index 0000000..e69de29 --- /dev/null +++ b/workflows/pull-data/genbank/sparql-fetch-ids.py diff --git a/workflows/pull-data/genbank/transform_genbank2yamlfa.py b/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py index 8b970b6..8b970b6 100755 --- a/workflows/pull-data/genbank/transform_genbank2yamlfa.py +++ b/workflows/pull-data/genbank/transform-genbank-xml2yamlfa.py diff --git a/workflows/pull-data/genbank/genbank_pull.py b/workflows/pull-data/genbank/update-from-genbank.py index 07bb15d..07bb15d 100755 --- a/workflows/pull-data/genbank/genbank_pull.py +++ b/workflows/pull-data/genbank/update-from-genbank.py |