From e33ce088225b9975a84a6724ab8e3a9f82964ec6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 31 Dec 2020 10:25:26 +0000 Subject: genbank: pseudo workflow --- workflows/pull-data/genbank/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 workflows/pull-data/genbank/README.md (limited to 'workflows/pull-data/genbank/README.md') diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md new file mode 100644 index 0000000..ee67e70 --- /dev/null +++ b/workflows/pull-data/genbank/README.md @@ -0,0 +1,13 @@ +Pipeline: + +```sh +# --- get list of IDs already in PubSeq +sparql-fetch-ids > pubseq_ids.txt +# --- fetch XML +update-from-genbank --skip pubseq_ids.txt --max 100 --outdir ~/tmp/genbank +# --- get new IDs +genbank-fetch-ids --dir ~/tmp/pubseq > genbank_ids.txt +# --- loop through IDs (pseudo code) +for id in genbank_ids.txt: + transform-genbank-xml2yamlfa --dir ~/tmp/genbank id --outdir ~/tmp/pubseq +``` -- cgit v1.2.3