diff options
-rw-r--r-- | workflows/pull-data/genbank/README.md | 2 | ||||
-rwxr-xr-x | workflows/pull-data/genbank/genbank-fetch-ids | 6 | ||||
-rwxr-xr-x | workflows/pull-data/genbank/update-from-genbank.py | 2 |
3 files changed, 6 insertions, 4 deletions
diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md index 0204dd0..22dd920 100644 --- a/workflows/pull-data/genbank/README.md +++ b/workflows/pull-data/genbank/README.md @@ -6,7 +6,7 @@ sparql-fetch-ids > pubseq_ids.txt # --- fetch XML update-from-genbank --skip pubseq_ids.txt --max 100 --outdir ~/tmp/genbank # --- get new IDs -genbank-fetch-ids --dir ~/tmp/pubseq > genbank_ids.txt +genbank-fetch-ids > genbank_ids.txt # --- loop through IDs (pseudo code) for id in genbank_ids.txt: transform-genbank-xml2yamlfa --dir ~/tmp/genbank id --outdir ~/tmp/pubseq diff --git a/workflows/pull-data/genbank/genbank-fetch-ids b/workflows/pull-data/genbank/genbank-fetch-ids index 00f693f..24fe4c7 100755 --- a/workflows/pull-data/genbank/genbank-fetch-ids +++ b/workflows/pull-data/genbank/genbank-fetch-ids @@ -1,3 +1,5 @@ -#!/usr/bin/env ruby -# +#!/bin/sh # +# genbank-fetch-ids > genbank_ids.tx + +curl 'https://www.ncbi.nlm.nih.gov/genomes/VirusVariation/vvsearch2/?q=*:*&fq=%7B!tag=SeqType_s%7DSeqType_s:(%22Nucleotide%22)&fq=VirusLineageId_ss:(2697049)&cmd=download&sort=SourceDB_s%20desc,CreateDate_dt%20desc,id%20asc&dlfmt=acc&fl=id' diff --git a/workflows/pull-data/genbank/update-from-genbank.py b/workflows/pull-data/genbank/update-from-genbank.py index 07bb15d..132f553 100755 --- a/workflows/pull-data/genbank/update-from-genbank.py +++ b/workflows/pull-data/genbank/update-from-genbank.py @@ -3,7 +3,7 @@ # - bulk download genbank data and matadata, preparing the FASTA and # the YAML files # -# See .guix-run python3 from_genbank_to_fasta_and_yaml.py +# See .guix-run import argparse parser = argparse.ArgumentParser() |