aboutsummaryrefslogtreecommitdiff
path: root/workflows/pull-data/genbank
diff options
context:
space:
mode:
authorPjotr Prins2021-01-01 09:53:15 +0000
committerPjotr Prins2021-01-01 09:53:15 +0000
commitacdd66f36e1596f8337195f22fdd7dd8a85c2b70 (patch)
tree889464ae7af6505ff4717d82be178e0df2fa9638 /workflows/pull-data/genbank
parentfc28fdbbb5e121eb94bb06334a30e9b5395e003b (diff)
downloadbh20-seq-resource-acdd66f36e1596f8337195f22fdd7dd8a85c2b70.tar.gz
bh20-seq-resource-acdd66f36e1596f8337195f22fdd7dd8a85c2b70.tar.lz
bh20-seq-resource-acdd66f36e1596f8337195f22fdd7dd8a85c2b70.zip
genbank-fetch-ids simple call
Diffstat (limited to 'workflows/pull-data/genbank')
-rw-r--r--workflows/pull-data/genbank/README.md2
-rwxr-xr-xworkflows/pull-data/genbank/genbank-fetch-ids6
-rwxr-xr-xworkflows/pull-data/genbank/update-from-genbank.py2
3 files changed, 6 insertions, 4 deletions
diff --git a/workflows/pull-data/genbank/README.md b/workflows/pull-data/genbank/README.md
index 0204dd0..22dd920 100644
--- a/workflows/pull-data/genbank/README.md
+++ b/workflows/pull-data/genbank/README.md
@@ -6,7 +6,7 @@ sparql-fetch-ids > pubseq_ids.txt
# --- fetch XML
update-from-genbank --skip pubseq_ids.txt --max 100 --outdir ~/tmp/genbank
# --- get new IDs
-genbank-fetch-ids --dir ~/tmp/pubseq > genbank_ids.txt
+genbank-fetch-ids > genbank_ids.txt
# --- loop through IDs (pseudo code)
for id in genbank_ids.txt:
transform-genbank-xml2yamlfa --dir ~/tmp/genbank id --outdir ~/tmp/pubseq
diff --git a/workflows/pull-data/genbank/genbank-fetch-ids b/workflows/pull-data/genbank/genbank-fetch-ids
index 00f693f..24fe4c7 100755
--- a/workflows/pull-data/genbank/genbank-fetch-ids
+++ b/workflows/pull-data/genbank/genbank-fetch-ids
@@ -1,3 +1,5 @@
-#!/usr/bin/env ruby
-#
+#!/bin/sh
#
+# genbank-fetch-ids > genbank_ids.tx
+
+curl 'https://www.ncbi.nlm.nih.gov/genomes/VirusVariation/vvsearch2/?q=*:*&fq=%7B!tag=SeqType_s%7DSeqType_s:(%22Nucleotide%22)&fq=VirusLineageId_ss:(2697049)&cmd=download&sort=SourceDB_s%20desc,CreateDate_dt%20desc,id%20asc&dlfmt=acc&fl=id'
diff --git a/workflows/pull-data/genbank/update-from-genbank.py b/workflows/pull-data/genbank/update-from-genbank.py
index 07bb15d..132f553 100755
--- a/workflows/pull-data/genbank/update-from-genbank.py
+++ b/workflows/pull-data/genbank/update-from-genbank.py
@@ -3,7 +3,7 @@
# - bulk download genbank data and matadata, preparing the FASTA and
# the YAML files
#
-# See .guix-run python3 from_genbank_to_fasta_and_yaml.py
+# See .guix-run
import argparse
parser = argparse.ArgumentParser()