aboutsummaryrefslogtreecommitdiff
path: root/bh20seqanalyzer
diff options
context:
space:
mode:
authorPeter Amstutz2020-06-22 15:06:10 +0000
committerPeter Amstutz2020-06-22 15:06:10 +0000
commit7daa9ff2cdba742a811db00c924ccde25fa2c9b6 (patch)
treef6f001d224c82825c08b091f221f85897f60d40d /bh20seqanalyzer
parent1554fb6b4daf263f034d46a5f5b26ebcc3e69d22 (diff)
downloadbh20-seq-resource-7daa9ff2cdba742a811db00c924ccde25fa2c9b6.tar.gz
bh20-seq-resource-7daa9ff2cdba742a811db00c924ccde25fa2c9b6.tar.lz
bh20-seq-resource-7daa9ff2cdba742a811db00c924ccde25fa2c9b6.zip
Handle upload & assembly of gzipped, paired-end fastq
Diffstat (limited to 'bh20seqanalyzer')
-rw-r--r--bh20seqanalyzer/main.py48
1 files changed, 34 insertions, 14 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 31ad4c4..794ce27 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -40,20 +40,21 @@ def validate_upload(api, collection, validated_project,
if valid:
tgt = None
- for n in ("sequence.fasta", "reads.fastq"):
+ paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"}
+ for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
if n not in col:
continue
- with col.open(n) as qf:
- tgt = qc_fasta(qf)
- if tgt != n:
+ with col.open(n, 'rb') as qf:
+ tgt = qc_fasta(qf)[0]
+ if tgt != n and tgt != paired.get(n):
logging.info("Expected %s but magic says it should be %s", n, tgt)
valid = False
- elif tgt == "reads.fastq":
- start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid)
+ elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
+ start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n)
return False
if tgt is None:
valid = False
- logging.warn("Upload '%s' does not contain sequence.fasta or reads.fastq", collection["name"])
+ logging.warn("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"])
dup = api.collections().list(filters=[["owner_uuid", "=", validated_project],
["portable_data_hash", "=", col.portable_data_hash()]]).execute()
@@ -95,6 +96,7 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
tmp.name]
logging.info("Running %s" % ' '.join(cmd))
comp = subprocess.run(cmd, capture_output=True)
+ logging.info("Submitted %s", comp.stdout)
if comp.returncode != 0:
logging.error(comp.stderr.decode('utf-8'))
@@ -103,12 +105,10 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
def start_fastq_to_fasta(api, collection,
analysis_project,
- fastq_workflow_uuid):
- newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", {
- "fastq_forward": {
- "class": "File",
- "location": "keep:%s/reads.fastq" % collection["portable_data_hash"]
- },
+ fastq_workflow_uuid,
+ tgt):
+
+ params = {
"metadata": {
"class": "File",
"location": "keep:%s/metadata.yaml" % collection["portable_data_hash"]
@@ -117,7 +117,24 @@ def start_fastq_to_fasta(api, collection,
"class": "File",
"location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta"
}
- })
+ }
+
+ if tgt.startswith("reads.fastq"):
+ params["fastq_forward"] = {
+ "class": "File",
+ "location": "keep:%s/%s" % (collection["portable_data_hash"], tgt)
+ }
+ elif tgt.startswith("reads_1.fastq"):
+ params["fastq_forward"] = {
+ "class": "File",
+ "location": "keep:%s/reads_1.%s" % (collection["portable_data_hash"], tgt[8:])
+ }
+ params["fastq_reverse"] = {
+ "class": "File",
+ "location": "keep:%s/reads_2.%s" % (collection["portable_data_hash"], tgt[8:])
+ }
+
+ newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", params)
api.collections().update(uuid=collection["uuid"],
body={"owner_uuid": newproject["uuid"]}).execute()
@@ -222,6 +239,7 @@ def main():
parser.add_argument('--latest-result-collection', type=str, default='lugli-4zz18-z513nlpqm03hpca', help='')
parser.add_argument('--kickoff', action="store_true")
+ parser.add_argument('--once', action="store_true")
args = parser.parse_args()
api = arvados.api()
@@ -265,4 +283,6 @@ def main():
args.pangenome_analysis_project,
args.latest_result_collection)
+ if args.once:
+ break
time.sleep(15)