aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-06-22 18:23:40 +0000
committerPeter Amstutz2020-06-22 18:25:45 +0000
commit76883d02e6073f990ea980dad1f8cf21121255ff (patch)
tree0fe93dadf69cb5c0c09ff9b98fe18e5baacdd6c4
parentfcf300d0df032f569f21b3ef75a78bb163f55da0 (diff)
downloadbh20-seq-resource-76883d02e6073f990ea980dad1f8cf21121255ff.tar.gz
bh20-seq-resource-76883d02e6073f990ea980dad1f8cf21121255ff.tar.lz
bh20-seq-resource-76883d02e6073f990ea980dad1f8cf21121255ff.zip
Adjust QC filter and relabel output sequence with sample_id
-rw-r--r--bh20seqanalyzer/main.py9
-rw-r--r--workflows/fastq2fasta/bam2fasta.cwl2
-rw-r--r--workflows/fastq2fasta/bcftools-consensus.cwl9
-rw-r--r--workflows/fastq2fasta/bcftools-view-qc.cwl2
-rw-r--r--workflows/fastq2fasta/fastq2fasta.cwl2
5 files changed, 19 insertions, 5 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 9164190..9a36cae 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -30,6 +30,7 @@ def validate_upload(api, collection, validated_project,
try:
metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml"))
metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"]
+ sample_id = metadata_content["sample"]["sample_id"]
add_lc_filename(metadata_content, metadata_content["id"])
valid = qc_metadata(metadata_content) and valid
except Exception as e:
@@ -51,7 +52,7 @@ def validate_upload(api, collection, validated_project,
logging.info("Expected %s but magic says it should be %s", n, tgt)
valid = False
elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"):
- start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n)
+ start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n, sample_id)
return False
if tgt is None:
valid = False
@@ -108,7 +109,8 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj):
def start_fastq_to_fasta(api, collection,
analysis_project,
fastq_workflow_uuid,
- tgt):
+ tgt,
+ sample_id):
params = {
"metadata": {
@@ -118,7 +120,8 @@ def start_fastq_to_fasta(api, collection,
"ref_fasta": {
"class": "File",
"location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta"
- }
+ },
+ "sample_id": sample_id
}
if tgt.startswith("reads.fastq"):
diff --git a/workflows/fastq2fasta/bam2fasta.cwl b/workflows/fastq2fasta/bam2fasta.cwl
index efe580f..dd4020b 100644
--- a/workflows/fastq2fasta/bam2fasta.cwl
+++ b/workflows/fastq2fasta/bam2fasta.cwl
@@ -15,6 +15,7 @@ inputs:
threads:
type: int
default: 4
+ sample_id: string
outputs:
out_fasta:
@@ -61,5 +62,6 @@ steps:
in:
ref_fasta: fasta
vcf: bcftools_index_after_qc/indexed
+ sample_id: sample_id
out: [out_fasta]
run: bcftools-consensus.cwl
diff --git a/workflows/fastq2fasta/bcftools-consensus.cwl b/workflows/fastq2fasta/bcftools-consensus.cwl
index c111792..dffdbe3 100644
--- a/workflows/fastq2fasta/bcftools-consensus.cwl
+++ b/workflows/fastq2fasta/bcftools-consensus.cwl
@@ -4,20 +4,27 @@ cwlVersion: v1.1
hints:
DockerRequirement:
dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
+ ShellCommandRequirement: {}
baseCommand: bcftools
arguments:
- consensus
- - -i'QUAL > 1 && GT="A"'
+ - -i
+ - 'QUAL > 1 && GT="a"'
- -Hla
- -f
- $(inputs.ref_fasta)
- $(inputs.vcf)
+ - {shellQuote: false, valueFrom: "|"}
+ - sed
+ - "s/^>.*/>$(inputs.sample_id)/g"
inputs:
- id: ref_fasta
type: File
- id: vcf
type: File
secondaryFiles: [.csi]
+ - id: sample_id
+ type: string
outputs:
- id: out_fasta
type: stdout
diff --git a/workflows/fastq2fasta/bcftools-view-qc.cwl b/workflows/fastq2fasta/bcftools-view-qc.cwl
index 477c596..336f455 100644
--- a/workflows/fastq2fasta/bcftools-view-qc.cwl
+++ b/workflows/fastq2fasta/bcftools-view-qc.cwl
@@ -8,7 +8,7 @@ baseCommand: bcftools
arguments:
- view
- -i
- - 'QUAL>1 && (GT="AA" || GT="Aa")'
+ - 'QUAL > 1 && GT="a"'
- -Oz
- --threads=$(inputs.threads)
- $(inputs.bcf)
diff --git a/workflows/fastq2fasta/fastq2fasta.cwl b/workflows/fastq2fasta/fastq2fasta.cwl
index 0cf5c48..d529d99 100644
--- a/workflows/fastq2fasta/fastq2fasta.cwl
+++ b/workflows/fastq2fasta/fastq2fasta.cwl
@@ -22,6 +22,7 @@ inputs:
type: int
default: 4
metadata: File?
+ sample_id: string
outputs:
out_fasta:
@@ -57,5 +58,6 @@ steps:
bam: samtools-sort/sorted_bam
fasta: ref_fasta
threads: threads
+ sample_id: sample_id
out: [out_fasta]
run: bam2fasta.cwl