From 76883d02e6073f990ea980dad1f8cf21121255ff Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 22 Jun 2020 18:23:40 +0000 Subject: Adjust QC filter and relabel output sequence with sample_id --- bh20seqanalyzer/main.py | 9 ++++++--- workflows/fastq2fasta/bam2fasta.cwl | 2 ++ workflows/fastq2fasta/bcftools-consensus.cwl | 9 ++++++++- workflows/fastq2fasta/bcftools-view-qc.cwl | 2 +- workflows/fastq2fasta/fastq2fasta.cwl | 2 ++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 9164190..9a36cae 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -30,6 +30,7 @@ def validate_upload(api, collection, validated_project, try: metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml")) metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"] + sample_id = metadata_content["sample"]["sample_id"] add_lc_filename(metadata_content, metadata_content["id"]) valid = qc_metadata(metadata_content) and valid except Exception as e: @@ -51,7 +52,7 @@ def validate_upload(api, collection, validated_project, logging.info("Expected %s but magic says it should be %s", n, tgt) valid = False elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"): - start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n) + start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n, sample_id) return False if tgt is None: valid = False @@ -108,7 +109,8 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj): def start_fastq_to_fasta(api, collection, analysis_project, fastq_workflow_uuid, - tgt): + tgt, + sample_id): params = { "metadata": { @@ -118,7 +120,8 @@ def start_fastq_to_fasta(api, collection, "ref_fasta": { "class": "File", "location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta" - } + }, + "sample_id": sample_id } if tgt.startswith("reads.fastq"): diff --git a/workflows/fastq2fasta/bam2fasta.cwl b/workflows/fastq2fasta/bam2fasta.cwl index efe580f..dd4020b 100644 --- a/workflows/fastq2fasta/bam2fasta.cwl +++ b/workflows/fastq2fasta/bam2fasta.cwl @@ -15,6 +15,7 @@ inputs: threads: type: int default: 4 + sample_id: string outputs: out_fasta: @@ -61,5 +62,6 @@ steps: in: ref_fasta: fasta vcf: bcftools_index_after_qc/indexed + sample_id: sample_id out: [out_fasta] run: bcftools-consensus.cwl diff --git a/workflows/fastq2fasta/bcftools-consensus.cwl b/workflows/fastq2fasta/bcftools-consensus.cwl index c111792..dffdbe3 100644 --- a/workflows/fastq2fasta/bcftools-consensus.cwl +++ b/workflows/fastq2fasta/bcftools-consensus.cwl @@ -4,20 +4,27 @@ cwlVersion: v1.1 hints: DockerRequirement: dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" + ShellCommandRequirement: {} baseCommand: bcftools arguments: - consensus - - -i'QUAL > 1 && GT="A"' + - -i + - 'QUAL > 1 && GT="a"' - -Hla - -f - $(inputs.ref_fasta) - $(inputs.vcf) + - {shellQuote: false, valueFrom: "|"} + - sed + - "s/^>.*/>$(inputs.sample_id)/g" inputs: - id: ref_fasta type: File - id: vcf type: File secondaryFiles: [.csi] + - id: sample_id + type: string outputs: - id: out_fasta type: stdout diff --git a/workflows/fastq2fasta/bcftools-view-qc.cwl b/workflows/fastq2fasta/bcftools-view-qc.cwl index 477c596..336f455 100644 --- a/workflows/fastq2fasta/bcftools-view-qc.cwl +++ b/workflows/fastq2fasta/bcftools-view-qc.cwl @@ -8,7 +8,7 @@ baseCommand: bcftools arguments: - view - -i - - 'QUAL>1 && (GT="AA" || GT="Aa")' + - 'QUAL > 1 && GT="a"' - -Oz - --threads=$(inputs.threads) - $(inputs.bcf) diff --git a/workflows/fastq2fasta/fastq2fasta.cwl b/workflows/fastq2fasta/fastq2fasta.cwl index 0cf5c48..d529d99 100644 --- a/workflows/fastq2fasta/fastq2fasta.cwl +++ b/workflows/fastq2fasta/fastq2fasta.cwl @@ -22,6 +22,7 @@ inputs: type: int default: 4 metadata: File? + sample_id: string outputs: out_fasta: @@ -57,5 +58,6 @@ steps: bam: samtools-sort/sorted_bam fasta: ref_fasta threads: threads + sample_id: sample_id out: [out_fasta] run: bam2fasta.cwl -- cgit v1.2.3