diff options
-rw-r--r-- | bh20seqanalyzer/main.py | 71 | ||||
-rw-r--r-- | bh20sequploader/main.py | 58 | ||||
-rw-r--r-- | bh20sequploader/qc_fasta.py | 35 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_countries.csv | 32 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_host_species.csv | 2 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv | 7 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_speciesman_source.csv | 1 | ||||
-rw-r--r-- | scripts/docker/Dockerfile | 4 | ||||
-rw-r--r-- | workflows/fastq2fasta/bam2fasta.cwl | 2 | ||||
-rw-r--r-- | workflows/fastq2fasta/bcftools-consensus.cwl | 9 | ||||
-rw-r--r-- | workflows/fastq2fasta/bcftools-view-qc.cwl | 2 | ||||
-rw-r--r-- | workflows/fastq2fasta/fastq2fasta.cwl | 2 |
12 files changed, 161 insertions, 64 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py index 31ad4c4..9a36cae 100644 --- a/bh20seqanalyzer/main.py +++ b/bh20seqanalyzer/main.py @@ -30,6 +30,7 @@ def validate_upload(api, collection, validated_project, try: metadata_content = ruamel.yaml.round_trip_load(col.open("metadata.yaml")) metadata_content["id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection["portable_data_hash"] + sample_id = metadata_content["sample"]["sample_id"] add_lc_filename(metadata_content, metadata_content["id"]) valid = qc_metadata(metadata_content) and valid except Exception as e: @@ -39,21 +40,25 @@ def validate_upload(api, collection, validated_project, logging.warn("Failed metadata qc") if valid: - tgt = None - for n in ("sequence.fasta", "reads.fastq"): - if n not in col: - continue - with col.open(n) as qf: - tgt = qc_fasta(qf) - if tgt != n: - logging.info("Expected %s but magic says it should be %s", n, tgt) - valid = False - elif tgt == "reads.fastq": - start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid) - return False - if tgt is None: + try: + tgt = None + paired = {"reads_1.fastq": "reads.fastq", "reads_1.fastq.gz": "reads.fastq.gz"} + for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"): + if n not in col: + continue + with col.open(n, 'rb') as qf: + tgt = qc_fasta(qf)[0] + if tgt != n and tgt != paired.get(n): + logging.info("Expected %s but magic says it should be %s", n, tgt) + valid = False + elif tgt in ("reads.fastq", "reads.fastq.gz", "reads_1.fastq", "reads_1.fastq.gz"): + start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid, n, sample_id) + return False + if tgt is None: + valid = False + logging.warn("Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq", collection["name"]) + except ValueError as v: valid = False - logging.warn("Upload '%s' does not contain sequence.fasta or reads.fastq", collection["name"]) dup = api.collections().list(filters=[["owner_uuid", "=", validated_project], ["portable_data_hash", "=", col.portable_data_hash()]]).execute() @@ -69,9 +74,8 @@ def validate_upload(api, collection, validated_project, "owner_uuid": validated_project, "name": "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))}).execute() else: - pass # It is invalid, delete it. - #logging.warn("Deleting '%s'" % collection["name"]) + logging.warn("Suggest deleting '%s'" % collection["name"]) #api.collections().delete(uuid=collection["uuid"]).execute() return valid @@ -95,6 +99,7 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj): tmp.name] logging.info("Running %s" % ' '.join(cmd)) comp = subprocess.run(cmd, capture_output=True) + logging.info("Submitted %s", comp.stdout) if comp.returncode != 0: logging.error(comp.stderr.decode('utf-8')) @@ -103,12 +108,11 @@ def run_workflow(api, parent_project, workflow_uuid, name, inputobj): def start_fastq_to_fasta(api, collection, analysis_project, - fastq_workflow_uuid): - newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", { - "fastq_forward": { - "class": "File", - "location": "keep:%s/reads.fastq" % collection["portable_data_hash"] - }, + fastq_workflow_uuid, + tgt, + sample_id): + + params = { "metadata": { "class": "File", "location": "keep:%s/metadata.yaml" % collection["portable_data_hash"] @@ -116,8 +120,26 @@ def start_fastq_to_fasta(api, collection, "ref_fasta": { "class": "File", "location": "keep:ffef6a3b77e5e04f8f62a7b6f67264d1+556/SARS-CoV2-NC_045512.2.fasta" + }, + "sample_id": sample_id + } + + if tgt.startswith("reads.fastq"): + params["fastq_forward"] = { + "class": "File", + "location": "keep:%s/%s" % (collection["portable_data_hash"], tgt) + } + elif tgt.startswith("reads_1.fastq"): + params["fastq_forward"] = { + "class": "File", + "location": "keep:%s/reads_1.%s" % (collection["portable_data_hash"], tgt[8:]) } - }) + params["fastq_reverse"] = { + "class": "File", + "location": "keep:%s/reads_2.%s" % (collection["portable_data_hash"], tgt[8:]) + } + + newproject = run_workflow(api, analysis_project, fastq_workflow_uuid, "FASTQ to FASTA", params) api.collections().update(uuid=collection["uuid"], body={"owner_uuid": newproject["uuid"]}).execute() @@ -222,6 +244,7 @@ def main(): parser.add_argument('--latest-result-collection', type=str, default='lugli-4zz18-z513nlpqm03hpca', help='') parser.add_argument('--kickoff', action="store_true") + parser.add_argument('--once', action="store_true") args = parser.parse_args() api = arvados.api() @@ -265,4 +288,6 @@ def main(): args.pangenome_analysis_project, args.latest_result_collection) + if args.once: + break time.sleep(15) diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index a2e62fa..c442af0 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -22,18 +22,10 @@ ARVADOS_API_HOST='lugli.arvadosapi.com' ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462' UPLOAD_PROJECT='lugli-j7d0g-n5clictpuvwk8aa' -def main(): - parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis') - parser.add_argument('sequence', type=argparse.FileType('r'), help='sequence FASTA/FASTQ') - parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json') - parser.add_argument("--validate", action="store_true", help="Dry run, validate only") - args = parser.parse_args() - - api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True) - +def qa_stuff(metadata, sequence_p1, sequence_p2): try: log.debug("Checking metadata") - if not qc_metadata(args.metadata.name): + if not qc_metadata(metadata.name): log.warning("Failed metadata qc") exit(1) except ValueError as e: @@ -42,29 +34,52 @@ def main(): print(e) exit(1) + target = [] try: - log.debug("Checking FASTA QC") - target = qc_fasta(args.sequence) + log.debug("Checking FASTA/FASTQ QC") + target.append(qc_fasta(sequence_p1)) + if sequence_p2: + target.append(qc_fasta(sequence_p2)) + target[0] = ("reads_1."+target[0][0][6:], target[0][1]) + target[1] = ("reads_2."+target[1][0][6:], target[0][1]) except ValueError as e: log.debug(e) log.debug("Failed FASTA qc") print(e) exit(1) + return target + +def upload_sequence(col, target, sequence): + with col.open(target[0], "wb") as f: + r = sequence.read(65536) + while r: + f.write(r) + r = sequence.read(65536) + + +def main(): + parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis') + parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json') + parser.add_argument('sequence_p1', type=argparse.FileType('rb'), help='sequence FASTA/FASTQ') + parser.add_argument('sequence_p2', type=argparse.FileType('rb'), default=None, help='sequence FASTQ pair') + parser.add_argument("--validate", action="store_true", help="Dry run, validate only") + args = parser.parse_args() + + api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True) + + target = qa_stuff(args.metadata, args.sequence_p1, args.sequence_p2) + seqlabel = target[0][1] + if args.validate: print("Valid") exit(0) col = arvados.collection.Collection(api_client=api) - with col.open(target, "w") as f: - r = args.sequence.read(65536) - seqlabel = r[1:r.index("\n")] - print(seqlabel) - while r: - f.write(r) - r = args.sequence.read(65536) - args.sequence.close() + upload_sequence(col, target[0], args.sequence_p1) + if args.sequence_p2: + upload_sequence(col, target[1], args.sequence_p2) print("Reading metadata") with col.open("metadata.yaml", "w") as f: @@ -73,7 +88,6 @@ def main(): while r: f.write(r) r = args.metadata.read(65536) - args.metadata.close() external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8') @@ -93,6 +107,8 @@ def main(): (seqlabel, properties['upload_user'], properties['upload_ip']), properties=properties, ensure_unique_name=True) + print("Saved to %s" % col.manifest_locator()) + print("Done") if __name__ == "__main__": diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py index 5c8cf3a..e198430 100644 --- a/bh20sequploader/qc_fasta.py +++ b/bh20sequploader/qc_fasta.py @@ -5,6 +5,8 @@ import subprocess import tempfile import logging import re +import io +import gzip log = logging.getLogger(__name__ ) @@ -23,7 +25,7 @@ def read_fasta(sequence): raise ValueError("FASTA file contains multiple entries") return label, bases -def qc_fasta(sequence): +def qc_fasta(arg_sequence): log.debug("Starting qc_fasta") schema_resource = pkg_resources.resource_stream(__name__, "validation/formats") with tempfile.NamedTemporaryFile() as tmp: @@ -31,12 +33,24 @@ def qc_fasta(sequence): tmp.flush() val = magic.Magic(magic_file=tmp.name, uncompress=False, mime=True) - seq_type = val.from_buffer(sequence.read(4096)).lower() + + gz = "" + if arg_sequence.name.endswith(".gz"): + sequence = gzip.GzipFile(fileobj=arg_sequence, mode='rb') + gz = ".gz" + else: + sequence = arg_sequence + + sequence = io.TextIOWrapper(sequence) + r = sequence.read(4096) sequence.seek(0) + + seqlabel = r[1:r.index("\n")] + seq_type = val.from_buffer(r).lower() + if seq_type == "text/fasta": # ensure that contains only one entry submitlabel, submitseq = read_fasta(sequence) - sequence.seek(0) with tempfile.NamedTemporaryFile() as tmp1: refstring = pkg_resources.resource_string(__name__, "SARS-CoV-2-reference.fasta") @@ -44,6 +58,9 @@ def qc_fasta(sequence): tmp1.write(submitlabel.encode("utf8")) tmp1.write(("".join(submitseq)).encode("utf8")) tmp1.flush() + subbp = 0 + refbp = 0 + similarity = 0 try: cmd = ["clustalw", "-infile="+tmp1.name, "-quicktree", "-iteration=none", "-type=DNA"] @@ -64,15 +81,17 @@ def qc_fasta(sequence): except Exception as e: logging.warn("Error trying to QC against reference sequence using 'clustalw': %s", e) - if (subbp/refbp) < .7: + if refbp and (subbp/refbp) < .7: raise ValueError("QC fail: submit sequence length is shorter than 70% reference") - if (subbp/refbp) > 1.3: + if refbp and (subbp/refbp) > 1.3: raise ValueError("QC fail: submit sequence length is greater than 130% reference") - if similarity < 70.0: + if similarity and similarity < 70.0: raise ValueError("QC fail: submit similarity is less than 70%") + if refbp == 0 or similarity == 0: + raise ValueError("QC fail") - return "sequence.fasta" + return ("sequence.fasta"+gz, seqlabel) elif seq_type == "text/fastq": - return "reads.fastq" + return ("reads.fastq"+gz, seqlabel) else: raise ValueError("Sequence file does not look like a DNA FASTA or FASTQ") diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv index 7e83564..85d4e8a 100644 --- a/scripts/dict_ontology_standardization/ncbi_countries.csv +++ b/scripts/dict_ontology_standardization/ncbi_countries.csv @@ -12,6 +12,9 @@ Armenia,http://www.wikidata.org/entity/Q399 Australia,http://www.wikidata.org/entity/Q408 Australia: Queensland,http://www.wikidata.org/entity/Q36074 Australia: Victoria,http://www.wikidata.org/entity/Q36687 +"Australia: Melbourne, Victoria",http://www.wikidata.org/entity/Q3141 +Australia: Northern Territory,http://www.wikidata.org/entity/Q3235 +Australia: NSW,http://www.wikidata.org/entity/Q3224 Austria,http://www.wikidata.org/entity/Q40 Azerbaijan,http://www.wikidata.org/entity/Q227 Bahrain,http://www.wikidata.org/entity/Q398 @@ -31,8 +34,10 @@ Bulgaria,http://www.wikidata.org/entity/Q219 Burkina Faso,http://www.wikidata.org/entity/Q965 Burundi,http://www.wikidata.org/entity/Q967 Cambodia,http://www.wikidata.org/entity/Q424 +Cambodia:Sihanoukville,http://www.wikidata.org/entity/Q18207676 Cameroon,http://www.wikidata.org/entity/Q1009 Canada,http://www.wikidata.org/entity/Q16 +Canada: Toronto,http://www.wikidata.org/entity/Q172 Cape Verde,http://www.wikidata.org/entity/Q1011 Central African Republic,http://www.wikidata.org/entity/Q929 Chad,http://www.wikidata.org/entity/Q657 @@ -65,12 +70,13 @@ China: Jiangxi,http://www.wikidata.org/entity/Q57052 China: Jilin,http://www.wikidata.org/entity/Q45208 China: Liaoning,http://www.wikidata.org/entity/Q43934 China: Macau,http://www.wikidata.org/entity/Q14773 -China: Nanchang,https://www.wikidata.org/wiki/Q171943 +China: Nanchang,http://www.wikidata.org/entity/Q171943 China: Ningxia Hui Autonomous Region,http://www.wikidata.org/entity/Q57448 China: Qinghai,http://www.wikidata.org/entity/Q45833 China: Shaanxi,http://www.wikidata.org/entity/Q47974 China: Shandong,http://www.wikidata.org/entity/Q43407 China: Shanghai,http://www.wikidata.org/entity/Q8686 +China:Shanghai,http://www.wikidata.org/entity/Q8686 China: Shanxi,http://www.wikidata.org/entity/Q46913 China: Shenzhen,http://www.wikidata.org/entity/Q15174 China: Sichuan,http://www.wikidata.org/entity/Q19770 @@ -111,11 +117,13 @@ France,http://www.wikidata.org/entity/Q142 Gabon,http://www.wikidata.org/entity/Q1000 Georgia,http://www.wikidata.org/entity/Q230 Germany,http://www.wikidata.org/entity/Q183 -Germany: Bavaria,https://www.wikidata.org/wiki/Q980 -Germany: Dusseldorf,https://www.wikidata.org/wiki/Q1718 +Germany: Bavaria,http://www.wikidata.org/entity/Q980 +Germany: Dusseldorf,http://www.wikidata.org/entity/Q1718 +Germany: Heinsberg,http://www.wikidata.org/entity/Q14833 +Germany: Starnberg,http://www.wikidata.org/entity/Q61936 Ghana,http://www.wikidata.org/entity/Q117 Greece,http://www.wikidata.org/entity/Q41 -Greece: Athens,https://www.wikidata.org/wiki/Q1524 +Greece: Athens,http://www.wikidata.org/entity/Q1524 Grenada,http://www.wikidata.org/entity/Q769 Guatemala,http://www.wikidata.org/entity/Q774 Guinea,http://www.wikidata.org/entity/Q1006 @@ -139,8 +147,8 @@ Ireland,http://www.wikidata.org/entity/Q27 Israel,http://www.wikidata.org/entity/Q801 Italy,http://www.wikidata.org/entity/Q38 Italy: Cagliari,http://www.wikidata.org/entity/Q1897 -Italy: Lazio,https://www.wikidata.org/wiki/Q1282 -Italy: Palermo,https://www.wikidata.org/wiki/Q2656 +Italy: Lazio,http://www.wikidata.org/entity/Q1282 +Italy: Palermo,http://www.wikidata.org/entity/Q2656 Italy: Rome,http://www.wikidata.org/entity/Q220 Ivory Coast,http://www.wikidata.org/entity/Q1008 Jamaica,http://www.wikidata.org/entity/Q766 @@ -181,7 +189,7 @@ Mozambique,http://www.wikidata.org/entity/Q1029 Myanmar,http://www.wikidata.org/entity/Q836 Namibia,http://www.wikidata.org/entity/Q1030 Nauru,http://www.wikidata.org/entity/Q697 -Netherlands: Milheeze,https://www.wikidata.org/wiki/Q3314115 +Netherlands: Milheeze,http://www.wikidata.org/entity/Q3314115 Nepal,http://www.wikidata.org/entity/Q837 New Zealand,http://www.wikidata.org/entity/Q664 Nicaragua,http://www.wikidata.org/entity/Q811 @@ -269,10 +277,14 @@ USA: AK,http://www.wikidata.org/entity/Q797 USA: AL,http://www.wikidata.org/entity/Q173 USA: AR,http://www.wikidata.org/entity/Q1612 USA: AZ,http://www.wikidata.org/entity/Q816 +USA: Arizona,http://www.wikidata.org/entity/Q816 USA: CA,http://www.wikidata.org/entity/Q99 +USA: California,http://www.wikidata.org/entity/Q99 +USA:California,http://www.wikidata.org/entity/Q99 "USA: CA, San Diego County",http://www.wikidata.org/entity/Q108143 USA: CO,http://www.wikidata.org/entity/Q1261 USA: CT,http://www.wikidata.org/entity/Q779 +USA: Connecticut,http://www.wikidata.org/entity/Q779 USA: DC,http://www.wikidata.org/entity/Q3551781 USA: DE,http://www.wikidata.org/entity/Q1393 USA: FL,http://www.wikidata.org/entity/Q812 @@ -287,7 +299,7 @@ USA: IN,http://www.wikidata.org/entity/Q1415 USA: KS,http://www.wikidata.org/entity/Q1558 USA: KY,http://www.wikidata.org/entity/Q1603 USA: LA,http://www.wikidata.org/entity/Q1588 -"USA: New Orleans, LA",https://www.wikidata.org/wiki/Q34404 +"USA: New Orleans, LA",http://www.wikidata.org/entity/Q34404 USA: MA,http://www.wikidata.org/entity/Q771 USA: Massachusetts,http://www.wikidata.org/entity/Q771 USA: MD,http://www.wikidata.org/entity/Q1391 @@ -295,6 +307,8 @@ USA: ME,http://www.wikidata.org/entity/Q724 USA: MI,http://www.wikidata.org/entity/Q1166 USA: Michigan,http://www.wikidata.org/entity/Q1166 USA: MN,http://www.wikidata.org/entity/Q1527 +USA:Minnesota,http://www.wikidata.org/entity/Q1527 +USA: Minnesota,http://www.wikidata.org/entity/Q1527 USA: MO,http://www.wikidata.org/entity/Q1581 USA: MS,http://www.wikidata.org/entity/Q1494 USA: MT,http://www.wikidata.org/entity/Q1212 @@ -323,8 +337,10 @@ USA: TX,http://www.wikidata.org/entity/Q1439 USA: UT,http://www.wikidata.org/entity/Q829 USA: VA,http://www.wikidata.org/entity/Q1370 USA: Virginia,http://www.wikidata.org/entity/Q1370 +USA:Virginia,http://www.wikidata.org/entity/Q1370 USA: VT,http://www.wikidata.org/entity/Q16551 USA: WA,http://www.wikidata.org/entity/Q1223 +USA: Washington,http://www.wikidata.org/entity/Q1223 USA: WI,http://www.wikidata.org/entity/Q1537 USA: Wisconsin,http://www.wikidata.org/entity/Q1537 USA: WV,http://www.wikidata.org/entity/Q1371 diff --git a/scripts/dict_ontology_standardization/ncbi_host_species.csv b/scripts/dict_ontology_standardization/ncbi_host_species.csv index 0d2120c..102d458 100644 --- a/scripts/dict_ontology_standardization/ncbi_host_species.csv +++ b/scripts/dict_ontology_standardization/ncbi_host_species.csv @@ -1,4 +1,6 @@ Homo sapiens,http://purl.obolibrary.org/obo/NCBITaxon_9606 +human,http://purl.obolibrary.org/obo/NCBITaxon_9606 +Human,http://purl.obolibrary.org/obo/NCBITaxon_9606 Mustela lutreola,http://purl.obolibrary.org/obo/NCBITaxon_9666 Manis javanica,http://purl.obolibrary.org/obo/NCBITaxon_9974 Felis catus,http://purl.obolibrary.org/obo/NCBITaxon_9685 diff --git a/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv b/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv index 49cb6b7..110e90b 100644 --- a/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv +++ b/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv @@ -1,6 +1,13 @@ Illumian NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 Illumina NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 NextSeq500,http://www.ebi.ac.uk/efo/EFO_0009173 +NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 +Illumian NextSeq 550,http://www.ebi.ac.uk/efo/EFO_0008566 +Illumina NextSeq 550,http://www.ebi.ac.uk/efo/EFO_0008566 +NextSeq550,http://www.ebi.ac.uk/efo/EFO_0008566 +NextSeq 550,http://www.ebi.ac.uk/efo/EFO_0008566 +Illumina MiniSeq,http://www.ebi.ac.uk/efo/EFO_0008636 +Illumina NovaSeq 6000,http://www.ebi.ac.uk/efo/EFO_0008637 Nanopore MinION,http://www.ebi.ac.uk/efo/EFO_0008632 Oxford Nanopore MinION,http://www.ebi.ac.uk/efo/EFO_0008632 ONT (Oxford Nanopore Technologies),http://purl.obolibrary.org/obo/NCIT_C146818 diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv index 18b986c..0fa2219 100644 --- a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv +++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv @@ -14,6 +14,7 @@ oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 throat swab,http://purl.obolibrary.org/obo/NCIT_C155835 oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 Oropharyngal,http://purl.obolibrary.org/obo/NCIT_C155835 +oralpharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 Oral-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 Oro-pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 Oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile index 5bd38dd..9fb33d5 100644 --- a/scripts/docker/Dockerfile +++ b/scripts/docker/Dockerfile @@ -4,7 +4,7 @@ RUN apt-get update && \ apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \ python3 python3-pip python3-setuptools python3-dev python-pycurl \ clustalw python3-biopython libcurl4-openssl-dev build-essential \ - libssl-dev && \ + libssl-dev libmagic-dev python3-magic && \ apt-get clean -RUN pip3 install bh20-seq-uploader
\ No newline at end of file +RUN pip3 install bh20-seq-uploader diff --git a/workflows/fastq2fasta/bam2fasta.cwl b/workflows/fastq2fasta/bam2fasta.cwl index efe580f..dd4020b 100644 --- a/workflows/fastq2fasta/bam2fasta.cwl +++ b/workflows/fastq2fasta/bam2fasta.cwl @@ -15,6 +15,7 @@ inputs: threads: type: int default: 4 + sample_id: string outputs: out_fasta: @@ -61,5 +62,6 @@ steps: in: ref_fasta: fasta vcf: bcftools_index_after_qc/indexed + sample_id: sample_id out: [out_fasta] run: bcftools-consensus.cwl diff --git a/workflows/fastq2fasta/bcftools-consensus.cwl b/workflows/fastq2fasta/bcftools-consensus.cwl index c111792..dffdbe3 100644 --- a/workflows/fastq2fasta/bcftools-consensus.cwl +++ b/workflows/fastq2fasta/bcftools-consensus.cwl @@ -4,20 +4,27 @@ cwlVersion: v1.1 hints: DockerRequirement: dockerPull: "quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0" + ShellCommandRequirement: {} baseCommand: bcftools arguments: - consensus - - -i'QUAL > 1 && GT="A"' + - -i + - 'QUAL > 1 && GT="a"' - -Hla - -f - $(inputs.ref_fasta) - $(inputs.vcf) + - {shellQuote: false, valueFrom: "|"} + - sed + - "s/^>.*/>$(inputs.sample_id)/g" inputs: - id: ref_fasta type: File - id: vcf type: File secondaryFiles: [.csi] + - id: sample_id + type: string outputs: - id: out_fasta type: stdout diff --git a/workflows/fastq2fasta/bcftools-view-qc.cwl b/workflows/fastq2fasta/bcftools-view-qc.cwl index 477c596..336f455 100644 --- a/workflows/fastq2fasta/bcftools-view-qc.cwl +++ b/workflows/fastq2fasta/bcftools-view-qc.cwl @@ -8,7 +8,7 @@ baseCommand: bcftools arguments: - view - -i - - 'QUAL>1 && (GT="AA" || GT="Aa")' + - 'QUAL > 1 && GT="a"' - -Oz - --threads=$(inputs.threads) - $(inputs.bcf) diff --git a/workflows/fastq2fasta/fastq2fasta.cwl b/workflows/fastq2fasta/fastq2fasta.cwl index 0cf5c48..d529d99 100644 --- a/workflows/fastq2fasta/fastq2fasta.cwl +++ b/workflows/fastq2fasta/fastq2fasta.cwl @@ -22,6 +22,7 @@ inputs: type: int default: 4 metadata: File? + sample_id: string outputs: out_fasta: @@ -57,5 +58,6 @@ steps: bam: samtools-sort/sorted_bam fasta: ref_fasta threads: threads + sample_id: sample_id out: [out_fasta] run: bam2fasta.cwl |