From 2cb5faa2b088cf36c8c41633db137fc020a34529 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 11 Nov 2020 17:38:16 -0500 Subject: Support uploading new metadata only Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- bh20sequploader/main.py | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) (limited to 'bh20sequploader/main.py') diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index ea0fa70..e2f089f 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -52,23 +52,23 @@ sequence for enough overlap with the reference genome failed = True # continue with the FASTA checker target = [] - try: - log.debug("FASTA/FASTQ QC" if do_qc else "Limited FASTA/FASTQ QC") - target.append(qc_fasta(sequence_p1, check_with_mimimap2=do_qc)) - if sequence_p2: - if target[0][2] == 'text/fasta': - raise ValueError("It is possible to upload just one FASTA file at a time") - target.append(qc_fasta(sequence_p2)) - - target[0] = ("reads_1."+target[0][0][6:], target[0][1], target[0][2]) - target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2]) - - if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]: - raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})") - - except Exception as e: - log.exception("Failed sequence QC") - failed = True + if sequence_p1: + try: + log.debug("FASTA/FASTQ QC" if do_qc else "Limited FASTA/FASTQ QC") + target.append(qc_fasta(sequence_p1, check_with_mimimap2=do_qc)) + if sequence_p2: + if target[0][2] == 'text/fasta': + raise ValueError("It is possible to upload just one FASTA file at a time") + target.append(qc_fasta(sequence_p2)) + + target[0] = ("reads_1."+target[0][0][6:], target[0][1], target[0][2]) + target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2]) + + if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]: + raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})") + except Exception as e: + log.exception("Failed sequence QC") + failed = True if failed: log.debug("Bailing out!") @@ -87,7 +87,7 @@ def upload_sequence(col, target, sequence): def main(): parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis') parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json') - parser.add_argument('sequence_p1', type=argparse.FileType('rb'), help='sequence FASTA/FASTQ') + parser.add_argument('sequence_p1', type=argparse.FileType('rb'), default=None, nargs='?', help='sequence FASTA/FASTQ') parser.add_argument('sequence_p2', type=argparse.FileType('rb'), default=None, nargs='?', help='sequence FASTQ pair') parser.add_argument("--validate", action="store_true", help="Dry run, validate only") parser.add_argument("--skip-qc", action="store_true", help="Skip local qc check") @@ -102,7 +102,10 @@ def main(): # ---- First the QC target = qc_stuff(args.metadata, args.sequence_p1, args.sequence_p2, not args.skip_qc) - seqlabel = target[0][1] + if target: + seqlabel = target[0][1] + else: + seqlabel = "" if args.validate: log.info("Valid") @@ -111,9 +114,10 @@ def main(): col = arvados.collection.Collection(api_client=api) # ---- Upload the sequence to Arvados - upload_sequence(col, target[0], args.sequence_p1) - if args.sequence_p2: - upload_sequence(col, target[1], args.sequence_p2) + if args.sequence_p1: + upload_sequence(col, target[0], args.sequence_p1) + if args.sequence_p2: + upload_sequence(col, target[1], args.sequence_p2) # ---- Make sure the metadata YAML is valid log.info("Reading metadata") -- cgit v1.2.3