about summary refs log tree commit diff
path: root/bh20sequploader
diff options
context:
space:
mode:
authorPeter Amstutz2020-11-11 17:38:16 -0500
committerPeter Amstutz2020-11-11 17:38:47 -0500
commit2cb5faa2b088cf36c8c41633db137fc020a34529 (patch)
tree6a9f39513e020517f2bac609aaf2e15128411a70 /bh20sequploader
parentc5c730cdeb9f2e9a02e30c2215dfe2b6ae787b07 (diff)
downloadbh20-seq-resource-2cb5faa2b088cf36c8c41633db137fc020a34529.tar.gz
bh20-seq-resource-2cb5faa2b088cf36c8c41633db137fc020a34529.tar.lz
bh20-seq-resource-2cb5faa2b088cf36c8c41633db137fc020a34529.zip
Support uploading new metadata only
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/main.py48
1 files changed, 26 insertions, 22 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index ea0fa70..e2f089f 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -52,23 +52,23 @@ sequence for enough overlap with the reference genome
         failed = True # continue with the FASTA checker
 
     target = []
-    try:
-        log.debug("FASTA/FASTQ QC" if do_qc else "Limited FASTA/FASTQ QC")
-        target.append(qc_fasta(sequence_p1, check_with_mimimap2=do_qc))
-        if sequence_p2:
-            if target[0][2] == 'text/fasta':
-                raise ValueError("It is possible to upload just one FASTA file at a time")
-            target.append(qc_fasta(sequence_p2))
-
-            target[0] = ("reads_1."+target[0][0][6:], target[0][1], target[0][2])
-            target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2])
-
-        if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]:
-            raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})")
-
-    except Exception as e:
-        log.exception("Failed sequence QC")
-        failed = True
+    if sequence_p1:
+        try:
+            log.debug("FASTA/FASTQ QC" if do_qc else "Limited FASTA/FASTQ QC")
+            target.append(qc_fasta(sequence_p1, check_with_mimimap2=do_qc))
+            if sequence_p2:
+                if target[0][2] == 'text/fasta':
+                    raise ValueError("It is possible to upload just one FASTA file at a time")
+                target.append(qc_fasta(sequence_p2))
+
+                target[0] = ("reads_1."+target[0][0][6:], target[0][1], target[0][2])
+                target[1] = ("reads_2."+target[1][0][6:], target[1][1], target[1][2])
+
+            if do_qc and target[0][2] == 'text/fasta' and sample_id != target[0][1]:
+                raise ValueError(f"The sample_id field in the metadata ({sample_id}) must be the same as the FASTA header ({target[0][1]})")
+        except Exception as e:
+            log.exception("Failed sequence QC")
+            failed = True
 
     if failed:
         log.debug("Bailing out!")
@@ -87,7 +87,7 @@ def upload_sequence(col, target, sequence):
 def main():
     parser = argparse.ArgumentParser(description='Upload SARS-CoV-19 sequences for analysis')
     parser.add_argument('metadata', type=argparse.FileType('r'), help='sequence metadata json')
-    parser.add_argument('sequence_p1', type=argparse.FileType('rb'), help='sequence FASTA/FASTQ')
+    parser.add_argument('sequence_p1', type=argparse.FileType('rb'), default=None, nargs='?', help='sequence FASTA/FASTQ')
     parser.add_argument('sequence_p2', type=argparse.FileType('rb'), default=None, nargs='?', help='sequence FASTQ pair')
     parser.add_argument("--validate", action="store_true", help="Dry run, validate only")
     parser.add_argument("--skip-qc", action="store_true", help="Skip local qc check")
@@ -102,7 +102,10 @@ def main():
 
     # ---- First the QC
     target = qc_stuff(args.metadata, args.sequence_p1, args.sequence_p2, not args.skip_qc)
-    seqlabel = target[0][1]
+    if target:
+        seqlabel = target[0][1]
+    else:
+        seqlabel = ""
 
     if args.validate:
         log.info("Valid")
@@ -111,9 +114,10 @@ def main():
     col = arvados.collection.Collection(api_client=api)
 
     # ---- Upload the sequence to Arvados
-    upload_sequence(col, target[0], args.sequence_p1)
-    if args.sequence_p2:
-        upload_sequence(col, target[1], args.sequence_p2)
+    if args.sequence_p1:
+        upload_sequence(col, target[0], args.sequence_p1)
+        if args.sequence_p2:
+            upload_sequence(col, target[1], args.sequence_p2)
 
     # ---- Make sure the metadata YAML is valid
     log.info("Reading metadata")