about summary refs log tree commit diff
path: root/bh20sequploader
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-10 15:52:37 -0400
committerPeter Amstutz2020-04-10 15:52:37 -0400
commit925058d0b3db70803d322cc2a33801240899a20a (patch)
tree41e4552336d7d3bf17be8b1df6cbc280ef86eb6a /bh20sequploader
parenta6ba9a5203a568611a94c043fd13e2ec50f071da (diff)
downloadbh20-seq-resource-925058d0b3db70803d322cc2a33801240899a20a.tar.gz
bh20-seq-resource-925058d0b3db70803d322cc2a33801240899a20a.tar.lz
bh20-seq-resource-925058d0b3db70803d322cc2a33801240899a20a.zip
Fix up fasta/fastq validation
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/main.py29
1 files changed, 5 insertions, 24 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 2032508..4a225f6 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -8,7 +8,8 @@ from pathlib import Path
 import urllib.request
 import socket
 import getpass
-from qc_metadata import qc_metadata
+from .qc_metadata import qc_metadata
+from .qc_fasta import qc_fasta
 
 ARVADOS_API_HOST='lugli.arvadosapi.com'
 ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
@@ -22,34 +23,14 @@ def main():
 
     api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
 
-    if not bh20sequploader.qc_metadata.qc_metadata(args.metadata.name):
+    target = qc_fasta(args.sequence)
+
+    if not qc_metadata(args.metadata.name):
         print("Failed metadata qc")
         exit(1)
 
     col = arvados.collection.Collection(api_client=api)
 
-    magic_file = Path(__file__).parent / "validation" / "formats.mgc"
-    val = magic.Magic(magic_file=magic_file.resolve().as_posix(),
-                      uncompress=False, mime=True)
-    seq_type = val.from_file(args.sequence.name).lower()
-    print(f"Sequence type: {seq_type}")
-    if seq_type == "text/fasta":
-        # ensure that contains only one entry
-        entries = 0
-        for line in args.sequence:
-            if line.startswith(">"):
-                entries += 1
-            if entries > 1:
-                raise ValueError("FASTA file contains multiple entries")
-                break
-        args.sequence.close()
-        args.sequence = open(args.sequence.name, "r")
-        target = "reads.fastq"
-    elif seq_type == "text/fastq":
-        target = "sequence.fasta"
-    else:
-        raise ValueError("Sequence file does not look like FASTA or FASTQ")
-
     with col.open(target, "w") as f:
         r = args.sequence.read(65536)
         print(r[0:20])