about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--bh20seqanalyzer/main.py9
-rw-r--r--bh20sequploader/main.py29
-rw-r--r--setup.py5
3 files changed, 16 insertions, 27 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 1fb51b5..c05b402 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -8,6 +8,7 @@ import json
 import logging
 import ruamel.yaml
 from bh20sequploader.qc_metadata import qc_metadata
+from bh20sequploader.qc_fasta import qc_fasta
 import pkg_resources
 from schema_salad.sourceline import add_lc_filename
 
@@ -38,7 +39,13 @@ def validate_upload(api, collection, validated_project,
             logging.warn("Failed metadata qc")
 
     if valid:
-        if "sequence.fasta" not in col:
+        if "sequence.fasta" in col:
+            try:
+                qc_fasta(col.open("sequence.fasta"))
+            except Exception as e:
+                logging.warn(e)
+                valid = False
+        else:
             if "reads.fastq" in col:
                 start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid)
                 return False
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 2032508..4a225f6 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -8,7 +8,8 @@ from pathlib import Path
 import urllib.request
 import socket
 import getpass
-from qc_metadata import qc_metadata
+from .qc_metadata import qc_metadata
+from .qc_fasta import qc_fasta
 
 ARVADOS_API_HOST='lugli.arvadosapi.com'
 ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
@@ -22,34 +23,14 @@ def main():
 
     api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
 
-    if not bh20sequploader.qc_metadata.qc_metadata(args.metadata.name):
+    target = qc_fasta(args.sequence)
+
+    if not qc_metadata(args.metadata.name):
         print("Failed metadata qc")
         exit(1)
 
     col = arvados.collection.Collection(api_client=api)
 
-    magic_file = Path(__file__).parent / "validation" / "formats.mgc"
-    val = magic.Magic(magic_file=magic_file.resolve().as_posix(),
-                      uncompress=False, mime=True)
-    seq_type = val.from_file(args.sequence.name).lower()
-    print(f"Sequence type: {seq_type}")
-    if seq_type == "text/fasta":
-        # ensure that contains only one entry
-        entries = 0
-        for line in args.sequence:
-            if line.startswith(">"):
-                entries += 1
-            if entries > 1:
-                raise ValueError("FASTA file contains multiple entries")
-                break
-        args.sequence.close()
-        args.sequence = open(args.sequence.name, "r")
-        target = "reads.fastq"
-    elif seq_type == "text/fastq":
-        target = "sequence.fasta"
-    else:
-        raise ValueError("Sequence file does not look like FASTA or FASTQ")
-
     with col.open(target, "w") as f:
         r = args.sequence.read(65536)
         print(r[0:20])
diff --git a/setup.py b/setup.py
index 41ace7b..18e858e 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ try:
 except ImportError:
     tagger = egg_info_cmd.egg_info
 
-install_requires = ["arvados-python-client", "schema-salad"]
+install_requires = ["arvados-python-client", "schema-salad", "python-magic"]
 web_requires = ["flask", "pyyaml"]
 
 needs_pytest = {"pytest", "test", "ptr"}.intersection(sys.argv)
@@ -31,7 +31,8 @@ setup(
     author_email="peter.amstutz@curii.com",
     license="Apache 2.0",
     packages=["bh20sequploader", "bh20seqanalyzer", "bh20simplewebuploader"],
-    package_data={"bh20sequploader": ["bh20seq-schema.yml"]},
+    package_data={"bh20sequploader": ["bh20seq-schema.yml", "validation/formats"],
+    },
     install_requires=install_requires,
     extras_require={
         'web': web_requires