aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bh20seqanalyzer/main.py9
-rw-r--r--bh20sequploader/main.py29
-rw-r--r--setup.py5
3 files changed, 16 insertions, 27 deletions
diff --git a/bh20seqanalyzer/main.py b/bh20seqanalyzer/main.py
index 1fb51b5..c05b402 100644
--- a/bh20seqanalyzer/main.py
+++ b/bh20seqanalyzer/main.py
@@ -8,6 +8,7 @@ import json
import logging
import ruamel.yaml
from bh20sequploader.qc_metadata import qc_metadata
+from bh20sequploader.qc_fasta import qc_fasta
import pkg_resources
from schema_salad.sourceline import add_lc_filename
@@ -38,7 +39,13 @@ def validate_upload(api, collection, validated_project,
logging.warn("Failed metadata qc")
if valid:
- if "sequence.fasta" not in col:
+ if "sequence.fasta" in col:
+ try:
+ qc_fasta(col.open("sequence.fasta"))
+ except Exception as e:
+ logging.warn(e)
+ valid = False
+ else:
if "reads.fastq" in col:
start_fastq_to_fasta(api, collection, fastq_project, fastq_workflow_uuid)
return False
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 2032508..4a225f6 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -8,7 +8,8 @@ from pathlib import Path
import urllib.request
import socket
import getpass
-from qc_metadata import qc_metadata
+from .qc_metadata import qc_metadata
+from .qc_fasta import qc_fasta
ARVADOS_API_HOST='lugli.arvadosapi.com'
ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462'
@@ -22,34 +23,14 @@ def main():
api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
- if not bh20sequploader.qc_metadata.qc_metadata(args.metadata.name):
+ target = qc_fasta(args.sequence)
+
+ if not qc_metadata(args.metadata.name):
print("Failed metadata qc")
exit(1)
col = arvados.collection.Collection(api_client=api)
- magic_file = Path(__file__).parent / "validation" / "formats.mgc"
- val = magic.Magic(magic_file=magic_file.resolve().as_posix(),
- uncompress=False, mime=True)
- seq_type = val.from_file(args.sequence.name).lower()
- print(f"Sequence type: {seq_type}")
- if seq_type == "text/fasta":
- # ensure that contains only one entry
- entries = 0
- for line in args.sequence:
- if line.startswith(">"):
- entries += 1
- if entries > 1:
- raise ValueError("FASTA file contains multiple entries")
- break
- args.sequence.close()
- args.sequence = open(args.sequence.name, "r")
- target = "reads.fastq"
- elif seq_type == "text/fastq":
- target = "sequence.fasta"
- else:
- raise ValueError("Sequence file does not look like FASTA or FASTQ")
-
with col.open(target, "w") as f:
r = args.sequence.read(65536)
print(r[0:20])
diff --git a/setup.py b/setup.py
index 41ace7b..18e858e 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ try:
except ImportError:
tagger = egg_info_cmd.egg_info
-install_requires = ["arvados-python-client", "schema-salad"]
+install_requires = ["arvados-python-client", "schema-salad", "python-magic"]
web_requires = ["flask", "pyyaml"]
needs_pytest = {"pytest", "test", "ptr"}.intersection(sys.argv)
@@ -31,7 +31,8 @@ setup(
author_email="peter.amstutz@curii.com",
license="Apache 2.0",
packages=["bh20sequploader", "bh20seqanalyzer", "bh20simplewebuploader"],
- package_data={"bh20sequploader": ["bh20seq-schema.yml"]},
+ package_data={"bh20sequploader": ["bh20seq-schema.yml", "validation/formats"],
+ },
install_requires=install_requires,
extras_require={
'web': web_requires