From 925058d0b3db70803d322cc2a33801240899a20a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 10 Apr 2020 15:52:37 -0400 Subject: Fix up fasta/fastq validation --- bh20sequploader/main.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'bh20sequploader') diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index 2032508..4a225f6 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -8,7 +8,8 @@ from pathlib import Path import urllib.request import socket import getpass -from qc_metadata import qc_metadata +from .qc_metadata import qc_metadata +from .qc_fasta import qc_fasta ARVADOS_API_HOST='lugli.arvadosapi.com' ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462' @@ -22,34 +23,14 @@ def main(): api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True) - if not bh20sequploader.qc_metadata.qc_metadata(args.metadata.name): + target = qc_fasta(args.sequence) + + if not qc_metadata(args.metadata.name): print("Failed metadata qc") exit(1) col = arvados.collection.Collection(api_client=api) - magic_file = Path(__file__).parent / "validation" / "formats.mgc" - val = magic.Magic(magic_file=magic_file.resolve().as_posix(), - uncompress=False, mime=True) - seq_type = val.from_file(args.sequence.name).lower() - print(f"Sequence type: {seq_type}") - if seq_type == "text/fasta": - # ensure that contains only one entry - entries = 0 - for line in args.sequence: - if line.startswith(">"): - entries += 1 - if entries > 1: - raise ValueError("FASTA file contains multiple entries") - break - args.sequence.close() - args.sequence = open(args.sequence.name, "r") - target = "reads.fastq" - elif seq_type == "text/fastq": - target = "sequence.fasta" - else: - raise ValueError("Sequence file does not look like FASTA or FASTQ") - with col.open(target, "w") as f: r = args.sequence.read(65536) print(r[0:20]) -- cgit v1.2.3