diff options
author | Peter Amstutz | 2020-04-10 15:52:37 -0400 |
---|---|---|
committer | Peter Amstutz | 2020-04-10 15:52:37 -0400 |
commit | 925058d0b3db70803d322cc2a33801240899a20a (patch) | |
tree | 41e4552336d7d3bf17be8b1df6cbc280ef86eb6a /bh20sequploader | |
parent | a6ba9a5203a568611a94c043fd13e2ec50f071da (diff) | |
download | bh20-seq-resource-925058d0b3db70803d322cc2a33801240899a20a.tar.gz bh20-seq-resource-925058d0b3db70803d322cc2a33801240899a20a.tar.lz bh20-seq-resource-925058d0b3db70803d322cc2a33801240899a20a.zip |
Fix up fasta/fastq validation
Diffstat (limited to 'bh20sequploader')
-rw-r--r-- | bh20sequploader/main.py | 29 |
1 files changed, 5 insertions, 24 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index 2032508..4a225f6 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -8,7 +8,8 @@ from pathlib import Path import urllib.request import socket import getpass -from qc_metadata import qc_metadata +from .qc_metadata import qc_metadata +from .qc_fasta import qc_fasta ARVADOS_API_HOST='lugli.arvadosapi.com' ARVADOS_API_TOKEN='2fbebpmbo3rw3x05ueu2i6nx70zhrsb1p22ycu3ry34m4x4462' @@ -22,34 +23,14 @@ def main(): api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True) - if not bh20sequploader.qc_metadata.qc_metadata(args.metadata.name): + target = qc_fasta(args.sequence) + + if not qc_metadata(args.metadata.name): print("Failed metadata qc") exit(1) col = arvados.collection.Collection(api_client=api) - magic_file = Path(__file__).parent / "validation" / "formats.mgc" - val = magic.Magic(magic_file=magic_file.resolve().as_posix(), - uncompress=False, mime=True) - seq_type = val.from_file(args.sequence.name).lower() - print(f"Sequence type: {seq_type}") - if seq_type == "text/fasta": - # ensure that contains only one entry - entries = 0 - for line in args.sequence: - if line.startswith(">"): - entries += 1 - if entries > 1: - raise ValueError("FASTA file contains multiple entries") - break - args.sequence.close() - args.sequence = open(args.sequence.name, "r") - target = "reads.fastq" - elif seq_type == "text/fastq": - target = "sequence.fasta" - else: - raise ValueError("Sequence file does not look like FASTA or FASTQ") - with col.open(target, "w") as f: r = args.sequence.read(65536) print(r[0:20]) |