From 1f66b8270a7bf06f98e2a336385bc84b778ead66 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 10 Apr 2020 15:53:58 -0400 Subject: Add qc_fasta --- bh20sequploader/qc_fasta.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 bh20sequploader/qc_fasta.py diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py new file mode 100644 index 0000000..e3d4fe7 --- /dev/null +++ b/bh20sequploader/qc_fasta.py @@ -0,0 +1,28 @@ +import pkg_resources +import tempfile +import magic + +def qc_fasta(sequence): + schema_resource = pkg_resources.resource_stream(__name__, "validation/formats") + with tempfile.NamedTemporaryFile() as tmp: + tmp.write(schema_resource.read()) + tmp.flush() + val = magic.Magic(magic_file=tmp.name, + uncompress=False, mime=True) + seq_type = val.from_buffer(sequence.read(4096)).lower() + sequence.seek(0) + if seq_type == "text/fasta": + # ensure that contains only one entry + entries = 0 + for line in sequence: + if line.startswith(">"): + entries += 1 + if entries > 1: + raise ValueError("FASTA file contains multiple entries") + break + sequence.seek(0) + return "reads.fastq" + elif seq_type == "text/fastq": + return "sequence.fasta" + else: + raise ValueError("Sequence file does not look like FASTA or FASTQ") -- cgit v1.2.3