diff options
Diffstat (limited to 'bh20sequploader/qc_fasta.py')
-rw-r--r-- | bh20sequploader/qc_fasta.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py new file mode 100644 index 0000000..e47d66b --- /dev/null +++ b/bh20sequploader/qc_fasta.py @@ -0,0 +1,28 @@ +import pkg_resources +import tempfile +import magic + +def qc_fasta(sequence): + schema_resource = pkg_resources.resource_stream(__name__, "validation/formats") + with tempfile.NamedTemporaryFile() as tmp: + tmp.write(schema_resource.read()) + tmp.flush() + val = magic.Magic(magic_file=tmp.name, + uncompress=False, mime=True) + seq_type = val.from_buffer(sequence.read(4096)).lower() + sequence.seek(0) + if seq_type == "text/fasta": + # ensure that contains only one entry + entries = 0 + for line in sequence: + if line.startswith(">"): + entries += 1 + if entries > 1: + raise ValueError("FASTA file contains multiple entries") + break + sequence.seek(0) + return "sequence.fasta" + elif seq_type == "text/fastq": + return "reads.fastq" + else: + raise ValueError("Sequence file does not look like FASTA or FASTQ") |