blob: e47d66bd7056afd78869d5491ddf81df3bf3b02b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
import pkg_resources
import tempfile
import magic
def qc_fasta(sequence):
schema_resource = pkg_resources.resource_stream(__name__, "validation/formats")
with tempfile.NamedTemporaryFile() as tmp:
tmp.write(schema_resource.read())
tmp.flush()
val = magic.Magic(magic_file=tmp.name,
uncompress=False, mime=True)
seq_type = val.from_buffer(sequence.read(4096)).lower()
sequence.seek(0)
if seq_type == "text/fasta":
# ensure that contains only one entry
entries = 0
for line in sequence:
if line.startswith(">"):
entries += 1
if entries > 1:
raise ValueError("FASTA file contains multiple entries")
break
sequence.seek(0)
return "sequence.fasta"
elif seq_type == "text/fastq":
return "reads.fastq"
else:
raise ValueError("Sequence file does not look like FASTA or FASTQ")
|