diff options
author | Peter Amstutz | 2020-04-10 15:53:58 -0400 |
---|---|---|
committer | Peter Amstutz | 2020-04-10 15:53:58 -0400 |
commit | 1f66b8270a7bf06f98e2a336385bc84b778ead66 (patch) | |
tree | 552afd813d8f5a5c54a7641e1fb9c7227d7ebb74 | |
parent | 925058d0b3db70803d322cc2a33801240899a20a (diff) | |
download | bh20-seq-resource-1f66b8270a7bf06f98e2a336385bc84b778ead66.tar.gz bh20-seq-resource-1f66b8270a7bf06f98e2a336385bc84b778ead66.tar.lz bh20-seq-resource-1f66b8270a7bf06f98e2a336385bc84b778ead66.zip |
Add qc_fasta
-rw-r--r-- | bh20sequploader/qc_fasta.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py new file mode 100644 index 0000000..e3d4fe7 --- /dev/null +++ b/bh20sequploader/qc_fasta.py @@ -0,0 +1,28 @@ +import pkg_resources +import tempfile +import magic + +def qc_fasta(sequence): + schema_resource = pkg_resources.resource_stream(__name__, "validation/formats") + with tempfile.NamedTemporaryFile() as tmp: + tmp.write(schema_resource.read()) + tmp.flush() + val = magic.Magic(magic_file=tmp.name, + uncompress=False, mime=True) + seq_type = val.from_buffer(sequence.read(4096)).lower() + sequence.seek(0) + if seq_type == "text/fasta": + # ensure that contains only one entry + entries = 0 + for line in sequence: + if line.startswith(">"): + entries += 1 + if entries > 1: + raise ValueError("FASTA file contains multiple entries") + break + sequence.seek(0) + return "reads.fastq" + elif seq_type == "text/fastq": + return "sequence.fasta" + else: + raise ValueError("Sequence file does not look like FASTA or FASTQ") |