aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-10 15:53:58 -0400
committerPeter Amstutz2020-04-10 15:53:58 -0400
commit1f66b8270a7bf06f98e2a336385bc84b778ead66 (patch)
tree552afd813d8f5a5c54a7641e1fb9c7227d7ebb74
parent925058d0b3db70803d322cc2a33801240899a20a (diff)
downloadbh20-seq-resource-1f66b8270a7bf06f98e2a336385bc84b778ead66.tar.gz
bh20-seq-resource-1f66b8270a7bf06f98e2a336385bc84b778ead66.tar.lz
bh20-seq-resource-1f66b8270a7bf06f98e2a336385bc84b778ead66.zip
Add qc_fasta
-rw-r--r--bh20sequploader/qc_fasta.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
new file mode 100644
index 0000000..e3d4fe7
--- /dev/null
+++ b/bh20sequploader/qc_fasta.py
@@ -0,0 +1,28 @@
+import pkg_resources
+import tempfile
+import magic
+
+def qc_fasta(sequence):
+ schema_resource = pkg_resources.resource_stream(__name__, "validation/formats")
+ with tempfile.NamedTemporaryFile() as tmp:
+ tmp.write(schema_resource.read())
+ tmp.flush()
+ val = magic.Magic(magic_file=tmp.name,
+ uncompress=False, mime=True)
+ seq_type = val.from_buffer(sequence.read(4096)).lower()
+ sequence.seek(0)
+ if seq_type == "text/fasta":
+ # ensure that contains only one entry
+ entries = 0
+ for line in sequence:
+ if line.startswith(">"):
+ entries += 1
+ if entries > 1:
+ raise ValueError("FASTA file contains multiple entries")
+ break
+ sequence.seek(0)
+ return "reads.fastq"
+ elif seq_type == "text/fastq":
+ return "sequence.fasta"
+ else:
+ raise ValueError("Sequence file does not look like FASTA or FASTQ")