about summary refs log tree commit diff
path: root/bh20sequploader
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/qc_fasta.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
new file mode 100644
index 0000000..e3d4fe7
--- /dev/null
+++ b/bh20sequploader/qc_fasta.py
@@ -0,0 +1,28 @@
+import pkg_resources
+import tempfile
+import magic
+
+def qc_fasta(sequence):
+    schema_resource = pkg_resources.resource_stream(__name__, "validation/formats")
+    with tempfile.NamedTemporaryFile() as tmp:
+        tmp.write(schema_resource.read())
+        tmp.flush()
+        val = magic.Magic(magic_file=tmp.name,
+                          uncompress=False, mime=True)
+    seq_type = val.from_buffer(sequence.read(4096)).lower()
+    sequence.seek(0)
+    if seq_type == "text/fasta":
+        # ensure that contains only one entry
+        entries = 0
+        for line in sequence:
+            if line.startswith(">"):
+                entries += 1
+            if entries > 1:
+                raise ValueError("FASTA file contains multiple entries")
+                break
+        sequence.seek(0)
+        return "reads.fastq"
+    elif seq_type == "text/fastq":
+        return "sequence.fasta"
+    else:
+        raise ValueError("Sequence file does not look like FASTA or FASTQ")