aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader/qc_fasta.py
diff options
context:
space:
mode:
authorPjotr Prins2020-04-12 12:49:06 -0500
committerGitHub2020-04-12 12:49:06 -0500
commit23722a63682fdffe51efca55b40573fa27370973 (patch)
tree7628ccda2671fe9a3e9ff204918c965d4bb9e257 /bh20sequploader/qc_fasta.py
parent10ccb97cab69cb704c154387d544a74cd38d3cdf (diff)
parent92e1608b2d8b21f2001d7bf480301d314337fdc0 (diff)
downloadbh20-seq-resource-23722a63682fdffe51efca55b40573fa27370973.tar.gz
bh20-seq-resource-23722a63682fdffe51efca55b40573fa27370973.tar.lz
bh20-seq-resource-23722a63682fdffe51efca55b40573fa27370973.zip
Merge branch 'master' into patch-1
Diffstat (limited to 'bh20sequploader/qc_fasta.py')
-rw-r--r--bh20sequploader/qc_fasta.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
new file mode 100644
index 0000000..e47d66b
--- /dev/null
+++ b/bh20sequploader/qc_fasta.py
@@ -0,0 +1,28 @@
+import pkg_resources
+import tempfile
+import magic
+
+def qc_fasta(sequence):
+ schema_resource = pkg_resources.resource_stream(__name__, "validation/formats")
+ with tempfile.NamedTemporaryFile() as tmp:
+ tmp.write(schema_resource.read())
+ tmp.flush()
+ val = magic.Magic(magic_file=tmp.name,
+ uncompress=False, mime=True)
+ seq_type = val.from_buffer(sequence.read(4096)).lower()
+ sequence.seek(0)
+ if seq_type == "text/fasta":
+ # ensure that contains only one entry
+ entries = 0
+ for line in sequence:
+ if line.startswith(">"):
+ entries += 1
+ if entries > 1:
+ raise ValueError("FASTA file contains multiple entries")
+ break
+ sequence.seek(0)
+ return "sequence.fasta"
+ elif seq_type == "text/fastq":
+ return "reads.fastq"
+ else:
+ raise ValueError("Sequence file does not look like FASTA or FASTQ")