aboutsummaryrefslogtreecommitdiff
path: root/workflows/yamlfa2ttl/check_format.py
blob: 4472b189dde2fdca8c030608c716f7285974eddd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gzip
import tempfile
import magic
import io
import sys

path_fasta = sys.argv[1]
format_to_check = sys.argv[2]
path_valid_formats = sys.argv[3]

with tempfile.NamedTemporaryFile() as tmp:
    with open(path_valid_formats, 'rb') as f:
        tmp.write(f.read())
    tmp.flush()

    check_format = magic.Magic(magic_file=tmp.name, uncompress=False, mime=True)

with open(path_fasta, "rb") as f:
    gz = ""
    if path_fasta.endswith(".gz"):
        gz = ".gz"
        f = gzip.GzipFile(fileobj=f, mode='rb')

    f = io.TextIOWrapper(f)

    buffer = f.read(4096)
    seq_type = check_format.from_buffer(buffer).lower()
    f.detach()

    if seq_type != format_to_check:
        raise ValueError(f"Input file ({path_fasta}) does not look like a {format_to_check}")