diff options
author | AndreaGuarracino | 2021-01-05 14:18:26 +0100 |
---|---|---|
committer | AndreaGuarracino | 2021-01-05 14:18:26 +0100 |
commit | e9f73388c810c678a69ce23c925d7914ba99a251 (patch) | |
tree | 04731e150e44a2d102f49b431527b73fb1caad2f /workflows/yamlfa2ttl/check_format.py | |
parent | 491b39273b54ca89861febf5c27d28f032ba1c49 (diff) | |
download | bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.tar.gz bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.tar.lz bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.zip |
added check_format workflow and script
Diffstat (limited to 'workflows/yamlfa2ttl/check_format.py')
-rw-r--r-- | workflows/yamlfa2ttl/check_format.py | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/workflows/yamlfa2ttl/check_format.py b/workflows/yamlfa2ttl/check_format.py new file mode 100644 index 0000000..a1b1f14 --- /dev/null +++ b/workflows/yamlfa2ttl/check_format.py @@ -0,0 +1,35 @@ +import gzip +import tempfile +import pkg_resources +import magic +import io + +import sys + +path_fasta = sys.argv[1] +format_to_check = sys.argv[2] +path_valid_formats = sys.argv[3] + + +# ../../bh20sequploader/validation/formats + +schema_resource = pkg_resources.resource_stream(__name__, path_valid_formats) +with tempfile.NamedTemporaryFile() as tmp: + tmp.write(schema_resource.read()) + tmp.flush() + check_format = magic.Magic(magic_file=tmp.name, uncompress=False, mime=True) + +with open(path_fasta, "rb") as f: + gz = "" + if path_fasta.endswith(".gz"): + gz = ".gz" + f = gzip.GzipFile(fileobj=f, mode='rb') + + f = io.TextIOWrapper(f) + + buffer = f.read(4096) + seq_type = check_format.from_buffer(buffer).lower() + f.detach() + + if seq_type != format_to_check: + raise ValueError(f"Input file ({path_fasta}) does not look like a {format_to_check}") |