about summary refs log tree commit diff
path: root/workflows/yamlfa2ttl
diff options
context:
space:
mode:
authorAndreaGuarracino2021-01-05 14:18:26 +0100
committerAndreaGuarracino2021-01-05 14:18:26 +0100
commite9f73388c810c678a69ce23c925d7914ba99a251 (patch)
tree04731e150e44a2d102f49b431527b73fb1caad2f /workflows/yamlfa2ttl
parent491b39273b54ca89861febf5c27d28f032ba1c49 (diff)
downloadbh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.tar.gz
bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.tar.lz
bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.zip
added check_format workflow and script
Diffstat (limited to 'workflows/yamlfa2ttl')
-rw-r--r--workflows/yamlfa2ttl/check_format.cwl22
-rw-r--r--workflows/yamlfa2ttl/check_format.py35
-rw-r--r--workflows/yamlfa2ttl/yamlfa2ttl.cwl31
3 files changed, 69 insertions, 19 deletions
diff --git a/workflows/yamlfa2ttl/check_format.cwl b/workflows/yamlfa2ttl/check_format.cwl
new file mode 100644
index 0000000..24de620
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_format.cwl
@@ -0,0 +1,22 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.1
+class: CommandLineTool
+baseCommand: python3
+inputs:
+  script:
+    type: File
+    inputBinding: {position: 1}
+    default: {class: File, location: check_format.py}
+  path_fasta:
+    type: string
+    inputBinding: {position: 2}
+  format_to_check:
+    type: string
+    inputBinding: {position: 3}
+  path_valid_formats:
+    type: File
+    inputBinding: {position: 4}
+    default: {class: File, location: ../../bh20sequploader/validation/formats}
+outputs: []
+
diff --git a/workflows/yamlfa2ttl/check_format.py b/workflows/yamlfa2ttl/check_format.py
new file mode 100644
index 0000000..a1b1f14
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_format.py
@@ -0,0 +1,35 @@
+import gzip
+import tempfile
+import pkg_resources
+import magic
+import io
+
+import sys
+
+path_fasta = sys.argv[1]
+format_to_check = sys.argv[2]
+path_valid_formats = sys.argv[3]
+
+
+# ../../bh20sequploader/validation/formats
+
+schema_resource = pkg_resources.resource_stream(__name__, path_valid_formats)
+with tempfile.NamedTemporaryFile() as tmp:
+    tmp.write(schema_resource.read())
+    tmp.flush()
+    check_format = magic.Magic(magic_file=tmp.name, uncompress=False, mime=True)
+
+with open(path_fasta, "rb") as f:
+    gz = ""
+    if path_fasta.endswith(".gz"):
+        gz = ".gz"
+        f = gzip.GzipFile(fileobj=f, mode='rb')
+
+    f = io.TextIOWrapper(f)
+
+    buffer = f.read(4096)
+    seq_type = check_format.from_buffer(buffer).lower()
+    f.detach()
+
+    if seq_type != format_to_check:
+        raise ValueError(f"Input file ({path_fasta}) does not look like a {format_to_check}")
diff --git a/workflows/yamlfa2ttl/yamlfa2ttl.cwl b/workflows/yamlfa2ttl/yamlfa2ttl.cwl
index 2580794..1dce9ca 100644
--- a/workflows/yamlfa2ttl/yamlfa2ttl.cwl
+++ b/workflows/yamlfa2ttl/yamlfa2ttl.cwl
@@ -1,31 +1,24 @@
 #!/usr/bin/env cwl-runner
 
 cwlVersion: v1.1
-class: CommandLineTool
+class: Workflow
 doc: "Workflow to go from YAML (metadata) + FASTA (sequence) to TTL (metadata)"
 
 inputs:
   path_fasta:
-    type: File
-    inputBinding:
-      position: 1
-  path_yaml:
-    type: File
-    inputBinding:
-      position: 2
+    type: string
+    doc: input fasta to validate
+
+  format_to_check:
+    type: string
+    default: text/fasta
 
 steps:
   check_format:
-    in: {path_fasta: path_fasta, path_valid_formats: '../../bh20sequploader/validation/formats', format_to_check: 'text/fasta'}
-    #out: true/false or nothing and it has to block the execution if the format is wrong
+    in:
+      path_fasta: path_fasta
+      format_to_check: format_to_check
+    out: []
     run: check_format.cwl
-  check_metadata:
-    # input and output
-    # run: check_metadata.cwl
-  check_header:
-    # id_fasta has to be equal to id_yaml
-    # run: check_header.cwl
-  check_sequence:
-    # The sequence has to be similar to the reference
-    # run: check_sequence.cwl
 
+outputs: []