aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreaGuarracino2021-01-05 14:18:26 +0100
committerAndreaGuarracino2021-01-05 14:18:26 +0100
commite9f73388c810c678a69ce23c925d7914ba99a251 (patch)
tree04731e150e44a2d102f49b431527b73fb1caad2f
parent491b39273b54ca89861febf5c27d28f032ba1c49 (diff)
downloadbh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.tar.gz
bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.tar.lz
bh20-seq-resource-e9f73388c810c678a69ce23c925d7914ba99a251.zip
added check_format workflow and script
-rw-r--r--workflows/yamlfa2ttl/check_format.cwl22
-rw-r--r--workflows/yamlfa2ttl/check_format.py35
-rw-r--r--workflows/yamlfa2ttl/yamlfa2ttl.cwl31
3 files changed, 69 insertions, 19 deletions
diff --git a/workflows/yamlfa2ttl/check_format.cwl b/workflows/yamlfa2ttl/check_format.cwl
new file mode 100644
index 0000000..24de620
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_format.cwl
@@ -0,0 +1,22 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.1
+class: CommandLineTool
+baseCommand: python3
+inputs:
+ script:
+ type: File
+ inputBinding: {position: 1}
+ default: {class: File, location: check_format.py}
+ path_fasta:
+ type: string
+ inputBinding: {position: 2}
+ format_to_check:
+ type: string
+ inputBinding: {position: 3}
+ path_valid_formats:
+ type: File
+ inputBinding: {position: 4}
+ default: {class: File, location: ../../bh20sequploader/validation/formats}
+outputs: []
+
diff --git a/workflows/yamlfa2ttl/check_format.py b/workflows/yamlfa2ttl/check_format.py
new file mode 100644
index 0000000..a1b1f14
--- /dev/null
+++ b/workflows/yamlfa2ttl/check_format.py
@@ -0,0 +1,35 @@
+import gzip
+import tempfile
+import pkg_resources
+import magic
+import io
+
+import sys
+
+path_fasta = sys.argv[1]
+format_to_check = sys.argv[2]
+path_valid_formats = sys.argv[3]
+
+
+# ../../bh20sequploader/validation/formats
+
+schema_resource = pkg_resources.resource_stream(__name__, path_valid_formats)
+with tempfile.NamedTemporaryFile() as tmp:
+ tmp.write(schema_resource.read())
+ tmp.flush()
+ check_format = magic.Magic(magic_file=tmp.name, uncompress=False, mime=True)
+
+with open(path_fasta, "rb") as f:
+ gz = ""
+ if path_fasta.endswith(".gz"):
+ gz = ".gz"
+ f = gzip.GzipFile(fileobj=f, mode='rb')
+
+ f = io.TextIOWrapper(f)
+
+ buffer = f.read(4096)
+ seq_type = check_format.from_buffer(buffer).lower()
+ f.detach()
+
+ if seq_type != format_to_check:
+ raise ValueError(f"Input file ({path_fasta}) does not look like a {format_to_check}")
diff --git a/workflows/yamlfa2ttl/yamlfa2ttl.cwl b/workflows/yamlfa2ttl/yamlfa2ttl.cwl
index 2580794..1dce9ca 100644
--- a/workflows/yamlfa2ttl/yamlfa2ttl.cwl
+++ b/workflows/yamlfa2ttl/yamlfa2ttl.cwl
@@ -1,31 +1,24 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.1
-class: CommandLineTool
+class: Workflow
doc: "Workflow to go from YAML (metadata) + FASTA (sequence) to TTL (metadata)"
inputs:
path_fasta:
- type: File
- inputBinding:
- position: 1
- path_yaml:
- type: File
- inputBinding:
- position: 2
+ type: string
+ doc: input fasta to validate
+
+ format_to_check:
+ type: string
+ default: text/fasta
steps:
check_format:
- in: {path_fasta: path_fasta, path_valid_formats: '../../bh20sequploader/validation/formats', format_to_check: 'text/fasta'}
- #out: true/false or nothing and it has to block the execution if the format is wrong
+ in:
+ path_fasta: path_fasta
+ format_to_check: format_to_check
+ out: []
run: check_format.cwl
- check_metadata:
- # input and output
- # run: check_metadata.cwl
- check_header:
- # id_fasta has to be equal to id_yaml
- # run: check_header.cwl
- check_sequence:
- # The sequence has to be similar to the reference
- # run: check_sequence.cwl
+outputs: []