aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-08 17:41:19 -0400
committerPeter Amstutz2020-04-08 17:46:12 -0400
commit9458ed33da08c787c4bb20af7b4108c93334b351 (patch)
treee4db1d5dbc8d653e05d612dc1229cef181bf68b3 /bh20sequploader
parent7140367bd832294c1756fe2369e91e7acc305083 (diff)
downloadbh20-seq-resource-9458ed33da08c787c4bb20af7b4108c93334b351.tar.gz
bh20-seq-resource-9458ed33da08c787c4bb20af7b4108c93334b351.tar.lz
bh20-seq-resource-9458ed33da08c787c4bb20af7b4108c93334b351.zip
Fastq now runs through fastq2fasta pipeline
then gets added to pangenome analysis.
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/main.py14
-rw-r--r--bh20sequploader/qc_metadata.py6
2 files changed, 15 insertions, 5 deletions
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 8b8fefe..56cbe22 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -20,12 +20,18 @@ def main():
api = arvados.api(host=ARVADOS_API_HOST, token=ARVADOS_API_TOKEN, insecure=True)
- qc_metadata(args.metadata.name)
+ if not qc_metadata(args.metadata.name):
+ print("Failed metadata qc")
+ exit(1)
col = arvados.collection.Collection(api_client=api)
- print("Reading FASTA")
- with col.open("sequence.fasta", "w") as f:
+ if args.sequence.name.endswith("fasta") or args.sequence.name.endswith("fa"):
+ target = "sequence.fasta"
+ elif args.sequence.name.endswith("fastq") or args.sequence.name.endswith("fq"):
+ target = "reads.fastq"
+
+ with col.open(target, "w") as f:
r = args.sequence.read(65536)
print(r[0:20])
while r:
@@ -52,5 +58,7 @@ def main():
(properties['upload_user'], properties['upload_ip']),
properties=properties, ensure_unique_name=True)
+ print("Done")
+
if __name__ == "__main__":
main()
diff --git a/bh20sequploader/qc_metadata.py b/bh20sequploader/qc_metadata.py
index 78b31b2..ebe4dfc 100644
--- a/bh20sequploader/qc_metadata.py
+++ b/bh20sequploader/qc_metadata.py
@@ -1,6 +1,7 @@
import schema_salad.schema
import logging
import pkg_resources
+import logging
def qc_metadata(metadatafile):
schema_resource = pkg_resources.resource_stream(__name__, "bh20seq-schema.yml")
@@ -17,5 +18,6 @@ def qc_metadata(metadatafile):
try:
doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadatafile, True)
return True
- except:
- return False
+ except Exception as e:
+ logging.warn(e)
+ return False