about summary refs log tree commit diff
path: root/bh20sequploader
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/bh20seq-schema.yml4
-rw-r--r--bh20sequploader/bh20seq-shex.rdf4
-rw-r--r--bh20sequploader/main.py22
-rw-r--r--bh20sequploader/qc_fasta.py4
4 files changed, 15 insertions, 19 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ee852fa..0aead3b 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -16,7 +16,7 @@ $graph:
   fields:
     license_type:
       doc: License types as defined in https://wiki.creativecommons.org/images/d/d6/Ccrel-1.0.pdf
-      type: string?
+      type: string
       jsonldPredicate:
           _id: https://creativecommons.org/ns#License
     title:
@@ -264,7 +264,7 @@ $graph:
     virus: virusSchema
     technology: technologySchema
     submitter: submitterSchema
-    license: licenseSchema
+    license: ["null", licenseSchema]
     id:
       doc: The subject (eg the fasta/fastq file) that the metadata describes
       type: string
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 7331e86..bbc7309 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -17,7 +17,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
   MainSchema:submitter @:submitterShape ;
   MainSchema:technology @:technologyShape ;
   MainSchema:virus @:virusShape;
-  MainSchema:license @:licenseShape;
+  MainSchema:license @:licenseShape ?;
 }
 
 :hostShape  {
@@ -71,7 +71,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
 }
 
 :licenseShape{
-    cc:License xsd:string ?;
+    cc:License xsd:string ;
     dc:Title xsd:string ?;
     cc:attributionName xsd:string ?;
     cc:attributionURL xsd:string ?;
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index f744a8c..6049bf9 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -29,11 +29,10 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
     try:
         log.debug("Checking metadata" if do_qc else "Skipping metadata check")
         if do_qc and not qc_metadata(metadata.name):
-            log.warning("Failed metadata qc")
+            log.warning("Failed metadata QC")
             failed = True
     except Exception as e:
-        log.debug(e)
-        print(e)
+        log.exception("Failed metadata QC")
         failed = True
 
     target = []
@@ -45,8 +44,7 @@ def qc_stuff(metadata, sequence_p1, sequence_p2, do_qc=True):
             target[0] = ("reads_1."+target[0][0][6:], target[0][1])
             target[1] = ("reads_2."+target[1][0][6:], target[0][1])
     except Exception as e:
-        log.debug(e)
-        print(e)
+        log.exception("Failed sequence QC")
         failed = True
 
     if failed:
@@ -82,7 +80,7 @@ def main():
     seqlabel = target[0][1]
 
     if args.validate:
-        print("Valid")
+        log.info("Valid")
         exit(0)
 
     col = arvados.collection.Collection(api_client=api)
@@ -91,10 +89,10 @@ def main():
     if args.sequence_p2:
         upload_sequence(col, target[1], args.sequence_p2)
 
-    print("Reading metadata")
+    log.info("Reading metadata")
     with col.open("metadata.yaml", "w") as f:
         r = args.metadata.read(65536)
-        print(r[0:20])
+        log.info(r[0:20])
         while r:
             f.write(r)
             r = args.metadata.read(65536)
@@ -118,7 +116,7 @@ def main():
                                            ["portable_data_hash", "=", col.portable_data_hash()]]).execute()
     if dup["items"]:
         # This exact collection has been uploaded before.
-        print("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
+        log.error("Duplicate of %s" % ([d["uuid"] for d in dup["items"]]))
         exit(1)
 
     if args.trusted:
@@ -131,9 +129,9 @@ def main():
                  (seqlabel, properties['upload_user'], properties['upload_ip']),
                  properties=properties, ensure_unique_name=True)
 
-    print("Saved to %s" % col.manifest_locator())
-
-    print("Done")
+    log.info("Saved to %s" % col.manifest_locator())
+    log.info("Done")
+    exit(0)
 
 if __name__ == "__main__":
     main()
diff --git a/bh20sequploader/qc_fasta.py b/bh20sequploader/qc_fasta.py
index 37eb4e8..0c7e16d 100644
--- a/bh20sequploader/qc_fasta.py
+++ b/bh20sequploader/qc_fasta.py
@@ -84,10 +84,8 @@ def qc_fasta(arg_sequence, check_with_clustalw=True):
                 except Exception as e:
                     logging.warn("QC against reference sequence using 'minimap2': %s", e, exc_info=e)
 
-                if similarity and similarity < 70.0:
+                if similarity < 70.0:
                     raise ValueError("QC fail: alignment to reference was less than 70%% (was %2.2f%%)" % (similarity))
-                if similarity == 0:
-                    raise ValueError("QC fail")
 
         return ("sequence.fasta"+gz, seqlabel)
     elif seq_type == "text/fastq":