about summary refs log tree commit diff
path: root/bh20sequploader
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/bh20seq-options.yml9
-rw-r--r--bh20sequploader/bh20seq-schema.yml76
-rw-r--r--bh20sequploader/bh20seq-shex.rdf40
-rw-r--r--bh20sequploader/main.py8
4 files changed, 80 insertions, 53 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml
index 7320ecf..4c1d15c 100644
--- a/bh20sequploader/bh20seq-options.yml
+++ b/bh20sequploader/bh20seq-options.yml
@@ -15,6 +15,15 @@ host_sex:
   Female: http://purl.obolibrary.org/obo/PATO_0000383
   Intersex: http://purl.obolibrary.org/obo/PATO_0001340
 
+host_health_status:
+  healthy: http://purl.obolibrary.org/obo/NCIT_C115935
+  asymptomatic:	http://purl.obolibrary.org/obo/NCIT_C3833
+  sympotmatic:	http://purl.obolibrary.org/obo/NCIT_C25269
+  admitted to hospital:	http://purl.obolibrary.org/obo/GENEPIO_0002020
+  discharged from hospital:	http://purl.obolibrary.org/obo/GENEPIO_0001849
+  dead:	http://purl.obolibrary.org/obo/NCIT_C28554
+  alive: http://purl.obolibrary.org/obo/NCIT_C37987
+
 sample_sequencing_technology:
   Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173
   Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index bbcafc8..9a89979 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -18,17 +18,19 @@ $graph:
         jsonldPredicate:
           _id: http://www.ebi.ac.uk/efo/EFO_0000532
           _type: "@id"
+          noLinkCheck: true
     host_id:
         doc: Identifer for the host. If you submit multiple samples from the same host, use the same host_id for those samples
-        type: string
+        type: string?
         jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_000115
     host_sex:
-        doc: Sex of the host, IRI expected
+        doc: Sex of the host as defined in PATO, expect male () or female ()
         type: string?
         jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/PATO_0000047
           _type: "@id"
+          noLinkCheck: true
     host_age:
         doc: Age of the host as number (e.g. 50)
         type: int?
@@ -40,10 +42,12 @@ $graph:
         jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C42574
           _type: "@id"
+          noLinkCheck: true
     host_health_status:
-        doc: A condition or state at a particular time
+        doc: A condition or state at a particular time, must be one of the following (obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987)
         type: string?
-        jsonldPredicate: http://purl.obolibrary.org/obo/NCIT_C25688
+        jsonldPredicate:
+          _id: http://purl.obolibrary.org/obo/NCIT_C25688
     host_treatment:
       doc: Process in which the act is intended to modify or alter host status
       type: string?
@@ -63,39 +67,47 @@ $graph:
 - name: sampleSchema
   type: record
   fields:
+    sample_id:
+      doc: Id of the sample as defined by the submitter
+      type: string
+      jsonldPredicate:
+        _id: http://semanticscience.org/resource/SIO_000115
+    collection_date:
+      doc: Date when the sample was taken
+      type: string
+      jsonldPredicate:
+        _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164
+    collection_location:
+      doc: Geographical location where the sample was collected as wikidata reference, e.g. http://www.wikidata.org/entity/Q148 (China)
+      type: string
+      jsonldPredicate:
+        _id: http://purl.obolibrary.org/obo/GAZ_00000448
+        _type: "@id"
+        noLinkCheck: true
     collector_name:
       doc: Name of the person that took the sample
-      type: string
+      type: string?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/OBI_0001895
     collecting_institution:
       doc: Institute that was responsible of sampeling
-      type: string
+      type: string?
       jsonldPredicate:
-          _id: http://semanticscience.org/resource/SIO_001167
+          _id: http://purl.obolibrary.org/obo/NCIT_C41206
     specimen_source:
       doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155831 (=nasopharyngeal swab)
       type: string?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/OBI_0001479
           _type: "@id"
+          noLinkCheck: true
     specimen_source2:
       doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155835 (=throat swabb)
       type: string?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/OBI_0001479
           _type: "@id"
-    collection_date:
-      doc: Date when the sample was taken
-      type: string
-      jsonldPredicate:
-          _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164
-    collection_location:
-      doc: Geographical location where the sample was collected as wikidata reference, e.g. http://www.wikidata.org/entity/Q148 (China)
-      type: string
-      jsonldPredicate:
-        _id: http://purl.obolibrary.org/obo/GAZ_00000448
-        _type: "@id"
+          noLinkCheck: true
     sample_storage_conditions:
       doc: Information about storage of a specified type, e.g.  frozen specimen, paraffin, fresh ....
       type: string?
@@ -106,11 +118,6 @@ $graph:
       type: string?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_001167
-    sample_id:
-      doc: Id of the sample as defined by the submitter
-      type: string
-      jsonldPredicate:
-          _id: http://semanticscience.org/resource/SIO_000115
     source_database_accession:
       doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here
       type: string?
@@ -126,6 +133,7 @@ $graph:
       jsonldPredicate:
           _id: http://edamontology.org/data_1875
           _type: "@id"
+          noLinkCheck: true
     virus_strain:
       doc: Name of the virus strain
       type: string?
@@ -137,16 +145,18 @@ $graph:
   fields:
     sample_sequencing_technology:
       doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
-      type: string
+      type: string?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/OBI_0600047
         _type: "@id"
+        noLinkCheck: true
     sample_sequencing_technology2:
       doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
       type: string?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/OBI_0600047
         _type: "@id"
+        noLinkCheck: true
     sequence_assembly_method:
       doc: Protocol which provides instructions on the alignment of sequencing reads to reference genome
       type: string?
@@ -158,7 +168,7 @@ $graph:
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/FLU_0000848
     sequencing_coverage2:
-      doc: If a second sequence technology was use you can submit its coverage here
+      doc: If a second sequence technology was used you can submit its coverage here
       type: float?
       jsonldPredicate:
         _id: http://purl.obolibrary.org/obo/FLU_0000848
@@ -171,9 +181,14 @@ $graph:
 - name: submitterSchema
   type: record
   fields:
+    authors:
+      doc: Name of the author(s)
+      type: string
+      jsonldPredicate:
+          _id: http://purl.obolibrary.org/obo/NCIT_C42781
     submitter_name:
       doc: Name of the submitter
-      type: string
+      type: string?
       jsonldPredicate:
           _id: http://semanticscience.org/resource/SIO_000116
     submitter_address:
@@ -183,7 +198,7 @@ $graph:
           _id: http://semanticscience.org/resource/SIO_000172
     originating_lab:
       doc: Name of the laboratory that took the sample
-      type: string
+      type: string?
       jsonldPredicate:
           _id: http://purl.obolibrary.org/obo/NCIT_C37984
     lab_address:
@@ -199,11 +214,6 @@ $graph:
       type: string?
       jsonldPredicate:
           _id: http://www.ebi.ac.uk/efo/EFO_0001741
-    authors:
-      doc: Name of the author(s)
-      type: string?
-      jsonldPredicate:
-          _id: http://purl.obolibrary.org/obo/NCIT_C42781
     publication:
       doc: Reference to publication of this sample (e.g. DOI, pubmed ID, ...)
       type: string?
@@ -223,7 +233,7 @@ $graph:
   fields:
     host: hostSchema
     sample: sampleSchema
-    virus: virusSchema?
+    virus: virusSchema
     technology: technologySchema
     submitter: submitterSchema
     id:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 8d3f5fc..246fd57 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -7,6 +7,7 @@ PREFIX sio: <http://semanticscience.org/resource/>
 PREFIX efo: <http://www.ebi.ac.uk/efo/>
 PREFIX evs: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
 PREFIX edam: <http://edamontology.org/>
+PREFIX wikidata: <http://www.wikidata.org/entity/>
 
 :submissionShape {
   MainSchema:host   @:hostShape ;
@@ -18,39 +19,44 @@ PREFIX edam: <http://edamontology.org/>
 
 :hostShape  {
   	efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
-    obo:PATO_0000047 [ obo:NCIT_C20197  obo:NCIT_C27993  obo:NCIT_C17998 ] ;
-    sio:SIO_000115 xsd:string ;
+    sio:SIO_000115 xsd:string ?;
+    obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 ] ?;
     obo:PATO_0000011 xsd:integer ?;
     obo:NCIT_C42574 [ obo:UO_~ ] ?;
-    sio:SIO_001167 xsd:string ?;
+	obo:NCIT_C25688 [obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987 ] ? ;
     efo:EFO_0000727 xsd:string ?;
+    obo:VO_0000002 xsd:string ?;
+    sio:SIO_001167 xsd:string ?;
 }
 
 :sampleShape  {
-    obo:OBI_0001895 xsd:string ;
-    sio:SIO_000115 xsd:string ;
-    sio:SIO_001167 xsd:string ;
-	evs:C25164 xsd:string ?;
-    obo:GAZ_00000448 [obo:GAZ_~] ?;
+    sio:SIO_000115 xsd:string;
+	evs:C25164 xsd:string;
+	obo:GAZ_00000448 [wikidata:~] ;
+    obo:OBI_0001895 xsd:string ?;
+    obo:NCIT_C41206 xsd:string ?;
+    obo:OBI_0001479 IRI {0,2};
     obo:OBI_0001472 xsd:string ?;
-    obo:OBI_0001479 xsd:string ?;
+    sio:SIO_001167 xsd:string ?;
 }
 
 :submitterShape {
-    sio:SIO_000116 xsd:string ;
-	obo:NCIT_C37984 xsd:string ;
-	obo:NCIT_C37900 xsd:string ?;
-    obo:NCIT_C42781 xsd:string ?;
-    obo:OBI_0600047 xsd:string ?;
-    sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
+    obo:NCIT_C42781 xsd:string ;
+    sio:SIO_000116 xsd:string ?;
     sio:SIO_000172 xsd:string ?;
+    obo:NCIT_C37984 xsd:string ?;
+    obo:OBI_0600047 xsd:string ?;
+  	obo:NCIT_C37900 xsd:string ?;
     efo:EFO_0001741 xsd:string ?;
+    obo:NCIT_C19026 xsd:string ?;
+    sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
 }
 
 :technologyShape {
-    obo:OBI_0600047 xsd:string ;
-    obo:FLU_0000848 xsd:integer ?;
+    obo:OBI_0600047 IRI {0,2} ;
     efo:EFO_0002699 xsd:string ?;
+    obo:FLU_0000848 xsd:double {0,2};
+    sio:SIO_001167 xsd:string ?;
 }
 
 :virusShape{
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 49d012d..4c4711d 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -44,7 +44,8 @@ def main():
 
     with col.open(target, "w") as f:
         r = args.sequence.read(65536)
-        print(r[0:20])
+        seqlabel = r[1:r.index("\n")]
+        print(seqlabel)
         while r:
             f.write(r)
             r = args.sequence.read(65536)
@@ -62,13 +63,14 @@ def main():
     external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')
 
     properties = {
+        "sequence_label": seqlabel,
         "upload_app": "bh20-seq-uploader",
         "upload_ip": external_ip,
         "upload_user": "%s@%s" % (getpass.getuser(), socket.gethostname())
     }
 
-    col.save_new(owner_uuid=UPLOAD_PROJECT, name="Uploaded by %s from %s" %
-                 (properties['upload_user'], properties['upload_ip']),
+    col.save_new(owner_uuid=UPLOAD_PROJECT, name="%s uploaded by %s from %s" %
+                 (seqlabel, properties['upload_user'], properties['upload_ip']),
                  properties=properties, ensure_unique_name=True)
 
     print("Done")