diff options
Diffstat (limited to 'bh20sequploader')
-rw-r--r-- | bh20sequploader/bh20seq-options.yml | 9 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-schema.yml | 76 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-shex.rdf | 40 | ||||
-rw-r--r-- | bh20sequploader/main.py | 8 |
4 files changed, 80 insertions, 53 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml index 7320ecf..4c1d15c 100644 --- a/bh20sequploader/bh20seq-options.yml +++ b/bh20sequploader/bh20seq-options.yml @@ -15,6 +15,15 @@ host_sex: Female: http://purl.obolibrary.org/obo/PATO_0000383 Intersex: http://purl.obolibrary.org/obo/PATO_0001340 +host_health_status: + healthy: http://purl.obolibrary.org/obo/NCIT_C115935 + asymptomatic: http://purl.obolibrary.org/obo/NCIT_C3833 + sympotmatic: http://purl.obolibrary.org/obo/NCIT_C25269 + admitted to hospital: http://purl.obolibrary.org/obo/GENEPIO_0002020 + discharged from hospital: http://purl.obolibrary.org/obo/GENEPIO_0001849 + dead: http://purl.obolibrary.org/obo/NCIT_C28554 + alive: http://purl.obolibrary.org/obo/NCIT_C37987 + sample_sequencing_technology: Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173 Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566 diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index bbcafc8..9a89979 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -18,17 +18,19 @@ $graph: jsonldPredicate: _id: http://www.ebi.ac.uk/efo/EFO_0000532 _type: "@id" + noLinkCheck: true host_id: doc: Identifer for the host. If you submit multiple samples from the same host, use the same host_id for those samples - type: string + type: string? jsonldPredicate: _id: http://semanticscience.org/resource/SIO_000115 host_sex: - doc: Sex of the host, IRI expected + doc: Sex of the host as defined in PATO, expect male () or female () type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/PATO_0000047 _type: "@id" + noLinkCheck: true host_age: doc: Age of the host as number (e.g. 50) type: int? @@ -40,10 +42,12 @@ $graph: jsonldPredicate: _id: http://purl.obolibrary.org/obo/NCIT_C42574 _type: "@id" + noLinkCheck: true host_health_status: - doc: A condition or state at a particular time + doc: A condition or state at a particular time, must be one of the following (obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987) type: string? - jsonldPredicate: http://purl.obolibrary.org/obo/NCIT_C25688 + jsonldPredicate: + _id: http://purl.obolibrary.org/obo/NCIT_C25688 host_treatment: doc: Process in which the act is intended to modify or alter host status type: string? @@ -63,39 +67,47 @@ $graph: - name: sampleSchema type: record fields: + sample_id: + doc: Id of the sample as defined by the submitter + type: string + jsonldPredicate: + _id: http://semanticscience.org/resource/SIO_000115 + collection_date: + doc: Date when the sample was taken + type: string + jsonldPredicate: + _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164 + collection_location: + doc: Geographical location where the sample was collected as wikidata reference, e.g. http://www.wikidata.org/entity/Q148 (China) + type: string + jsonldPredicate: + _id: http://purl.obolibrary.org/obo/GAZ_00000448 + _type: "@id" + noLinkCheck: true collector_name: doc: Name of the person that took the sample - type: string + type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0001895 collecting_institution: doc: Institute that was responsible of sampeling - type: string + type: string? jsonldPredicate: - _id: http://semanticscience.org/resource/SIO_001167 + _id: http://purl.obolibrary.org/obo/NCIT_C41206 specimen_source: doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155831 (=nasopharyngeal swab) type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0001479 _type: "@id" + noLinkCheck: true specimen_source2: doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155835 (=throat swabb) type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0001479 _type: "@id" - collection_date: - doc: Date when the sample was taken - type: string - jsonldPredicate: - _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164 - collection_location: - doc: Geographical location where the sample was collected as wikidata reference, e.g. http://www.wikidata.org/entity/Q148 (China) - type: string - jsonldPredicate: - _id: http://purl.obolibrary.org/obo/GAZ_00000448 - _type: "@id" + noLinkCheck: true sample_storage_conditions: doc: Information about storage of a specified type, e.g. frozen specimen, paraffin, fresh .... type: string? @@ -106,11 +118,6 @@ $graph: type: string? jsonldPredicate: _id: http://semanticscience.org/resource/SIO_001167 - sample_id: - doc: Id of the sample as defined by the submitter - type: string - jsonldPredicate: - _id: http://semanticscience.org/resource/SIO_000115 source_database_accession: doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here type: string? @@ -126,6 +133,7 @@ $graph: jsonldPredicate: _id: http://edamontology.org/data_1875 _type: "@id" + noLinkCheck: true virus_strain: doc: Name of the virus strain type: string? @@ -137,16 +145,18 @@ $graph: fields: sample_sequencing_technology: doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION) - type: string + type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0600047 _type: "@id" + noLinkCheck: true sample_sequencing_technology2: doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION) type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0600047 _type: "@id" + noLinkCheck: true sequence_assembly_method: doc: Protocol which provides instructions on the alignment of sequencing reads to reference genome type: string? @@ -158,7 +168,7 @@ $graph: jsonldPredicate: _id: http://purl.obolibrary.org/obo/FLU_0000848 sequencing_coverage2: - doc: If a second sequence technology was use you can submit its coverage here + doc: If a second sequence technology was used you can submit its coverage here type: float? jsonldPredicate: _id: http://purl.obolibrary.org/obo/FLU_0000848 @@ -171,9 +181,14 @@ $graph: - name: submitterSchema type: record fields: + authors: + doc: Name of the author(s) + type: string + jsonldPredicate: + _id: http://purl.obolibrary.org/obo/NCIT_C42781 submitter_name: doc: Name of the submitter - type: string + type: string? jsonldPredicate: _id: http://semanticscience.org/resource/SIO_000116 submitter_address: @@ -183,7 +198,7 @@ $graph: _id: http://semanticscience.org/resource/SIO_000172 originating_lab: doc: Name of the laboratory that took the sample - type: string + type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/NCIT_C37984 lab_address: @@ -199,11 +214,6 @@ $graph: type: string? jsonldPredicate: _id: http://www.ebi.ac.uk/efo/EFO_0001741 - authors: - doc: Name of the author(s) - type: string? - jsonldPredicate: - _id: http://purl.obolibrary.org/obo/NCIT_C42781 publication: doc: Reference to publication of this sample (e.g. DOI, pubmed ID, ...) type: string? @@ -223,7 +233,7 @@ $graph: fields: host: hostSchema sample: sampleSchema - virus: virusSchema? + virus: virusSchema technology: technologySchema submitter: submitterSchema id: diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf index 8d3f5fc..246fd57 100644 --- a/bh20sequploader/bh20seq-shex.rdf +++ b/bh20sequploader/bh20seq-shex.rdf @@ -7,6 +7,7 @@ PREFIX sio: <http://semanticscience.org/resource/> PREFIX efo: <http://www.ebi.ac.uk/efo/> PREFIX evs: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#> PREFIX edam: <http://edamontology.org/> +PREFIX wikidata: <http://www.wikidata.org/entity/> :submissionShape { MainSchema:host @:hostShape ; @@ -18,39 +19,44 @@ PREFIX edam: <http://edamontology.org/> :hostShape { efo:EFO_0000532 [ obo:NCBITaxon_~ ] ; - obo:PATO_0000047 [ obo:NCIT_C20197 obo:NCIT_C27993 obo:NCIT_C17998 ] ; - sio:SIO_000115 xsd:string ; + sio:SIO_000115 xsd:string ?; + obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 ] ?; obo:PATO_0000011 xsd:integer ?; obo:NCIT_C42574 [ obo:UO_~ ] ?; - sio:SIO_001167 xsd:string ?; + obo:NCIT_C25688 [obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987 ] ? ; efo:EFO_0000727 xsd:string ?; + obo:VO_0000002 xsd:string ?; + sio:SIO_001167 xsd:string ?; } :sampleShape { - obo:OBI_0001895 xsd:string ; - sio:SIO_000115 xsd:string ; - sio:SIO_001167 xsd:string ; - evs:C25164 xsd:string ?; - obo:GAZ_00000448 [obo:GAZ_~] ?; + sio:SIO_000115 xsd:string; + evs:C25164 xsd:string; + obo:GAZ_00000448 [wikidata:~] ; + obo:OBI_0001895 xsd:string ?; + obo:NCIT_C41206 xsd:string ?; + obo:OBI_0001479 IRI {0,2}; obo:OBI_0001472 xsd:string ?; - obo:OBI_0001479 xsd:string ?; + sio:SIO_001167 xsd:string ?; } :submitterShape { - sio:SIO_000116 xsd:string ; - obo:NCIT_C37984 xsd:string ; - obo:NCIT_C37900 xsd:string ?; - obo:NCIT_C42781 xsd:string ?; - obo:OBI_0600047 xsd:string ?; - sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?; + obo:NCIT_C42781 xsd:string ; + sio:SIO_000116 xsd:string ?; sio:SIO_000172 xsd:string ?; + obo:NCIT_C37984 xsd:string ?; + obo:OBI_0600047 xsd:string ?; + obo:NCIT_C37900 xsd:string ?; efo:EFO_0001741 xsd:string ?; + obo:NCIT_C19026 xsd:string ?; + sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?; } :technologyShape { - obo:OBI_0600047 xsd:string ; - obo:FLU_0000848 xsd:integer ?; + obo:OBI_0600047 IRI {0,2} ; efo:EFO_0002699 xsd:string ?; + obo:FLU_0000848 xsd:double {0,2}; + sio:SIO_001167 xsd:string ?; } :virusShape{ diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py index 49d012d..4c4711d 100644 --- a/bh20sequploader/main.py +++ b/bh20sequploader/main.py @@ -44,7 +44,8 @@ def main(): with col.open(target, "w") as f: r = args.sequence.read(65536) - print(r[0:20]) + seqlabel = r[1:r.index("\n")] + print(seqlabel) while r: f.write(r) r = args.sequence.read(65536) @@ -62,13 +63,14 @@ def main(): external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8') properties = { + "sequence_label": seqlabel, "upload_app": "bh20-seq-uploader", "upload_ip": external_ip, "upload_user": "%s@%s" % (getpass.getuser(), socket.gethostname()) } - col.save_new(owner_uuid=UPLOAD_PROJECT, name="Uploaded by %s from %s" % - (properties['upload_user'], properties['upload_ip']), + col.save_new(owner_uuid=UPLOAD_PROJECT, name="%s uploaded by %s from %s" % + (seqlabel, properties['upload_user'], properties['upload_ip']), properties=properties, ensure_unique_name=True) print("Done") |