aboutsummaryrefslogtreecommitdiff
path: root/bh20sequploader
diff options
context:
space:
mode:
Diffstat (limited to 'bh20sequploader')
-rw-r--r--bh20sequploader/bh20seq-options.yml9
-rw-r--r--bh20sequploader/bh20seq-schema.yml76
-rw-r--r--bh20sequploader/bh20seq-shex.rdf40
-rw-r--r--bh20sequploader/main.py8
4 files changed, 80 insertions, 53 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml
index 7320ecf..4c1d15c 100644
--- a/bh20sequploader/bh20seq-options.yml
+++ b/bh20sequploader/bh20seq-options.yml
@@ -15,6 +15,15 @@ host_sex:
Female: http://purl.obolibrary.org/obo/PATO_0000383
Intersex: http://purl.obolibrary.org/obo/PATO_0001340
+host_health_status:
+ healthy: http://purl.obolibrary.org/obo/NCIT_C115935
+ asymptomatic: http://purl.obolibrary.org/obo/NCIT_C3833
+ sympotmatic: http://purl.obolibrary.org/obo/NCIT_C25269
+ admitted to hospital: http://purl.obolibrary.org/obo/GENEPIO_0002020
+ discharged from hospital: http://purl.obolibrary.org/obo/GENEPIO_0001849
+ dead: http://purl.obolibrary.org/obo/NCIT_C28554
+ alive: http://purl.obolibrary.org/obo/NCIT_C37987
+
sample_sequencing_technology:
Illumina NextSeq 500: http://www.ebi.ac.uk/efo/EFO_0009173
Illumina NextSeq 550: http://www.ebi.ac.uk/efo/EFO_0008566
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index bbcafc8..9a89979 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -18,17 +18,19 @@ $graph:
jsonldPredicate:
_id: http://www.ebi.ac.uk/efo/EFO_0000532
_type: "@id"
+ noLinkCheck: true
host_id:
doc: Identifer for the host. If you submit multiple samples from the same host, use the same host_id for those samples
- type: string
+ type: string?
jsonldPredicate:
_id: http://semanticscience.org/resource/SIO_000115
host_sex:
- doc: Sex of the host, IRI expected
+ doc: Sex of the host as defined in PATO, expect male () or female ()
type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/PATO_0000047
_type: "@id"
+ noLinkCheck: true
host_age:
doc: Age of the host as number (e.g. 50)
type: int?
@@ -40,10 +42,12 @@ $graph:
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/NCIT_C42574
_type: "@id"
+ noLinkCheck: true
host_health_status:
- doc: A condition or state at a particular time
+ doc: A condition or state at a particular time, must be one of the following (obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987)
type: string?
- jsonldPredicate: http://purl.obolibrary.org/obo/NCIT_C25688
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NCIT_C25688
host_treatment:
doc: Process in which the act is intended to modify or alter host status
type: string?
@@ -63,39 +67,47 @@ $graph:
- name: sampleSchema
type: record
fields:
+ sample_id:
+ doc: Id of the sample as defined by the submitter
+ type: string
+ jsonldPredicate:
+ _id: http://semanticscience.org/resource/SIO_000115
+ collection_date:
+ doc: Date when the sample was taken
+ type: string
+ jsonldPredicate:
+ _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164
+ collection_location:
+ doc: Geographical location where the sample was collected as wikidata reference, e.g. http://www.wikidata.org/entity/Q148 (China)
+ type: string
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/GAZ_00000448
+ _type: "@id"
+ noLinkCheck: true
collector_name:
doc: Name of the person that took the sample
- type: string
+ type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0001895
collecting_institution:
doc: Institute that was responsible of sampeling
- type: string
+ type: string?
jsonldPredicate:
- _id: http://semanticscience.org/resource/SIO_001167
+ _id: http://purl.obolibrary.org/obo/NCIT_C41206
specimen_source:
doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155831 (=nasopharyngeal swab)
type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0001479
_type: "@id"
+ noLinkCheck: true
specimen_source2:
doc: Method how the specimen was derived as NCIT IRI, e.g. http://purl.obolibrary.org/obo/NCIT_C155835 (=throat swabb)
type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0001479
_type: "@id"
- collection_date:
- doc: Date when the sample was taken
- type: string
- jsonldPredicate:
- _id: http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C25164
- collection_location:
- doc: Geographical location where the sample was collected as wikidata reference, e.g. http://www.wikidata.org/entity/Q148 (China)
- type: string
- jsonldPredicate:
- _id: http://purl.obolibrary.org/obo/GAZ_00000448
- _type: "@id"
+ noLinkCheck: true
sample_storage_conditions:
doc: Information about storage of a specified type, e.g. frozen specimen, paraffin, fresh ....
type: string?
@@ -106,11 +118,6 @@ $graph:
type: string?
jsonldPredicate:
_id: http://semanticscience.org/resource/SIO_001167
- sample_id:
- doc: Id of the sample as defined by the submitter
- type: string
- jsonldPredicate:
- _id: http://semanticscience.org/resource/SIO_000115
source_database_accession:
doc: If data is deposit at a public resource (e.g. Genbank, ENA) enter the Accession Id here
type: string?
@@ -126,6 +133,7 @@ $graph:
jsonldPredicate:
_id: http://edamontology.org/data_1875
_type: "@id"
+ noLinkCheck: true
virus_strain:
doc: Name of the virus strain
type: string?
@@ -137,16 +145,18 @@ $graph:
fields:
sample_sequencing_technology:
doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
- type: string
+ type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0600047
_type: "@id"
+ noLinkCheck: true
sample_sequencing_technology2:
doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0600047
_type: "@id"
+ noLinkCheck: true
sequence_assembly_method:
doc: Protocol which provides instructions on the alignment of sequencing reads to reference genome
type: string?
@@ -158,7 +168,7 @@ $graph:
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
sequencing_coverage2:
- doc: If a second sequence technology was use you can submit its coverage here
+ doc: If a second sequence technology was used you can submit its coverage here
type: float?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
@@ -171,9 +181,14 @@ $graph:
- name: submitterSchema
type: record
fields:
+ authors:
+ doc: Name of the author(s)
+ type: string
+ jsonldPredicate:
+ _id: http://purl.obolibrary.org/obo/NCIT_C42781
submitter_name:
doc: Name of the submitter
- type: string
+ type: string?
jsonldPredicate:
_id: http://semanticscience.org/resource/SIO_000116
submitter_address:
@@ -183,7 +198,7 @@ $graph:
_id: http://semanticscience.org/resource/SIO_000172
originating_lab:
doc: Name of the laboratory that took the sample
- type: string
+ type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/NCIT_C37984
lab_address:
@@ -199,11 +214,6 @@ $graph:
type: string?
jsonldPredicate:
_id: http://www.ebi.ac.uk/efo/EFO_0001741
- authors:
- doc: Name of the author(s)
- type: string?
- jsonldPredicate:
- _id: http://purl.obolibrary.org/obo/NCIT_C42781
publication:
doc: Reference to publication of this sample (e.g. DOI, pubmed ID, ...)
type: string?
@@ -223,7 +233,7 @@ $graph:
fields:
host: hostSchema
sample: sampleSchema
- virus: virusSchema?
+ virus: virusSchema
technology: technologySchema
submitter: submitterSchema
id:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 8d3f5fc..246fd57 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -7,6 +7,7 @@ PREFIX sio: <http://semanticscience.org/resource/>
PREFIX efo: <http://www.ebi.ac.uk/efo/>
PREFIX evs: <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#>
PREFIX edam: <http://edamontology.org/>
+PREFIX wikidata: <http://www.wikidata.org/entity/>
:submissionShape {
MainSchema:host @:hostShape ;
@@ -18,39 +19,44 @@ PREFIX edam: <http://edamontology.org/>
:hostShape {
efo:EFO_0000532 [ obo:NCBITaxon_~ ] ;
- obo:PATO_0000047 [ obo:NCIT_C20197 obo:NCIT_C27993 obo:NCIT_C17998 ] ;
- sio:SIO_000115 xsd:string ;
+ sio:SIO_000115 xsd:string ?;
+ obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 ] ?;
obo:PATO_0000011 xsd:integer ?;
obo:NCIT_C42574 [ obo:UO_~ ] ?;
- sio:SIO_001167 xsd:string ?;
+ obo:NCIT_C25688 [obo:NCIT_C115935 obo:NCIT_C3833 obo:NCIT_C25269 obo:GENEPIO_0002020 obo:GENEPIO_0001849 obo:NCIT_C28554 obo:NCIT_C37987 ] ? ;
efo:EFO_0000727 xsd:string ?;
+ obo:VO_0000002 xsd:string ?;
+ sio:SIO_001167 xsd:string ?;
}
:sampleShape {
- obo:OBI_0001895 xsd:string ;
- sio:SIO_000115 xsd:string ;
- sio:SIO_001167 xsd:string ;
- evs:C25164 xsd:string ?;
- obo:GAZ_00000448 [obo:GAZ_~] ?;
+ sio:SIO_000115 xsd:string;
+ evs:C25164 xsd:string;
+ obo:GAZ_00000448 [wikidata:~] ;
+ obo:OBI_0001895 xsd:string ?;
+ obo:NCIT_C41206 xsd:string ?;
+ obo:OBI_0001479 IRI {0,2};
obo:OBI_0001472 xsd:string ?;
- obo:OBI_0001479 xsd:string ?;
+ sio:SIO_001167 xsd:string ?;
}
:submitterShape {
- sio:SIO_000116 xsd:string ;
- obo:NCIT_C37984 xsd:string ;
- obo:NCIT_C37900 xsd:string ?;
- obo:NCIT_C42781 xsd:string ?;
- obo:OBI_0600047 xsd:string ?;
- sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
+ obo:NCIT_C42781 xsd:string ;
+ sio:SIO_000116 xsd:string ?;
sio:SIO_000172 xsd:string ?;
+ obo:NCIT_C37984 xsd:string ?;
+ obo:OBI_0600047 xsd:string ?;
+ obo:NCIT_C37900 xsd:string ?;
efo:EFO_0001741 xsd:string ?;
+ obo:NCIT_C19026 xsd:string ?;
+ sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
}
:technologyShape {
- obo:OBI_0600047 xsd:string ;
- obo:FLU_0000848 xsd:integer ?;
+ obo:OBI_0600047 IRI {0,2} ;
efo:EFO_0002699 xsd:string ?;
+ obo:FLU_0000848 xsd:double {0,2};
+ sio:SIO_001167 xsd:string ?;
}
:virusShape{
diff --git a/bh20sequploader/main.py b/bh20sequploader/main.py
index 49d012d..4c4711d 100644
--- a/bh20sequploader/main.py
+++ b/bh20sequploader/main.py
@@ -44,7 +44,8 @@ def main():
with col.open(target, "w") as f:
r = args.sequence.read(65536)
- print(r[0:20])
+ seqlabel = r[1:r.index("\n")]
+ print(seqlabel)
while r:
f.write(r)
r = args.sequence.read(65536)
@@ -62,13 +63,14 @@ def main():
external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')
properties = {
+ "sequence_label": seqlabel,
"upload_app": "bh20-seq-uploader",
"upload_ip": external_ip,
"upload_user": "%s@%s" % (getpass.getuser(), socket.gethostname())
}
- col.save_new(owner_uuid=UPLOAD_PROJECT, name="Uploaded by %s from %s" %
- (properties['upload_user'], properties['upload_ip']),
+ col.save_new(owner_uuid=UPLOAD_PROJECT, name="%s uploaded by %s from %s" %
+ (seqlabel, properties['upload_user'], properties['upload_ip']),
properties=properties, ensure_unique_name=True)
print("Done")