From 85b85b676d7ecc218d9f84357b2e7ea0133eed94 Mon Sep 17 00:00:00 2001 From: lltommy Date: Tue, 21 Apr 2020 16:49:47 +0200 Subject: Updated shex and manditory fields and stuff --- example/minimal_example.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'example') diff --git a/example/minimal_example.yaml b/example/minimal_example.yaml index ed578e2..0e36a25 100644 --- a/example/minimal_example.yaml +++ b/example/minimal_example.yaml @@ -1,13 +1,10 @@ id: placeholder host: - host_id: XX1 host_species: http://purl.obolibrary.org/obo/NCBITaxon_9606 sample: sample_id: XX - collector_name: John Doe - collecting_institution: Doe university collection_date: 2020-01 collection_location: http://www.wikidata.org/entity/Q148 @@ -18,5 +15,4 @@ technology: sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0008632 submitter: - submitter_name: John Doe - originating_lab: John Doe's kitchen \ No newline at end of file + authors: John Doe \ No newline at end of file -- cgit v1.2.3 From 53ec1c771fea5066abc83acb54859bfa3048a606 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Tue, 21 Apr 2020 18:20:58 -0400 Subject: Update metadata.yaml --- example/metadata.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'example') diff --git a/example/metadata.yaml b/example/metadata.yaml index 1e83400..57d90b5 100644 --- a/example/metadata.yaml +++ b/example/metadata.yaml @@ -6,15 +6,15 @@ host: host_sex: http://purl.obolibrary.org/obo/NCIT_C27993 host_age: 20 host_age_unit: http://purl.obolibrary.org/obo/UO_0000036 - host_health_status: A condition or state at a particular time - host_treatment: Process in which the act is intended to modify or alter host status - host_vaccination: List of vaccines given to the host + host_health_status: A condition or state at a particular time (Disease ontology) + host_treatment: Process in which the act is intended to modify or alter host status (Compounds) + host_vaccination: List of vaccines given to the host (RRIDs?) additional_host_information: Field for additional host information sample: - sample_id: Id of the sample as defined by the submitter + sample_id: Id of the sample as defined by the submitter collector_name: Name of the person that took the sample - collecting_institution: Institute that was responsible of sampeling + collecting_institution: Institute that was responsible of sampling specimen_source: http://purl.obolibrary.org/obo/NCIT_C155831 specimen_source2: http://purl.obolibrary.org/obo/NCIT_C155835 collection_date: "2020-01-01" @@ -29,15 +29,15 @@ virus: technology: sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0009173 sample_sequencing_technology2: http://www.ebi.ac.uk/efo/EFO_0009173 - sequence_assembly_method: Protocol used for assembly + sequence_assembly_method: Protocol used for assembly (CWL, WDL, NF, BCO?) sequencing_coverage: 70 submitter: - submitter_name: John Doe + submitter_name: John Doe (ORCID?) submitter_address: John Doe's adress originating_lab: John Doe kitchen - lab_address: John Doe's adress + lab_address: John Doe's address provider_sample_id: HmX submitter_sample_id: xXx authors: John Doe et all - submitter_orcid: https://orcid.org/0000-0000-0000-0000 + submitter_orcid: https://orcid.org/0000-0000-0000-0000 (if this is here, others can be optional?) -- cgit v1.2.3 From a12fe94f174da766be612fbb2712b4db2ba98296 Mon Sep 17 00:00:00 2001 From: lltommy Date: Wed, 22 Apr 2020 19:41:27 +0200 Subject: Small changes all around, trying to make the importer/metadata better --- bh20sequploader/bh20seq-schema.yml | 4 ++-- bh20sequploader/bh20seq-shex.rdf | 25 +++++++++++++++---------- example/metadata.yaml | 8 ++++---- scripts/foreach.sh | 2 +- scripts/from_genbank_to_fasta_and_yaml.py | 12 ++++++++---- 5 files changed, 30 insertions(+), 21 deletions(-) (limited to 'example') diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index 1ceebe2..80013c3 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -25,7 +25,7 @@ $graph: jsonldPredicate: _id: http://semanticscience.org/resource/SIO_000115 host_sex: - doc: Sex of the host as defined in NCIT, IRI expected (http://purl.obolibrary.org/obo/NCIT_C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female), http://purl.obolibrary.org/obo/NCIT_C45908 (Intersex), or http://purl.obolibrary.org/obo/NCIT_C17998 (Unknown)) + doc: Sex of the host as defined in PATO, expect male () or female () type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/PATO_0000047 @@ -144,7 +144,7 @@ $graph: fields: sample_sequencing_technology: doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION) - type: string + type: string? jsonldPredicate: _id: http://purl.obolibrary.org/obo/OBI_0600047 _type: "@id" diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf index 31e714f..8d0055e 100644 --- a/bh20sequploader/bh20seq-shex.rdf +++ b/bh20sequploader/bh20seq-shex.rdf @@ -23,35 +23,40 @@ PREFIX wikidata: obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 ] ?; obo:PATO_0000011 xsd:integer ?; obo:NCIT_C42574 [ obo:UO_~ ] ?; - sio:SIO_001167 xsd:string ?; + obo:NCIT_C25688 xsd:string ? ; efo:EFO_0000727 xsd:string ?; + obo:VO_0000002 xsd:string ?; + sio:SIO_001167 xsd:string ?; } :sampleShape { sio:SIO_000115 xsd:string; - obo:GAZ_00000448 [wikidata:~] ; evs:C25164 xsd:string; + obo:GAZ_00000448 [wikidata:~] ; obo:OBI_0001895 xsd:string ?; - sio:SIO_001167 xsd:string ?; - obo:OBI_0001472 xsd:string ?; + obo:NCIT_C41206 xsd:string ?; obo:OBI_0001479 IRI {0,2}; + obo:OBI_0001472 xsd:string ?; + sio:SIO_001167 xsd:string ?; } :submitterShape { obo:NCIT_C42781 xsd:string ; - obo:NCIT_C37984 xsd:string ?; - obo:NCIT_C37900 xsd:string ?; sio:SIO_000116 xsd:string ?; - obo:OBI_0600047 xsd:string ?; - sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?; sio:SIO_000172 xsd:string ?; + obo:NCIT_C37984 xsd:string ?; + obo:OBI_0600047 xsd:string ?; + obo:NCIT_C37900 xsd:string ?; efo:EFO_0001741 xsd:string ?; + obo:NCIT_C19026 xsd:string ?; + sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?; } :technologyShape { - obo:OBI_0600047 IRI {0,2} ; - obo:FLU_0000848 xsd:double ?; + obo:OBI_0600047 IRI {0,2} ?; efo:EFO_0002699 xsd:string ?; + obo:FLU_0000848 xsd:double {0,2}; + sio:SIO_001167 xsd:string ?; } :virusShape{ diff --git a/example/metadata.yaml b/example/metadata.yaml index 57d90b5..d1b10c1 100644 --- a/example/metadata.yaml +++ b/example/metadata.yaml @@ -6,7 +6,7 @@ host: host_sex: http://purl.obolibrary.org/obo/NCIT_C27993 host_age: 20 host_age_unit: http://purl.obolibrary.org/obo/UO_0000036 - host_health_status: A condition or state at a particular time (Disease ontology) + host_health_status: A condition or state at a particular time host_treatment: Process in which the act is intended to modify or alter host status (Compounds) host_vaccination: List of vaccines given to the host (RRIDs?) additional_host_information: Field for additional host information @@ -29,15 +29,15 @@ virus: technology: sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0009173 sample_sequencing_technology2: http://www.ebi.ac.uk/efo/EFO_0009173 - sequence_assembly_method: Protocol used for assembly (CWL, WDL, NF, BCO?) + sequence_assembly_method: Protocol used for assembly sequencing_coverage: 70 submitter: - submitter_name: John Doe (ORCID?) + submitter_name: John Doe submitter_address: John Doe's adress originating_lab: John Doe kitchen lab_address: John Doe's address provider_sample_id: HmX submitter_sample_id: xXx authors: John Doe et all - submitter_orcid: https://orcid.org/0000-0000-0000-0000 (if this is here, others can be optional?) + submitter_orcid: https://orcid.org/0000-0000-0000-0000 \ No newline at end of file diff --git a/scripts/foreach.sh b/scripts/foreach.sh index 35b07b8..ddc9387 100755 --- a/scripts/foreach.sh +++ b/scripts/foreach.sh @@ -2,7 +2,7 @@ rm -rf validated fasta_and_yaml_* mkdir -p validated ./from_genbank_to_fasta_and_yaml.py -fasta_files=$(find fasta_and_yaml_20200421/ -name "*.fasta") +fasta_files=$(find fasta_and_yaml/ -name "*.fasta") for f in $fasta_files ; do yaml=$(echo $f | rev | cut -c7- | rev).yaml echo $f diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py index 00c0012..096a6af 100755 --- a/scripts/from_genbank_to_fasta_and_yaml.py +++ b/scripts/from_genbank_to_fasta_and_yaml.py @@ -8,10 +8,11 @@ import json import os from datetime import date -today = date.today().strftime("%Y%m%d") +#today = date.today().strftime("%Y%m%d") -dir_metadata_today = 'metadata_from_nuccore_{}'.format(today) -dir_fasta_and_yaml_today = 'fasta_and_yaml_{}'.format(today) + +dir_metadata_today = 'metadata_from_nuccore' #_{}'.format(today) +dir_fasta_and_yaml_today = 'fasta_and_yaml' #'.format(today) dir_dict_ontology_standardization = 'dict_ontology_standardization/' @@ -177,7 +178,10 @@ if not os.path.exists(dir_fasta_and_yaml_today): if len(GBQualifier_value_text_list) > 1: if GBQualifier_value_text_list[1] in ['male', 'female']: - info_for_yaml_dict['host']['host_sex'] = GBQualifier_value_text_list[1] + if GBQualifier_value_text_list[1]=='male': + info_for_yaml_dict['host']['host_sex'] = "http://purl.obolibrary.org/obo/PATO_0000384" + elif GBQualifier_value_text_list[1]=='female': + info_for_yaml_dict['host']['host_sex'] = "http://purl.obolibrary.org/obo/PATO_0000383" else: info_for_yaml_dict['host']['host_health_status'] = GBQualifier_value_text_list[1] -- cgit v1.2.3 From 9ae32dda5908d666dd6cf574769984c882cc40e6 Mon Sep 17 00:00:00 2001 From: lltommy Date: Wed, 22 Apr 2020 20:48:00 +0200 Subject: updating example with ontology term for host_health_status --- example/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'example') diff --git a/example/metadata.yaml b/example/metadata.yaml index d1b10c1..a76616c 100644 --- a/example/metadata.yaml +++ b/example/metadata.yaml @@ -6,7 +6,7 @@ host: host_sex: http://purl.obolibrary.org/obo/NCIT_C27993 host_age: 20 host_age_unit: http://purl.obolibrary.org/obo/UO_0000036 - host_health_status: A condition or state at a particular time + host_health_status: http://purl.obolibrary.org/obo/NCIT_C25269 host_treatment: Process in which the act is intended to modify or alter host status (Compounds) host_vaccination: List of vaccines given to the host (RRIDs?) additional_host_information: Field for additional host information -- cgit v1.2.3