aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlltommy2020-04-22 19:41:27 +0200
committerlltommy2020-04-22 19:41:27 +0200
commita12fe94f174da766be612fbb2712b4db2ba98296 (patch)
tree0c1a21fcfc638460d91309f7dfac3fdc967ca7c1
parentba8b5d364f0ba96f3fef5214137d30ed00a8079d (diff)
downloadbh20-seq-resource-a12fe94f174da766be612fbb2712b4db2ba98296.tar.gz
bh20-seq-resource-a12fe94f174da766be612fbb2712b4db2ba98296.tar.lz
bh20-seq-resource-a12fe94f174da766be612fbb2712b4db2ba98296.zip
Small changes all around, trying to make the importer/metadata better
-rw-r--r--bh20sequploader/bh20seq-schema.yml4
-rw-r--r--bh20sequploader/bh20seq-shex.rdf25
-rw-r--r--example/metadata.yaml8
-rwxr-xr-xscripts/foreach.sh2
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py12
5 files changed, 30 insertions, 21 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index 1ceebe2..80013c3 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -25,7 +25,7 @@ $graph:
jsonldPredicate:
_id: http://semanticscience.org/resource/SIO_000115
host_sex:
- doc: Sex of the host as defined in NCIT, IRI expected (http://purl.obolibrary.org/obo/NCIT_C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female), http://purl.obolibrary.org/obo/NCIT_C45908 (Intersex), or http://purl.obolibrary.org/obo/NCIT_C17998 (Unknown))
+ doc: Sex of the host as defined in PATO, expect male () or female ()
type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/PATO_0000047
@@ -144,7 +144,7 @@ $graph:
fields:
sample_sequencing_technology:
doc: Technology that was used to sequence this sample (e.g Sanger, Nanopor MiniION)
- type: string
+ type: string?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/OBI_0600047
_type: "@id"
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 31e714f..8d0055e 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -23,35 +23,40 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
obo:PATO_0000047 [ obo:PATO_0000384 obo:PATO_0000383 ] ?;
obo:PATO_0000011 xsd:integer ?;
obo:NCIT_C42574 [ obo:UO_~ ] ?;
- sio:SIO_001167 xsd:string ?;
+ obo:NCIT_C25688 xsd:string ? ;
efo:EFO_0000727 xsd:string ?;
+ obo:VO_0000002 xsd:string ?;
+ sio:SIO_001167 xsd:string ?;
}
:sampleShape {
sio:SIO_000115 xsd:string;
- obo:GAZ_00000448 [wikidata:~] ;
evs:C25164 xsd:string;
+ obo:GAZ_00000448 [wikidata:~] ;
obo:OBI_0001895 xsd:string ?;
- sio:SIO_001167 xsd:string ?;
- obo:OBI_0001472 xsd:string ?;
+ obo:NCIT_C41206 xsd:string ?;
obo:OBI_0001479 IRI {0,2};
+ obo:OBI_0001472 xsd:string ?;
+ sio:SIO_001167 xsd:string ?;
}
:submitterShape {
obo:NCIT_C42781 xsd:string ;
- obo:NCIT_C37984 xsd:string ?;
- obo:NCIT_C37900 xsd:string ?;
sio:SIO_000116 xsd:string ?;
- obo:OBI_0600047 xsd:string ?;
- sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
sio:SIO_000172 xsd:string ?;
+ obo:NCIT_C37984 xsd:string ?;
+ obo:OBI_0600047 xsd:string ?;
+ obo:NCIT_C37900 xsd:string ?;
efo:EFO_0001741 xsd:string ?;
+ obo:NCIT_C19026 xsd:string ?;
+ sio:SIO_000115 /https:\u002F\u002Forcid.org\u002F.{4}-.{4}-.{4}-.{4}/?;
}
:technologyShape {
- obo:OBI_0600047 IRI {0,2} ;
- obo:FLU_0000848 xsd:double ?;
+ obo:OBI_0600047 IRI {0,2} ?;
efo:EFO_0002699 xsd:string ?;
+ obo:FLU_0000848 xsd:double {0,2};
+ sio:SIO_001167 xsd:string ?;
}
:virusShape{
diff --git a/example/metadata.yaml b/example/metadata.yaml
index 57d90b5..d1b10c1 100644
--- a/example/metadata.yaml
+++ b/example/metadata.yaml
@@ -6,7 +6,7 @@ host:
host_sex: http://purl.obolibrary.org/obo/NCIT_C27993
host_age: 20
host_age_unit: http://purl.obolibrary.org/obo/UO_0000036
- host_health_status: A condition or state at a particular time (Disease ontology)
+ host_health_status: A condition or state at a particular time
host_treatment: Process in which the act is intended to modify or alter host status (Compounds)
host_vaccination: List of vaccines given to the host (RRIDs?)
additional_host_information: Field for additional host information
@@ -29,15 +29,15 @@ virus:
technology:
sample_sequencing_technology: http://www.ebi.ac.uk/efo/EFO_0009173
sample_sequencing_technology2: http://www.ebi.ac.uk/efo/EFO_0009173
- sequence_assembly_method: Protocol used for assembly (CWL, WDL, NF, BCO?)
+ sequence_assembly_method: Protocol used for assembly
sequencing_coverage: 70
submitter:
- submitter_name: John Doe (ORCID?)
+ submitter_name: John Doe
submitter_address: John Doe's adress
originating_lab: John Doe kitchen
lab_address: John Doe's address
provider_sample_id: HmX
submitter_sample_id: xXx
authors: John Doe et all
- submitter_orcid: https://orcid.org/0000-0000-0000-0000 (if this is here, others can be optional?)
+ submitter_orcid: https://orcid.org/0000-0000-0000-0000 \ No newline at end of file
diff --git a/scripts/foreach.sh b/scripts/foreach.sh
index 35b07b8..ddc9387 100755
--- a/scripts/foreach.sh
+++ b/scripts/foreach.sh
@@ -2,7 +2,7 @@
rm -rf validated fasta_and_yaml_*
mkdir -p validated
./from_genbank_to_fasta_and_yaml.py
-fasta_files=$(find fasta_and_yaml_20200421/ -name "*.fasta")
+fasta_files=$(find fasta_and_yaml/ -name "*.fasta")
for f in $fasta_files ; do
yaml=$(echo $f | rev | cut -c7- | rev).yaml
echo $f
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 00c0012..096a6af 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -8,10 +8,11 @@ import json
import os
from datetime import date
-today = date.today().strftime("%Y%m%d")
+#today = date.today().strftime("%Y%m%d")
-dir_metadata_today = 'metadata_from_nuccore_{}'.format(today)
-dir_fasta_and_yaml_today = 'fasta_and_yaml_{}'.format(today)
+
+dir_metadata_today = 'metadata_from_nuccore' #_{}'.format(today)
+dir_fasta_and_yaml_today = 'fasta_and_yaml' #'.format(today)
dir_dict_ontology_standardization = 'dict_ontology_standardization/'
@@ -177,7 +178,10 @@ if not os.path.exists(dir_fasta_and_yaml_today):
if len(GBQualifier_value_text_list) > 1:
if GBQualifier_value_text_list[1] in ['male', 'female']:
- info_for_yaml_dict['host']['host_sex'] = GBQualifier_value_text_list[1]
+ if GBQualifier_value_text_list[1]=='male':
+ info_for_yaml_dict['host']['host_sex'] = "http://purl.obolibrary.org/obo/PATO_0000384"
+ elif GBQualifier_value_text_list[1]=='female':
+ info_for_yaml_dict['host']['host_sex'] = "http://purl.obolibrary.org/obo/PATO_0000383"
else:
info_for_yaml_dict['host']['host_health_status'] = GBQualifier_value_text_list[1]