aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Amstutz2020-04-21 13:22:53 -0400
committerPeter Amstutz2020-04-21 13:22:53 -0400
commit7e085b2958d9bd4f0a2b1912cf259a05b56366bc (patch)
tree29227cc5e55264945521ed27172db70c8de46d73
parent88d81f853cf04b7f28681dd9cdee775b0422f252 (diff)
downloadbh20-seq-resource-7e085b2958d9bd4f0a2b1912cf259a05b56366bc.tar.gz
bh20-seq-resource-7e085b2958d9bd4f0a2b1912cf259a05b56366bc.tar.lz
bh20-seq-resource-7e085b2958d9bd4f0a2b1912cf259a05b56366bc.zip
Tweak handling of "coverage" also fix typo
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
-rw-r--r--bh20sequploader/bh20seq-schema.yml4
-rw-r--r--bh20sequploader/bh20seq-shex.rdf2
-rw-r--r--scripts/dict_ontology_standardization/ncbi_speciesman_source.csv2
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py9
4 files changed, 10 insertions, 7 deletions
diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml
index ebca35b..75308ab 100644
--- a/bh20sequploader/bh20seq-schema.yml
+++ b/bh20sequploader/bh20seq-schema.yml
@@ -162,12 +162,12 @@ $graph:
_id: http://www.ebi.ac.uk/efo/EFO_0002699
sequencing_coverage:
doc: Sequence coverage defined as the average number of reads representing a given nucleotide (e.g. 100x)
- type: ["null", float, int]
+ type: float?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
sequencing_coverage2:
doc: If a second sequence technology was used you can submit its coverage here
- type: ["null", float, int]
+ type: float?
jsonldPredicate:
_id: http://purl.obolibrary.org/obo/FLU_0000848
additional_technology_information:
diff --git a/bh20sequploader/bh20seq-shex.rdf b/bh20sequploader/bh20seq-shex.rdf
index 59ee71b..31e714f 100644
--- a/bh20sequploader/bh20seq-shex.rdf
+++ b/bh20sequploader/bh20seq-shex.rdf
@@ -50,7 +50,7 @@ PREFIX wikidata: <http://www.wikidata.org/entity/>
:technologyShape {
obo:OBI_0600047 IRI {0,2} ;
- obo:FLU_0000848 xsd:integer ?;
+ obo:FLU_0000848 xsd:double ?;
efo:EFO_0002699 xsd:string ?;
}
diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
index 2905588..909cf37 100644
--- a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
+++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv
@@ -1,4 +1,4 @@
-nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831
nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831
naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 1a12513..00c0012 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -130,9 +130,12 @@ if not os.path.exists(dir_fasta_and_yaml_today):
if field_in_yaml == 'sequencing_coverage':
# A regular expression would be better!
- info_for_yaml_dict['technology'][field_in_yaml] = ';'.join(
- [x.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>') for x in tech_info_to_parse.split(';')]
- )
+ try:
+ info_for_yaml_dict['technology'][field_in_yaml] = float(
+ tech_info_to_parse.strip('(average)').strip("reads/nt").replace(',', '.').strip(' xX>'))
+ except ValueError:
+ print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse)
+ pass
elif field_in_yaml == 'sample_sequencing_technology':
new_seq_tec_list = []
for seq_tec in tech_info_to_parse.split(';'):