aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreaGuarracino2020-06-06 16:02:28 +0200
committerAndreaGuarracino2020-06-06 16:02:28 +0200
commit278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7 (patch)
treee2363dc0ff4327a96dc4b71726db198e415821ff
parent9a3925dbbacf855b638b3fd058c5e793ddc20a16 (diff)
downloadbh20-seq-resource-278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7.tar.gz
bh20-seq-resource-278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7.tar.lz
bh20-seq-resource-278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7.zip
fixed collection-date management; updated assembly info management for new IDs
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 060c314..fc09615 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -161,7 +161,11 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
GBSeq_comment = GBSeq.find('GBSeq_comment')
if GBSeq_comment is not None and 'Assembly-Data' in GBSeq_comment.text:
- GBSeq_comment_text = GBSeq_comment.text.split('##Assembly-Data-START## ; ')[1].split(' ; ##Assembly-Data-END##')[0]
+ prefix_split_string = '##Genome-Assembly' if GBSeq_comment.text.startswith('##Genome-') else '##Assembly'
+
+ GBSeq_comment_text = GBSeq_comment.text.split(
+ '{}-Data-START## ; '.format(prefix_split_string)
+ )[1].split(' ; {}-Data-END##'.format(prefix_split_string))[0]
for info_to_check, field_in_yaml in zip(
['Assembly Method', 'Coverage', 'Sequencing Technology'],
@@ -289,9 +293,9 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
if len(GBQualifier_value_text.split('-')) == 1:
if int(GBQualifier_value_text) < 2020:
- date_to_write = "15 12 {}".format(GBQualifier_value_text)
+ date_to_write = "{}-12-15".format(GBQualifier_value_text)
else:
- date_to_write = "15 01 {}".format(GBQualifier_value_text)
+ date_to_write = "{}-01-15".format(GBQualifier_value_text)
if 'additional_collection_information' in info_for_yaml_dict['sample']:
info_for_yaml_dict['sample']['additional_collection_information'] += "; The 'collection_date' is estimated (the original date was: {})".format(GBQualifier_value_text)
@@ -308,7 +312,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
GBQualifier_value_text_list = GBQualifier_value_text.split('-')
if GBQualifier_value_text_list[1].isalpha():
- date_to_write = GBQualifier_value_text_list[1] + ' ' + GBQualifier_value_text_list[0] + ' ' + GBQualifier_value_text_list[2]
+ date_to_write = GBQualifier_value_text_list[1] + '-' + GBQualifier_value_text_list[0] + '-' + GBQualifier_value_text_list[2]
info_for_yaml_dict['sample']['collection_date'] = date_to_write
elif GBQualifier_name_text in ['lat_lon', 'country']: