diff options
author | AndreaGuarracino | 2020-06-06 16:02:28 +0200 |
---|---|---|
committer | AndreaGuarracino | 2020-06-06 16:02:28 +0200 |
commit | 278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7 (patch) | |
tree | e2363dc0ff4327a96dc4b71726db198e415821ff /scripts | |
parent | 9a3925dbbacf855b638b3fd058c5e793ddc20a16 (diff) | |
download | bh20-seq-resource-278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7.tar.gz bh20-seq-resource-278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7.tar.lz bh20-seq-resource-278b1e7ccbc8b56e6e9b3413840b18f3ce0f36e7.zip |
fixed collection-date management; updated assembly info management for new IDs
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/from_genbank_to_fasta_and_yaml.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py index 060c314..fc09615 100755 --- a/scripts/from_genbank_to_fasta_and_yaml.py +++ b/scripts/from_genbank_to_fasta_and_yaml.py @@ -161,7 +161,11 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) GBSeq_comment = GBSeq.find('GBSeq_comment') if GBSeq_comment is not None and 'Assembly-Data' in GBSeq_comment.text: - GBSeq_comment_text = GBSeq_comment.text.split('##Assembly-Data-START## ; ')[1].split(' ; ##Assembly-Data-END##')[0] + prefix_split_string = '##Genome-Assembly' if GBSeq_comment.text.startswith('##Genome-') else '##Assembly' + + GBSeq_comment_text = GBSeq_comment.text.split( + '{}-Data-START## ; '.format(prefix_split_string) + )[1].split(' ; {}-Data-END##'.format(prefix_split_string))[0] for info_to_check, field_in_yaml in zip( ['Assembly Method', 'Coverage', 'Sequencing Technology'], @@ -289,9 +293,9 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) if len(GBQualifier_value_text.split('-')) == 1: if int(GBQualifier_value_text) < 2020: - date_to_write = "15 12 {}".format(GBQualifier_value_text) + date_to_write = "{}-12-15".format(GBQualifier_value_text) else: - date_to_write = "15 01 {}".format(GBQualifier_value_text) + date_to_write = "{}-01-15".format(GBQualifier_value_text) if 'additional_collection_information' in info_for_yaml_dict['sample']: info_for_yaml_dict['sample']['additional_collection_information'] += "; The 'collection_date' is estimated (the original date was: {})".format(GBQualifier_value_text) @@ -308,7 +312,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) GBQualifier_value_text_list = GBQualifier_value_text.split('-') if GBQualifier_value_text_list[1].isalpha(): - date_to_write = GBQualifier_value_text_list[1] + ' ' + GBQualifier_value_text_list[0] + ' ' + GBQualifier_value_text_list[2] + date_to_write = GBQualifier_value_text_list[1] + '-' + GBQualifier_value_text_list[0] + '-' + GBQualifier_value_text_list[2] info_for_yaml_dict['sample']['collection_date'] = date_to_write elif GBQualifier_name_text in ['lat_lon', 'country']: |