aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/dict_ontology_standardization/ncbi_countries.csv357
-rwxr-xr-xscripts/from_genbank_to_fasta_and_yaml.py30
-rw-r--r--scripts/sequences.acc297
3 files changed, 339 insertions, 345 deletions
diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv
index c08b613..b81da36 100644
--- a/scripts/dict_ontology_standardization/ncbi_countries.csv
+++ b/scripts/dict_ontology_standardization/ncbi_countries.csv
@@ -1,328 +1,29 @@
-30.59 N 114.3 E,http://www.wikidata.org/entity/Q11746
-35.92 N 74.33 E,http://www.wikidata.org/entity/Q609024
-39.54 N 116.23 E,http://www.wikidata.org/entity/Q198244
-Afghanistan,http://www.wikidata.org/entity/Q889
-Albania,http://www.wikidata.org/entity/Q222
-Algeria,http://www.wikidata.org/entity/Q262
-Andorra,http://www.wikidata.org/entity/Q228
-Angola,http://www.wikidata.org/entity/Q916
-Antigua and Barbuda,http://www.wikidata.org/entity/Q781
-Argentina,http://www.wikidata.org/entity/Q414
-Armenia,http://www.wikidata.org/entity/Q399
-Australia,http://www.wikidata.org/entity/Q408
-Australia: Queensland,http://www.wikidata.org/entity/Q36074
-Australia: Victoria,http://www.wikidata.org/entity/Q36687
-Austria,http://www.wikidata.org/entity/Q40
-Azerbaijan,http://www.wikidata.org/entity/Q227
-Bahrain,http://www.wikidata.org/entity/Q398
-Bangladesh,http://www.wikidata.org/entity/Q902
-Barbados,http://www.wikidata.org/entity/Q244
-Belarus,http://www.wikidata.org/entity/Q184
-Belgium,http://www.wikidata.org/entity/Q31
-Belize,http://www.wikidata.org/entity/Q242
-Benin,http://www.wikidata.org/entity/Q962
-Bhutan,http://www.wikidata.org/entity/Q917
-Bolivia,http://www.wikidata.org/entity/Q750
-Bosnia and Herzegovina,http://www.wikidata.org/entity/Q225
-Botswana,http://www.wikidata.org/entity/Q963
-Brazil,http://www.wikidata.org/entity/Q155
-Brunei,http://www.wikidata.org/entity/Q921
-Bulgaria,http://www.wikidata.org/entity/Q219
-Burkina Faso,http://www.wikidata.org/entity/Q965
-Burundi,http://www.wikidata.org/entity/Q967
-Cambodia,http://www.wikidata.org/entity/Q424
-Cameroon,http://www.wikidata.org/entity/Q1009
-Canada,http://www.wikidata.org/entity/Q16
-Cape Verde,http://www.wikidata.org/entity/Q1011
-Central African Republic,http://www.wikidata.org/entity/Q929
-Chad,http://www.wikidata.org/entity/Q657
-Chile,http://www.wikidata.org/entity/Q298
-China,http://www.wikidata.org/entity/Q148
-China: Anhui,http://www.wikidata.org/entity/Q40956
-"China: Anhui, Fuyang":http://www.wikidata.org/entity/Q360584
-China: Beijing,http://www.wikidata.org/entity/Q956
-China: Chongqing,http://www.wikidata.org/entity/Q11725
-China: Fujian,http://www.wikidata.org/entity/Q41705
-China: Gansu,http://www.wikidata.org/entity/Q42392
-China: Guangdong,http://www.wikidata.org/entity/Q15175
-"China: Guangdong, Guangzhou",http://www.wikidata.org/entity/Q16572
-China: Guangxi Zhuang Autonomous Region,http://www.wikidata.org/entity/Q15176
-China: Guangzhou,http://www.wikidata.org/entity/Q16572
-China: Guizhou,http://www.wikidata.org/entity/Q47097
-China: Hangzhou,http://www.wikidata.org/entity/Q4970
-China: Hainan,http://www.wikidata.org/entity/Q42200
-China: Hebei,http://www.wikidata.org/entity/Q21208
-China: Heilongjiang,http://www.wikidata.org/entity/Q19206
-China: Henan,http://www.wikidata.org/entity/Q43684
-China: Hong Kong,http://www.wikidata.org/entity/Q8646
-China: HuaShang,http://www.wikidata.org/entity/Q148
-China: Hubei,http://www.wikidata.org/entity/Q46862
-"China: Hubei, Wuhan",http://www.wikidata.org/entity/Q11746
-China: Hunan,http://www.wikidata.org/entity/Q45761
-China: Inner Mongolia,http://www.wikidata.org/entity/Q41079
-China: Jiangsu,http://www.wikidata.org/entity/Q16963
-China: Jiangxi,http://www.wikidata.org/entity/Q57052
-China: Jilin,http://www.wikidata.org/entity/Q45208
-China: Liaoning,http://www.wikidata.org/entity/Q43934
-China: Macau,http://www.wikidata.org/entity/Q14773
-China: Nanchang,https://www.wikidata.org/wiki/Q171943
-China: Ningxia Hui Autonomous Region,http://www.wikidata.org/entity/Q57448
-China: Qinghai,http://www.wikidata.org/entity/Q45833
-China: Shaanxi,http://www.wikidata.org/entity/Q47974
-China: Shandong,http://www.wikidata.org/entity/Q43407
-China: Shanghai,http://www.wikidata.org/entity/Q8686
-China: Shanxi,http://www.wikidata.org/entity/Q46913
-China: Shenzhen,http://www.wikidata.org/entity/Q15174
-China: Sichuan,http://www.wikidata.org/entity/Q19770
-China: Tianjin,http://www.wikidata.org/entity/Q11736
-China: Tibet Autonomous Region,http://www.wikidata.org/entity/Q17269
-China: Wuhan,http://www.wikidata.org/entity/Q11746
-China:Wuhan,http://www.wikidata.org/entity/Q11746
-China: Xinjiang,http://www.wikidata.org/entity/Q34800
-China: Yunnan,http://www.wikidata.org/entity/Q43194
-China: Zhejiang,http://www.wikidata.org/entity/Q16967
-"China: Zhejiang, Hangzhou",http://www.wikidata.org/entity/Q4970
-Colombia,http://www.wikidata.org/entity/Q739
-Colombia: Antioquia,http://www.wikidata.org/entity/Q123304
-Comoros,http://www.wikidata.org/entity/Q970
-Costa Rica,http://www.wikidata.org/entity/Q800
-Croatia,http://www.wikidata.org/entity/Q224
-Cuba,http://www.wikidata.org/entity/Q241
-Czech Republic,http://www.wikidata.org/entity/Q213
-Democratic Republic of the Congo,http://www.wikidata.org/entity/Q974
-Denmark,http://www.wikidata.org/entity/Q35
-Djibouti,http://www.wikidata.org/entity/Q977
-Dominica,http://www.wikidata.org/entity/Q784
-Dominican Republic,http://www.wikidata.org/entity/Q786
-East Timor,http://www.wikidata.org/entity/Q574
-Ecuador,http://www.wikidata.org/entity/Q736
-Egypt,http://www.wikidata.org/entity/Q79
-El Salvador,http://www.wikidata.org/entity/Q792
-Equatorial Guinea,http://www.wikidata.org/entity/Q983
-Eritrea,http://www.wikidata.org/entity/Q986
-Estado Libre del Istmo,http://www.wikidata.org/entity/Q8842943
-Estonia,http://www.wikidata.org/entity/Q191
-Eswatini,http://www.wikidata.org/entity/Q1050
-Ethiopia,http://www.wikidata.org/entity/Q115
-Federated States of Micronesia,http://www.wikidata.org/entity/Q702
-Fiji,http://www.wikidata.org/entity/Q712
-Finland,http://www.wikidata.org/entity/Q33
-France,http://www.wikidata.org/entity/Q142
-Gabon,http://www.wikidata.org/entity/Q1000
-Georgia,http://www.wikidata.org/entity/Q230
-Germany,http://www.wikidata.org/entity/Q183
-Ghana,http://www.wikidata.org/entity/Q117
-Greece,http://www.wikidata.org/entity/Q41
-Grenada,http://www.wikidata.org/entity/Q769
-Guatemala,http://www.wikidata.org/entity/Q774
-Guinea,http://www.wikidata.org/entity/Q1006
-Guinea-Bissau,http://www.wikidata.org/entity/Q1007
-Guyana,http://www.wikidata.org/entity/Q734
-Haiti,http://www.wikidata.org/entity/Q790
-Honduras,http://www.wikidata.org/entity/Q783
-Hungary,http://www.wikidata.org/entity/Q28
-Iceland,http://www.wikidata.org/entity/Q189
-Icelandic Commonwealth,http://www.wikidata.org/entity/Q62389
-India,http://www.wikidata.org/entity/Q668
-India: Kerala State,http://www.wikidata.org/entity/Q1186
-India: Rajkot,http://www.wikidata.org/entity/Q1815245
-Indonesia,http://www.wikidata.org/entity/Q252
-Iran,http://www.wikidata.org/entity/Q794
-Iran: Qum,http://www.wikidata.org/entity/Q131664
-Iran: Tehran,http://www.wikidata.org/entity/Q3616
-Iraq,http://www.wikidata.org/entity/Q796
-Ireland,http://www.wikidata.org/entity/Q27
-Israel,http://www.wikidata.org/entity/Q801
-Italy,http://www.wikidata.org/entity/Q38
-Italy: Cagliari,http://www.wikidata.org/entity/Q1897
-Italy: Rome,http://www.wikidata.org/entity/Q220
-Ivory Coast,http://www.wikidata.org/entity/Q1008
-Jamaica,http://www.wikidata.org/entity/Q766
-Japan,http://www.wikidata.org/entity/Q17
-Jordan,http://www.wikidata.org/entity/Q810
-Kazakhstan,http://www.wikidata.org/entity/Q232
-Kenya,http://www.wikidata.org/entity/Q114
-Kingdom of Denmark,http://www.wikidata.org/entity/Q756617
-Kingdom of the Netherlands,http://www.wikidata.org/entity/Q29999
-Kiribati,http://www.wikidata.org/entity/Q710
-Kuwait,http://www.wikidata.org/entity/Q817
-Kyrgyzstan,http://www.wikidata.org/entity/Q813
-Laos,http://www.wikidata.org/entity/Q819
-Latvia,http://www.wikidata.org/entity/Q211
-Lebanon,http://www.wikidata.org/entity/Q822
-Lesotho,http://www.wikidata.org/entity/Q1013
-Liberia,http://www.wikidata.org/entity/Q1014
-Libya,http://www.wikidata.org/entity/Q1016
-Liechtenstein,http://www.wikidata.org/entity/Q347
-Lithuania,http://www.wikidata.org/entity/Q37
-Luxembourg,http://www.wikidata.org/entity/Q32
-Madagascar,http://www.wikidata.org/entity/Q1019
-Malawi,http://www.wikidata.org/entity/Q1020
-Malaysia,http://www.wikidata.org/entity/Q833
-Maldives,http://www.wikidata.org/entity/Q826
-Mali,http://www.wikidata.org/entity/Q912
-Malta,http://www.wikidata.org/entity/Q233
-Marshall Islands,http://www.wikidata.org/entity/Q709
-Mauritania,http://www.wikidata.org/entity/Q1025
-Mauritius,http://www.wikidata.org/entity/Q1027
-Mexico,http://www.wikidata.org/entity/Q96
-Moldova,http://www.wikidata.org/entity/Q217
-Monaco,http://www.wikidata.org/entity/Q235
-Mongolia,http://www.wikidata.org/entity/Q711
-Montenegro,http://www.wikidata.org/entity/Q236
-Morocco,http://www.wikidata.org/entity/Q1028
-Mozambique,http://www.wikidata.org/entity/Q1029
-Myanmar,http://www.wikidata.org/entity/Q836
-Namibia,http://www.wikidata.org/entity/Q1030
-Nauru,http://www.wikidata.org/entity/Q697
-Nepal,http://www.wikidata.org/entity/Q837
-New Zealand,http://www.wikidata.org/entity/Q664
-Nicaragua,http://www.wikidata.org/entity/Q811
-Niger,http://www.wikidata.org/entity/Q1032
-Nigeria,http://www.wikidata.org/entity/Q1033
-Nigeria: Lagos,http://www.wikidata.org/entity/Q8673
-North Korea,http://www.wikidata.org/entity/Q423
-North Macedonia,http://www.wikidata.org/entity/Q221
-Norway,http://www.wikidata.org/entity/Q20
-Oman,http://www.wikidata.org/entity/Q842
-Ottoman Empire,http://www.wikidata.org/entity/Q12560
-Pakistan,http://www.wikidata.org/entity/Q843
-Pakistan: Gilgit,http://www.wikidata.org/entity/Q609024
-Pakistan: KPK,http://www.wikidata.org/entity/Q183314
-Palau,http://www.wikidata.org/entity/Q695
-Panama,http://www.wikidata.org/entity/Q804
-Papua New Guinea,http://www.wikidata.org/entity/Q691
-Paraguay,http://www.wikidata.org/entity/Q733
-People's Republic of China,http://www.wikidata.org/entity/Q148
-Peru,http://www.wikidata.org/entity/Q419
-Philippines,http://www.wikidata.org/entity/Q928
-Poland,http://www.wikidata.org/entity/Q36
-Portugal,http://www.wikidata.org/entity/Q45
-Principality of Turov and Pinsk,http://www.wikidata.org/entity/Q671362
-Qatar,http://www.wikidata.org/entity/Q846
-Republic of Cyprus,http://www.wikidata.org/entity/Q229
-Republic of Geneva,http://www.wikidata.org/entity/Q23366230
-Republic of the Congo,http://www.wikidata.org/entity/Q971
-Romania,http://www.wikidata.org/entity/Q218
-Russia,http://www.wikidata.org/entity/Q159
-Rwanda,http://www.wikidata.org/entity/Q1037
-Saint Kitts and Nevis,http://www.wikidata.org/entity/Q763
-Saint Lucia,http://www.wikidata.org/entity/Q760
-Saint Vincent and the Grenadines,http://www.wikidata.org/entity/Q757
-Samoa,http://www.wikidata.org/entity/Q683
-San Marino,http://www.wikidata.org/entity/Q238
-São Tomé and Príncipe,http://www.wikidata.org/entity/Q1039
-Saudi Arabia,http://www.wikidata.org/entity/Q851
-Senegal,http://www.wikidata.org/entity/Q1041
-Serbia,http://www.wikidata.org/entity/Q403
-Seychelles,http://www.wikidata.org/entity/Q1042
-Sierra Leone,http://www.wikidata.org/entity/Q1044
-Singapore,http://www.wikidata.org/entity/Q334
-Slovakia,http://www.wikidata.org/entity/Q214
-Slovenia,http://www.wikidata.org/entity/Q215
-Solomon Islands,http://www.wikidata.org/entity/Q685
-Somalia,http://www.wikidata.org/entity/Q1045
-South Africa,http://www.wikidata.org/entity/Q258
-South Africa: KwaZulu-Natal,http://www.wikidata.org/entity/Q81725
-South African Republic,http://www.wikidata.org/entity/Q550374
-South Korea,http://www.wikidata.org/entity/Q884
-South Sudan,http://www.wikidata.org/entity/Q958
-Spain,http://www.wikidata.org/entity/Q29
-Spain: Valencia,http://www.wikidata.org/entity/Q8818
-Sri Lanka,http://www.wikidata.org/entity/Q854
-State of Los Altos,http://www.wikidata.org/entity/Q738264
-Sudan,http://www.wikidata.org/entity/Q1049
-Suriname,http://www.wikidata.org/entity/Q730
-Sweden,http://www.wikidata.org/entity/Q34
-Switzerland,http://www.wikidata.org/entity/Q39
-Syria,http://www.wikidata.org/entity/Q858
-Taiwan,http://www.wikidata.org/entity/Q865
-Tajikistan,http://www.wikidata.org/entity/Q863
-Tanzania,http://www.wikidata.org/entity/Q924
-Thailand,http://www.wikidata.org/entity/Q869
-The Bahamas,http://www.wikidata.org/entity/Q778
-The Gambia,http://www.wikidata.org/entity/Q1005
-Togo,http://www.wikidata.org/entity/Q945
-Tonga,http://www.wikidata.org/entity/Q678
-Trinidad and Tobago,http://www.wikidata.org/entity/Q754
-Tunisia,http://www.wikidata.org/entity/Q948
-Tunisia: Tunis,http://www.wikidata.org/entity/Q3572
-Turkey,http://www.wikidata.org/entity/Q43
-Turkmenistan,http://www.wikidata.org/entity/Q874
-Tuvalu,http://www.wikidata.org/entity/Q672
-Uganda,http://www.wikidata.org/entity/Q1036
-Ukraine,http://www.wikidata.org/entity/Q212
-United Arab Emirates,http://www.wikidata.org/entity/Q878
-United Arab Republic,http://www.wikidata.org/entity/Q170468
-United Kingdom,http://www.wikidata.org/entity/Q145
-United States of America,http://www.wikidata.org/entity/Q30
-Uruguay,http://www.wikidata.org/entity/Q77
-USA,http://www.wikidata.org/entity/Q30
-USA: AK,http://www.wikidata.org/entity/Q797
-USA: AL,http://www.wikidata.org/entity/Q173
-USA: AR,http://www.wikidata.org/entity/Q1612
-USA: AZ,http://www.wikidata.org/entity/Q816
-USA: CA,http://www.wikidata.org/entity/Q99
-"USA: CA, San Diego County",http://www.wikidata.org/entity/Q108143
-USA: CO,http://www.wikidata.org/entity/Q1261
-USA: CT,http://www.wikidata.org/entity/Q779
-USA: DC,http://www.wikidata.org/entity/Q3551781
-USA: DE,http://www.wikidata.org/entity/Q1393
-USA: FL,http://www.wikidata.org/entity/Q812
-USA: GA,http://www.wikidata.org/entity/Q1428
-USA: HI,http://www.wikidata.org/entity/Q782
-USA: IA,http://www.wikidata.org/entity/Q1546
-USA: ID,http://www.wikidata.org/entity/Q1221
-USA: IL,http://www.wikidata.org/entity/Q1204
-USA: Illinois,http://www.wikidata.org/entity/Q1204
-USA: IN,http://www.wikidata.org/entity/Q1415
-USA: KS,http://www.wikidata.org/entity/Q1558
-USA: KY,http://www.wikidata.org/entity/Q1603
-USA: LA,http://www.wikidata.org/entity/Q1588
-"USA: New Orleans, LA",https://www.wikidata.org/wiki/Q34404
-USA: MA,http://www.wikidata.org/entity/Q771
-USA: MD,http://www.wikidata.org/entity/Q1391
-USA: ME,http://www.wikidata.org/entity/Q724
-USA: MI,http://www.wikidata.org/entity/Q1166
-USA: MN,http://www.wikidata.org/entity/Q1527
-USA: MO,http://www.wikidata.org/entity/Q1581
-USA: MS,http://www.wikidata.org/entity/Q1494
-USA: MT,http://www.wikidata.org/entity/Q1212
-USA: NC,http://www.wikidata.org/entity/Q1454
-USA: ND,http://www.wikidata.org/entity/Q1207
-USA: NE,http://www.wikidata.org/entity/Q1553
-USA: NH,http://www.wikidata.org/entity/Q759
-USA: NJ,http://www.wikidata.org/entity/Q1408
-USA: NM,http://www.wikidata.org/entity/Q1522
-USA: North Carolina,http://www.wikidata.org/entity/Q1454
-USA: NV,http://www.wikidata.org/entity/Q1227
-USA: NY,http://www.wikidata.org/entity/Q1384
-USA: OH,http://www.wikidata.org/entity/Q1397
-USA: OK,http://www.wikidata.org/entity/Q1649
-USA: OR,http://www.wikidata.org/entity/Q824
-USA: PA,http://www.wikidata.org/entity/Q1400
-USA: RI,http://www.wikidata.org/entity/Q1387
-"USA: San Francisco, CA",http://www.wikidata.org/entity/Q62
-USA: SC,http://www.wikidata.org/entity/Q1456
-USA: SD,http://www.wikidata.org/entity/Q1211
-"USA: Snohomish County, WA",http://www.wikidata.org/entity/Q110403
-USA: TN,http://www.wikidata.org/entity/Q1509
-USA: TX,http://www.wikidata.org/entity/Q1439
-USA: UT,http://www.wikidata.org/entity/Q829
-USA: VA,http://www.wikidata.org/entity/Q1370
-USA: VT,http://www.wikidata.org/entity/Q16551
-USA: WA,http://www.wikidata.org/entity/Q1223
-USA: WI,http://www.wikidata.org/entity/Q1537
-USA: WV,http://www.wikidata.org/entity/Q1371
-USA: WY,http://www.wikidata.org/entity/Q1214
-Uzbekistan,http://www.wikidata.org/entity/Q265
-Vanuatu,http://www.wikidata.org/entity/Q686
-Vatican City,http://www.wikidata.org/entity/Q237
-Venezuela,http://www.wikidata.org/entity/Q717
-Viet nam,http://www.wikidata.org/entity/Q881
-Viet Nam,http://www.wikidata.org/entity/Q881
-Viet Nam: Ho Chi Minh city,http://www.wikidata.org/entity/Q1854
-Vietnam,http://www.wikidata.org/entity/Q881
-Yemen,http://www.wikidata.org/entity/Q805
-Zambia,http://www.wikidata.org/entity/Q953
-Zimbabwe,http://www.wikidata.org/entity/Q954
+nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal swabs,http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal,http://purl.obolibrary.org/obo/NCIT_C155831
+respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831
+naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831
+nasal swab specimen,http://purl.obolibrary.org/obo/NCIT_C155831
+pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831
+respiratory secretion,http://purl.obolibrary.org/obo/NCIT_C155831
+mid-nasal swab,http://purl.obolibrary.org/obo/NCIT_C155831
+nasopharyngeal (throat) washings,http://purl.obolibrary.org/obo/NCIT_C155831
+oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835
+throat swab,http://purl.obolibrary.org/obo/NCIT_C155835
+oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835
+buccal swab,http://purl.obolibrary.org/obo/NCIT_C155835
+throat washing,http://purl.obolibrary.org/obo/NCIT_C155835
+Throat Swab,http://purl.obolibrary.org/obo/NCIT_C155835
+throat (oropharyngeal) swab,http://purl.obolibrary.org/obo/NCIT_C155835
+bronchoalveolar lavage fluid,http://purl.obolibrary.org/obo/NCIT_C13195
+swab,http://purl.obolibrary.org/obo/NCIT_C13195
+oral swab,http://purl.obolibrary.org/obo/NCIT_C13195
+bronchoalveolar lavage,http://purl.obolibrary.org/obo/NCIT_C13195
+sputum,http://purl.obolibrary.org/obo/NCIT_C13278
+aspirate,http://purl.obolibrary.org/obo/NCIT_C13347
+stool,http://purl.obolibrary.org/obo/NCIT_C13234
+serum,http://purl.obolibrary.org/obo/NCIT_C13325
+saliva,http://purl.obolibrary.org/obo/NCIT_C13275
+nasal swab,http://purl.obolibrary.org/obo/NCIT_C132119
diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py
index 148a7e1..21ed3b2 100755
--- a/scripts/from_genbank_to_fasta_and_yaml.py
+++ b/scripts/from_genbank_to_fasta_and_yaml.py
@@ -37,8 +37,7 @@ if not os.path.exists(dir_metadata):
tmp_list = [x.split('.')[0] for x in tmp_list]
print(term, len(tmp_list))
- tmp_list=tmp_list
- # tmp_list = tmp_list[0:2] # restricting to small run
+ #tmp_list = tmp_list[0:2] # restricting to small run
id_set.update([x.split('.')[0] for x in tmp_list])
@@ -112,13 +111,13 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
info_for_yaml_dict['sample']['sample_id'] = accession_version
- info_for_yaml_dict['sample']['source_database_accession'] = "http://identifiers.org/insdc/"+accession_version+"#sequence" #accession is turned into resolvable URL/URI now
+ info_for_yaml_dict['sample']['source_database_accession'] = ["http://identifiers.org/insdc/"+accession_version+"#sequence"] #accession is turned into resolvable URL/URI now
# submitter info
GBSeq_references = GBSeq.find('GBSeq_references')
if GBSeq_references is not None:
- info_for_yaml_dict['submitter']['authors'] = ';'.join([x.text for x in GBSeq_references.iter('GBAuthor')])
+ info_for_yaml_dict['submitter']['authors'] = ["{}".format(x.text) for x in GBSeq_references.iter('GBAuthor')]
GBReference = GBSeq_references.find('GBReference')
if GBReference is not None:
@@ -126,7 +125,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
if GBReference_journal is not None and GBReference_journal.text != 'Unpublished':
if 'Submitted' in GBReference_journal.text:
- info_for_yaml_dict['submitter']['submitter_name'] = GBReference_journal.text.split(') ')[1].split(',')[0].strip()
+ info_for_yaml_dict['submitter']['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())]
info_for_yaml_dict['submitter']['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip()
else:
info_for_yaml_dict['submitter']['additional_submitter_information'] = GBReference_journal.text
@@ -146,8 +145,9 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
if field_in_yaml == 'sequencing_coverage':
# A regular expression would be better!
try:
- info_for_yaml_dict['technology'][field_in_yaml] = float(
- tech_info_to_parse.strip('(average)').strip("reads/nt").strip('(average for 6 sequences)').replace(',', '.').strip(' xX>'))
+ info_for_yaml_dict['technology'][field_in_yaml] = [
+ float(tech_info_to_parse.strip('(average)').strip("reads/nt").strip('(average for 6 sequences)').replace(',', '.').strip(' xX>'))
+ ]
except ValueError:
print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse)
pass
@@ -162,8 +162,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
new_seq_tec_list.append(seq_tec)
- for n, seq_tec in enumerate(new_seq_tec_list):
- info_for_yaml_dict['technology'][field_in_yaml + ('' if n == 0 else str(n + 1))] = seq_tec
+ info_for_yaml_dict['technology']['sample_sequencing_technology'] = [x for x in new_seq_tec_list]
else:
info_for_yaml_dict['technology'][field_in_yaml] = tech_info_to_parse
@@ -210,17 +209,14 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml)
GBQualifier_value_text = GBQualifier_value_text.upper() # For example, in case of 'usa: wa'
if GBQualifier_value_text in term_to_uri_dict:
- info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict[GBQualifier_value_text]
+ info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict[GBQualifier_value_text]]
else:
if GBQualifier_value_text in ['NP/OP swab', 'nasopharyngeal and oropharyngeal swab', 'nasopharyngeal/oropharyngeal swab', 'np/np swab', 'np/op']:
- info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal swab']
- info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['oropharyngeal swab']
- elif GBQualifier_value_text in ['nasopharyngeal swab/throat swab']:
- info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal swab']
- info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['throat swab']
+ info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal swab'], term_to_uri_dict['oropharyngeal swab']]
+ elif GBQualifier_value_text in ['nasopharyngeal swab/throat swab', 'nasopharyngeal/throat swab']:
+ info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal swab'], term_to_uri_dict['throat swab']]
elif GBQualifier_value_text in ['nasopharyngeal aspirate/throat swab']:
- info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal aspirate']
- info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['throat swab']
+ info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal aspirate'], term_to_uri_dict['throat swab']]
else:
missing_value_list.append('\t'.join([accession_version, 'specimen_source', GBQualifier_value_text]))
elif GBQualifier_name_text == 'collection_date':
diff --git a/scripts/sequences.acc b/scripts/sequences.acc
index a99c4e6..697d868 100644
--- a/scripts/sequences.acc
+++ b/scripts/sequences.acc
@@ -1,4 +1,299 @@
NC_045512
+MT394528
+MT394529
+MT394530
+MT394531
+MT394864
+MT396241
+MT396242
+MT396243
+MT396244
+MT396245
+MT396246
+MT396247
+MT396248
+MT396266
+MT380726
+MT380727
+MT380728
+MT380729
+MT380730
+MT380731
+MT380732
+MT380733
+MT380734
+MT385414
+MT385415
+MT385416
+MT385417
+MT385418
+MT385419
+MT385420
+MT385421
+MT385422
+MT385423
+MT385424
+MT385425
+MT385426
+MT385427
+MT385428
+MT385429
+MT385430
+MT385431
+MT385432
+MT385433
+MT385434
+MT385435
+MT385436
+MT385437
+MT385438
+MT385439
+MT385440
+MT385441
+MT385442
+MT385443
+MT385444
+MT385445
+MT385446
+MT385447
+MT385448
+MT385449
+MT385450
+MT385451
+MT385452
+MT385453
+MT385454
+MT385455
+MT385456
+MT385457
+MT385458
+MT385459
+MT385460
+MT385461
+MT385462
+MT385463
+MT385464
+MT385465
+MT385466
+MT385467
+MT385468
+MT385469
+MT385470
+MT385471
+MT385472
+MT385473
+MT385474
+MT385475
+MT385476
+MT385477
+MT385478
+MT385479
+MT385480
+MT385481
+MT385482
+MT385483
+MT385484
+MT385485
+MT385486
+MT385487
+MT385488
+MT385489
+MT385490
+MT385491
+MT385492
+MT385493
+MT385494
+MT385495
+MT385496
+MT385497
+MT186683
+MT252677
+MT252678
+MT252679
+MT252680
+MT252681
+MT252682
+MT252683
+MT252684
+MT252685
+MT252686
+MT252687
+MT252688
+MT252689
+MT252690
+MT252691
+MT252692
+MT252693
+MT252694
+MT252695
+MT252696
+MT252697
+MT252698
+MT252699
+MT252700
+MT252701
+MT252702
+MT252703
+MT252704
+MT252705
+MT252706
+MT252707
+MT252708
+MT252709
+MT252710
+MT252711
+MT252712
+MT252713
+MT252715
+MT252716
+MT252717
+MT252719
+MT252721
+MT252723
+MT252725
+MT252726
+MT252728
+MT252729
+MT252730
+MT252733
+MT252734
+MT252735
+MT252736
+MT252737
+MT252738
+MT252739
+MT252740
+MT252741
+MT252742
+MT252745
+MT252746
+MT252747
+MT252748
+MT252749
+MT252756
+MT252757
+MT252758
+MT252761
+MT252763
+MT252764
+MT252765
+MT252766
+MT252767
+MT252768
+MT252769
+MT252770
+MT252771
+MT252772
+MT252773
+MT252774
+MT252775
+MT252778
+MT252779
+MT252780
+MT252781
+MT252782
+MT252783
+MT252784
+MT252785
+MT252787
+MT252788
+MT252792
+MT252793
+MT252794
+MT252795
+MT252797
+MT252798
+MT252799
+MT252800
+MT252801
+MT252802
+MT252803
+MT252804
+MT252805
+MT252806
+MT252807
+MT252808
+MT252809
+MT252810
+MT252811
+MT252821
+MT252822
+MT252823
+MT252824
+MT339043
+MT365033
+MT374101
+MT374102
+MT374103
+MT374104
+MT374105
+MT374106
+MT374107
+MT374108
+MT374109
+MT374110
+MT374111
+MT374112
+MT374113
+MT374114
+MT374115
+MT374116
+MT375428
+MT375429
+MT375430
+MT375431
+MT375432
+MT375433
+MT375434
+MT375435
+MT375436
+MT375437
+MT375438
+MT375439
+MT375440
+MT375441
+MT375442
+MT375443
+MT375444
+MT375445
+MT375446
+MT375447
+MT375448
+MT375449
+MT375450
+MT375451
+MT375452
+MT375453
+MT375454
+MT375455
+MT375456
+MT375457
+MT375458
+MT375459
+MT375460
+MT375461
+MT375462
+MT375463
+MT375464
+MT375465
+MT375466
+MT375467
+MT375468
+MT375469
+MT375470
+MT375471
+MT375472
+MT375473
+MT375474
+MT375475
+MT375476
+MT375477
+MT375478
+MT375479
+MT375480
+MT375481
+MT375482
+MT375483
MT370516
MT370517
MT370518
@@ -225,6 +520,8 @@ MT372480
MT372481
MT372482
MT372483
+7BV2_P
+7BV2_T
LC542976
LC542809
MT114412