diff options
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_countries.csv | 357 | ||||
-rwxr-xr-x | scripts/from_genbank_to_fasta_and_yaml.py | 30 | ||||
-rw-r--r-- | scripts/sequences.acc | 297 |
3 files changed, 339 insertions, 345 deletions
diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv index c08b613..b81da36 100644 --- a/scripts/dict_ontology_standardization/ncbi_countries.csv +++ b/scripts/dict_ontology_standardization/ncbi_countries.csv @@ -1,328 +1,29 @@ -30.59 N 114.3 E,http://www.wikidata.org/entity/Q11746 -35.92 N 74.33 E,http://www.wikidata.org/entity/Q609024 -39.54 N 116.23 E,http://www.wikidata.org/entity/Q198244 -Afghanistan,http://www.wikidata.org/entity/Q889 -Albania,http://www.wikidata.org/entity/Q222 -Algeria,http://www.wikidata.org/entity/Q262 -Andorra,http://www.wikidata.org/entity/Q228 -Angola,http://www.wikidata.org/entity/Q916 -Antigua and Barbuda,http://www.wikidata.org/entity/Q781 -Argentina,http://www.wikidata.org/entity/Q414 -Armenia,http://www.wikidata.org/entity/Q399 -Australia,http://www.wikidata.org/entity/Q408 -Australia: Queensland,http://www.wikidata.org/entity/Q36074 -Australia: Victoria,http://www.wikidata.org/entity/Q36687 -Austria,http://www.wikidata.org/entity/Q40 -Azerbaijan,http://www.wikidata.org/entity/Q227 -Bahrain,http://www.wikidata.org/entity/Q398 -Bangladesh,http://www.wikidata.org/entity/Q902 -Barbados,http://www.wikidata.org/entity/Q244 -Belarus,http://www.wikidata.org/entity/Q184 -Belgium,http://www.wikidata.org/entity/Q31 -Belize,http://www.wikidata.org/entity/Q242 -Benin,http://www.wikidata.org/entity/Q962 -Bhutan,http://www.wikidata.org/entity/Q917 -Bolivia,http://www.wikidata.org/entity/Q750 -Bosnia and Herzegovina,http://www.wikidata.org/entity/Q225 -Botswana,http://www.wikidata.org/entity/Q963 -Brazil,http://www.wikidata.org/entity/Q155 -Brunei,http://www.wikidata.org/entity/Q921 -Bulgaria,http://www.wikidata.org/entity/Q219 -Burkina Faso,http://www.wikidata.org/entity/Q965 -Burundi,http://www.wikidata.org/entity/Q967 -Cambodia,http://www.wikidata.org/entity/Q424 -Cameroon,http://www.wikidata.org/entity/Q1009 -Canada,http://www.wikidata.org/entity/Q16 -Cape Verde,http://www.wikidata.org/entity/Q1011 -Central African Republic,http://www.wikidata.org/entity/Q929 -Chad,http://www.wikidata.org/entity/Q657 -Chile,http://www.wikidata.org/entity/Q298 -China,http://www.wikidata.org/entity/Q148 -China: Anhui,http://www.wikidata.org/entity/Q40956 -"China: Anhui, Fuyang":http://www.wikidata.org/entity/Q360584 -China: Beijing,http://www.wikidata.org/entity/Q956 -China: Chongqing,http://www.wikidata.org/entity/Q11725 -China: Fujian,http://www.wikidata.org/entity/Q41705 -China: Gansu,http://www.wikidata.org/entity/Q42392 -China: Guangdong,http://www.wikidata.org/entity/Q15175 -"China: Guangdong, Guangzhou",http://www.wikidata.org/entity/Q16572 -China: Guangxi Zhuang Autonomous Region,http://www.wikidata.org/entity/Q15176 -China: Guangzhou,http://www.wikidata.org/entity/Q16572 -China: Guizhou,http://www.wikidata.org/entity/Q47097 -China: Hangzhou,http://www.wikidata.org/entity/Q4970 -China: Hainan,http://www.wikidata.org/entity/Q42200 -China: Hebei,http://www.wikidata.org/entity/Q21208 -China: Heilongjiang,http://www.wikidata.org/entity/Q19206 -China: Henan,http://www.wikidata.org/entity/Q43684 -China: Hong Kong,http://www.wikidata.org/entity/Q8646 -China: HuaShang,http://www.wikidata.org/entity/Q148 -China: Hubei,http://www.wikidata.org/entity/Q46862 -"China: Hubei, Wuhan",http://www.wikidata.org/entity/Q11746 -China: Hunan,http://www.wikidata.org/entity/Q45761 -China: Inner Mongolia,http://www.wikidata.org/entity/Q41079 -China: Jiangsu,http://www.wikidata.org/entity/Q16963 -China: Jiangxi,http://www.wikidata.org/entity/Q57052 -China: Jilin,http://www.wikidata.org/entity/Q45208 -China: Liaoning,http://www.wikidata.org/entity/Q43934 -China: Macau,http://www.wikidata.org/entity/Q14773 -China: Nanchang,https://www.wikidata.org/wiki/Q171943 -China: Ningxia Hui Autonomous Region,http://www.wikidata.org/entity/Q57448 -China: Qinghai,http://www.wikidata.org/entity/Q45833 -China: Shaanxi,http://www.wikidata.org/entity/Q47974 -China: Shandong,http://www.wikidata.org/entity/Q43407 -China: Shanghai,http://www.wikidata.org/entity/Q8686 -China: Shanxi,http://www.wikidata.org/entity/Q46913 -China: Shenzhen,http://www.wikidata.org/entity/Q15174 -China: Sichuan,http://www.wikidata.org/entity/Q19770 -China: Tianjin,http://www.wikidata.org/entity/Q11736 -China: Tibet Autonomous Region,http://www.wikidata.org/entity/Q17269 -China: Wuhan,http://www.wikidata.org/entity/Q11746 -China:Wuhan,http://www.wikidata.org/entity/Q11746 -China: Xinjiang,http://www.wikidata.org/entity/Q34800 -China: Yunnan,http://www.wikidata.org/entity/Q43194 -China: Zhejiang,http://www.wikidata.org/entity/Q16967 -"China: Zhejiang, Hangzhou",http://www.wikidata.org/entity/Q4970 -Colombia,http://www.wikidata.org/entity/Q739 -Colombia: Antioquia,http://www.wikidata.org/entity/Q123304 -Comoros,http://www.wikidata.org/entity/Q970 -Costa Rica,http://www.wikidata.org/entity/Q800 -Croatia,http://www.wikidata.org/entity/Q224 -Cuba,http://www.wikidata.org/entity/Q241 -Czech Republic,http://www.wikidata.org/entity/Q213 -Democratic Republic of the Congo,http://www.wikidata.org/entity/Q974 -Denmark,http://www.wikidata.org/entity/Q35 -Djibouti,http://www.wikidata.org/entity/Q977 -Dominica,http://www.wikidata.org/entity/Q784 -Dominican Republic,http://www.wikidata.org/entity/Q786 -East Timor,http://www.wikidata.org/entity/Q574 -Ecuador,http://www.wikidata.org/entity/Q736 -Egypt,http://www.wikidata.org/entity/Q79 -El Salvador,http://www.wikidata.org/entity/Q792 -Equatorial Guinea,http://www.wikidata.org/entity/Q983 -Eritrea,http://www.wikidata.org/entity/Q986 -Estado Libre del Istmo,http://www.wikidata.org/entity/Q8842943 -Estonia,http://www.wikidata.org/entity/Q191 -Eswatini,http://www.wikidata.org/entity/Q1050 -Ethiopia,http://www.wikidata.org/entity/Q115 -Federated States of Micronesia,http://www.wikidata.org/entity/Q702 -Fiji,http://www.wikidata.org/entity/Q712 -Finland,http://www.wikidata.org/entity/Q33 -France,http://www.wikidata.org/entity/Q142 -Gabon,http://www.wikidata.org/entity/Q1000 -Georgia,http://www.wikidata.org/entity/Q230 -Germany,http://www.wikidata.org/entity/Q183 -Ghana,http://www.wikidata.org/entity/Q117 -Greece,http://www.wikidata.org/entity/Q41 -Grenada,http://www.wikidata.org/entity/Q769 -Guatemala,http://www.wikidata.org/entity/Q774 -Guinea,http://www.wikidata.org/entity/Q1006 -Guinea-Bissau,http://www.wikidata.org/entity/Q1007 -Guyana,http://www.wikidata.org/entity/Q734 -Haiti,http://www.wikidata.org/entity/Q790 -Honduras,http://www.wikidata.org/entity/Q783 -Hungary,http://www.wikidata.org/entity/Q28 -Iceland,http://www.wikidata.org/entity/Q189 -Icelandic Commonwealth,http://www.wikidata.org/entity/Q62389 -India,http://www.wikidata.org/entity/Q668 -India: Kerala State,http://www.wikidata.org/entity/Q1186 -India: Rajkot,http://www.wikidata.org/entity/Q1815245 -Indonesia,http://www.wikidata.org/entity/Q252 -Iran,http://www.wikidata.org/entity/Q794 -Iran: Qum,http://www.wikidata.org/entity/Q131664 -Iran: Tehran,http://www.wikidata.org/entity/Q3616 -Iraq,http://www.wikidata.org/entity/Q796 -Ireland,http://www.wikidata.org/entity/Q27 -Israel,http://www.wikidata.org/entity/Q801 -Italy,http://www.wikidata.org/entity/Q38 -Italy: Cagliari,http://www.wikidata.org/entity/Q1897 -Italy: Rome,http://www.wikidata.org/entity/Q220 -Ivory Coast,http://www.wikidata.org/entity/Q1008 -Jamaica,http://www.wikidata.org/entity/Q766 -Japan,http://www.wikidata.org/entity/Q17 -Jordan,http://www.wikidata.org/entity/Q810 -Kazakhstan,http://www.wikidata.org/entity/Q232 -Kenya,http://www.wikidata.org/entity/Q114 -Kingdom of Denmark,http://www.wikidata.org/entity/Q756617 -Kingdom of the Netherlands,http://www.wikidata.org/entity/Q29999 -Kiribati,http://www.wikidata.org/entity/Q710 -Kuwait,http://www.wikidata.org/entity/Q817 -Kyrgyzstan,http://www.wikidata.org/entity/Q813 -Laos,http://www.wikidata.org/entity/Q819 -Latvia,http://www.wikidata.org/entity/Q211 -Lebanon,http://www.wikidata.org/entity/Q822 -Lesotho,http://www.wikidata.org/entity/Q1013 -Liberia,http://www.wikidata.org/entity/Q1014 -Libya,http://www.wikidata.org/entity/Q1016 -Liechtenstein,http://www.wikidata.org/entity/Q347 -Lithuania,http://www.wikidata.org/entity/Q37 -Luxembourg,http://www.wikidata.org/entity/Q32 -Madagascar,http://www.wikidata.org/entity/Q1019 -Malawi,http://www.wikidata.org/entity/Q1020 -Malaysia,http://www.wikidata.org/entity/Q833 -Maldives,http://www.wikidata.org/entity/Q826 -Mali,http://www.wikidata.org/entity/Q912 -Malta,http://www.wikidata.org/entity/Q233 -Marshall Islands,http://www.wikidata.org/entity/Q709 -Mauritania,http://www.wikidata.org/entity/Q1025 -Mauritius,http://www.wikidata.org/entity/Q1027 -Mexico,http://www.wikidata.org/entity/Q96 -Moldova,http://www.wikidata.org/entity/Q217 -Monaco,http://www.wikidata.org/entity/Q235 -Mongolia,http://www.wikidata.org/entity/Q711 -Montenegro,http://www.wikidata.org/entity/Q236 -Morocco,http://www.wikidata.org/entity/Q1028 -Mozambique,http://www.wikidata.org/entity/Q1029 -Myanmar,http://www.wikidata.org/entity/Q836 -Namibia,http://www.wikidata.org/entity/Q1030 -Nauru,http://www.wikidata.org/entity/Q697 -Nepal,http://www.wikidata.org/entity/Q837 -New Zealand,http://www.wikidata.org/entity/Q664 -Nicaragua,http://www.wikidata.org/entity/Q811 -Niger,http://www.wikidata.org/entity/Q1032 -Nigeria,http://www.wikidata.org/entity/Q1033 -Nigeria: Lagos,http://www.wikidata.org/entity/Q8673 -North Korea,http://www.wikidata.org/entity/Q423 -North Macedonia,http://www.wikidata.org/entity/Q221 -Norway,http://www.wikidata.org/entity/Q20 -Oman,http://www.wikidata.org/entity/Q842 -Ottoman Empire,http://www.wikidata.org/entity/Q12560 -Pakistan,http://www.wikidata.org/entity/Q843 -Pakistan: Gilgit,http://www.wikidata.org/entity/Q609024 -Pakistan: KPK,http://www.wikidata.org/entity/Q183314 -Palau,http://www.wikidata.org/entity/Q695 -Panama,http://www.wikidata.org/entity/Q804 -Papua New Guinea,http://www.wikidata.org/entity/Q691 -Paraguay,http://www.wikidata.org/entity/Q733 -People's Republic of China,http://www.wikidata.org/entity/Q148 -Peru,http://www.wikidata.org/entity/Q419 -Philippines,http://www.wikidata.org/entity/Q928 -Poland,http://www.wikidata.org/entity/Q36 -Portugal,http://www.wikidata.org/entity/Q45 -Principality of Turov and Pinsk,http://www.wikidata.org/entity/Q671362 -Qatar,http://www.wikidata.org/entity/Q846 -Republic of Cyprus,http://www.wikidata.org/entity/Q229 -Republic of Geneva,http://www.wikidata.org/entity/Q23366230 -Republic of the Congo,http://www.wikidata.org/entity/Q971 -Romania,http://www.wikidata.org/entity/Q218 -Russia,http://www.wikidata.org/entity/Q159 -Rwanda,http://www.wikidata.org/entity/Q1037 -Saint Kitts and Nevis,http://www.wikidata.org/entity/Q763 -Saint Lucia,http://www.wikidata.org/entity/Q760 -Saint Vincent and the Grenadines,http://www.wikidata.org/entity/Q757 -Samoa,http://www.wikidata.org/entity/Q683 -San Marino,http://www.wikidata.org/entity/Q238 -São Tomé and Príncipe,http://www.wikidata.org/entity/Q1039 -Saudi Arabia,http://www.wikidata.org/entity/Q851 -Senegal,http://www.wikidata.org/entity/Q1041 -Serbia,http://www.wikidata.org/entity/Q403 -Seychelles,http://www.wikidata.org/entity/Q1042 -Sierra Leone,http://www.wikidata.org/entity/Q1044 -Singapore,http://www.wikidata.org/entity/Q334 -Slovakia,http://www.wikidata.org/entity/Q214 -Slovenia,http://www.wikidata.org/entity/Q215 -Solomon Islands,http://www.wikidata.org/entity/Q685 -Somalia,http://www.wikidata.org/entity/Q1045 -South Africa,http://www.wikidata.org/entity/Q258 -South Africa: KwaZulu-Natal,http://www.wikidata.org/entity/Q81725 -South African Republic,http://www.wikidata.org/entity/Q550374 -South Korea,http://www.wikidata.org/entity/Q884 -South Sudan,http://www.wikidata.org/entity/Q958 -Spain,http://www.wikidata.org/entity/Q29 -Spain: Valencia,http://www.wikidata.org/entity/Q8818 -Sri Lanka,http://www.wikidata.org/entity/Q854 -State of Los Altos,http://www.wikidata.org/entity/Q738264 -Sudan,http://www.wikidata.org/entity/Q1049 -Suriname,http://www.wikidata.org/entity/Q730 -Sweden,http://www.wikidata.org/entity/Q34 -Switzerland,http://www.wikidata.org/entity/Q39 -Syria,http://www.wikidata.org/entity/Q858 -Taiwan,http://www.wikidata.org/entity/Q865 -Tajikistan,http://www.wikidata.org/entity/Q863 -Tanzania,http://www.wikidata.org/entity/Q924 -Thailand,http://www.wikidata.org/entity/Q869 -The Bahamas,http://www.wikidata.org/entity/Q778 -The Gambia,http://www.wikidata.org/entity/Q1005 -Togo,http://www.wikidata.org/entity/Q945 -Tonga,http://www.wikidata.org/entity/Q678 -Trinidad and Tobago,http://www.wikidata.org/entity/Q754 -Tunisia,http://www.wikidata.org/entity/Q948 -Tunisia: Tunis,http://www.wikidata.org/entity/Q3572 -Turkey,http://www.wikidata.org/entity/Q43 -Turkmenistan,http://www.wikidata.org/entity/Q874 -Tuvalu,http://www.wikidata.org/entity/Q672 -Uganda,http://www.wikidata.org/entity/Q1036 -Ukraine,http://www.wikidata.org/entity/Q212 -United Arab Emirates,http://www.wikidata.org/entity/Q878 -United Arab Republic,http://www.wikidata.org/entity/Q170468 -United Kingdom,http://www.wikidata.org/entity/Q145 -United States of America,http://www.wikidata.org/entity/Q30 -Uruguay,http://www.wikidata.org/entity/Q77 -USA,http://www.wikidata.org/entity/Q30 -USA: AK,http://www.wikidata.org/entity/Q797 -USA: AL,http://www.wikidata.org/entity/Q173 -USA: AR,http://www.wikidata.org/entity/Q1612 -USA: AZ,http://www.wikidata.org/entity/Q816 -USA: CA,http://www.wikidata.org/entity/Q99 -"USA: CA, San Diego County",http://www.wikidata.org/entity/Q108143 -USA: CO,http://www.wikidata.org/entity/Q1261 -USA: CT,http://www.wikidata.org/entity/Q779 -USA: DC,http://www.wikidata.org/entity/Q3551781 -USA: DE,http://www.wikidata.org/entity/Q1393 -USA: FL,http://www.wikidata.org/entity/Q812 -USA: GA,http://www.wikidata.org/entity/Q1428 -USA: HI,http://www.wikidata.org/entity/Q782 -USA: IA,http://www.wikidata.org/entity/Q1546 -USA: ID,http://www.wikidata.org/entity/Q1221 -USA: IL,http://www.wikidata.org/entity/Q1204 -USA: Illinois,http://www.wikidata.org/entity/Q1204 -USA: IN,http://www.wikidata.org/entity/Q1415 -USA: KS,http://www.wikidata.org/entity/Q1558 -USA: KY,http://www.wikidata.org/entity/Q1603 -USA: LA,http://www.wikidata.org/entity/Q1588 -"USA: New Orleans, LA",https://www.wikidata.org/wiki/Q34404 -USA: MA,http://www.wikidata.org/entity/Q771 -USA: MD,http://www.wikidata.org/entity/Q1391 -USA: ME,http://www.wikidata.org/entity/Q724 -USA: MI,http://www.wikidata.org/entity/Q1166 -USA: MN,http://www.wikidata.org/entity/Q1527 -USA: MO,http://www.wikidata.org/entity/Q1581 -USA: MS,http://www.wikidata.org/entity/Q1494 -USA: MT,http://www.wikidata.org/entity/Q1212 -USA: NC,http://www.wikidata.org/entity/Q1454 -USA: ND,http://www.wikidata.org/entity/Q1207 -USA: NE,http://www.wikidata.org/entity/Q1553 -USA: NH,http://www.wikidata.org/entity/Q759 -USA: NJ,http://www.wikidata.org/entity/Q1408 -USA: NM,http://www.wikidata.org/entity/Q1522 -USA: North Carolina,http://www.wikidata.org/entity/Q1454 -USA: NV,http://www.wikidata.org/entity/Q1227 -USA: NY,http://www.wikidata.org/entity/Q1384 -USA: OH,http://www.wikidata.org/entity/Q1397 -USA: OK,http://www.wikidata.org/entity/Q1649 -USA: OR,http://www.wikidata.org/entity/Q824 -USA: PA,http://www.wikidata.org/entity/Q1400 -USA: RI,http://www.wikidata.org/entity/Q1387 -"USA: San Francisco, CA",http://www.wikidata.org/entity/Q62 -USA: SC,http://www.wikidata.org/entity/Q1456 -USA: SD,http://www.wikidata.org/entity/Q1211 -"USA: Snohomish County, WA",http://www.wikidata.org/entity/Q110403 -USA: TN,http://www.wikidata.org/entity/Q1509 -USA: TX,http://www.wikidata.org/entity/Q1439 -USA: UT,http://www.wikidata.org/entity/Q829 -USA: VA,http://www.wikidata.org/entity/Q1370 -USA: VT,http://www.wikidata.org/entity/Q16551 -USA: WA,http://www.wikidata.org/entity/Q1223 -USA: WI,http://www.wikidata.org/entity/Q1537 -USA: WV,http://www.wikidata.org/entity/Q1371 -USA: WY,http://www.wikidata.org/entity/Q1214 -Uzbekistan,http://www.wikidata.org/entity/Q265 -Vanuatu,http://www.wikidata.org/entity/Q686 -Vatican City,http://www.wikidata.org/entity/Q237 -Venezuela,http://www.wikidata.org/entity/Q717 -Viet nam,http://www.wikidata.org/entity/Q881 -Viet Nam,http://www.wikidata.org/entity/Q881 -Viet Nam: Ho Chi Minh city,http://www.wikidata.org/entity/Q1854 -Vietnam,http://www.wikidata.org/entity/Q881 -Yemen,http://www.wikidata.org/entity/Q805 -Zambia,http://www.wikidata.org/entity/Q953 -Zimbabwe,http://www.wikidata.org/entity/Q954 +nasopharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal swabs,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831 +naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasal swab specimen,http://purl.obolibrary.org/obo/NCIT_C155831 +pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory secretion,http://purl.obolibrary.org/obo/NCIT_C155831 +mid-nasal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal (throat) washings,http://purl.obolibrary.org/obo/NCIT_C155831 +oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat swab,http://purl.obolibrary.org/obo/NCIT_C155835 +oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 +buccal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat washing,http://purl.obolibrary.org/obo/NCIT_C155835 +Throat Swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat (oropharyngeal) swab,http://purl.obolibrary.org/obo/NCIT_C155835 +bronchoalveolar lavage fluid,http://purl.obolibrary.org/obo/NCIT_C13195 +swab,http://purl.obolibrary.org/obo/NCIT_C13195 +oral swab,http://purl.obolibrary.org/obo/NCIT_C13195 +bronchoalveolar lavage,http://purl.obolibrary.org/obo/NCIT_C13195 +sputum,http://purl.obolibrary.org/obo/NCIT_C13278 +aspirate,http://purl.obolibrary.org/obo/NCIT_C13347 +stool,http://purl.obolibrary.org/obo/NCIT_C13234 +serum,http://purl.obolibrary.org/obo/NCIT_C13325 +saliva,http://purl.obolibrary.org/obo/NCIT_C13275 +nasal swab,http://purl.obolibrary.org/obo/NCIT_C132119 diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py index 148a7e1..21ed3b2 100755 --- a/scripts/from_genbank_to_fasta_and_yaml.py +++ b/scripts/from_genbank_to_fasta_and_yaml.py @@ -37,8 +37,7 @@ if not os.path.exists(dir_metadata): tmp_list = [x.split('.')[0] for x in tmp_list] print(term, len(tmp_list)) - tmp_list=tmp_list - # tmp_list = tmp_list[0:2] # restricting to small run + #tmp_list = tmp_list[0:2] # restricting to small run id_set.update([x.split('.')[0] for x in tmp_list]) @@ -112,13 +111,13 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) info_for_yaml_dict['sample']['sample_id'] = accession_version - info_for_yaml_dict['sample']['source_database_accession'] = "http://identifiers.org/insdc/"+accession_version+"#sequence" #accession is turned into resolvable URL/URI now + info_for_yaml_dict['sample']['source_database_accession'] = ["http://identifiers.org/insdc/"+accession_version+"#sequence"] #accession is turned into resolvable URL/URI now # submitter info GBSeq_references = GBSeq.find('GBSeq_references') if GBSeq_references is not None: - info_for_yaml_dict['submitter']['authors'] = ';'.join([x.text for x in GBSeq_references.iter('GBAuthor')]) + info_for_yaml_dict['submitter']['authors'] = ["{}".format(x.text) for x in GBSeq_references.iter('GBAuthor')] GBReference = GBSeq_references.find('GBReference') if GBReference is not None: @@ -126,7 +125,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) if GBReference_journal is not None and GBReference_journal.text != 'Unpublished': if 'Submitted' in GBReference_journal.text: - info_for_yaml_dict['submitter']['submitter_name'] = GBReference_journal.text.split(') ')[1].split(',')[0].strip() + info_for_yaml_dict['submitter']['submitter_name'] = ["{}".format(GBReference_journal.text.split(') ')[1].split(',')[0].strip())] info_for_yaml_dict['submitter']['submitter_address'] = ','.join(GBReference_journal.text.split(') ')[1].split(',')[1:]).strip() else: info_for_yaml_dict['submitter']['additional_submitter_information'] = GBReference_journal.text @@ -146,8 +145,9 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) if field_in_yaml == 'sequencing_coverage': # A regular expression would be better! try: - info_for_yaml_dict['technology'][field_in_yaml] = float( - tech_info_to_parse.strip('(average)').strip("reads/nt").strip('(average for 6 sequences)').replace(',', '.').strip(' xX>')) + info_for_yaml_dict['technology'][field_in_yaml] = [ + float(tech_info_to_parse.strip('(average)').strip("reads/nt").strip('(average for 6 sequences)').replace(',', '.').strip(' xX>')) + ] except ValueError: print(accession_version, "Couldn't make sense of Coverage '%s'" % tech_info_to_parse) pass @@ -162,8 +162,7 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) new_seq_tec_list.append(seq_tec) - for n, seq_tec in enumerate(new_seq_tec_list): - info_for_yaml_dict['technology'][field_in_yaml + ('' if n == 0 else str(n + 1))] = seq_tec + info_for_yaml_dict['technology']['sample_sequencing_technology'] = [x for x in new_seq_tec_list] else: info_for_yaml_dict['technology'][field_in_yaml] = tech_info_to_parse @@ -210,17 +209,14 @@ for path_metadata_xxx_xml in [os.path.join(dir_metadata, name_metadata_xxx_xml) GBQualifier_value_text = GBQualifier_value_text.upper() # For example, in case of 'usa: wa' if GBQualifier_value_text in term_to_uri_dict: - info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict[GBQualifier_value_text] + info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict[GBQualifier_value_text]] else: if GBQualifier_value_text in ['NP/OP swab', 'nasopharyngeal and oropharyngeal swab', 'nasopharyngeal/oropharyngeal swab', 'np/np swab', 'np/op']: - info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal swab'] - info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['oropharyngeal swab'] - elif GBQualifier_value_text in ['nasopharyngeal swab/throat swab']: - info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal swab'] - info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['throat swab'] + info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal swab'], term_to_uri_dict['oropharyngeal swab']] + elif GBQualifier_value_text in ['nasopharyngeal swab/throat swab', 'nasopharyngeal/throat swab']: + info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal swab'], term_to_uri_dict['throat swab']] elif GBQualifier_value_text in ['nasopharyngeal aspirate/throat swab']: - info_for_yaml_dict['sample']['specimen_source'] = term_to_uri_dict['nasopharyngeal aspirate'] - info_for_yaml_dict['sample']['specimen_source2'] = term_to_uri_dict['throat swab'] + info_for_yaml_dict['sample']['specimen_source'] = [term_to_uri_dict['nasopharyngeal aspirate'], term_to_uri_dict['throat swab']] else: missing_value_list.append('\t'.join([accession_version, 'specimen_source', GBQualifier_value_text])) elif GBQualifier_name_text == 'collection_date': diff --git a/scripts/sequences.acc b/scripts/sequences.acc index a99c4e6..697d868 100644 --- a/scripts/sequences.acc +++ b/scripts/sequences.acc @@ -1,4 +1,299 @@ NC_045512 +MT394528 +MT394529 +MT394530 +MT394531 +MT394864 +MT396241 +MT396242 +MT396243 +MT396244 +MT396245 +MT396246 +MT396247 +MT396248 +MT396266 +MT380726 +MT380727 +MT380728 +MT380729 +MT380730 +MT380731 +MT380732 +MT380733 +MT380734 +MT385414 +MT385415 +MT385416 +MT385417 +MT385418 +MT385419 +MT385420 +MT385421 +MT385422 +MT385423 +MT385424 +MT385425 +MT385426 +MT385427 +MT385428 +MT385429 +MT385430 +MT385431 +MT385432 +MT385433 +MT385434 +MT385435 +MT385436 +MT385437 +MT385438 +MT385439 +MT385440 +MT385441 +MT385442 +MT385443 +MT385444 +MT385445 +MT385446 +MT385447 +MT385448 +MT385449 +MT385450 +MT385451 +MT385452 +MT385453 +MT385454 +MT385455 +MT385456 +MT385457 +MT385458 +MT385459 +MT385460 +MT385461 +MT385462 +MT385463 +MT385464 +MT385465 +MT385466 +MT385467 +MT385468 +MT385469 +MT385470 +MT385471 +MT385472 +MT385473 +MT385474 +MT385475 +MT385476 +MT385477 +MT385478 +MT385479 +MT385480 +MT385481 +MT385482 +MT385483 +MT385484 +MT385485 +MT385486 +MT385487 +MT385488 +MT385489 +MT385490 +MT385491 +MT385492 +MT385493 +MT385494 +MT385495 +MT385496 +MT385497 +MT186683 +MT252677 +MT252678 +MT252679 +MT252680 +MT252681 +MT252682 +MT252683 +MT252684 +MT252685 +MT252686 +MT252687 +MT252688 +MT252689 +MT252690 +MT252691 +MT252692 +MT252693 +MT252694 +MT252695 +MT252696 +MT252697 +MT252698 +MT252699 +MT252700 +MT252701 +MT252702 +MT252703 +MT252704 +MT252705 +MT252706 +MT252707 +MT252708 +MT252709 +MT252710 +MT252711 +MT252712 +MT252713 +MT252715 +MT252716 +MT252717 +MT252719 +MT252721 +MT252723 +MT252725 +MT252726 +MT252728 +MT252729 +MT252730 +MT252733 +MT252734 +MT252735 +MT252736 +MT252737 +MT252738 +MT252739 +MT252740 +MT252741 +MT252742 +MT252745 +MT252746 +MT252747 +MT252748 +MT252749 +MT252756 +MT252757 +MT252758 +MT252761 +MT252763 +MT252764 +MT252765 +MT252766 +MT252767 +MT252768 +MT252769 +MT252770 +MT252771 +MT252772 +MT252773 +MT252774 +MT252775 +MT252778 +MT252779 +MT252780 +MT252781 +MT252782 +MT252783 +MT252784 +MT252785 +MT252787 +MT252788 +MT252792 +MT252793 +MT252794 +MT252795 +MT252797 +MT252798 +MT252799 +MT252800 +MT252801 +MT252802 +MT252803 +MT252804 +MT252805 +MT252806 +MT252807 +MT252808 +MT252809 +MT252810 +MT252811 +MT252821 +MT252822 +MT252823 +MT252824 +MT339043 +MT365033 +MT374101 +MT374102 +MT374103 +MT374104 +MT374105 +MT374106 +MT374107 +MT374108 +MT374109 +MT374110 +MT374111 +MT374112 +MT374113 +MT374114 +MT374115 +MT374116 +MT375428 +MT375429 +MT375430 +MT375431 +MT375432 +MT375433 +MT375434 +MT375435 +MT375436 +MT375437 +MT375438 +MT375439 +MT375440 +MT375441 +MT375442 +MT375443 +MT375444 +MT375445 +MT375446 +MT375447 +MT375448 +MT375449 +MT375450 +MT375451 +MT375452 +MT375453 +MT375454 +MT375455 +MT375456 +MT375457 +MT375458 +MT375459 +MT375460 +MT375461 +MT375462 +MT375463 +MT375464 +MT375465 +MT375466 +MT375467 +MT375468 +MT375469 +MT375470 +MT375471 +MT375472 +MT375473 +MT375474 +MT375475 +MT375476 +MT375477 +MT375478 +MT375479 +MT375480 +MT375481 +MT375482 +MT375483 MT370516 MT370517 MT370518 @@ -225,6 +520,8 @@ MT372480 MT372481 MT372482 MT372483 +7BV2_P +7BV2_T LC542976 LC542809 MT114412 |