From bebc6de7b32f0c69636361e97faa1e184d0226cb Mon Sep 17 00:00:00 2001 From: Andrea Guarracino Date: Wed, 15 Apr 2020 15:52:28 +0200 Subject: accessions list CoV-2 from NCBI Virus 2020/04/15 --- scripts/sequences.acc | 396 ++++++++++++++++++++++++++++---------------------- 1 file changed, 224 insertions(+), 172 deletions(-) (limited to 'scripts') diff --git a/scripts/sequences.acc b/scripts/sequences.acc index 62bde2c..a420fb4 100644 --- a/scripts/sequences.acc +++ b/scripts/sequences.acc @@ -1,25 +1,159 @@ -MT325599 -MT325601 -MT325602 -MT325607 -MT325608 -MT325609 -MT325610 -MT325612 -MT325616 -MT325617 -MT325618 -MT325622 -MT325623 -MT325600 -MT325606 -MT325611 -MT325613 -MT325615 -MT325619 -MT325620 -MT325624 -MT325625 +NC_045512 +MT334522 +MT334523 +MT334524 +MT334525 +MT334526 +MT334527 +MT334528 +MT334529 +MT334530 +MT334531 +MT334532 +MT334533 +MT334534 +MT334535 +MT334536 +MT334537 +MT334538 +MT334539 +MT334540 +MT334541 +MT334542 +MT334543 +MT334544 +MT334545 +MT334546 +MT334555 +MT334547 +MT334548 +MT334549 +MT334550 +MT334551 +MT334552 +MT334553 +MT334554 +MT334556 +MT334557 +MT334558 +MT334559 +MT334560 +MT334561 +MT334562 +MT334563 +MT334564 +MT334565 +MT334566 +MT334567 +MT334568 +MT334569 +MT334570 +MT334571 +MT334572 +MT334573 +MT324062 +MT324680 +MT324684 +MT325573 +MT325574 +MT325576 +MT325577 +MT325578 +MT325580 +MT325591 +MT325592 +MT325593 +MT325595 +MT325605 +MT325627 +MT326028 +MT326029 +MT326031 +MT326048 +MT326093 +MT326092 +MT326091 +MT326090 +MT326085 +MT326084 +MT326083 +MT326082 +MT326081 +MT326080 +MT326077 +MT326067 +MT326057 +MT326024 +MT326025 +MT326032 +MT326033 +MT326035 +MT326036 +MT326037 +MT326040 +MT326041 +MT326043 +MT326044 +MT326046 +MT326049 +MT326050 +MT326052 +MT326053 +MT326055 +MT326056 +MT326059 +MT326062 +MT326063 +MT326066 +MT326069 +MT326070 +MT326071 +MT326073 +MT326074 +MT326075 +MT326088 +MT326089 +MT327745 +MT325568 +MT325572 +MT325575 +MT325583 +MT325584 +MT325604 +MT325631 +MT325632 +MT325635 +MT325636 +MT325637 +MT326095 +MT326096 +MT326103 +MT326112 +MT326113 +MT326114 +MT326115 +MT326122 +MT326131 +MT326132 +MT326133 +MT325563 +MT326164 +MT326166 +MT326167 +MT325570 +MT325579 +MT325581 +MT325582 +MT325586 +MT325594 +MT325598 +MT325626 +MT325628 +MT325633 +MT325634 +MT326030 +MT326038 +MT326058 MT325565 MT325566 MT326147 @@ -56,7 +190,6 @@ MT326121 MT326119 MT326109 MT326100 -MT325568 MT324679 MT325561 MT325571 @@ -95,31 +228,11 @@ MT326177 MT326184 MT326185 MT326187 -MT325572 -MT325575 -MT325583 -MT325584 -MT325604 -MT325631 -MT325632 -MT325635 -MT325636 -MT325637 -MT326095 -MT326096 -MT326103 -MT326112 -MT326113 -MT326114 -MT326115 -MT326122 -MT326131 -MT326132 -MT326133 -MT325563 -MT326164 -MT326166 -MT326167 +MT324681 +MT324682 +MT324683 +MT328032 +MT328035 MT325569 MT326097 MT326106 @@ -177,146 +290,83 @@ MT326101 MT326099 MT326098 MT326094 -MT326093 -MT326092 -MT326091 -MT326090 -MT326085 -MT326084 -MT326083 -MT326082 -MT326081 -MT326080 -MT326077 -MT326067 -MT326057 -MT326024 -MT326025 -MT326032 -MT326033 -MT326035 -MT326036 -MT326037 -MT326040 -MT326041 -MT326043 -MT326044 -MT326046 -MT326049 -MT326050 -MT326052 -MT326053 -MT326055 -MT326056 -MT326059 -MT326062 -MT326063 -MT326066 -MT326069 -MT326070 -MT326071 -MT326073 -MT326074 -MT326075 -MT326088 -MT326089 -MT327745 -MT324062 -MT324680 -MT324684 -MT325573 -MT325574 -MT325576 -MT325577 -MT325578 -MT325580 -MT325591 -MT325592 -MT325593 -MT325595 -MT325605 -MT325627 -MT326028 -MT326029 -MT326031 -MT326048 -MT325570 -MT325579 -MT325581 -MT325582 -MT325586 -MT325594 -MT325598 -MT325626 -MT325628 -MT325633 -MT325634 -MT326030 -MT326038 -MT326058 -MT324681 -MT324682 -MT324683 -MT328032 -MT328035 +MT325599 +MT325601 +MT325602 +MT325607 +MT325608 +MT325609 +MT325610 +MT325612 +MT325616 +MT325617 +MT325618 +MT325622 +MT325623 +MT325600 +MT325606 +MT325611 +MT325613 +MT325615 +MT325619 +MT325620 +MT325624 +MT325625 +MT322394 +MT322395 +MT322420 +MT322424 MT039874 MT077125 -MT322394 +MT322396 MT322397 -MT322398 MT322399 +MT322403 +MT322406 +MT322407 +MT322412 +MT322413 +MT322414 +MT322416 +MT322398 MT322400 MT322401 -MT322403 +MT322402 MT322404 MT322405 -MT322406 MT322408 MT322409 MT322410 MT322411 -MT322412 -MT322413 -MT322414 MT322415 -MT322416 MT322417 MT322418 MT322419 -MT322420 MT322421 MT322422 MT322423 -MT322424 -MT322396 -MT322402 -MT322395 -MT322407 -MT320538 MT320891 +MT320538 MT308692 MT308693 +MT308695 +MT308696 MT308698 MT308699 +MT308701 MT308703 MT308704 MT308694 -MT308695 -MT308696 MT308697 MT308700 -MT308701 MT308702 MT304476 MT304474 MT304475 MT293547 MT304477 -MT304483 -MT300186 MT304478 MT304479 -MT304480 MT304481 MT304482 MT304484 @@ -324,9 +374,12 @@ MT304485 MT304486 MT304487 MT304488 +MT304491 +MT304480 +MT304483 MT304489 MT304490 -MT304491 +MT300186 MT291831 MT291836 MT291834 @@ -366,7 +419,6 @@ MT293168 MT293175 MT293190 MT293191 -MT273658 MT293159 MT292582 MT293162 @@ -376,7 +428,6 @@ MT293165 MT293156 MT293157 MT293158 -MT281577 MT293171 MT293174 MT293176 @@ -426,6 +477,8 @@ MT293223 MT291826 MT291832 MT291833 +MT273658 +MT281577 MT281530 MT276331 MT276325 @@ -645,8 +698,6 @@ MT253700 MT253705 MT253709 MT253708 -MT233526 -MT246667 MT246451 MT246453 MT246454 @@ -689,6 +740,8 @@ MT246472 MT246473 MT246483 MT246484 +MT233526 +MT246667 MT240479 MT232869 MT232870 @@ -774,8 +827,8 @@ MT127113 MT127114 MT127115 MT126808 -LC528233 LC528232 +LC528233 MT123290 MT123291 MT123292 @@ -807,8 +860,8 @@ MT066159 MT066175 MT066176 LC523807 -LC523808 LC523809 +LC523808 MT044258 MT044257 MT042777 @@ -830,7 +883,6 @@ LC522350 MT027062 MT027063 MT027064 -MT020781 MT019530 MT019531 MT020881 @@ -838,13 +890,14 @@ MT019533 MT019529 MT019532 MT020880 +MT020781 LR757995 LR757996 LR757997 LR757998 MT007544 -MT008023 MT008022 +MT008023 MN996530 MN996531 MN996527 @@ -856,22 +909,21 @@ MN988668 MN988669 MN994467 MN988713 -MN938387 -MN938389 -MN975263 -MN975268 +MN938384 +MN975262 +MN985325 +MN975264 +MN975266 MN975267 +MN975268 MN938388 +MN938389 MN938390 -MN975264 -MN975265 -MN975266 -MN938386 +MN975263 MN938385 -MN938384 -MN975262 -MN985325 +MN938386 +MN938387 +MN975265 MN970003 MN970004 -NC_045512 MN908947 -- cgit v1.2.3 From addbd80878cc4fedaf785c147073bb72ef8b54b4 Mon Sep 17 00:00:00 2001 From: Andrea Guarracino Date: Wed, 15 Apr 2020 15:54:26 +0200 Subject: added type id check what is not genomic DNA is removed--- scripts/from_genbank_to_fasta_and_yaml.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/from_genbank_to_fasta_and_yaml.py b/scripts/from_genbank_to_fasta_and_yaml.py index 0cc1a57..6a55b5e 100644 --- a/scripts/from_genbank_to_fasta_and_yaml.py +++ b/scripts/from_genbank_to_fasta_and_yaml.py @@ -7,7 +7,7 @@ import os path_ncbi_virus_accession = 'sequences.acc' -date = '20200414' +date = '20200415' path_seq_fasta = 'seq_from_nuccore.{}.fasta'.format(date) path_metadata_xml = 'metadata_from_nuccore.{}.xml'.format(date) @@ -19,9 +19,15 @@ for term in term_list: tmp_list = Entrez.read( Entrez.esearch(db='nuccore', term=term, idtype='acc', retmax='10000') )['IdList'] - print(term, len(tmp_list)) - + + # Remove mRNAs, ncRNAs, Proteins, and predicted models (more information here: https://en.wikipedia.org/wiki/RefSeq) + tmp_list = [x for x in tmp_list if x[:2] not in ['NM', 'NR', 'NP', 'XM', 'XR', 'XP', 'WP']] + # Remove the version in the id + tmp_list = [x.split('.')[0] for x in tmp_list] + + print(term, len(tmp_list)) + id_set.update([x.split('.')[0] for x in tmp_list]) print(term_list, len(id_set)) -- cgit v1.2.3 From 7c5c91000661f268c4fc8bc2001918c5e86a9aee Mon Sep 17 00:00:00 2001 From: Andrea Guarracino Date: Sat, 18 Apr 2020 22:16:13 +0200 Subject: ncbi_speciesman_source mapping --- scripts/dict_ontology_standardization | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 scripts/dict_ontology_standardization (limited to 'scripts') diff --git a/scripts/dict_ontology_standardization b/scripts/dict_ontology_standardization new file mode 100644 index 0000000..fcd6c94 --- /dev/null +++ b/scripts/dict_ontology_standardization @@ -0,0 +1,22 @@ +nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831 +naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasal swab specimen,http://purl.obolibrary.org/obo/NCIT_C155831 +pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory secretion,http://purl.obolibrary.org/obo/NCIT_C155831 +mid-nasal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal (throat) washings,http://purl.obolibrary.org/obo/NCIT_C155831 +oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat swab,http://purl.obolibrary.org/obo/NCIT_C155835 +oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 +buccal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat washing,http://purl.obolibrary.org/obo/NCIT_C155835 +Throat Swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat (oropharyngeal) swab,http://purl.obolibrary.org/obo/NCIT_C155835 +bronchoalveolar lavage fluid,http://purl.obolibrary.org/obo/NCIT_C13195 +swab,http://purl.obolibrary.org/obo/NCIT_C13195 +oral swab,http://purl.obolibrary.org/obo/NCIT_C13195 +bronchoalveolar lavage,http://purl.obolibrary.org/obo/NCIT_C13195 +sputum,http://purl.obolibrary.org/obo/NCIT_C13278 -- cgit v1.2.3 From d0aae0bbaa1ba3a6f25bc5bb02a37af5ad1d54fc Mon Sep 17 00:00:00 2001 From: Andrea Guarracino Date: Sat, 18 Apr 2020 22:17:32 +0200 Subject: Delete dict_ontology_standardization --- scripts/dict_ontology_standardization | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 scripts/dict_ontology_standardization (limited to 'scripts') diff --git a/scripts/dict_ontology_standardization b/scripts/dict_ontology_standardization deleted file mode 100644 index fcd6c94..0000000 --- a/scripts/dict_ontology_standardization +++ /dev/null @@ -1,22 +0,0 @@ -nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831 -nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 -respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831 -naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 -nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831 -nasal swab specimen,http://purl.obolibrary.org/obo/NCIT_C155831 -pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 -respiratory secretion,http://purl.obolibrary.org/obo/NCIT_C155831 -mid-nasal swab,http://purl.obolibrary.org/obo/NCIT_C155831 -nasopharyngeal (throat) washings,http://purl.obolibrary.org/obo/NCIT_C155831 -oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 -throat swab,http://purl.obolibrary.org/obo/NCIT_C155835 -oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 -buccal swab,http://purl.obolibrary.org/obo/NCIT_C155835 -throat washing,http://purl.obolibrary.org/obo/NCIT_C155835 -Throat Swab,http://purl.obolibrary.org/obo/NCIT_C155835 -throat (oropharyngeal) swab,http://purl.obolibrary.org/obo/NCIT_C155835 -bronchoalveolar lavage fluid,http://purl.obolibrary.org/obo/NCIT_C13195 -swab,http://purl.obolibrary.org/obo/NCIT_C13195 -oral swab,http://purl.obolibrary.org/obo/NCIT_C13195 -bronchoalveolar lavage,http://purl.obolibrary.org/obo/NCIT_C13195 -sputum,http://purl.obolibrary.org/obo/NCIT_C13278 -- cgit v1.2.3 From 4a7f822a502ca213d405c93cea9b330b3d32ba20 Mon Sep 17 00:00:00 2001 From: Andrea Guarracino Date: Sat, 18 Apr 2020 22:18:11 +0200 Subject: ncbi_speciesman_source mapping --- .../ncbi_speciesman_source.csv | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 scripts/dict_ontology_standardization/ncbi_speciesman_source.csv (limited to 'scripts') diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv new file mode 100644 index 0000000..fcd6c94 --- /dev/null +++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv @@ -0,0 +1,22 @@ +nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831 +naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasal swab specimen,http://purl.obolibrary.org/obo/NCIT_C155831 +pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory secretion,http://purl.obolibrary.org/obo/NCIT_C155831 +mid-nasal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal (throat) washings,http://purl.obolibrary.org/obo/NCIT_C155831 +oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat swab,http://purl.obolibrary.org/obo/NCIT_C155835 +oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 +buccal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat washing,http://purl.obolibrary.org/obo/NCIT_C155835 +Throat Swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat (oropharyngeal) swab,http://purl.obolibrary.org/obo/NCIT_C155835 +bronchoalveolar lavage fluid,http://purl.obolibrary.org/obo/NCIT_C13195 +swab,http://purl.obolibrary.org/obo/NCIT_C13195 +oral swab,http://purl.obolibrary.org/obo/NCIT_C13195 +bronchoalveolar lavage,http://purl.obolibrary.org/obo/NCIT_C13195 +sputum,http://purl.obolibrary.org/obo/NCIT_C13278 -- cgit v1.2.3 From 96409bac49eafa1f797a667e662efeb0431383f7 Mon Sep 17 00:00:00 2001 From: Andrea Guarracino Date: Sat, 18 Apr 2020 22:18:30 +0200 Subject: dictionaries for mapping --- .../ncbi_countries.csv | 243 +++++++++++++++++++++ .../ncbi_sequencing_technology.csv | 15 ++ 2 files changed, 258 insertions(+) create mode 100644 scripts/dict_ontology_standardization/ncbi_countries.csv create mode 100644 scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv (limited to 'scripts') diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv new file mode 100644 index 0000000..9813f52 --- /dev/null +++ b/scripts/dict_ontology_standardization/ncbi_countries.csv @@ -0,0 +1,243 @@ +USA,http://www.wikidata.org/entity/Q30 +USA: CA,http://www.wikidata.org/entity/Q99 +USA: ME,http://www.wikidata.org/entity/Q724 +USA: NH,http://www.wikidata.org/entity/Q759 +USA: AL,http://www.wikidata.org/entity/Q173 +USA: MA,http://www.wikidata.org/entity/Q771 +USA: CT,http://www.wikidata.org/entity/Q779 +USA: AK,http://www.wikidata.org/entity/Q797 +USA: HI,http://www.wikidata.org/entity/Q782 +USA: FL,http://www.wikidata.org/entity/Q812 +USA: AZ,http://www.wikidata.org/entity/Q816 +USA: OR,http://www.wikidata.org/entity/Q824 +USA: UT,http://www.wikidata.org/entity/Q829 +USA: MI,http://www.wikidata.org/entity/Q1166 +USA: IL,http://www.wikidata.org/entity/Q1204 +USA: ND,http://www.wikidata.org/entity/Q1207 +USA: SD,http://www.wikidata.org/entity/Q1211 +USA: ID,http://www.wikidata.org/entity/Q1221 +USA: MT,http://www.wikidata.org/entity/Q1212 +USA: WY,http://www.wikidata.org/entity/Q1214 +USA: WA,http://www.wikidata.org/entity/Q1223 +USA: NV,http://www.wikidata.org/entity/Q1227 +USA: CO,http://www.wikidata.org/entity/Q1261 +USA: WV,http://www.wikidata.org/entity/Q1371 +USA: VA,http://www.wikidata.org/entity/Q1370 +USA: RI,http://www.wikidata.org/entity/Q1387 +USA: NY,http://www.wikidata.org/entity/Q1384 +USA: DE,http://www.wikidata.org/entity/Q1393 +USA: OH,http://www.wikidata.org/entity/Q1397 +USA: MD,http://www.wikidata.org/entity/Q1391 +USA: NJ,http://www.wikidata.org/entity/Q1408 +USA: GA,http://www.wikidata.org/entity/Q1428 +USA: PA,http://www.wikidata.org/entity/Q1400 +USA: IN,http://www.wikidata.org/entity/Q1415 +USA: TX,http://www.wikidata.org/entity/Q1439 +USA: NC,http://www.wikidata.org/entity/Q1454 +USA: MS,http://www.wikidata.org/entity/Q1494 +USA: SC,http://www.wikidata.org/entity/Q1456 +USA: NM,http://www.wikidata.org/entity/Q1522 +USA: TN,http://www.wikidata.org/entity/Q1509 +USA: MN,http://www.wikidata.org/entity/Q1527 +USA: WI,http://www.wikidata.org/entity/Q1537 +USA: NE,http://www.wikidata.org/entity/Q1553 +USA: IA,http://www.wikidata.org/entity/Q1546 +USA: OK,http://www.wikidata.org/entity/Q1649 +USA: KS,http://www.wikidata.org/entity/Q1558 +USA: VT,http://www.wikidata.org/entity/Q16551 +USA: MO,http://www.wikidata.org/entity/Q1581 +USA: LA,http://www.wikidata.org/entity/Q1588 +USA: KY,http://www.wikidata.org/entity/Q1603 +USA: AR,http://www.wikidata.org/entity/Q1612 +China,http://www.wikidata.org/entity/Q148 +China: Beijing,http://www.wikidata.org/entity/Q956 +China: Hong Kong,http://www.wikidata.org/entity/Q8646 +China: Shanghai,http://www.wikidata.org/entity/Q8686 +China: Chongqing,http://www.wikidata.org/entity/Q11725 +China: Tianjin,http://www.wikidata.org/entity/Q11736 +China: Macau,http://www.wikidata.org/entity/Q14773 +China: Guangdong,http://www.wikidata.org/entity/Q15175 +China: Guangxi Zhuang Autonomous Region,http://www.wikidata.org/entity/Q15176 +China: Jiangsu,http://www.wikidata.org/entity/Q16963 +China: Zhejiang,http://www.wikidata.org/entity/Q16967 +China: Tibet Autonomous Region,http://www.wikidata.org/entity/Q17269 +China: Heilongjiang,http://www.wikidata.org/entity/Q19206 +China: Sichuan,http://www.wikidata.org/entity/Q19770 +China: Hebei,http://www.wikidata.org/entity/Q21208 +China: Xinjiang,http://www.wikidata.org/entity/Q34800 +China: Anhui,http://www.wikidata.org/entity/Q40956 +China: Inner Mongolia,http://www.wikidata.org/entity/Q41079 +China: Fujian,http://www.wikidata.org/entity/Q41705 +China: Hainan,http://www.wikidata.org/entity/Q42200 +China: Gansu,http://www.wikidata.org/entity/Q42392 +China: Yunnan,http://www.wikidata.org/entity/Q43194 +China: Shandong,http://www.wikidata.org/entity/Q43407 +China: Henan,http://www.wikidata.org/entity/Q43684 +China: Liaoning,http://www.wikidata.org/entity/Q43934 +China: Jilin,http://www.wikidata.org/entity/Q45208 +China: Hunan,http://www.wikidata.org/entity/Q45761 +China: Hubei,http://www.wikidata.org/entity/Q46862 +China: Qinghai,http://www.wikidata.org/entity/Q45833 +China: Shanxi,http://www.wikidata.org/entity/Q46913 +China: Shaanxi,http://www.wikidata.org/entity/Q47974 +China: Guizhou,http://www.wikidata.org/entity/Q47097 +China: Jiangxi,http://www.wikidata.org/entity/Q57052 +China: Ningxia Hui Autonomous Region,http://www.wikidata.org/entity/Q57448 +30.59 N 114.3 E,http://www.wikidata.org/entity/Q11746 +Sri Lanka,http://www.wikidata.org/entity/Q854 +Syria,http://www.wikidata.org/entity/Q858 +Tajikistan,http://www.wikidata.org/entity/Q863 +Canada,http://www.wikidata.org/entity/Q16 +Thailand,http://www.wikidata.org/entity/Q869 +Japan,http://www.wikidata.org/entity/Q17 +Turkmenistan,http://www.wikidata.org/entity/Q874 +Norway,http://www.wikidata.org/entity/Q20 +Ireland,http://www.wikidata.org/entity/Q27 +United Arab Emirates,http://www.wikidata.org/entity/Q878 +Vietnam,http://www.wikidata.org/entity/Q881 +United States of America,http://www.wikidata.org/entity/Q30 +South Korea,http://www.wikidata.org/entity/Q884 +Denmark,http://www.wikidata.org/entity/Q35 +Afghanistan,http://www.wikidata.org/entity/Q889 +Bangladesh,http://www.wikidata.org/entity/Q902 +Poland,http://www.wikidata.org/entity/Q36 +Mali,http://www.wikidata.org/entity/Q912 +Italy,http://www.wikidata.org/entity/Q38 +Angola,http://www.wikidata.org/entity/Q916 +Switzerland,http://www.wikidata.org/entity/Q39 +Bhutan,http://www.wikidata.org/entity/Q917 +Austria,http://www.wikidata.org/entity/Q40 +Brunei,http://www.wikidata.org/entity/Q921 +Tanzania,http://www.wikidata.org/entity/Q924 +Turkey,http://www.wikidata.org/entity/Q43 +Philippines,http://www.wikidata.org/entity/Q928 +Portugal,http://www.wikidata.org/entity/Q45 +Uruguay,http://www.wikidata.org/entity/Q77 +Central African Republic,http://www.wikidata.org/entity/Q929 +Togo,http://www.wikidata.org/entity/Q945 +Egypt,http://www.wikidata.org/entity/Q79 +Tunisia,http://www.wikidata.org/entity/Q948 +Zambia,http://www.wikidata.org/entity/Q953 +Mexico,http://www.wikidata.org/entity/Q96 +Zimbabwe,http://www.wikidata.org/entity/Q954 +South Sudan,http://www.wikidata.org/entity/Q958 +Kenya,http://www.wikidata.org/entity/Q114 +Benin,http://www.wikidata.org/entity/Q962 +Ethiopia,http://www.wikidata.org/entity/Q115 +Botswana,http://www.wikidata.org/entity/Q963 +Ghana,http://www.wikidata.org/entity/Q117 +Burkina Faso,http://www.wikidata.org/entity/Q965 +Burundi,http://www.wikidata.org/entity/Q967 +France,http://www.wikidata.org/entity/Q142 +Comoros,http://www.wikidata.org/entity/Q970 +United Kingdom,http://www.wikidata.org/entity/Q145 +Republic of the Congo,http://www.wikidata.org/entity/Q971 +People's Republic of China,http://www.wikidata.org/entity/Q148 +Democratic Republic of the Congo,http://www.wikidata.org/entity/Q974 +Brazil,http://www.wikidata.org/entity/Q155 +Djibouti,http://www.wikidata.org/entity/Q977 +Germany,http://www.wikidata.org/entity/Q183 +Eritrea,http://www.wikidata.org/entity/Q986 +The Gambia,http://www.wikidata.org/entity/Q1005 +Latvia,http://www.wikidata.org/entity/Q211 +Czech Republic,http://www.wikidata.org/entity/Q213 +Guinea,http://www.wikidata.org/entity/Q1006 +Guinea-Bissau,http://www.wikidata.org/entity/Q1007 +Slovakia,http://www.wikidata.org/entity/Q214 +Ivory Coast,http://www.wikidata.org/entity/Q1008 +Romania,http://www.wikidata.org/entity/Q218 +Cape Verde,http://www.wikidata.org/entity/Q1011 +Cameroon,http://www.wikidata.org/entity/Q1009 +Bulgaria,http://www.wikidata.org/entity/Q219 +Lesotho,http://www.wikidata.org/entity/Q1013 +Croatia,http://www.wikidata.org/entity/Q224 +Liberia,http://www.wikidata.org/entity/Q1014 +Libya,http://www.wikidata.org/entity/Q1016 +Kazakhstan,http://www.wikidata.org/entity/Q232 +Montenegro,http://www.wikidata.org/entity/Q236 +Madagascar,http://www.wikidata.org/entity/Q1019 +Barbados,http://www.wikidata.org/entity/Q244 +Indonesia,http://www.wikidata.org/entity/Q252 +Malawi,http://www.wikidata.org/entity/Q1020 +Mauritania,http://www.wikidata.org/entity/Q1025 +South Africa,http://www.wikidata.org/entity/Q258 +Mauritius,http://www.wikidata.org/entity/Q1027 +Algeria,http://www.wikidata.org/entity/Q262 +Morocco,http://www.wikidata.org/entity/Q1028 +Mozambique,http://www.wikidata.org/entity/Q1029 +Uzbekistan,http://www.wikidata.org/entity/Q265 +Namibia,http://www.wikidata.org/entity/Q1030 +Chile,http://www.wikidata.org/entity/Q298 +Niger,http://www.wikidata.org/entity/Q1032 +Singapore,http://www.wikidata.org/entity/Q334 +Nigeria,http://www.wikidata.org/entity/Q1033 +Bahrain,http://www.wikidata.org/entity/Q398 +Uganda,http://www.wikidata.org/entity/Q1036 +Australia,http://www.wikidata.org/entity/Q408 +Rwanda,http://www.wikidata.org/entity/Q1037 +Argentina,http://www.wikidata.org/entity/Q414 +São Tomé and Príncipe,http://www.wikidata.org/entity/Q1039 +Peru,http://www.wikidata.org/entity/Q419 +Senegal,http://www.wikidata.org/entity/Q1041 +Seychelles,http://www.wikidata.org/entity/Q1042 +North Korea,http://www.wikidata.org/entity/Q423 +Sierra Leone,http://www.wikidata.org/entity/Q1044 +Cambodia,http://www.wikidata.org/entity/Q424 +Sudan,http://www.wikidata.org/entity/Q1049 +Somalia,http://www.wikidata.org/entity/Q1045 +Eswatini,http://www.wikidata.org/entity/Q1050 +East Timor,http://www.wikidata.org/entity/Q574 +Chad,http://www.wikidata.org/entity/Q657 +New Zealand,http://www.wikidata.org/entity/Q664 +Kingdom of the Netherlands,http://www.wikidata.org/entity/Q29999 +India,http://www.wikidata.org/entity/Q668 +Tuvalu,http://www.wikidata.org/entity/Q672 +Samoa,http://www.wikidata.org/entity/Q683 +Solomon Islands,http://www.wikidata.org/entity/Q685 +Vanuatu,http://www.wikidata.org/entity/Q686 +Papua New Guinea,http://www.wikidata.org/entity/Q691 +Palau,http://www.wikidata.org/entity/Q695 +Nauru,http://www.wikidata.org/entity/Q697 +Federated States of Micronesia,http://www.wikidata.org/entity/Q702 +Marshall Islands,http://www.wikidata.org/entity/Q709 +Kiribati,http://www.wikidata.org/entity/Q710 +Mongolia,http://www.wikidata.org/entity/Q711 +Fiji,http://www.wikidata.org/entity/Q712 +Venezuela,http://www.wikidata.org/entity/Q717 +Paraguay,http://www.wikidata.org/entity/Q733 +Guyana,http://www.wikidata.org/entity/Q734 +Ecuador,http://www.wikidata.org/entity/Q736 +Colombia,http://www.wikidata.org/entity/Q739 +Bolivia,http://www.wikidata.org/entity/Q750 +Trinidad and Tobago,http://www.wikidata.org/entity/Q754 +Saint Vincent and the Grenadines,http://www.wikidata.org/entity/Q757 +Saint Lucia,http://www.wikidata.org/entity/Q760 +Saint Kitts and Nevis,http://www.wikidata.org/entity/Q763 +Jamaica,http://www.wikidata.org/entity/Q766 +Grenada,http://www.wikidata.org/entity/Q769 +Guatemala,http://www.wikidata.org/entity/Q774 +The Bahamas,http://www.wikidata.org/entity/Q778 +Antigua and Barbuda,http://www.wikidata.org/entity/Q781 +Honduras,http://www.wikidata.org/entity/Q783 +Dominica,http://www.wikidata.org/entity/Q784 +Dominican Republic,http://www.wikidata.org/entity/Q786 +Haiti,http://www.wikidata.org/entity/Q790 +El Salvador,http://www.wikidata.org/entity/Q792 +Iran,http://www.wikidata.org/entity/Q794 +Iraq,http://www.wikidata.org/entity/Q796 +Costa Rica,http://www.wikidata.org/entity/Q800 +Israel,http://www.wikidata.org/entity/Q801 +Yemen,http://www.wikidata.org/entity/Q805 +Jordan,http://www.wikidata.org/entity/Q810 +Nicaragua,http://www.wikidata.org/entity/Q811 +Kyrgyzstan,http://www.wikidata.org/entity/Q813 +Laos,http://www.wikidata.org/entity/Q819 +Lebanon,http://www.wikidata.org/entity/Q822 +Maldives,http://www.wikidata.org/entity/Q826 +Malaysia,http://www.wikidata.org/entity/Q833 +Myanmar,http://www.wikidata.org/entity/Q836 +Nepal,http://www.wikidata.org/entity/Q837 +Oman,http://www.wikidata.org/entity/Q842 +Pakistan,http://www.wikidata.org/entity/Q843 +Qatar,http://www.wikidata.org/entity/Q846 +Saudi Arabia,http://www.wikidata.org/entity/Q851 \ No newline at end of file diff --git a/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv b/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv new file mode 100644 index 0000000..3ec7e09 --- /dev/null +++ b/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv @@ -0,0 +1,15 @@ +Illumian NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 +Illumina NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 +Nanopore MinION,http://www.ebi.ac.uk/efo/EFO_0008632 +Oxford Nanopore MinION,http://www.ebi.ac.uk/efo/EFO_0008632 +ONT (Oxford Nanopore Technologies),http://www.ebi.ac.uk/efo/EFO_0008632 +Oxford Nanopore technologies MinION,http://www.ebi.ac.uk/efo/EFO_0008632 +MinION Oxford Nanopore,http://www.ebi.ac.uk/efo/EFO_0008632 +Illumina MiSeq,http://www.ebi.ac.uk/efo/EFO_0004205 +Illumina,http://purl.obolibrary.org/obo/OBI_0000759 +Oxford Nanopore technology,http://purl.obolibrary.org/obo/NCIT_C146818 +Oxford Nanopore Technologies,http://purl.obolibrary.org/obo/NCIT_C146818 +Oxford Nanopore,http://purl.obolibrary.org/obo/NCIT_C146818 +IonTorrent,http://purl.obolibrary.org/obo/NCIT_C125894 +Ion Torrent X5Plus,http://purl.obolibrary.org/obo/NCIT_C125894 +Sanger dideoxy sequencing,http://purl.obolibrary.org/obo/NCIT_C19641 -- cgit v1.2.3