diff options
-rw-r--r-- | bh20sequploader/bh20seq-options.yml | 17 | ||||
-rw-r--r-- | bh20sequploader/bh20seq-schema.yml | 2 | ||||
-rw-r--r-- | bh20simplewebuploader/main.py | 86 | ||||
-rw-r--r-- | bh20simplewebuploader/templates/form.html | 19 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_countries.csv | 243 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv | 15 | ||||
-rw-r--r-- | scripts/dict_ontology_standardization/ncbi_speciesman_source.csv | 22 | ||||
-rw-r--r-- | scripts/sequences.acc | 396 | ||||
-rw-r--r-- | setup.py | 2 |
9 files changed, 607 insertions, 195 deletions
diff --git a/bh20sequploader/bh20seq-options.yml b/bh20sequploader/bh20seq-options.yml new file mode 100644 index 0000000..d05be5a --- /dev/null +++ b/bh20sequploader/bh20seq-options.yml @@ -0,0 +1,17 @@ +# Contains suggested human-readable field values and their corresponding IRIs. +# Keyed on the field names in the types in the schema. Relies on field names +# being unique or at least using the same options in different containing +# types. + +host_age_unit: + Years: http://purl.obolibrary.org/obo/UO_0000036 + Months: http://purl.obolibrary.org/obo/UO_0000035 + Weeks: http://purl.obolibrary.org/obo/UO_0000034 + Days: http://purl.obolibrary.org/obo/UO_0000033 + Hours: http://purl.obolibrary.org/obo/UO_0000032 + +host_sex: + Male: http://purl.obolibrary.org/obo/NCIT_C20197 + Female: http://purl.obolibrary.org/obo/NCIT_C27993 + Intersex: http://purl.obolibrary.org/obo/NCIT_C45908 + Unknown: http://purl.obolibrary.org/obo/NCIT_C17998 diff --git a/bh20sequploader/bh20seq-schema.yml b/bh20sequploader/bh20seq-schema.yml index 7ffc15b..4cd0865 100644 --- a/bh20sequploader/bh20seq-schema.yml +++ b/bh20sequploader/bh20seq-schema.yml @@ -30,7 +30,7 @@ $graph: # jsonldPredicate: # _id: http://purl.obolibrary.org/obo/NOMEN_0000037 host_sex: - doc: Sex of the host as define in NCIT, IRI expected (http://purl.obolibrary.org/obo/C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female) or unkown (http://purl.obolibrary.org/obo/NCIT_C17998)) + doc: Sex of the host as defined in NCIT, IRI expected (http://purl.obolibrary.org/obo/NCIT_C20197 (Male), http://purl.obolibrary.org/obo/NCIT_C27993 (Female), http://purl.obolibrary.org/obo/NCIT_C45908 (Intersex), or http://purl.obolibrary.org/obo/NCIT_C17998 (Unknown)) type: string jsonldPredicate: _id: http://purl.obolibrary.org/obo/PATO_0000047 diff --git a/bh20simplewebuploader/main.py b/bh20simplewebuploader/main.py index f5324a5..8c5c18c 100644 --- a/bh20simplewebuploader/main.py +++ b/bh20simplewebuploader/main.py @@ -7,7 +7,7 @@ import sys import re import string import yaml -import urllib.request +import pkg_resources from flask import Flask, request, redirect, send_file, send_from_directory, render_template import os.path @@ -25,7 +25,7 @@ app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 @app.errorhandler(413) def handle_large_file(e): return (render_template('error.html', - error_message="One of your files is too large. The maximum file size is 1 megabyte."), 413) + error_message="One of your files is too large. The maximum file size is 50 megabytes."), 413) def type_to_heading(type_name): @@ -49,12 +49,32 @@ def name_to_label(field_name): return string.capwords(field_name.replace('_', ' ')) -def generate_form(schema): +def is_iri(string): """ - Linearize the schema and send a bunch of dicts. + Return True if the given string looks like an IRI, and False otherwise. + + Used for finding type IRIs in the schema. + + Right now only supports http(s) URLs because that's all we have in our schema. + """ + + return string.startswith('http') + +def generate_form(schema, options): + """ + Linearize the schema into a list of dicts. + Each dict either has a 'heading' (in which case we put a heading for a form section in the template) or an 'id', 'label', 'type', and 'required' - (in which case we make a form field in the template). + (in which case we make a form field in the template). Non-heading dicts + with type 'select' will have an 'options' field, with a list of (name, + value) tuples, and represent a form dropdown element. Non-heading dicts may + have a human-readable 'docstring' field describing them. + + Takes the deserialized metadata schema YAML, and also a deserialized YAML + of option values. The option values are keyed on (unscoped) field name in + the schema, and each is a dict of human readable option -> corresponding + IRI. """ # Get the list of form components, one of which is the root @@ -90,16 +110,35 @@ def generate_form(schema): for field_name, field_type in by_name.get(type_name, {}).get('fields', {}).items(): # For each field - ref_url = None + ref_iri = None + docstring = None if not isinstance(field_type, str): # If the type isn't a string + + # It may have documentation + docstring = field_type.get('doc', None) + # See if it has a more info/what goes here URL predicate = field_type.get('jsonldPredicate', {}) - if not isinstance(predicate, str): - ref_url = predicate.get('_id', None) + # Predicate may be a URL, a dict with a URL in _id, maybe a + # dict with a URL in _type, or a dict with _id and _type but no + # URLs anywhere. Some of these may not technically be allowed + # by the format, but if they occur, we might as well try to + # handle them. + if isinstance(predicate, str): + if is_iri(predicate): + ref_iri = predicate else: - ref_url = predicate # not sure this is correct - # Grab out its type field + # Assume it's a dict. Look at the fields we know about. + for field in ['_id', 'type']: + field_value = predicate.get(field, None) + if isinstance(field_value, str) and is_iri(field_value) and ref_iri is None: + # Take the first URL-looking thing we find + ref_iri = field_value + break + + + # Now overwrite the field type with the actual type string field_type = field_type.get('type', '') # Decide if the field is optional (type ends in ?) @@ -115,14 +154,26 @@ def generate_form(schema): for item in walk_fields(field_type, parent_keys + [field_name], subtree_optional or optional): yield item else: - # We know how to make a string input + # This is a leaf field. We need an input for it. record = {} record['id'] = '.'.join(parent_keys + [field_name]) record['label'] = name_to_label(field_name) record['required'] = not optional and not subtree_optional - if ref_url: - record['ref_url'] = ref_url - if field_type == 'string': + if ref_iri: + record['ref_iri'] = ref_iri + if docstring: + record['docstring'] = docstring + + if field_name in options: + # The field will be a 'select' type no matter what its real + # data type is. + record['type'] = 'select' # Not a real HTML input type. It's its own tag. + # We have a set of values to present + record['options'] = [] + for name, value in options[field_name].items(): + # Make a tuple for each one + record['options'].append((name, value)) + elif field_type == 'string': record['type'] = 'text' # HTML input type elif field_type == 'int': record['type'] = 'number' @@ -133,9 +184,10 @@ def generate_form(schema): return list(walk_fields(root_name)) -# At startup, we need to load the current metadata schema so we can make a form for it -METADATA_SCHEMA = yaml.safe_load(urllib.request.urlopen('https://raw.githubusercontent.com/arvados/bh20-seq-resource/master/bh20sequploader/bh20seq-schema.yml')) -FORM_ITEMS = generate_form(METADATA_SCHEMA) +# At startup, we need to load the metadata schema from the uploader module, so we can make a form for it +METADATA_SCHEMA = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-schema.yml")) +METADATA_OPTION_DEFINITIONS = yaml.safe_load(pkg_resources.resource_stream("bh20sequploader", "bh20seq-options.yml")) +FORM_ITEMS = generate_form(METADATA_SCHEMA, METADATA_OPTION_DEFINITIONS) @app.route('/') def send_form(): diff --git a/bh20simplewebuploader/templates/form.html b/bh20simplewebuploader/templates/form.html index df66e8c..6993cf5 100644 --- a/bh20simplewebuploader/templates/form.html +++ b/bh20simplewebuploader/templates/form.html @@ -211,7 +211,6 @@ <div id="metadata_fill_form_spot"> <div id="metadata_fill_form"> - {{ record }} {% for record in fields %} {% if 'heading' in record %} @@ -221,15 +220,27 @@ <div class="record"> <h4>{{ record['heading'] }}</h4> {% else %} - <label for="{{ record['id'] }}"> + <label for="{{ record['id'] }}" title="{{ record.get('docstring', '') }}"> {{ record['label'] }} {{ "*" if record['required'] else "" }} - {% if 'ref_url' in record %} - <a href="{{ record['ref_url'] }}" title="More Info" target="_blank">?</a> + {% if 'docstring' in record %} + <a href='javascript:alert({{ record['docstring'] | tojson }})'>❓</a> + {% endif %} + {% if 'ref_iri' in record %} + <a href="{{ record['ref_iri'] }}" target="_blank" title="Ontology Link">🔗</a> {% endif %} </label> + {% if record['type'] == 'select' %} + <select id="{{ record['id'] }}" name="{{ record['id'] }}" {{ "required" if record['required'] else "" }}> + <option value="" selected>Choose one...</option> + {% for option in record['options'] %} + <option value="{{ option[1] }}">{{ option[0] }}</option> + {% endfor %} + </select> + {% else %} <input type="{{ record['type'] }}" id="{{ record['id'] }}" name="{{ record['id'] }}" {{ "required" if record['required'] else "" }}> {% endif %} + {% endif %} {% if loop.index == loop.length %} </div> {% endif %} diff --git a/scripts/dict_ontology_standardization/ncbi_countries.csv b/scripts/dict_ontology_standardization/ncbi_countries.csv new file mode 100644 index 0000000..9813f52 --- /dev/null +++ b/scripts/dict_ontology_standardization/ncbi_countries.csv @@ -0,0 +1,243 @@ +USA,http://www.wikidata.org/entity/Q30 +USA: CA,http://www.wikidata.org/entity/Q99 +USA: ME,http://www.wikidata.org/entity/Q724 +USA: NH,http://www.wikidata.org/entity/Q759 +USA: AL,http://www.wikidata.org/entity/Q173 +USA: MA,http://www.wikidata.org/entity/Q771 +USA: CT,http://www.wikidata.org/entity/Q779 +USA: AK,http://www.wikidata.org/entity/Q797 +USA: HI,http://www.wikidata.org/entity/Q782 +USA: FL,http://www.wikidata.org/entity/Q812 +USA: AZ,http://www.wikidata.org/entity/Q816 +USA: OR,http://www.wikidata.org/entity/Q824 +USA: UT,http://www.wikidata.org/entity/Q829 +USA: MI,http://www.wikidata.org/entity/Q1166 +USA: IL,http://www.wikidata.org/entity/Q1204 +USA: ND,http://www.wikidata.org/entity/Q1207 +USA: SD,http://www.wikidata.org/entity/Q1211 +USA: ID,http://www.wikidata.org/entity/Q1221 +USA: MT,http://www.wikidata.org/entity/Q1212 +USA: WY,http://www.wikidata.org/entity/Q1214 +USA: WA,http://www.wikidata.org/entity/Q1223 +USA: NV,http://www.wikidata.org/entity/Q1227 +USA: CO,http://www.wikidata.org/entity/Q1261 +USA: WV,http://www.wikidata.org/entity/Q1371 +USA: VA,http://www.wikidata.org/entity/Q1370 +USA: RI,http://www.wikidata.org/entity/Q1387 +USA: NY,http://www.wikidata.org/entity/Q1384 +USA: DE,http://www.wikidata.org/entity/Q1393 +USA: OH,http://www.wikidata.org/entity/Q1397 +USA: MD,http://www.wikidata.org/entity/Q1391 +USA: NJ,http://www.wikidata.org/entity/Q1408 +USA: GA,http://www.wikidata.org/entity/Q1428 +USA: PA,http://www.wikidata.org/entity/Q1400 +USA: IN,http://www.wikidata.org/entity/Q1415 +USA: TX,http://www.wikidata.org/entity/Q1439 +USA: NC,http://www.wikidata.org/entity/Q1454 +USA: MS,http://www.wikidata.org/entity/Q1494 +USA: SC,http://www.wikidata.org/entity/Q1456 +USA: NM,http://www.wikidata.org/entity/Q1522 +USA: TN,http://www.wikidata.org/entity/Q1509 +USA: MN,http://www.wikidata.org/entity/Q1527 +USA: WI,http://www.wikidata.org/entity/Q1537 +USA: NE,http://www.wikidata.org/entity/Q1553 +USA: IA,http://www.wikidata.org/entity/Q1546 +USA: OK,http://www.wikidata.org/entity/Q1649 +USA: KS,http://www.wikidata.org/entity/Q1558 +USA: VT,http://www.wikidata.org/entity/Q16551 +USA: MO,http://www.wikidata.org/entity/Q1581 +USA: LA,http://www.wikidata.org/entity/Q1588 +USA: KY,http://www.wikidata.org/entity/Q1603 +USA: AR,http://www.wikidata.org/entity/Q1612 +China,http://www.wikidata.org/entity/Q148 +China: Beijing,http://www.wikidata.org/entity/Q956 +China: Hong Kong,http://www.wikidata.org/entity/Q8646 +China: Shanghai,http://www.wikidata.org/entity/Q8686 +China: Chongqing,http://www.wikidata.org/entity/Q11725 +China: Tianjin,http://www.wikidata.org/entity/Q11736 +China: Macau,http://www.wikidata.org/entity/Q14773 +China: Guangdong,http://www.wikidata.org/entity/Q15175 +China: Guangxi Zhuang Autonomous Region,http://www.wikidata.org/entity/Q15176 +China: Jiangsu,http://www.wikidata.org/entity/Q16963 +China: Zhejiang,http://www.wikidata.org/entity/Q16967 +China: Tibet Autonomous Region,http://www.wikidata.org/entity/Q17269 +China: Heilongjiang,http://www.wikidata.org/entity/Q19206 +China: Sichuan,http://www.wikidata.org/entity/Q19770 +China: Hebei,http://www.wikidata.org/entity/Q21208 +China: Xinjiang,http://www.wikidata.org/entity/Q34800 +China: Anhui,http://www.wikidata.org/entity/Q40956 +China: Inner Mongolia,http://www.wikidata.org/entity/Q41079 +China: Fujian,http://www.wikidata.org/entity/Q41705 +China: Hainan,http://www.wikidata.org/entity/Q42200 +China: Gansu,http://www.wikidata.org/entity/Q42392 +China: Yunnan,http://www.wikidata.org/entity/Q43194 +China: Shandong,http://www.wikidata.org/entity/Q43407 +China: Henan,http://www.wikidata.org/entity/Q43684 +China: Liaoning,http://www.wikidata.org/entity/Q43934 +China: Jilin,http://www.wikidata.org/entity/Q45208 +China: Hunan,http://www.wikidata.org/entity/Q45761 +China: Hubei,http://www.wikidata.org/entity/Q46862 +China: Qinghai,http://www.wikidata.org/entity/Q45833 +China: Shanxi,http://www.wikidata.org/entity/Q46913 +China: Shaanxi,http://www.wikidata.org/entity/Q47974 +China: Guizhou,http://www.wikidata.org/entity/Q47097 +China: Jiangxi,http://www.wikidata.org/entity/Q57052 +China: Ningxia Hui Autonomous Region,http://www.wikidata.org/entity/Q57448 +30.59 N 114.3 E,http://www.wikidata.org/entity/Q11746 +Sri Lanka,http://www.wikidata.org/entity/Q854 +Syria,http://www.wikidata.org/entity/Q858 +Tajikistan,http://www.wikidata.org/entity/Q863 +Canada,http://www.wikidata.org/entity/Q16 +Thailand,http://www.wikidata.org/entity/Q869 +Japan,http://www.wikidata.org/entity/Q17 +Turkmenistan,http://www.wikidata.org/entity/Q874 +Norway,http://www.wikidata.org/entity/Q20 +Ireland,http://www.wikidata.org/entity/Q27 +United Arab Emirates,http://www.wikidata.org/entity/Q878 +Vietnam,http://www.wikidata.org/entity/Q881 +United States of America,http://www.wikidata.org/entity/Q30 +South Korea,http://www.wikidata.org/entity/Q884 +Denmark,http://www.wikidata.org/entity/Q35 +Afghanistan,http://www.wikidata.org/entity/Q889 +Bangladesh,http://www.wikidata.org/entity/Q902 +Poland,http://www.wikidata.org/entity/Q36 +Mali,http://www.wikidata.org/entity/Q912 +Italy,http://www.wikidata.org/entity/Q38 +Angola,http://www.wikidata.org/entity/Q916 +Switzerland,http://www.wikidata.org/entity/Q39 +Bhutan,http://www.wikidata.org/entity/Q917 +Austria,http://www.wikidata.org/entity/Q40 +Brunei,http://www.wikidata.org/entity/Q921 +Tanzania,http://www.wikidata.org/entity/Q924 +Turkey,http://www.wikidata.org/entity/Q43 +Philippines,http://www.wikidata.org/entity/Q928 +Portugal,http://www.wikidata.org/entity/Q45 +Uruguay,http://www.wikidata.org/entity/Q77 +Central African Republic,http://www.wikidata.org/entity/Q929 +Togo,http://www.wikidata.org/entity/Q945 +Egypt,http://www.wikidata.org/entity/Q79 +Tunisia,http://www.wikidata.org/entity/Q948 +Zambia,http://www.wikidata.org/entity/Q953 +Mexico,http://www.wikidata.org/entity/Q96 +Zimbabwe,http://www.wikidata.org/entity/Q954 +South Sudan,http://www.wikidata.org/entity/Q958 +Kenya,http://www.wikidata.org/entity/Q114 +Benin,http://www.wikidata.org/entity/Q962 +Ethiopia,http://www.wikidata.org/entity/Q115 +Botswana,http://www.wikidata.org/entity/Q963 +Ghana,http://www.wikidata.org/entity/Q117 +Burkina Faso,http://www.wikidata.org/entity/Q965 +Burundi,http://www.wikidata.org/entity/Q967 +France,http://www.wikidata.org/entity/Q142 +Comoros,http://www.wikidata.org/entity/Q970 +United Kingdom,http://www.wikidata.org/entity/Q145 +Republic of the Congo,http://www.wikidata.org/entity/Q971 +People's Republic of China,http://www.wikidata.org/entity/Q148 +Democratic Republic of the Congo,http://www.wikidata.org/entity/Q974 +Brazil,http://www.wikidata.org/entity/Q155 +Djibouti,http://www.wikidata.org/entity/Q977 +Germany,http://www.wikidata.org/entity/Q183 +Eritrea,http://www.wikidata.org/entity/Q986 +The Gambia,http://www.wikidata.org/entity/Q1005 +Latvia,http://www.wikidata.org/entity/Q211 +Czech Republic,http://www.wikidata.org/entity/Q213 +Guinea,http://www.wikidata.org/entity/Q1006 +Guinea-Bissau,http://www.wikidata.org/entity/Q1007 +Slovakia,http://www.wikidata.org/entity/Q214 +Ivory Coast,http://www.wikidata.org/entity/Q1008 +Romania,http://www.wikidata.org/entity/Q218 +Cape Verde,http://www.wikidata.org/entity/Q1011 +Cameroon,http://www.wikidata.org/entity/Q1009 +Bulgaria,http://www.wikidata.org/entity/Q219 +Lesotho,http://www.wikidata.org/entity/Q1013 +Croatia,http://www.wikidata.org/entity/Q224 +Liberia,http://www.wikidata.org/entity/Q1014 +Libya,http://www.wikidata.org/entity/Q1016 +Kazakhstan,http://www.wikidata.org/entity/Q232 +Montenegro,http://www.wikidata.org/entity/Q236 +Madagascar,http://www.wikidata.org/entity/Q1019 +Barbados,http://www.wikidata.org/entity/Q244 +Indonesia,http://www.wikidata.org/entity/Q252 +Malawi,http://www.wikidata.org/entity/Q1020 +Mauritania,http://www.wikidata.org/entity/Q1025 +South Africa,http://www.wikidata.org/entity/Q258 +Mauritius,http://www.wikidata.org/entity/Q1027 +Algeria,http://www.wikidata.org/entity/Q262 +Morocco,http://www.wikidata.org/entity/Q1028 +Mozambique,http://www.wikidata.org/entity/Q1029 +Uzbekistan,http://www.wikidata.org/entity/Q265 +Namibia,http://www.wikidata.org/entity/Q1030 +Chile,http://www.wikidata.org/entity/Q298 +Niger,http://www.wikidata.org/entity/Q1032 +Singapore,http://www.wikidata.org/entity/Q334 +Nigeria,http://www.wikidata.org/entity/Q1033 +Bahrain,http://www.wikidata.org/entity/Q398 +Uganda,http://www.wikidata.org/entity/Q1036 +Australia,http://www.wikidata.org/entity/Q408 +Rwanda,http://www.wikidata.org/entity/Q1037 +Argentina,http://www.wikidata.org/entity/Q414 +São Tomé and Príncipe,http://www.wikidata.org/entity/Q1039 +Peru,http://www.wikidata.org/entity/Q419 +Senegal,http://www.wikidata.org/entity/Q1041 +Seychelles,http://www.wikidata.org/entity/Q1042 +North Korea,http://www.wikidata.org/entity/Q423 +Sierra Leone,http://www.wikidata.org/entity/Q1044 +Cambodia,http://www.wikidata.org/entity/Q424 +Sudan,http://www.wikidata.org/entity/Q1049 +Somalia,http://www.wikidata.org/entity/Q1045 +Eswatini,http://www.wikidata.org/entity/Q1050 +East Timor,http://www.wikidata.org/entity/Q574 +Chad,http://www.wikidata.org/entity/Q657 +New Zealand,http://www.wikidata.org/entity/Q664 +Kingdom of the Netherlands,http://www.wikidata.org/entity/Q29999 +India,http://www.wikidata.org/entity/Q668 +Tuvalu,http://www.wikidata.org/entity/Q672 +Samoa,http://www.wikidata.org/entity/Q683 +Solomon Islands,http://www.wikidata.org/entity/Q685 +Vanuatu,http://www.wikidata.org/entity/Q686 +Papua New Guinea,http://www.wikidata.org/entity/Q691 +Palau,http://www.wikidata.org/entity/Q695 +Nauru,http://www.wikidata.org/entity/Q697 +Federated States of Micronesia,http://www.wikidata.org/entity/Q702 +Marshall Islands,http://www.wikidata.org/entity/Q709 +Kiribati,http://www.wikidata.org/entity/Q710 +Mongolia,http://www.wikidata.org/entity/Q711 +Fiji,http://www.wikidata.org/entity/Q712 +Venezuela,http://www.wikidata.org/entity/Q717 +Paraguay,http://www.wikidata.org/entity/Q733 +Guyana,http://www.wikidata.org/entity/Q734 +Ecuador,http://www.wikidata.org/entity/Q736 +Colombia,http://www.wikidata.org/entity/Q739 +Bolivia,http://www.wikidata.org/entity/Q750 +Trinidad and Tobago,http://www.wikidata.org/entity/Q754 +Saint Vincent and the Grenadines,http://www.wikidata.org/entity/Q757 +Saint Lucia,http://www.wikidata.org/entity/Q760 +Saint Kitts and Nevis,http://www.wikidata.org/entity/Q763 +Jamaica,http://www.wikidata.org/entity/Q766 +Grenada,http://www.wikidata.org/entity/Q769 +Guatemala,http://www.wikidata.org/entity/Q774 +The Bahamas,http://www.wikidata.org/entity/Q778 +Antigua and Barbuda,http://www.wikidata.org/entity/Q781 +Honduras,http://www.wikidata.org/entity/Q783 +Dominica,http://www.wikidata.org/entity/Q784 +Dominican Republic,http://www.wikidata.org/entity/Q786 +Haiti,http://www.wikidata.org/entity/Q790 +El Salvador,http://www.wikidata.org/entity/Q792 +Iran,http://www.wikidata.org/entity/Q794 +Iraq,http://www.wikidata.org/entity/Q796 +Costa Rica,http://www.wikidata.org/entity/Q800 +Israel,http://www.wikidata.org/entity/Q801 +Yemen,http://www.wikidata.org/entity/Q805 +Jordan,http://www.wikidata.org/entity/Q810 +Nicaragua,http://www.wikidata.org/entity/Q811 +Kyrgyzstan,http://www.wikidata.org/entity/Q813 +Laos,http://www.wikidata.org/entity/Q819 +Lebanon,http://www.wikidata.org/entity/Q822 +Maldives,http://www.wikidata.org/entity/Q826 +Malaysia,http://www.wikidata.org/entity/Q833 +Myanmar,http://www.wikidata.org/entity/Q836 +Nepal,http://www.wikidata.org/entity/Q837 +Oman,http://www.wikidata.org/entity/Q842 +Pakistan,http://www.wikidata.org/entity/Q843 +Qatar,http://www.wikidata.org/entity/Q846 +Saudi Arabia,http://www.wikidata.org/entity/Q851
\ No newline at end of file diff --git a/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv b/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv new file mode 100644 index 0000000..3ec7e09 --- /dev/null +++ b/scripts/dict_ontology_standardization/ncbi_sequencing_technology.csv @@ -0,0 +1,15 @@ +Illumian NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 +Illumina NextSeq 500,http://www.ebi.ac.uk/efo/EFO_0009173 +Nanopore MinION,http://www.ebi.ac.uk/efo/EFO_0008632 +Oxford Nanopore MinION,http://www.ebi.ac.uk/efo/EFO_0008632 +ONT (Oxford Nanopore Technologies),http://www.ebi.ac.uk/efo/EFO_0008632 +Oxford Nanopore technologies MinION,http://www.ebi.ac.uk/efo/EFO_0008632 +MinION Oxford Nanopore,http://www.ebi.ac.uk/efo/EFO_0008632 +Illumina MiSeq,http://www.ebi.ac.uk/efo/EFO_0004205 +Illumina,http://purl.obolibrary.org/obo/OBI_0000759 +Oxford Nanopore technology,http://purl.obolibrary.org/obo/NCIT_C146818 +Oxford Nanopore Technologies,http://purl.obolibrary.org/obo/NCIT_C146818 +Oxford Nanopore,http://purl.obolibrary.org/obo/NCIT_C146818 +IonTorrent,http://purl.obolibrary.org/obo/NCIT_C125894 +Ion Torrent X5Plus,http://purl.obolibrary.org/obo/NCIT_C125894 +Sanger dideoxy sequencing,http://purl.obolibrary.org/obo/NCIT_C19641 diff --git a/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv new file mode 100644 index 0000000..fcd6c94 --- /dev/null +++ b/scripts/dict_ontology_standardization/ncbi_speciesman_source.csv @@ -0,0 +1,22 @@ +nasopharyngeal swab, http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory swab,http://purl.obolibrary.org/obo/NCIT_C155831 +naso-pharyngeal exudate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal aspirate,http://purl.obolibrary.org/obo/NCIT_C155831 +nasal swab specimen,http://purl.obolibrary.org/obo/NCIT_C155831 +pharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +respiratory secretion,http://purl.obolibrary.org/obo/NCIT_C155831 +mid-nasal swab,http://purl.obolibrary.org/obo/NCIT_C155831 +nasopharyngeal (throat) washings,http://purl.obolibrary.org/obo/NCIT_C155831 +oropharyngeal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat swab,http://purl.obolibrary.org/obo/NCIT_C155835 +oro-pharyngeal,http://purl.obolibrary.org/obo/NCIT_C155835 +buccal swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat washing,http://purl.obolibrary.org/obo/NCIT_C155835 +Throat Swab,http://purl.obolibrary.org/obo/NCIT_C155835 +throat (oropharyngeal) swab,http://purl.obolibrary.org/obo/NCIT_C155835 +bronchoalveolar lavage fluid,http://purl.obolibrary.org/obo/NCIT_C13195 +swab,http://purl.obolibrary.org/obo/NCIT_C13195 +oral swab,http://purl.obolibrary.org/obo/NCIT_C13195 +bronchoalveolar lavage,http://purl.obolibrary.org/obo/NCIT_C13195 +sputum,http://purl.obolibrary.org/obo/NCIT_C13278 diff --git a/scripts/sequences.acc b/scripts/sequences.acc index 62bde2c..a420fb4 100644 --- a/scripts/sequences.acc +++ b/scripts/sequences.acc @@ -1,25 +1,159 @@ -MT325599 -MT325601 -MT325602 -MT325607 -MT325608 -MT325609 -MT325610 -MT325612 -MT325616 -MT325617 -MT325618 -MT325622 -MT325623 -MT325600 -MT325606 -MT325611 -MT325613 -MT325615 -MT325619 -MT325620 -MT325624 -MT325625 +NC_045512 +MT334522 +MT334523 +MT334524 +MT334525 +MT334526 +MT334527 +MT334528 +MT334529 +MT334530 +MT334531 +MT334532 +MT334533 +MT334534 +MT334535 +MT334536 +MT334537 +MT334538 +MT334539 +MT334540 +MT334541 +MT334542 +MT334543 +MT334544 +MT334545 +MT334546 +MT334555 +MT334547 +MT334548 +MT334549 +MT334550 +MT334551 +MT334552 +MT334553 +MT334554 +MT334556 +MT334557 +MT334558 +MT334559 +MT334560 +MT334561 +MT334562 +MT334563 +MT334564 +MT334565 +MT334566 +MT334567 +MT334568 +MT334569 +MT334570 +MT334571 +MT334572 +MT334573 +MT324062 +MT324680 +MT324684 +MT325573 +MT325574 +MT325576 +MT325577 +MT325578 +MT325580 +MT325591 +MT325592 +MT325593 +MT325595 +MT325605 +MT325627 +MT326028 +MT326029 +MT326031 +MT326048 +MT326093 +MT326092 +MT326091 +MT326090 +MT326085 +MT326084 +MT326083 +MT326082 +MT326081 +MT326080 +MT326077 +MT326067 +MT326057 +MT326024 +MT326025 +MT326032 +MT326033 +MT326035 +MT326036 +MT326037 +MT326040 +MT326041 +MT326043 +MT326044 +MT326046 +MT326049 +MT326050 +MT326052 +MT326053 +MT326055 +MT326056 +MT326059 +MT326062 +MT326063 +MT326066 +MT326069 +MT326070 +MT326071 +MT326073 +MT326074 +MT326075 +MT326088 +MT326089 +MT327745 +MT325568 +MT325572 +MT325575 +MT325583 +MT325584 +MT325604 +MT325631 +MT325632 +MT325635 +MT325636 +MT325637 +MT326095 +MT326096 +MT326103 +MT326112 +MT326113 +MT326114 +MT326115 +MT326122 +MT326131 +MT326132 +MT326133 +MT325563 +MT326164 +MT326166 +MT326167 +MT325570 +MT325579 +MT325581 +MT325582 +MT325586 +MT325594 +MT325598 +MT325626 +MT325628 +MT325633 +MT325634 +MT326030 +MT326038 +MT326058 MT325565 MT325566 MT326147 @@ -56,7 +190,6 @@ MT326121 MT326119 MT326109 MT326100 -MT325568 MT324679 MT325561 MT325571 @@ -95,31 +228,11 @@ MT326177 MT326184 MT326185 MT326187 -MT325572 -MT325575 -MT325583 -MT325584 -MT325604 -MT325631 -MT325632 -MT325635 -MT325636 -MT325637 -MT326095 -MT326096 -MT326103 -MT326112 -MT326113 -MT326114 -MT326115 -MT326122 -MT326131 -MT326132 -MT326133 -MT325563 -MT326164 -MT326166 -MT326167 +MT324681 +MT324682 +MT324683 +MT328032 +MT328035 MT325569 MT326097 MT326106 @@ -177,146 +290,83 @@ MT326101 MT326099 MT326098 MT326094 -MT326093 -MT326092 -MT326091 -MT326090 -MT326085 -MT326084 -MT326083 -MT326082 -MT326081 -MT326080 -MT326077 -MT326067 -MT326057 -MT326024 -MT326025 -MT326032 -MT326033 -MT326035 -MT326036 -MT326037 -MT326040 -MT326041 -MT326043 -MT326044 -MT326046 -MT326049 -MT326050 -MT326052 -MT326053 -MT326055 -MT326056 -MT326059 -MT326062 -MT326063 -MT326066 -MT326069 -MT326070 -MT326071 -MT326073 -MT326074 -MT326075 -MT326088 -MT326089 -MT327745 -MT324062 -MT324680 -MT324684 -MT325573 -MT325574 -MT325576 -MT325577 -MT325578 -MT325580 -MT325591 -MT325592 -MT325593 -MT325595 -MT325605 -MT325627 -MT326028 -MT326029 -MT326031 -MT326048 -MT325570 -MT325579 -MT325581 -MT325582 -MT325586 -MT325594 -MT325598 -MT325626 -MT325628 -MT325633 -MT325634 -MT326030 -MT326038 -MT326058 -MT324681 -MT324682 -MT324683 -MT328032 -MT328035 +MT325599 +MT325601 +MT325602 +MT325607 +MT325608 +MT325609 +MT325610 +MT325612 +MT325616 +MT325617 +MT325618 +MT325622 +MT325623 +MT325600 +MT325606 +MT325611 +MT325613 +MT325615 +MT325619 +MT325620 +MT325624 +MT325625 +MT322394 +MT322395 +MT322420 +MT322424 MT039874 MT077125 -MT322394 +MT322396 MT322397 -MT322398 MT322399 +MT322403 +MT322406 +MT322407 +MT322412 +MT322413 +MT322414 +MT322416 +MT322398 MT322400 MT322401 -MT322403 +MT322402 MT322404 MT322405 -MT322406 MT322408 MT322409 MT322410 MT322411 -MT322412 -MT322413 -MT322414 MT322415 -MT322416 MT322417 MT322418 MT322419 -MT322420 MT322421 MT322422 MT322423 -MT322424 -MT322396 -MT322402 -MT322395 -MT322407 -MT320538 MT320891 +MT320538 MT308692 MT308693 +MT308695 +MT308696 MT308698 MT308699 +MT308701 MT308703 MT308704 MT308694 -MT308695 -MT308696 MT308697 MT308700 -MT308701 MT308702 MT304476 MT304474 MT304475 MT293547 MT304477 -MT304483 -MT300186 MT304478 MT304479 -MT304480 MT304481 MT304482 MT304484 @@ -324,9 +374,12 @@ MT304485 MT304486 MT304487 MT304488 +MT304491 +MT304480 +MT304483 MT304489 MT304490 -MT304491 +MT300186 MT291831 MT291836 MT291834 @@ -366,7 +419,6 @@ MT293168 MT293175 MT293190 MT293191 -MT273658 MT293159 MT292582 MT293162 @@ -376,7 +428,6 @@ MT293165 MT293156 MT293157 MT293158 -MT281577 MT293171 MT293174 MT293176 @@ -426,6 +477,8 @@ MT293223 MT291826 MT291832 MT291833 +MT273658 +MT281577 MT281530 MT276331 MT276325 @@ -645,8 +698,6 @@ MT253700 MT253705 MT253709 MT253708 -MT233526 -MT246667 MT246451 MT246453 MT246454 @@ -689,6 +740,8 @@ MT246472 MT246473 MT246483 MT246484 +MT233526 +MT246667 MT240479 MT232869 MT232870 @@ -774,8 +827,8 @@ MT127113 MT127114 MT127115 MT126808 -LC528233 LC528232 +LC528233 MT123290 MT123291 MT123292 @@ -807,8 +860,8 @@ MT066159 MT066175 MT066176 LC523807 -LC523808 LC523809 +LC523808 MT044258 MT044257 MT042777 @@ -830,7 +883,6 @@ LC522350 MT027062 MT027063 MT027064 -MT020781 MT019530 MT019531 MT020881 @@ -838,13 +890,14 @@ MT019533 MT019529 MT019532 MT020880 +MT020781 LR757995 LR757996 LR757997 LR757998 MT007544 -MT008023 MT008022 +MT008023 MN996530 MN996531 MN996527 @@ -856,22 +909,21 @@ MN988668 MN988669 MN994467 MN988713 -MN938387 -MN938389 -MN975263 -MN975268 +MN938384 +MN975262 +MN985325 +MN975264 +MN975266 MN975267 +MN975268 MN938388 +MN938389 MN938390 -MN975264 -MN975265 -MN975266 -MN938386 +MN975263 MN938385 -MN938384 -MN975262 -MN985325 +MN938386 +MN938387 +MN975265 MN970003 MN970004 -NC_045512 MN908947 @@ -31,7 +31,7 @@ setup( author_email="peter.amstutz@curii.com", license="Apache 2.0", packages=["bh20sequploader", "bh20seqanalyzer", "bh20simplewebuploader"], - package_data={"bh20sequploader": ["bh20seq-schema.yml", "validation/formats"], + package_data={"bh20sequploader": ["bh20seq-schema.yml", "bh20seq-options.yml", "validation/formats"], }, install_requires=install_requires, extras_require={ |