From 43d7264dda8061a024befbc9ca0a89d7159b1e40 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 6 Nov 2020 09:52:32 +0000 Subject: UTHSC upload info --- scripts/uthsc_samples/uthsc_samples.py | 57 ++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 scripts/uthsc_samples/uthsc_samples.py (limited to 'scripts/uthsc_samples/uthsc_samples.py') diff --git a/scripts/uthsc_samples/uthsc_samples.py b/scripts/uthsc_samples/uthsc_samples.py new file mode 100644 index 0000000..5c39398 --- /dev/null +++ b/scripts/uthsc_samples/uthsc_samples.py @@ -0,0 +1,57 @@ +import os +import pandas as pd +from string import Template +from dateutil.parser import parse +import re + +import sys + +# Metadata in tabular format in a spreadsheet(?!) +xlsx = '../../test/data/10_samples.xlsx' + +# Template in a text file +template_yaml = 'template.yaml' + +dir_output = 'yaml' + +if not os.path.exists(dir_output): + os.makedirs(dir_output) + +table = pd.read_excel(xlsx) + +print(table) + +for index, row in table.iterrows(): + sample = row['Sample ID'] + print(f"Processing sample {sample}...") + + with open(template_yaml) as f: + text = Template(f.read()) + with open(os.path.join(dir_output,f"{sample}.yaml"), 'w') as fw: + sample_id = sample + sample_name = sample + collection_date = parse(str(row['Collection Date'])).strftime('%Y-%m-%d') + locationx = row['City']+", "+row['State']+", USA" + location = "https://www.wikidata.org/wiki/Q16563" # Memphis by default + map = { + "Pegram": "https://www.wikidata.org/wiki/Q3289517", + "Alexander": "https://www.wikidata.org/wiki/Q79663", + "Smithville": "https://www.wikidata.org/wiki/Q2145339", + "Nashville": "https://www.wikidata.org/wiki/Q23197", + "Madison": "https://www.wikidata.org/wiki/Q494755" + } + + for name in map: + p = re.compile(name) + if p.match(locationx): + location = map[name] + break + + strain = f"SARS-CoV-2/human/USA/{sample}/2020" + fw.write(text.substitute(sample_id=sample_id, + sample_name=sample_name, + collection_date=collection_date, + location=location, + locationx=locationx, + strain=strain + )) -- cgit v1.2.3 From 039ad90b1627464b687adddb08cf489dca3c5fbc Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 6 Nov 2020 10:14:21 +0000 Subject: Using correct wikidata geo link --- scripts/uthsc_samples/uthsc_samples.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'scripts/uthsc_samples/uthsc_samples.py') diff --git a/scripts/uthsc_samples/uthsc_samples.py b/scripts/uthsc_samples/uthsc_samples.py index 5c39398..c18c07a 100644 --- a/scripts/uthsc_samples/uthsc_samples.py +++ b/scripts/uthsc_samples/uthsc_samples.py @@ -32,13 +32,13 @@ for index, row in table.iterrows(): sample_name = sample collection_date = parse(str(row['Collection Date'])).strftime('%Y-%m-%d') locationx = row['City']+", "+row['State']+", USA" - location = "https://www.wikidata.org/wiki/Q16563" # Memphis by default + location = "http://www.wikidata.org/enitity/Q16563" # Memphis by default map = { - "Pegram": "https://www.wikidata.org/wiki/Q3289517", - "Alexander": "https://www.wikidata.org/wiki/Q79663", - "Smithville": "https://www.wikidata.org/wiki/Q2145339", - "Nashville": "https://www.wikidata.org/wiki/Q23197", - "Madison": "https://www.wikidata.org/wiki/Q494755" + "Pegram": "http://www.wikidata.org/enitity/Q3289517", + "Alexander": "http://www.wikidata.org/enitity/Q79663", + "Smithville": "http://www.wikidata.org/enitity/Q2145339", + "Nashville": "http://www.wikidata.org/enitity/Q23197", + "Madison": "http://www.wikidata.org/enitity/Q494755" } for name in map: -- cgit v1.2.3 From 951ebe949d88cdbfed028e0a2a420ce7921c3919 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 6 Nov 2020 10:31:56 +0000 Subject: Countries --- scripts/db_enrichment/input_location.csv | 21 ++++++--- scripts/uthsc_samples/uthsc_samples.py | 12 ++--- semantic_enrichment/countries.ttl | 75 ++++++++++++++++++++++++-------- 3 files changed, 80 insertions(+), 28 deletions(-) (limited to 'scripts/uthsc_samples/uthsc_samples.py') diff --git a/scripts/db_enrichment/input_location.csv b/scripts/db_enrichment/input_location.csv index eb5322a..a4246cd 100644 --- a/scripts/db_enrichment/input_location.csv +++ b/scripts/db_enrichment/input_location.csv @@ -1,5 +1,16 @@ -http://www.wikidata.org/entity/Q3289517 -http://www.wikidata.org/entity/Q79663 -http://www.wikidata.org/entity/Q2145339 -http://www.wikidata.org/entity/Q23197 -http://www.wikidata.org/entity/Q494755 +http://www.wikidata.org/entity/Q7960498 +http://www.wikidata.org/entity/Q692895 +http://www.wikidata.org/entity/Q928 +http://www.wikidata.org/entity/Q2722074 +http://www.wikidata.org/entity/Q25622187 +http://www.wikidata.org/entity/Q27684996 +http://www.wikidata.org/entity/Q2757125 +http://www.wikidata.org/entity/Q1922283 +http://www.wikidata.org/entity/Q490 +http://www.wikidata.org/entity/Q677037 +http://www.wikidata.org/entity/Q3037 +http://www.wikidata.org/entity/Q843 +http://www.wikidata.org/entity/Q183 +http://www.wikidata.org/entity/Q29 +http://www.wikidata.org/entity/Q17 +http://www.wikidata.org/entity/Q810 diff --git a/scripts/uthsc_samples/uthsc_samples.py b/scripts/uthsc_samples/uthsc_samples.py index c18c07a..3ad2561 100644 --- a/scripts/uthsc_samples/uthsc_samples.py +++ b/scripts/uthsc_samples/uthsc_samples.py @@ -32,13 +32,13 @@ for index, row in table.iterrows(): sample_name = sample collection_date = parse(str(row['Collection Date'])).strftime('%Y-%m-%d') locationx = row['City']+", "+row['State']+", USA" - location = "http://www.wikidata.org/enitity/Q16563" # Memphis by default + location = "http://www.wikidata.org/entity/Q16563" # Memphis by default map = { - "Pegram": "http://www.wikidata.org/enitity/Q3289517", - "Alexander": "http://www.wikidata.org/enitity/Q79663", - "Smithville": "http://www.wikidata.org/enitity/Q2145339", - "Nashville": "http://www.wikidata.org/enitity/Q23197", - "Madison": "http://www.wikidata.org/enitity/Q494755" + "Pegram": "http://www.wikidata.org/entity/Q3289517", + "Alexander": "http://www.wikidata.org/entity/Q79663", + "Smithville": "http://www.wikidata.org/entity/Q2145339", + "Nashville": "http://www.wikidata.org/entity/Q23197", + "Madison": "http://www.wikidata.org/entity/Q494755" } for name in map: diff --git a/semantic_enrichment/countries.ttl b/semantic_enrichment/countries.ttl index fe50b16..728877f 100644 --- a/semantic_enrichment/countries.ttl +++ b/semantic_enrichment/countries.ttl @@ -220,7 +220,6 @@ ns1:P17 ; ns1:P625 "Point(-120.0 37.0)" . - rdfs:label "Brazil" ; ns1:P17 ; ns1:P30 ; @@ -1157,7 +1156,9 @@ ns1:P625 "Point(28.0 -14.0)" . rdfs:label "Mexico" ; - ns1:P30 . + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(-102.0 23.0)" . rdfs:label "Morocco" ; ns1:P17 ; @@ -1170,32 +1171,17 @@ ; ns1:P625 "Point(94.25 66.416666666)" . - rdfs:label "Germany" ; - ns1:P30 . - - rdfs:label "Spain" ; - ns1:P30 . - - rdfs:label "Italy" ; - ns1:P30 . - rdfs:label "Netherlands" ; ns1:P17 ; ns1:P30 , ; ns1:P625 "Point(5.55 52.316666666)" . - rdfs:label "Jordan" ; - ns1:P30 . - rdfs:label "Sierra Leone" ; ns1:P17 ; ns1:P30 ; ns1:P625 "Point(-11.916667 8.5)" . - rdfs:label "Japan" ; - ns1:P30 . - rdfs:label "Tunisia" ; ns1:P17 ; ns1:P30 ; @@ -1348,6 +1334,61 @@ ns1:P17 ; ns1:P625 "Point(-85.936008 32.933157)" . + rdfs:label "Mehsana" ; + ns1:P17 ; + ns1:P625 "Point(72.4 23.6)" . + + rdfs:label "Kathmandu" ; + ns1:P17 ; + ns1:P625 "Point(85.366666666 27.716666666)" . + + rdfs:label "Milan" ; + ns1:P17 ; + ns1:P625 "Point(9.19 45.466944444)" . + + rdfs:label "Telangana" ; + ns1:P17 ; + ns1:P625 "Point(79.59 17.99)" . + + rdfs:label "Philippines" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(123.0 12.0)" . + + rdfs:label "Mehsana" ; + ns1:P17 ; + ns1:P625 "Point(72.4 23.6)" . + + rdfs:label "Japan" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(136.0 35.0)" . + + rdfs:label "Germany" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(10.0 51.0)" . + + rdfs:label "Spain" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(-3.5 40.2)" . + + rdfs:label "Jordan" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(36.5 31.2)" . + + rdfs:label "Pakistan" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(71.0 30.0)" . + + rdfs:label "Philippines" ; + ns1:P17 ; + ns1:P30 ; + ns1:P625 "Point(123.0 12.0)" . + rdfs:label "Oceania" . rdfs:label "North America" . rdfs:label "South America" . -- cgit v1.2.3 From b311e2ec0f1d02cf16152855dd8bdd760ed4578b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 8 Nov 2020 10:50:24 +0000 Subject: Uploaded UTHSC sequences --- scripts/uthsc_samples/uthsc_samples.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'scripts/uthsc_samples/uthsc_samples.py') diff --git a/scripts/uthsc_samples/uthsc_samples.py b/scripts/uthsc_samples/uthsc_samples.py index 3ad2561..54c70ee 100644 --- a/scripts/uthsc_samples/uthsc_samples.py +++ b/scripts/uthsc_samples/uthsc_samples.py @@ -55,3 +55,5 @@ for index, row in table.iterrows(): locationx=locationx, strain=strain )) + + print(f"Run: python3 bh20sequploader/main.py scripts/uthsc_samples/yaml/{sample}.yaml scripts/uthsc_samples/yaml/{sample}.fa") -- cgit v1.2.3