aboutsummaryrefslogtreecommitdiff
path: root/scripts/uthsc_samples/uthsc_samples.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/uthsc_samples/uthsc_samples.py')
-rw-r--r--scripts/uthsc_samples/uthsc_samples.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/scripts/uthsc_samples/uthsc_samples.py b/scripts/uthsc_samples/uthsc_samples.py
new file mode 100644
index 0000000..5c39398
--- /dev/null
+++ b/scripts/uthsc_samples/uthsc_samples.py
@@ -0,0 +1,57 @@
+import os
+import pandas as pd
+from string import Template
+from dateutil.parser import parse
+import re
+
+import sys
+
+# Metadata in tabular format in a spreadsheet(?!)
+xlsx = '../../test/data/10_samples.xlsx'
+
+# Template in a text file
+template_yaml = 'template.yaml'
+
+dir_output = 'yaml'
+
+if not os.path.exists(dir_output):
+ os.makedirs(dir_output)
+
+table = pd.read_excel(xlsx)
+
+print(table)
+
+for index, row in table.iterrows():
+ sample = row['Sample ID']
+ print(f"Processing sample {sample}...")
+
+ with open(template_yaml) as f:
+ text = Template(f.read())
+ with open(os.path.join(dir_output,f"{sample}.yaml"), 'w') as fw:
+ sample_id = sample
+ sample_name = sample
+ collection_date = parse(str(row['Collection Date'])).strftime('%Y-%m-%d')
+ locationx = row['City']+", "+row['State']+", USA"
+ location = "https://www.wikidata.org/wiki/Q16563" # Memphis by default
+ map = {
+ "Pegram": "https://www.wikidata.org/wiki/Q3289517",
+ "Alexander": "https://www.wikidata.org/wiki/Q79663",
+ "Smithville": "https://www.wikidata.org/wiki/Q2145339",
+ "Nashville": "https://www.wikidata.org/wiki/Q23197",
+ "Madison": "https://www.wikidata.org/wiki/Q494755"
+ }
+
+ for name in map:
+ p = re.compile(name)
+ if p.match(locationx):
+ location = map[name]
+ break
+
+ strain = f"SARS-CoV-2/human/USA/{sample}/2020"
+ fw.write(text.substitute(sample_id=sample_id,
+ sample_name=sample_name,
+ collection_date=collection_date,
+ location=location,
+ locationx=locationx,
+ strain=strain
+ ))