about summary refs log tree commit diff
path: root/scripts/uthsc_samples/uthsc_samples.py
diff options
context:
space:
mode:
authorPjotr Prins2020-11-06 09:52:32 +0000
committerPjotr Prins2020-11-06 09:52:32 +0000
commit43d7264dda8061a024befbc9ca0a89d7159b1e40 (patch)
tree2f0792879657ff2105bdfe36b1d9ed6a5bc1f08c /scripts/uthsc_samples/uthsc_samples.py
parentfbbec51e604964d18ab72cbf0ac24b102ecc0376 (diff)
downloadbh20-seq-resource-43d7264dda8061a024befbc9ca0a89d7159b1e40.tar.gz
bh20-seq-resource-43d7264dda8061a024befbc9ca0a89d7159b1e40.tar.lz
bh20-seq-resource-43d7264dda8061a024befbc9ca0a89d7159b1e40.zip
UTHSC upload info
Diffstat (limited to 'scripts/uthsc_samples/uthsc_samples.py')
-rw-r--r--scripts/uthsc_samples/uthsc_samples.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/scripts/uthsc_samples/uthsc_samples.py b/scripts/uthsc_samples/uthsc_samples.py
new file mode 100644
index 0000000..5c39398
--- /dev/null
+++ b/scripts/uthsc_samples/uthsc_samples.py
@@ -0,0 +1,57 @@
+import os
+import pandas as pd
+from string import Template
+from dateutil.parser import parse
+import re
+
+import sys
+
+# Metadata in tabular format in a spreadsheet(?!)
+xlsx = '../../test/data/10_samples.xlsx'
+
+# Template in a text file
+template_yaml = 'template.yaml'
+
+dir_output = 'yaml'
+
+if not os.path.exists(dir_output):
+    os.makedirs(dir_output)
+
+table = pd.read_excel(xlsx)
+
+print(table)
+
+for index, row in table.iterrows():
+    sample = row['Sample ID']
+    print(f"Processing sample {sample}...")
+
+    with open(template_yaml) as f:
+      text = Template(f.read())
+      with open(os.path.join(dir_output,f"{sample}.yaml"), 'w') as fw:
+          sample_id = sample
+          sample_name = sample
+          collection_date = parse(str(row['Collection Date'])).strftime('%Y-%m-%d')
+          locationx = row['City']+", "+row['State']+", USA"
+          location = "https://www.wikidata.org/wiki/Q16563" # Memphis by default
+          map = {
+              "Pegram": "https://www.wikidata.org/wiki/Q3289517",
+              "Alexander": "https://www.wikidata.org/wiki/Q79663",
+              "Smithville": "https://www.wikidata.org/wiki/Q2145339",
+              "Nashville": "https://www.wikidata.org/wiki/Q23197",
+              "Madison": "https://www.wikidata.org/wiki/Q494755"
+              }
+
+          for name in map:
+              p = re.compile(name)
+              if p.match(locationx):
+                  location = map[name]
+                  break
+
+          strain = f"SARS-CoV-2/human/USA/{sample}/2020"
+          fw.write(text.substitute(sample_id=sample_id,
+                                   sample_name=sample_name,
+                                   collection_date=collection_date,
+                                   location=location,
+                                   locationx=locationx,
+                                   strain=strain
+                                   ))