about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2025-07-25 14:43:44 +0100
committerArun Isaac2025-08-01 12:58:45 +0100
commit92b69996f378f320579ca84915640aed6729f362 (patch)
tree72fea660fd2b10a11afb31cc6a69059f89fbf831
parentabb436a5c8f379098b41872ed587c12d21396f67 (diff)
downloadpyhegp-92b69996f378f320579ca84915640aed6729f362.tar.gz
pyhegp-92b69996f378f320579ca84915640aed6729f362.tar.lz
pyhegp-92b69996f378f320579ca84915640aed6729f362.zip
Tab-separate data section of summary files.
* pyhegp/serialization.py (read_summary, write_summary): Use tab as
the delimiter.
* doc/file-formats.md (File formats)[summary file]: Update
documentation.
-rw-r--r--doc/file-formats.md2
-rw-r--r--pyhegp/serialization.py3
2 files changed, 3 insertions, 2 deletions
diff --git a/doc/file-formats.md b/doc/file-formats.md
index 27dfe2a..4d3bfcd 100644
--- a/doc/file-formats.md
+++ b/doc/file-formats.md
@@ -5,7 +5,7 @@ The summary file is ASCII encoded. It consists of two sections—the header and
 
 The first line of the header section MUST be `# pyhegp summary file version 1`. Subsequent lines of the header section are a list of key-value pairs. Each line MUST be `#`, optional whitespace, the key, a single space character and then the value. The key MUST NOT contain whitespace or control characters, and MUST NOT begin with a `#` character. The value MAY contain whitespace characters, but MUST NOT contain control characters.
 
-The data section is a space separated table of numbers. The first line of the data section is a vector of means—one for each SNP. The second line is a vector of standard deviations—one for each SNP.
+The data section is a tab-separated table of numbers. The first line of the data section is a vector of means—one for each SNP. The second line is a vector of standard deviations—one for each SNP.
 
 Here is an example summary file.
 `TODO: Add example.`
diff --git a/pyhegp/serialization.py b/pyhegp/serialization.py
index 269528e..657e4ec 100644
--- a/pyhegp/serialization.py
+++ b/pyhegp/serialization.py
@@ -43,13 +43,14 @@ def read_summary_headers(file):
 def read_summary(file):
     headers = read_summary_headers(file)
     return Summary(int(headers["number-of-samples"]),
-                   *np.loadtxt(file, ndmin=2))
+                   *np.loadtxt(file, ndmin=2, delimiter="\t"))
 
 def write_summary(file, summary):
     file.write(SUMMARY_HEADER)
     file.write(f"# number-of-samples {summary.n}\n".encode("ascii"))
     np.savetxt(file,
                np.row_stack((summary.mean, summary.std)),
+               delimiter="\t",
                fmt="%.8g")
 
 def read_genotype(genotype_file):