From 92b69996f378f320579ca84915640aed6729f362 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 25 Jul 2025 14:43:44 +0100 Subject: Tab-separate data section of summary files. * pyhegp/serialization.py (read_summary, write_summary): Use tab as the delimiter. * doc/file-formats.md (File formats)[summary file]: Update documentation. --- doc/file-formats.md | 2 +- pyhegp/serialization.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/file-formats.md b/doc/file-formats.md index 27dfe2a..4d3bfcd 100644 --- a/doc/file-formats.md +++ b/doc/file-formats.md @@ -5,7 +5,7 @@ The summary file is ASCII encoded. It consists of two sections—the header and The first line of the header section MUST be `# pyhegp summary file version 1`. Subsequent lines of the header section are a list of key-value pairs. Each line MUST be `#`, optional whitespace, the key, a single space character and then the value. The key MUST NOT contain whitespace or control characters, and MUST NOT begin with a `#` character. The value MAY contain whitespace characters, but MUST NOT contain control characters. -The data section is a space separated table of numbers. The first line of the data section is a vector of means—one for each SNP. The second line is a vector of standard deviations—one for each SNP. +The data section is a tab-separated table of numbers. The first line of the data section is a vector of means—one for each SNP. The second line is a vector of standard deviations—one for each SNP. Here is an example summary file. `TODO: Add example.` diff --git a/pyhegp/serialization.py b/pyhegp/serialization.py index 269528e..657e4ec 100644 --- a/pyhegp/serialization.py +++ b/pyhegp/serialization.py @@ -43,13 +43,14 @@ def read_summary_headers(file): def read_summary(file): headers = read_summary_headers(file) return Summary(int(headers["number-of-samples"]), - *np.loadtxt(file, ndmin=2)) + *np.loadtxt(file, ndmin=2, delimiter="\t")) def write_summary(file, summary): file.write(SUMMARY_HEADER) file.write(f"# number-of-samples {summary.n}\n".encode("ascii")) np.savetxt(file, np.row_stack((summary.mean, summary.std)), + delimiter="\t", fmt="%.8g") def read_genotype(genotype_file): -- cgit 1.4.1