diff options
author | Arun Isaac | 2025-07-25 14:43:44 +0100 |
---|---|---|
committer | Arun Isaac | 2025-08-01 12:58:45 +0100 |
commit | 92b69996f378f320579ca84915640aed6729f362 (patch) | |
tree | 72fea660fd2b10a11afb31cc6a69059f89fbf831 | |
parent | abb436a5c8f379098b41872ed587c12d21396f67 (diff) | |
download | pyhegp-92b69996f378f320579ca84915640aed6729f362.tar.gz pyhegp-92b69996f378f320579ca84915640aed6729f362.tar.lz pyhegp-92b69996f378f320579ca84915640aed6729f362.zip |
Tab-separate data section of summary files.
* pyhegp/serialization.py (read_summary, write_summary): Use tab as the delimiter. * doc/file-formats.md (File formats)[summary file]: Update documentation.
-rw-r--r-- | doc/file-formats.md | 2 | ||||
-rw-r--r-- | pyhegp/serialization.py | 3 |
2 files changed, 3 insertions, 2 deletions
diff --git a/doc/file-formats.md b/doc/file-formats.md index 27dfe2a..4d3bfcd 100644 --- a/doc/file-formats.md +++ b/doc/file-formats.md @@ -5,7 +5,7 @@ The summary file is ASCII encoded. It consists of two sections—the header and The first line of the header section MUST be `# pyhegp summary file version 1`. Subsequent lines of the header section are a list of key-value pairs. Each line MUST be `#`, optional whitespace, the key, a single space character and then the value. The key MUST NOT contain whitespace or control characters, and MUST NOT begin with a `#` character. The value MAY contain whitespace characters, but MUST NOT contain control characters. -The data section is a space separated table of numbers. The first line of the data section is a vector of means—one for each SNP. The second line is a vector of standard deviations—one for each SNP. +The data section is a tab-separated table of numbers. The first line of the data section is a vector of means—one for each SNP. The second line is a vector of standard deviations—one for each SNP. Here is an example summary file. `TODO: Add example.` diff --git a/pyhegp/serialization.py b/pyhegp/serialization.py index 269528e..657e4ec 100644 --- a/pyhegp/serialization.py +++ b/pyhegp/serialization.py @@ -43,13 +43,14 @@ def read_summary_headers(file): def read_summary(file): headers = read_summary_headers(file) return Summary(int(headers["number-of-samples"]), - *np.loadtxt(file, ndmin=2)) + *np.loadtxt(file, ndmin=2, delimiter="\t")) def write_summary(file, summary): file.write(SUMMARY_HEADER) file.write(f"# number-of-samples {summary.n}\n".encode("ascii")) np.savetxt(file, np.row_stack((summary.mean, summary.std)), + delimiter="\t", fmt="%.8g") def read_genotype(genotype_file): |