Add intercept column in phenotypes file.

author: Arun Isaac 2026-01-14 20:29:33 +0000
committer: Arun Isaac 2026-01-16 23:06:32 +0000
commit: a76dedb432aa99f53681144c488ac19b9c23e660 (patch)
tree: 17ba9a6502c0f71a4355f0b8210666b65749ea15
parent: 1e6efe6ef73cbb65839a95883899442f5dc4afe1 (diff)
download: pyhegp-a76dedb432aa99f53681144c488ac19b9c23e660.tar.gz
pyhegp-a76dedb432aa99f53681144c488ac19b9c23e660.tar.lz
pyhegp-a76dedb432aa99f53681144c488ac19b9c23e660.zip
3 files changed, 22 insertions, 7 deletions
diff --git a/doc/file-formats.md b/doc/file-formats.md
index 01263e3..04fae0c 100644
--- a/doc/file-formats.md
+++ b/doc/file-formats.md
@@ -49,7 +49,11 @@ chr11	3464016	A	-0.3461	-0.334	-0.3331	0.08
 
 ## phenotype (and covariates) file
 
-The phenotype file is a tab-separated values (TSV) file. The first line MUST be a header with column labels. Each row corresponds to one individual. The column labelled `sample-id` contains the sample identifier for that individual. Other columns each contain a phenotypical trait for that individual. The headers of these columns MUST be the names of the phenotypes. Column headers are case-sensitive.
+The phenotype file is a tab-separated values (TSV) file. The first line MUST be a header with column labels. Each row corresponds to one individual.
+
+The column labelled `sample-id` contains the sample identifier for that individual. Other columns each contain a phenotypical trait for that individual. The headers of these columns MUST be the names of the phenotypes. Column headers are case-sensitive.
+
+Encryption MAY add a column labelled `intercept` containing the ciphertext corresponding to a vector of 1s.
 
 Here is an example phenotype file.
 ```
diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py
index 3694ce4..2a82690 100644
--- a/pyhegp/pyhegp.py
+++ b/pyhegp/pyhegp.py
@@ -131,11 +131,14 @@ def encrypt_genotype(genotype, key, summary):
 
 def encrypt_phenotype(phenotype, key):
     phenotype_matrix = phenotype.drop(columns=["sample-id"])
-    sample_names = phenotype_matrix.columns
+    sample_names = list(phenotype_matrix.columns)
     return pd.concat((phenotype["sample-id"],
-                      pd.DataFrame(hegp_encrypt(phenotype_matrix.to_numpy(),
-                                                key),
-                                   columns=sample_names)),
+                      pd.DataFrame(
+                          hegp_encrypt(
+                              np.column_stack((np.ones(len(phenotype)),
+                                               phenotype_matrix.to_numpy())),
+                              key),
+                          columns=["intercept"] + sample_names)),
                      axis="columns")
 
 def cat_genotype(genotypes):
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
index 3771ed0..171c721 100644
--- a/tests/helpers/strategies.py
+++ b/tests/helpers/strategies.py
@@ -1,5 +1,5 @@
 ### pyhegp --- Homomorphic encryption of genotypes and phenotypes
-### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net>
+### Copyright © 2025–2026 Arun Isaac <arunisaac@systemreboot.net>
 ###
 ### This file is part of pyhegp.
 ###
@@ -104,13 +104,21 @@ phenotype_names = st.lists(tabless_printable_ascii_text
 @st.composite
 def phenotype_frames(draw,
                      number_of_samples=st.integers(min_value=0,
-                                                   max_value=10)):
+                                                   max_value=10),
+                     intercept_present=st.booleans()):
     _number_of_samples = draw(number_of_samples)
     return draw(data_frames(
         columns=([column(name="sample-id",
                          dtype="str",
                          elements=tabless_printable_ascii_text,
                          unique=True)]
+                 + ([column(name="intercept",
+                            dtype="float64",
+                            elements=st.floats(min_value=-1,
+                                               max_value=1,
+                                               allow_nan=False))]
+                    if draw(intercept_present)
+                    else [])
                  + columns(draw(phenotype_names),
                            dtype="float64",
                            elements=st.floats(min_value=-1000,
author	Arun Isaac	2026-01-14 20:29:33 +0000
committer	Arun Isaac	2026-01-16 23:06:32 +0000
commit	a76dedb432aa99f53681144c488ac19b9c23e660 (patch)
tree	17ba9a6502c0f71a4355f0b8210666b65749ea15
parent	1e6efe6ef73cbb65839a95883899442f5dc4afe1 (diff)
download	pyhegp-a76dedb432aa99f53681144c488ac19b9c23e660.tar.gz pyhegp-a76dedb432aa99f53681144c488ac19b9c23e660.tar.lz pyhegp-a76dedb432aa99f53681144c488ac19b9c23e660.zip