about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--doc/file-formats.md6
-rw-r--r--pyhegp/pyhegp.py11
-rw-r--r--tests/helpers/strategies.py12
3 files changed, 22 insertions, 7 deletions
diff --git a/doc/file-formats.md b/doc/file-formats.md
index 01263e3..04fae0c 100644
--- a/doc/file-formats.md
+++ b/doc/file-formats.md
@@ -49,7 +49,11 @@ chr11	3464016	A	-0.3461	-0.334	-0.3331	0.08
 
 ## phenotype (and covariates) file
 
-The phenotype file is a tab-separated values (TSV) file. The first line MUST be a header with column labels. Each row corresponds to one individual. The column labelled `sample-id` contains the sample identifier for that individual. Other columns each contain a phenotypical trait for that individual. The headers of these columns MUST be the names of the phenotypes. Column headers are case-sensitive.
+The phenotype file is a tab-separated values (TSV) file. The first line MUST be a header with column labels. Each row corresponds to one individual.
+
+The column labelled `sample-id` contains the sample identifier for that individual. Other columns each contain a phenotypical trait for that individual. The headers of these columns MUST be the names of the phenotypes. Column headers are case-sensitive.
+
+Encryption MAY add a column labelled `intercept` containing the ciphertext corresponding to a vector of 1s.
 
 Here is an example phenotype file.
 ```
diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py
index 3694ce4..2a82690 100644
--- a/pyhegp/pyhegp.py
+++ b/pyhegp/pyhegp.py
@@ -131,11 +131,14 @@ def encrypt_genotype(genotype, key, summary):
 
 def encrypt_phenotype(phenotype, key):
     phenotype_matrix = phenotype.drop(columns=["sample-id"])
-    sample_names = phenotype_matrix.columns
+    sample_names = list(phenotype_matrix.columns)
     return pd.concat((phenotype["sample-id"],
-                      pd.DataFrame(hegp_encrypt(phenotype_matrix.to_numpy(),
-                                                key),
-                                   columns=sample_names)),
+                      pd.DataFrame(
+                          hegp_encrypt(
+                              np.column_stack((np.ones(len(phenotype)),
+                                               phenotype_matrix.to_numpy())),
+                              key),
+                          columns=["intercept"] + sample_names)),
                      axis="columns")
 
 def cat_genotype(genotypes):
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
index 3771ed0..171c721 100644
--- a/tests/helpers/strategies.py
+++ b/tests/helpers/strategies.py
@@ -1,5 +1,5 @@
 ### pyhegp --- Homomorphic encryption of genotypes and phenotypes
-### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net>
+### Copyright © 2025–2026 Arun Isaac <arunisaac@systemreboot.net>
 ###
 ### This file is part of pyhegp.
 ###
@@ -104,13 +104,21 @@ phenotype_names = st.lists(tabless_printable_ascii_text
 @st.composite
 def phenotype_frames(draw,
                      number_of_samples=st.integers(min_value=0,
-                                                   max_value=10)):
+                                                   max_value=10),
+                     intercept_present=st.booleans()):
     _number_of_samples = draw(number_of_samples)
     return draw(data_frames(
         columns=([column(name="sample-id",
                          dtype="str",
                          elements=tabless_printable_ascii_text,
                          unique=True)]
+                 + ([column(name="intercept",
+                            dtype="float64",
+                            elements=st.floats(min_value=-1,
+                                               max_value=1,
+                                               allow_nan=False))]
+                    if draw(intercept_present)
+                    else [])
                  + columns(draw(phenotype_names),
                            dtype="float64",
                            elements=st.floats(min_value=-1000,