From 1a907c0b75080891a960e561bcca5d7a35624cac Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Thu, 15 Jan 2026 02:57:02 +0000 Subject: Separate centering from normalization. --- pyhegp/pyhegp.py | 16 ++++++++++------ tests/test_pyhegp.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py index 2a82690..676e0b6 100644 --- a/pyhegp/pyhegp.py +++ b/pyhegp/pyhegp.py @@ -33,15 +33,19 @@ Stats = namedtuple("Stats", "n mean std") def random_key(rng, n): return special_ortho_group.rvs(n, random_state=rng) -def standardize(matrix, mean, standard_deviation): +def center(matrix, mean): m, _ = matrix.shape - return ((matrix - np.tile(mean, (m, 1))) - @ np.diag(1 / standard_deviation)) + return matrix - np.tile(mean, (m, 1)) + +def uncenter(matrix, mean): + return center(matrix, -mean) + +def standardize(matrix, mean, standard_deviation): + return center(matrix, mean) @ np.diag(1 / standard_deviation) def unstandardize(matrix, mean, standard_deviation): - m, _ = matrix.shape - return ((matrix @ np.diag(standard_deviation)) - + np.tile(mean, (m, 1))) + return uncenter(matrix @ np.diag(standard_deviation), + mean) def hegp_encrypt(plaintext, key): return key @ plaintext diff --git a/tests/test_pyhegp.py b/tests/test_pyhegp.py index cdf3a7f..c3cf47f 100644 --- a/tests/test_pyhegp.py +++ b/tests/test_pyhegp.py @@ -1,5 +1,5 @@ ### pyhegp --- Homomorphic encryption of genotypes and phenotypes -### Copyright © 2025 Arun Isaac +### Copyright © 2025–2026 Arun Isaac ### ### This file is part of pyhegp. ### @@ -29,7 +29,7 @@ import pandas as pd import pytest from pytest import approx -from pyhegp.pyhegp import Stats, main, hegp_encrypt, hegp_decrypt, random_key, pool_stats, standardize, unstandardize, genotype_summary, encrypt_genotype, encrypt_phenotype, cat_genotype, cat_phenotype +from pyhegp.pyhegp import Stats, main, hegp_encrypt, hegp_decrypt, random_key, pool_stats, center, uncenter, standardize, unstandardize, genotype_summary, encrypt_genotype, encrypt_phenotype, cat_genotype, cat_phenotype from pyhegp.serialization import Summary, read_summary, read_genotype, is_genotype_metadata_column, is_phenotype_metadata_column from pyhegp.utils import negate @@ -93,6 +93,13 @@ def test_hegp_encryption_decryption_are_inverses(plaintext): key = random_key(rng, len(plaintext)) assert hegp_decrypt(hegp_encrypt(plaintext, key), key) == approx(plaintext) +@given(arrays("float64", + array_shapes(min_dims=2, max_dims=2), + elements=st.floats(min_value=0, max_value=100))) +def test_center_uncenter_are_inverses(matrix): + mean = np.mean(matrix, axis=0) + assert uncenter(center(matrix, mean), mean) == approx(matrix) + @given(arrays("float64", array_shapes(min_dims=2, max_dims=2), elements=st.floats(min_value=0, max_value=100)) -- cgit 1.4.1