about summary refs log tree commit diff
path: root/tests
diff options
context:
space:
mode:
authorArun Isaac2025-07-15 17:33:34 +0100
committerArun Isaac2025-07-17 20:36:08 +0100
commita78069cde91c8b9e75f4fb3141b173e4252697cc (patch)
tree408bdce34119166ca165f14559ee3051ffcb5512 /tests
parent69a4bafb322f7aad8ffd0c622cff70a891b03f33 (diff)
downloadpyhegp-a78069cde91c8b9e75f4fb3141b173e4252697cc.tar.gz
pyhegp-a78069cde91c8b9e75f4fb3141b173e4252697cc.tar.lz
pyhegp-a78069cde91c8b9e75f4fb3141b173e4252697cc.zip
Add standardization.
* pyhegp/pyhegp.py (standardize): Standardize using mean and standard
deviation, instead of the minor allele frequency.
(unstandardize): New function.
* tests/test_pyhegp.py: Import standardize and unstandardize from
pyhegp.pyhegp.
(no_column_zero_standard_deviation): New function.
(test_standardize_unstandardize_are_inverses): New test.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_pyhegp.py17
1 files changed, 16 insertions, 1 deletions
diff --git a/tests/test_pyhegp.py b/tests/test_pyhegp.py
index 2d3e0b8..304e74b 100644
--- a/tests/test_pyhegp.py
+++ b/tests/test_pyhegp.py
@@ -21,7 +21,7 @@ from hypothesis.extra.numpy import arrays, array_shapes
 import numpy as np
 from pytest import approx
 
-from pyhegp.pyhegp import Stats, hegp_encrypt, hegp_decrypt, random_key, pool_stats
+from pyhegp.pyhegp import Stats, hegp_encrypt, hegp_decrypt, random_key, pool_stats, standardize, unstandardize
 
 @given(st.lists(st.lists(arrays("float64",
                                 st.shared(array_shapes(min_dims=1, max_dims=1),
@@ -39,6 +39,9 @@ def test_pool_stats(pools):
             and pooled_stats.mean == approx(np.mean(combined_pool, axis=0))
             and pooled_stats.std == approx(np.std(combined_pool, axis=0, ddof=1)))
 
+def no_column_zero_standard_deviation(matrix):
+    return not np.any(np.isclose(np.std(matrix, axis=0), 0))
+
 @given(st.one_of(
     arrays("int32",
            array_shapes(min_dims=2, max_dims=2),
@@ -58,3 +61,15 @@ def test_hegp_encryption_decryption_are_inverses(plaintext):
     # FIXME: We don't use maf at the moment.
     maf = None
     assert hegp_decrypt(hegp_encrypt(plaintext, maf, key), key) == approx(plaintext)
+
+@given(arrays("float64",
+              array_shapes(min_dims=2, max_dims=2),
+              elements=st.floats(min_value=0, max_value=100))
+       # Reject matrices with zero standard deviation columns since
+       # they trigger a division by zero.
+       .filter(no_column_zero_standard_deviation))
+def test_standardize_unstandardize_are_inverses(matrix):
+    mean = np.mean(matrix, axis=0)
+    standard_deviation = np.std(matrix, axis=0)
+    assert unstandardize(standardize(matrix, mean, standard_deviation),
+                         mean, standard_deviation) == approx(matrix)