about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2025-09-05 12:21:24 +0100
committerArun Isaac2025-09-05 12:22:05 +0100
commit7d5ed26346dc67fd829a902efeba270d0b4b4b61 (patch)
tree3222f8e38822294c78047b05e79ca422d9abd77e
parent6d4ef66d668aafb3796ad019447485c3eba61737 (diff)
downloadpyhegp-7d5ed26346dc67fd829a902efeba270d0b4b4b61.tar.gz
pyhegp-7d5ed26346dc67fd829a902efeba270d0b4b4b61.tar.lz
pyhegp-7d5ed26346dc67fd829a902efeba270d0b4b4b61.zip
Drop SNPs with a zero standard deviation.
-rw-r--r--pyhegp/pyhegp.py5
-rw-r--r--tests/test_pyhegp.py3
2 files changed, 6 insertions, 2 deletions
diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py
index 6df91ec..cf9f88d 100644
--- a/pyhegp/pyhegp.py
+++ b/pyhegp/pyhegp.py
@@ -93,6 +93,11 @@ def pool_summaries(summaries):
                    pooled_summary.data.drop(columns=["reference"]))
 
 def encrypt_genotype(genotype, key, summary):
+    # Drop SNPs that have a zero standard deviation. Such SNPs have no
+    # discriminatory power in the analysis and mess with our
+    # standardization by causing a division by zero.
+    summary = summary._replace(
+        data=summary.data[~np.isclose(summary.data["std"], 0)])
     # Drop any SNPs that are not in both genotype and summary.
     common_genotype = pd.merge(genotype,
                                summary.data[["chromosome", "position"]],
diff --git a/tests/test_pyhegp.py b/tests/test_pyhegp.py
index a7c78a9..8f9e2de 100644
--- a/tests/test_pyhegp.py
+++ b/tests/test_pyhegp.py
@@ -26,7 +26,7 @@ from hypothesis import given, strategies as st
 from hypothesis.extra.numpy import arrays, array_shapes
 import numpy as np
 import pandas as pd
-from pytest import approx, mark
+from pytest import approx
 
 from pyhegp.pyhegp import Stats, main, hegp_encrypt, hegp_decrypt, random_key, pool_stats, standardize, unstandardize, genotype_summary, encrypt_genotype, encrypt_phenotype, cat_genotype, cat_phenotype
 from pyhegp.serialization import Summary, read_summary, read_genotype, is_genotype_metadata_column, is_phenotype_metadata_column
@@ -126,7 +126,6 @@ def test_conservation_of_solutions(genotype, phenotype):
             == np.linalg.solve(hegp_encrypt(genotype, key),
                                hegp_encrypt(phenotype, key)))
 
-@mark.xfail
 @given(genotype_frames(st.shared(st.integers(min_value=2, max_value=10),
                                  key="number-of-samples"),
                        reference_present=st.just(True)),