From 7d5ed26346dc67fd829a902efeba270d0b4b4b61 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 5 Sep 2025 12:21:24 +0100 Subject: Drop SNPs with a zero standard deviation. --- pyhegp/pyhegp.py | 5 +++++ tests/test_pyhegp.py | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py index 6df91ec..cf9f88d 100644 --- a/pyhegp/pyhegp.py +++ b/pyhegp/pyhegp.py @@ -93,6 +93,11 @@ def pool_summaries(summaries): pooled_summary.data.drop(columns=["reference"])) def encrypt_genotype(genotype, key, summary): + # Drop SNPs that have a zero standard deviation. Such SNPs have no + # discriminatory power in the analysis and mess with our + # standardization by causing a division by zero. + summary = summary._replace( + data=summary.data[~np.isclose(summary.data["std"], 0)]) # Drop any SNPs that are not in both genotype and summary. common_genotype = pd.merge(genotype, summary.data[["chromosome", "position"]], diff --git a/tests/test_pyhegp.py b/tests/test_pyhegp.py index a7c78a9..8f9e2de 100644 --- a/tests/test_pyhegp.py +++ b/tests/test_pyhegp.py @@ -26,7 +26,7 @@ from hypothesis import given, strategies as st from hypothesis.extra.numpy import arrays, array_shapes import numpy as np import pandas as pd -from pytest import approx, mark +from pytest import approx from pyhegp.pyhegp import Stats, main, hegp_encrypt, hegp_decrypt, random_key, pool_stats, standardize, unstandardize, genotype_summary, encrypt_genotype, encrypt_phenotype, cat_genotype, cat_phenotype from pyhegp.serialization import Summary, read_summary, read_genotype, is_genotype_metadata_column, is_phenotype_metadata_column @@ -126,7 +126,6 @@ def test_conservation_of_solutions(genotype, phenotype): == np.linalg.solve(hegp_encrypt(genotype, key), hegp_encrypt(phenotype, key))) -@mark.xfail @given(genotype_frames(st.shared(st.integers(min_value=2, max_value=10), key="number-of-samples"), reference_present=st.just(True)), -- cgit 1.4.1