diff options
author | Arun Isaac | 2025-09-05 12:21:24 +0100 |
---|---|---|
committer | Arun Isaac | 2025-09-05 12:22:05 +0100 |
commit | 7d5ed26346dc67fd829a902efeba270d0b4b4b61 (patch) | |
tree | 3222f8e38822294c78047b05e79ca422d9abd77e | |
parent | 6d4ef66d668aafb3796ad019447485c3eba61737 (diff) | |
download | pyhegp-7d5ed26346dc67fd829a902efeba270d0b4b4b61.tar.gz pyhegp-7d5ed26346dc67fd829a902efeba270d0b4b4b61.tar.lz pyhegp-7d5ed26346dc67fd829a902efeba270d0b4b4b61.zip |
Drop SNPs with a zero standard deviation.
-rw-r--r-- | pyhegp/pyhegp.py | 5 | ||||
-rw-r--r-- | tests/test_pyhegp.py | 3 |
2 files changed, 6 insertions, 2 deletions
diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py index 6df91ec..cf9f88d 100644 --- a/pyhegp/pyhegp.py +++ b/pyhegp/pyhegp.py @@ -93,6 +93,11 @@ def pool_summaries(summaries): pooled_summary.data.drop(columns=["reference"])) def encrypt_genotype(genotype, key, summary): + # Drop SNPs that have a zero standard deviation. Such SNPs have no + # discriminatory power in the analysis and mess with our + # standardization by causing a division by zero. + summary = summary._replace( + data=summary.data[~np.isclose(summary.data["std"], 0)]) # Drop any SNPs that are not in both genotype and summary. common_genotype = pd.merge(genotype, summary.data[["chromosome", "position"]], diff --git a/tests/test_pyhegp.py b/tests/test_pyhegp.py index a7c78a9..8f9e2de 100644 --- a/tests/test_pyhegp.py +++ b/tests/test_pyhegp.py @@ -26,7 +26,7 @@ from hypothesis import given, strategies as st from hypothesis.extra.numpy import arrays, array_shapes import numpy as np import pandas as pd -from pytest import approx, mark +from pytest import approx from pyhegp.pyhegp import Stats, main, hegp_encrypt, hegp_decrypt, random_key, pool_stats, standardize, unstandardize, genotype_summary, encrypt_genotype, encrypt_phenotype, cat_genotype, cat_phenotype from pyhegp.serialization import Summary, read_summary, read_genotype, is_genotype_metadata_column, is_phenotype_metadata_column @@ -126,7 +126,6 @@ def test_conservation_of_solutions(genotype, phenotype): == np.linalg.solve(hegp_encrypt(genotype, key), hegp_encrypt(phenotype, key))) -@mark.xfail @given(genotype_frames(st.shared(st.integers(min_value=2, max_value=10), key="number-of-samples"), reference_present=st.just(True)), |