diff options
| author | Arun Isaac | 2026-01-28 00:23:56 +0000 |
|---|---|---|
| committer | Arun Isaac | 2026-01-28 01:18:09 +0000 |
| commit | ee0b1836f33004c62d05408c4ee2b69086652131 (patch) | |
| tree | d284558a4049c8e6b9f910f5c5b046c5e2b3a248 | |
| parent | 963e662a93291b2e5297f637e8a59947657d4325 (diff) | |
| download | pyhegp-ee0b1836f33004c62d05408c4ee2b69086652131.tar.gz pyhegp-ee0b1836f33004c62d05408c4ee2b69086652131.tar.lz pyhegp-ee0b1836f33004c62d05408c4ee2b69086652131.zip | |
Do not drop zero standard deviation SNPs with --only-center.
| -rw-r--r-- | pyhegp/pyhegp.py | 9 | ||||
| -rw-r--r-- | test-data/genotype-with-zero-stddev-snp.tsv | 4 | ||||
| -rw-r--r-- | tests/test_pyhegp.py | 13 |
3 files changed, 22 insertions, 4 deletions
diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py index 48d0ea2..c9a2240 100644 --- a/pyhegp/pyhegp.py +++ b/pyhegp/pyhegp.py @@ -262,10 +262,11 @@ def encrypt_command(genotype_file, phenotype_file, summary_file, if key_output_file: write_key(key_output_file, key) - # Drop SNPs that have a zero standard deviation. Such SNPs have no - # discriminatory power in the analysis and mess with our - # standardization by causing a division by zero. - summary_subset = drop_zero_stddev_snps(summary) + # Drop SNPs that have a zero standard deviation. Such SNPs + # have no discriminatory power in the analysis and mess with + # our standardization by causing a division by zero. This is + # not a problem if we are only centering. + summary_subset = summary if only_center else drop_zero_stddev_snps(summary) if (dropped_zero_stddev_snps := len(summary.data) - len(summary_subset.data)) > 0: print(f"Dropped {dropped_zero_stddev_snps} SNP(s) with zero standard deviation") diff --git a/test-data/genotype-with-zero-stddev-snp.tsv b/test-data/genotype-with-zero-stddev-snp.tsv new file mode 100644 index 0000000..a78977c --- /dev/null +++ b/test-data/genotype-with-zero-stddev-snp.tsv @@ -0,0 +1,4 @@ +chromosome position reference sample0 sample1 sample2 +chr 0 A 2 0 2 +chr 1 A 1 0 1 +chr 2 A 1 1 1 diff --git a/tests/test_pyhegp.py b/tests/test_pyhegp.py index 569ebe5..3192bf7 100644 --- a/tests/test_pyhegp.py +++ b/tests/test_pyhegp.py @@ -178,6 +178,19 @@ def test_encrypt_genotype_does_not_produce_na(genotype, key, only_center): def test_encrypt_phenotype_does_not_produce_na(phenotype, key): assert not encrypt_phenotype(phenotype, key).isna().any(axis=None) +def test_encrypt_with_only_center_does_not_drop_snps(tmp_path): + genotype_file = Path("test-data/genotype-with-zero-stddev-snp.tsv") + shutil.copy(genotype_file, tmp_path) + result = CliRunner().invoke(main, ["encrypt", + "--only-center", + str(tmp_path / genotype_file.name)]) + assert result.exit_code == 0 + with genotype_file.open("rb") as file: + genotype = read_genotype(file) + with (tmp_path / f"{genotype_file.name}.hegp").open("rb") as file: + encrypted_genotype = read_genotype(file) + assert len(genotype) == len(encrypted_genotype) + @pytest.mark.parametrize("summary_files", [[Path("test-data/pool-test-summary1"), Path("test-data/pool-test-summary2")], |
