diff options
author | Arun Isaac | 2025-09-02 11:45:59 +0100 |
---|---|---|
committer | Arun Isaac | 2025-09-02 18:35:00 +0100 |
commit | 0a7d4eb372239a0714e17dfa32947f244503c7c0 (patch) | |
tree | e119843de196cd055494da10706fc9c621974249 | |
parent | 2b3779a6cae2ed4eaa280c2bb72b91c388152e36 (diff) | |
download | pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.gz pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.lz pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.zip |
Move hypothesis strategies to separate file.
These strategies may be used by other test modules as well.
-rw-r--r-- | tests/helpers/__init__.py | 0 | ||||
-rw-r--r-- | tests/helpers/strategies.py | 88 | ||||
-rw-r--r-- | tests/test_serialization.py | 66 |
3 files changed, 90 insertions, 64 deletions
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/helpers/__init__.py diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py new file mode 100644 index 0000000..fc8ae1d --- /dev/null +++ b/tests/helpers/strategies.py @@ -0,0 +1,88 @@ +### pyhegp --- Homomorphic encryption of genotypes and phenotypes +### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net> +### +### This file is part of pyhegp. +### +### pyhegp is free software: you can redistribute it and/or modify it +### under the terms of the GNU General Public License as published by +### the Free Software Foundation, either version 3 of the License, or +### (at your option) any later version. +### +### pyhegp is distributed in the hope that it will be useful, but +### WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +### General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with pyhegp. If not, see <https://www.gnu.org/licenses/>. + +from hypothesis import strategies as st +from hypothesis.extra.pandas import column, columns, data_frames + +from pyhegp.serialization import Summary +from pyhegp.utils import negate + +tabless_printable_ascii_text = st.text( + # Exclude control characters and tab. + st.characters(codec="ascii", + exclude_categories=("Cc",), + exclude_characters=("\t",)), + min_size=1) + +chromosome_column = column(name="chromosome", + dtype="str", + elements=tabless_printable_ascii_text) + +position_column = column(name="position", + dtype="int") + +reference_column = column(name="reference", + dtype="str", + elements=st.text( + st.characters(codec="ascii", + categories=(), + include_characters=("A", "G", "C", "T")), + min_size=1)) + +def genotype_reserved_column_name_p(name): + return name.lower() in {"chromosome", "position", "reference"} + +sample_names = st.lists(tabless_printable_ascii_text + .filter(negate(genotype_reserved_column_name_p)), + unique=True) + +@st.composite +def summaries(draw): + return Summary(draw(st.integers()), + draw(data_frames( + columns=([chromosome_column, position_column] + + ([reference_column] if draw(st.booleans()) else []) + + columns(["mean", "std"], + dtype="float64", + elements=st.floats(allow_nan=False)))))) + +@st.composite +def genotype_frames(draw): + return draw(data_frames( + columns=([chromosome_column, position_column] + + ([reference_column] if draw(st.booleans()) else []) + + columns(draw(sample_names), + dtype="float64", + elements=st.floats(allow_nan=False))))) + +def phenotype_reserved_column_name_p(name): + return name.lower() == "sample-id" + +phenotype_names = st.lists(tabless_printable_ascii_text + .filter(negate(phenotype_reserved_column_name_p)), + unique=True) + +@st.composite +def phenotype_frames(draw): + return draw(data_frames( + columns=([column(name="sample-id", + dtype="str", + elements=tabless_printable_ascii_text)] + + columns(draw(phenotype_names), + dtype="float64", + elements=st.floats(allow_nan=False))))) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index c856094..ab354e1 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -20,41 +20,12 @@ import tempfile from hypothesis import given, strategies as st from hypothesis.extra.numpy import arrays, array_shapes -from hypothesis.extra.pandas import column, columns, data_frames import pandas as pd from pytest import approx -from pyhegp.serialization import Summary, read_summary, write_summary, read_summary_headers, read_genotype, write_genotype, read_phenotype, write_phenotype, read_key, write_key -from pyhegp.utils import negate +from pyhegp.serialization import read_summary, write_summary, read_summary_headers, read_genotype, write_genotype, read_phenotype, write_phenotype, read_key, write_key -tabless_printable_ascii_text = st.text( - # Exclude control characters and tab. - st.characters(codec="ascii", - exclude_categories=("Cc",), - exclude_characters=("\t",)), - min_size=1) -chromosome_column = column(name="chromosome", - dtype="str", - elements=tabless_printable_ascii_text) -position_column = column(name="position", - dtype="int") -reference_column = column(name="reference", - dtype="str", - elements=st.text( - st.characters(codec="ascii", - categories=(), - include_characters=("A", "G", "C", "T")), - min_size=1)) - -@st.composite -def summaries(draw): - return Summary(draw(st.integers()), - draw(data_frames( - columns=([chromosome_column, position_column] - + ([reference_column] if draw(st.booleans()) else []) - + columns(["mean", "std"], - dtype="float64", - elements=st.floats(allow_nan=False)))))) +from helpers.strategies import * @given(summaries()) def test_read_write_summary_are_inverses(summary): @@ -95,22 +66,6 @@ def test_read_summary_headers_variable_whitespace(properties_and_whitespace): file.seek(0) assert properties == read_summary_headers(file) -def genotype_reserved_column_name_p(name): - return name.lower() in {"chromosome", "position", "reference"} - -sample_names = st.lists(tabless_printable_ascii_text - .filter(negate(genotype_reserved_column_name_p)), - unique=True) - -@st.composite -def genotype_frames(draw): - return draw(data_frames( - columns=([chromosome_column, position_column] - + ([reference_column] if draw(st.booleans()) else []) - + columns(draw(sample_names), - dtype="float64", - elements=st.floats(allow_nan=False))))) - @given(genotype_frames()) def test_read_write_genotype_are_inverses(genotype): with tempfile.TemporaryFile() as file: @@ -118,23 +73,6 @@ def test_read_write_genotype_are_inverses(genotype): file.seek(0) pd.testing.assert_frame_equal(genotype, read_genotype(file)) -def phenotype_reserved_column_name_p(name): - return name.lower() == "sample-id" - -phenotype_names = st.lists(tabless_printable_ascii_text - .filter(negate(phenotype_reserved_column_name_p)), - unique=True) - -@st.composite -def phenotype_frames(draw): - return draw(data_frames( - columns=([column(name="sample-id", - dtype="str", - elements=tabless_printable_ascii_text)] - + columns(draw(phenotype_names), - dtype="float64", - elements=st.floats(allow_nan=False))))) - @given(phenotype_frames()) def test_read_write_phenotype_are_inverses(phenotype): with tempfile.TemporaryFile() as file: |