Move hypothesis strategies to separate file.

These strategies may be used by other test modules as well.
author: Arun Isaac 2025-09-02 11:45:59 +0100
committer: Arun Isaac 2025-09-02 18:35:00 +0100
commit: 0a7d4eb372239a0714e17dfa32947f244503c7c0 (patch)
tree: e119843de196cd055494da10706fc9c621974249
parent: 2b3779a6cae2ed4eaa280c2bb72b91c388152e36 (diff)
download: pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.gz
pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.lz
pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.zip
3 files changed, 90 insertions, 64 deletions
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/helpers/__init__.py
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
new file mode 100644
index 0000000..fc8ae1d
--- /dev/null
+++ b/tests/helpers/strategies.py
@@ -0,0 +1,88 @@
+### pyhegp --- Homomorphic encryption of genotypes and phenotypes
+### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net>
+###
+### This file is part of pyhegp.
+###
+### pyhegp is free software: you can redistribute it and/or modify it
+### under the terms of the GNU General Public License as published by
+### the Free Software Foundation, either version 3 of the License, or
+### (at your option) any later version.
+###
+### pyhegp is distributed in the hope that it will be useful, but
+### WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+### General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with pyhegp. If not, see <https://www.gnu.org/licenses/>.
+
+from hypothesis import strategies as st
+from hypothesis.extra.pandas import column, columns, data_frames
+
+from pyhegp.serialization import Summary
+from pyhegp.utils import negate
+
+tabless_printable_ascii_text = st.text(
+    # Exclude control characters and tab.
+    st.characters(codec="ascii",
+                  exclude_categories=("Cc",),
+                  exclude_characters=("\t",)),
+    min_size=1)
+
+chromosome_column = column(name="chromosome",
+                           dtype="str",
+                           elements=tabless_printable_ascii_text)
+
+position_column = column(name="position",
+                         dtype="int")
+
+reference_column = column(name="reference",
+                          dtype="str",
+                          elements=st.text(
+                              st.characters(codec="ascii",
+                                            categories=(),
+                                            include_characters=("A", "G", "C", "T")),
+                              min_size=1))
+
+def genotype_reserved_column_name_p(name):
+    return name.lower() in {"chromosome", "position", "reference"}
+
+sample_names = st.lists(tabless_printable_ascii_text
+                        .filter(negate(genotype_reserved_column_name_p)),
+                        unique=True)
+
+@st.composite
+def summaries(draw):
+    return Summary(draw(st.integers()),
+                   draw(data_frames(
+                       columns=([chromosome_column, position_column]
+                                + ([reference_column] if draw(st.booleans()) else [])
+                                + columns(["mean", "std"],
+                                          dtype="float64",
+                                          elements=st.floats(allow_nan=False))))))
+
+@st.composite
+def genotype_frames(draw):
+    return draw(data_frames(
+        columns=([chromosome_column, position_column]
+                 + ([reference_column] if draw(st.booleans()) else [])
+                 + columns(draw(sample_names),
+                           dtype="float64",
+                           elements=st.floats(allow_nan=False)))))
+
+def phenotype_reserved_column_name_p(name):
+    return name.lower() == "sample-id"
+
+phenotype_names = st.lists(tabless_printable_ascii_text
+                           .filter(negate(phenotype_reserved_column_name_p)),
+                           unique=True)
+
+@st.composite
+def phenotype_frames(draw):
+    return draw(data_frames(
+        columns=([column(name="sample-id",
+                         dtype="str",
+                         elements=tabless_printable_ascii_text)]
+                 + columns(draw(phenotype_names),
+                           dtype="float64",
+                           elements=st.floats(allow_nan=False)))))
diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index c856094..ab354e1 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -20,41 +20,12 @@ import tempfile
 
 from hypothesis import given, strategies as st
 from hypothesis.extra.numpy import arrays, array_shapes
-from hypothesis.extra.pandas import column, columns, data_frames
 import pandas as pd
 from pytest import approx
 
-from pyhegp.serialization import Summary, read_summary, write_summary, read_summary_headers, read_genotype, write_genotype, read_phenotype, write_phenotype, read_key, write_key
-from pyhegp.utils import negate
+from pyhegp.serialization import read_summary, write_summary, read_summary_headers, read_genotype, write_genotype, read_phenotype, write_phenotype, read_key, write_key
 
-tabless_printable_ascii_text = st.text(
-    # Exclude control characters and tab.
-    st.characters(codec="ascii",
-                  exclude_categories=("Cc",),
-                  exclude_characters=("\t",)),
-    min_size=1)
-chromosome_column = column(name="chromosome",
-                           dtype="str",
-                           elements=tabless_printable_ascii_text)
-position_column = column(name="position",
-                         dtype="int")
-reference_column = column(name="reference",
-                          dtype="str",
-                          elements=st.text(
-                              st.characters(codec="ascii",
-                                            categories=(),
-                                            include_characters=("A", "G", "C", "T")),
-                              min_size=1))
-
-@st.composite
-def summaries(draw):
-    return Summary(draw(st.integers()),
-                   draw(data_frames(
-                       columns=([chromosome_column, position_column]
-                                + ([reference_column] if draw(st.booleans()) else [])
-                                + columns(["mean", "std"],
-                                          dtype="float64",
-                                          elements=st.floats(allow_nan=False))))))
+from helpers.strategies import *
 
 @given(summaries())
 def test_read_write_summary_are_inverses(summary):
@@ -95,22 +66,6 @@ def test_read_summary_headers_variable_whitespace(properties_and_whitespace):
         file.seek(0)
         assert properties == read_summary_headers(file)
 
-def genotype_reserved_column_name_p(name):
-    return name.lower() in {"chromosome", "position", "reference"}
-
-sample_names = st.lists(tabless_printable_ascii_text
-                        .filter(negate(genotype_reserved_column_name_p)),
-                        unique=True)
-
-@st.composite
-def genotype_frames(draw):
-    return draw(data_frames(
-        columns=([chromosome_column, position_column]
-                 + ([reference_column] if draw(st.booleans()) else [])
-                 + columns(draw(sample_names),
-                           dtype="float64",
-                           elements=st.floats(allow_nan=False)))))
-
 @given(genotype_frames())
 def test_read_write_genotype_are_inverses(genotype):
     with tempfile.TemporaryFile() as file:
@@ -118,23 +73,6 @@ def test_read_write_genotype_are_inverses(genotype):
         file.seek(0)
         pd.testing.assert_frame_equal(genotype, read_genotype(file))
 
-def phenotype_reserved_column_name_p(name):
-    return name.lower() == "sample-id"
-
-phenotype_names = st.lists(tabless_printable_ascii_text
-                           .filter(negate(phenotype_reserved_column_name_p)),
-                           unique=True)
-
-@st.composite
-def phenotype_frames(draw):
-    return draw(data_frames(
-        columns=([column(name="sample-id",
-                         dtype="str",
-                         elements=tabless_printable_ascii_text)]
-                 + columns(draw(phenotype_names),
-                           dtype="float64",
-                           elements=st.floats(allow_nan=False)))))
-
 @given(phenotype_frames())
 def test_read_write_phenotype_are_inverses(phenotype):
     with tempfile.TemporaryFile() as file:
author	Arun Isaac	2025-09-02 11:45:59 +0100
committer	Arun Isaac	2025-09-02 18:35:00 +0100
commit	0a7d4eb372239a0714e17dfa32947f244503c7c0 (patch)
tree	e119843de196cd055494da10706fc9c621974249
parent	2b3779a6cae2ed4eaa280c2bb72b91c388152e36 (diff)
download	pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.gz pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.lz pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.zip