about summary refs log tree commit diff
path: root/tests/helpers
diff options
context:
space:
mode:
authorArun Isaac2025-09-02 11:45:59 +0100
committerArun Isaac2025-09-02 18:35:00 +0100
commit0a7d4eb372239a0714e17dfa32947f244503c7c0 (patch)
treee119843de196cd055494da10706fc9c621974249 /tests/helpers
parent2b3779a6cae2ed4eaa280c2bb72b91c388152e36 (diff)
downloadpyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.gz
pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.tar.lz
pyhegp-0a7d4eb372239a0714e17dfa32947f244503c7c0.zip
Move hypothesis strategies to separate file.
These strategies may be used by other test modules as well.
Diffstat (limited to 'tests/helpers')
-rw-r--r--tests/helpers/__init__.py0
-rw-r--r--tests/helpers/strategies.py88
2 files changed, 88 insertions, 0 deletions
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/helpers/__init__.py
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
new file mode 100644
index 0000000..fc8ae1d
--- /dev/null
+++ b/tests/helpers/strategies.py
@@ -0,0 +1,88 @@
+### pyhegp --- Homomorphic encryption of genotypes and phenotypes
+### Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net>
+###
+### This file is part of pyhegp.
+###
+### pyhegp is free software: you can redistribute it and/or modify it
+### under the terms of the GNU General Public License as published by
+### the Free Software Foundation, either version 3 of the License, or
+### (at your option) any later version.
+###
+### pyhegp is distributed in the hope that it will be useful, but
+### WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+### General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with pyhegp. If not, see <https://www.gnu.org/licenses/>.
+
+from hypothesis import strategies as st
+from hypothesis.extra.pandas import column, columns, data_frames
+
+from pyhegp.serialization import Summary
+from pyhegp.utils import negate
+
+tabless_printable_ascii_text = st.text(
+    # Exclude control characters and tab.
+    st.characters(codec="ascii",
+                  exclude_categories=("Cc",),
+                  exclude_characters=("\t",)),
+    min_size=1)
+
+chromosome_column = column(name="chromosome",
+                           dtype="str",
+                           elements=tabless_printable_ascii_text)
+
+position_column = column(name="position",
+                         dtype="int")
+
+reference_column = column(name="reference",
+                          dtype="str",
+                          elements=st.text(
+                              st.characters(codec="ascii",
+                                            categories=(),
+                                            include_characters=("A", "G", "C", "T")),
+                              min_size=1))
+
+def genotype_reserved_column_name_p(name):
+    return name.lower() in {"chromosome", "position", "reference"}
+
+sample_names = st.lists(tabless_printable_ascii_text
+                        .filter(negate(genotype_reserved_column_name_p)),
+                        unique=True)
+
+@st.composite
+def summaries(draw):
+    return Summary(draw(st.integers()),
+                   draw(data_frames(
+                       columns=([chromosome_column, position_column]
+                                + ([reference_column] if draw(st.booleans()) else [])
+                                + columns(["mean", "std"],
+                                          dtype="float64",
+                                          elements=st.floats(allow_nan=False))))))
+
+@st.composite
+def genotype_frames(draw):
+    return draw(data_frames(
+        columns=([chromosome_column, position_column]
+                 + ([reference_column] if draw(st.booleans()) else [])
+                 + columns(draw(sample_names),
+                           dtype="float64",
+                           elements=st.floats(allow_nan=False)))))
+
+def phenotype_reserved_column_name_p(name):
+    return name.lower() == "sample-id"
+
+phenotype_names = st.lists(tabless_printable_ascii_text
+                           .filter(negate(phenotype_reserved_column_name_p)),
+                           unique=True)
+
+@st.composite
+def phenotype_frames(draw):
+    return draw(data_frames(
+        columns=([column(name="sample-id",
+                         dtype="str",
+                         elements=tabless_printable_ascii_text)]
+                 + columns(draw(phenotype_names),
+                           dtype="float64",
+                           elements=st.floats(allow_nan=False)))))