about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2025-09-02 17:55:12 +0100
committerArun Isaac2025-09-02 22:32:03 +0100
commitbf80585e2b3cb7ba4abe474af06bb49e7259f94c (patch)
tree9c8c25c57bd60fd226e808fadcbed0f3620d561a
parent3f3dd13f75ab91862c9e0cbd5e65f1da1e26cf4b (diff)
downloadpyhegp-bf80585e2b3cb7ba4abe474af06bb49e7259f94c.tar.gz
pyhegp-bf80585e2b3cb7ba4abe474af06bb49e7259f94c.tar.lz
pyhegp-bf80585e2b3cb7ba4abe474af06bb49e7259f94c.zip
Add is_genotype_metadata_column.
Promote genotype_reserved_column_name_p from helpers.strategies to
is_genotype_metadata_column in pyhegp.serialization, and use it
everywhere.
-rw-r--r--pyhegp/serialization.py5
-rw-r--r--tests/helpers/strategies.py9
2 files changed, 7 insertions, 7 deletions
diff --git a/pyhegp/serialization.py b/pyhegp/serialization.py
index c86d216..ba2cb0f 100644
--- a/pyhegp/serialization.py
+++ b/pyhegp/serialization.py
@@ -78,13 +78,16 @@ def read_tsv(file, dtype):
                        # data file.
                        skip_blank_lines=False)
 
+def is_genotype_metadata_column(name):
+    return name.lower() in {"chromosome", "position", "reference"}
+
 def read_genotype(file):
     df = read_tsv(file, {"chromosome": "str",
                          "position": "int",
                          "reference": "str"})
     sample_columns = [column
                       for column in df.columns
-                      if column not in ["chromosome", "position", "reference"]]
+                      if not is_genotype_metadata_column(column)]
     df.chromosome = df.chromosome.astype("str")
     df.position = df.position.astype("int")
     if "reference" in df:
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
index 7edf667..00c4c11 100644
--- a/tests/helpers/strategies.py
+++ b/tests/helpers/strategies.py
@@ -19,7 +19,7 @@
 from hypothesis import strategies as st
 from hypothesis.extra.pandas import column, columns, data_frames
 
-from pyhegp.serialization import Summary
+from pyhegp.serialization import Summary, is_genotype_metadata_column
 from pyhegp.utils import negate
 
 tabless_printable_ascii_text = st.text(
@@ -44,11 +44,8 @@ reference_column = column(name="reference",
                                             include_characters=("A", "G", "C", "T")),
                               min_size=1))
 
-def genotype_reserved_column_name_p(name):
-    return name.lower() in {"chromosome", "position", "reference"}
-
 sample_names = st.lists(tabless_printable_ascii_text
-                        .filter(negate(genotype_reserved_column_name_p)),
+                        .filter(negate(is_genotype_metadata_column)),
                         unique=True)
 
 @st.composite
@@ -70,7 +67,7 @@ def genotype_frames(draw):
                            dtype="float64",
                            elements=st.floats(allow_nan=False)))))
     return genotype.drop_duplicates(subset=list(
-        filter(genotype_reserved_column_name_p,
+        filter(is_genotype_metadata_column,
                genotype.columns)),
                                     ignore_index=True)