about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2026-01-28 01:04:40 +0000
committerArun Isaac2026-01-28 01:18:09 +0000
commit83fff7bec63b80e5c89bbffbac95504660013dce (patch)
treef908974323723bf96cfef7e57c9058b1d8b4f990
parentd4afd6d2a02c12697c959c046cd345ab0a645f72 (diff)
downloadpyhegp-main.tar.gz
pyhegp-main.tar.lz
pyhegp-main.zip
Require that (chromosome, position) in genotype frames is unique. HEAD main
Earlier, we required that (chromosome, position, reference) was
unique. We tighten this restriction requiring (chromosome, position)
be unique. Therefore, there can be only one reference allele at any
given chromosome and position.
-rw-r--r--tests/helpers/strategies.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
index a30a34c..210fc7f 100644
--- a/tests/helpers/strategies.py
+++ b/tests/helpers/strategies.py
@@ -45,9 +45,7 @@ sample_names = (tabless_printable_ascii_text
                 .filter(negate(is_genotype_metadata_column)))
 
 def genotype_metadata(draw, number_of_snps, reference_present):
-    match list(zip(*draw(st.lists(st.tuples(chromosomes, positions, references)
-                                  if reference_present
-                                  else st.tuples(chromosomes, positions),
+    match list(zip(*draw(st.lists(st.tuples(chromosomes, positions),
                                   min_size=number_of_snps,
                                   max_size=number_of_snps,
                                   unique=True)))):
@@ -56,10 +54,13 @@ def genotype_metadata(draw, number_of_snps, reference_present):
                                  "position": pd.Series(dtype="int")}
                                 | ({"reference": pd.Series(dtype="str")}
                                    if reference_present else {}))
-        case chromosomes_lst, positions_lst, *references_lst:
+        case chromosomes_lst, positions_lst:
             return pd.DataFrame({"chromosome": pd.Series(chromosomes_lst, dtype="str"),
                                  "position": pd.Series(positions_lst, dtype="int")}
-                                | ({"reference": pd.Series(*references_lst, dtype="str")}
+                                | ({"reference": pd.Series(draw(st.lists(references,
+                                                                         min_size=number_of_snps,
+                                                                         max_size=number_of_snps)),
+                                                           dtype="str")}
                                    if reference_present else {}))
         case _ as unreachable:
             assert_never(unreachable)