about summary refs log tree commit diff
path: root/tests
diff options
context:
space:
mode:
authorArun Isaac2026-01-28 01:04:40 +0000
committerArun Isaac2026-01-28 01:18:09 +0000
commit83fff7bec63b80e5c89bbffbac95504660013dce (patch)
treef908974323723bf96cfef7e57c9058b1d8b4f990 /tests
parentd4afd6d2a02c12697c959c046cd345ab0a645f72 (diff)
downloadpyhegp-83fff7bec63b80e5c89bbffbac95504660013dce.tar.gz
pyhegp-83fff7bec63b80e5c89bbffbac95504660013dce.tar.lz
pyhegp-83fff7bec63b80e5c89bbffbac95504660013dce.zip
Require that (chromosome, position) in genotype frames is unique. HEAD main
Earlier, we required that (chromosome, position, reference) was
unique. We tighten this restriction requiring (chromosome, position)
be unique. Therefore, there can be only one reference allele at any
given chromosome and position.
Diffstat (limited to 'tests')
-rw-r--r--tests/helpers/strategies.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py
index a30a34c..210fc7f 100644
--- a/tests/helpers/strategies.py
+++ b/tests/helpers/strategies.py
@@ -45,9 +45,7 @@ sample_names = (tabless_printable_ascii_text
                 .filter(negate(is_genotype_metadata_column)))
 
 def genotype_metadata(draw, number_of_snps, reference_present):
-    match list(zip(*draw(st.lists(st.tuples(chromosomes, positions, references)
-                                  if reference_present
-                                  else st.tuples(chromosomes, positions),
+    match list(zip(*draw(st.lists(st.tuples(chromosomes, positions),
                                   min_size=number_of_snps,
                                   max_size=number_of_snps,
                                   unique=True)))):
@@ -56,10 +54,13 @@ def genotype_metadata(draw, number_of_snps, reference_present):
                                  "position": pd.Series(dtype="int")}
                                 | ({"reference": pd.Series(dtype="str")}
                                    if reference_present else {}))
-        case chromosomes_lst, positions_lst, *references_lst:
+        case chromosomes_lst, positions_lst:
             return pd.DataFrame({"chromosome": pd.Series(chromosomes_lst, dtype="str"),
                                  "position": pd.Series(positions_lst, dtype="int")}
-                                | ({"reference": pd.Series(*references_lst, dtype="str")}
+                                | ({"reference": pd.Series(draw(st.lists(references,
+                                                                         min_size=number_of_snps,
+                                                                         max_size=number_of_snps)),
+                                                           dtype="str")}
                                    if reference_present else {}))
         case _ as unreachable:
             assert_never(unreachable)