From 83fff7bec63b80e5c89bbffbac95504660013dce Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Wed, 28 Jan 2026 01:04:40 +0000 Subject: Require that (chromosome, position) in genotype frames is unique. Earlier, we required that (chromosome, position, reference) was unique. We tighten this restriction requiring (chromosome, position) be unique. Therefore, there can be only one reference allele at any given chromosome and position. --- tests/helpers/strategies.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tests/helpers/strategies.py') diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py index a30a34c..210fc7f 100644 --- a/tests/helpers/strategies.py +++ b/tests/helpers/strategies.py @@ -45,9 +45,7 @@ sample_names = (tabless_printable_ascii_text .filter(negate(is_genotype_metadata_column))) def genotype_metadata(draw, number_of_snps, reference_present): - match list(zip(*draw(st.lists(st.tuples(chromosomes, positions, references) - if reference_present - else st.tuples(chromosomes, positions), + match list(zip(*draw(st.lists(st.tuples(chromosomes, positions), min_size=number_of_snps, max_size=number_of_snps, unique=True)))): @@ -56,10 +54,13 @@ def genotype_metadata(draw, number_of_snps, reference_present): "position": pd.Series(dtype="int")} | ({"reference": pd.Series(dtype="str")} if reference_present else {})) - case chromosomes_lst, positions_lst, *references_lst: + case chromosomes_lst, positions_lst: return pd.DataFrame({"chromosome": pd.Series(chromosomes_lst, dtype="str"), "position": pd.Series(positions_lst, dtype="int")} - | ({"reference": pd.Series(*references_lst, dtype="str")} + | ({"reference": pd.Series(draw(st.lists(references, + min_size=number_of_snps, + max_size=number_of_snps)), + dtype="str")} if reference_present else {})) case _ as unreachable: assert_never(unreachable) -- cgit 1.4.1