diff options
-rw-r--r-- | pyhegp/serialization.py | 5 | ||||
-rw-r--r-- | tests/helpers/strategies.py | 9 |
2 files changed, 7 insertions, 7 deletions
diff --git a/pyhegp/serialization.py b/pyhegp/serialization.py index c86d216..ba2cb0f 100644 --- a/pyhegp/serialization.py +++ b/pyhegp/serialization.py @@ -78,13 +78,16 @@ def read_tsv(file, dtype): # data file. skip_blank_lines=False) +def is_genotype_metadata_column(name): + return name.lower() in {"chromosome", "position", "reference"} + def read_genotype(file): df = read_tsv(file, {"chromosome": "str", "position": "int", "reference": "str"}) sample_columns = [column for column in df.columns - if column not in ["chromosome", "position", "reference"]] + if not is_genotype_metadata_column(column)] df.chromosome = df.chromosome.astype("str") df.position = df.position.astype("int") if "reference" in df: diff --git a/tests/helpers/strategies.py b/tests/helpers/strategies.py index 7edf667..00c4c11 100644 --- a/tests/helpers/strategies.py +++ b/tests/helpers/strategies.py @@ -19,7 +19,7 @@ from hypothesis import strategies as st from hypothesis.extra.pandas import column, columns, data_frames -from pyhegp.serialization import Summary +from pyhegp.serialization import Summary, is_genotype_metadata_column from pyhegp.utils import negate tabless_printable_ascii_text = st.text( @@ -44,11 +44,8 @@ reference_column = column(name="reference", include_characters=("A", "G", "C", "T")), min_size=1)) -def genotype_reserved_column_name_p(name): - return name.lower() in {"chromosome", "position", "reference"} - sample_names = st.lists(tabless_printable_ascii_text - .filter(negate(genotype_reserved_column_name_p)), + .filter(negate(is_genotype_metadata_column)), unique=True) @st.composite @@ -70,7 +67,7 @@ def genotype_frames(draw): dtype="float64", elements=st.floats(allow_nan=False))))) return genotype.drop_duplicates(subset=list( - filter(genotype_reserved_column_name_p, + filter(is_genotype_metadata_column, genotype.columns)), ignore_index=True) |