diff options
-rw-r--r-- | pyhegp/serialization.py | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/pyhegp/serialization.py b/pyhegp/serialization.py index 799109e..c86d216 100644 --- a/pyhegp/serialization.py +++ b/pyhegp/serialization.py @@ -66,8 +66,9 @@ def write_summary(file, summary): float_format="%.8g", index=False)) -def read_tsv(file): +def read_tsv(file, dtype): return pd.read_csv(file, + dtype=dtype, quoting=csv.QUOTE_NONE, sep="\t", na_filter=False, @@ -78,7 +79,9 @@ def read_tsv(file): skip_blank_lines=False) def read_genotype(file): - df = read_tsv(file) + df = read_tsv(file, {"chromosome": "str", + "position": "int", + "reference": "str"}) sample_columns = [column for column in df.columns if column not in ["chromosome", "position", "reference"]] @@ -90,7 +93,7 @@ def read_genotype(file): return df def read_phenotype(file): - df = read_tsv(file) + df = read_tsv(file, {"sample-id": "str"}) phenotype_columns = [column for column in df.columns if column != "sample-id"] |