diff options
author | Arun Isaac | 2025-09-03 10:56:21 +0100 |
---|---|---|
committer | Arun Isaac | 2025-09-04 00:30:00 +0100 |
commit | 7d9d55e571b562a8d8faaee100c2ae075034a91d (patch) | |
tree | 4a844bd9964ba170d56d32c2b63e722e9bc87593 | |
parent | 9d6b612f81ece8ba6cb7aa63efedc91cca491247 (diff) | |
download | pyhegp-7d9d55e571b562a8d8faaee100c2ae075034a91d.tar.gz pyhegp-7d9d55e571b562a8d8faaee100c2ae075034a91d.tar.lz pyhegp-7d9d55e571b562a8d8faaee100c2ae075034a91d.zip |
Support encrypting phenotypes.
-rw-r--r-- | pyhegp/pyhegp.py | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/pyhegp/pyhegp.py b/pyhegp/pyhegp.py index fcd20ea..b4c1f2e 100644 --- a/pyhegp/pyhegp.py +++ b/pyhegp/pyhegp.py @@ -110,6 +110,15 @@ def encrypt_genotype(genotype, key, summary): columns=sample_names)), axis="columns") +def encrypt_phenotype(phenotype, key): + phenotype_matrix = phenotype.drop(columns=["sample-id"]) + sample_names = phenotype_matrix.columns + return pd.concat((phenotype["sample-id"], + pd.DataFrame(hegp_encrypt(phenotype_matrix.to_numpy(), + key), + columns=sample_names)), + axis="columns") + def cat_data_frames(frames, metadata_columns): def cat2(df1, df2): return pd.merge(df1, df2, how="inner", on=metadata_columns) @@ -171,11 +180,20 @@ def pool_command(pooled_summary_file, summary_files): @main.command("encrypt") @click.argument("genotype-file", type=click.File("r")) +@click.argument("phenotype-file", type=click.File("r"), required=False) @click.option("--summary", "-s", "summary_file", type=click.File("rb"), help="Summary statistics file") @click.option("--key", "-k", "key_file", type=click.File("w"), help="Output key") -def encrypt_command(genotype_file, summary_file, key_file): +def encrypt_command(genotype_file, phenotype_file, summary_file, key_file): + def write_ciphertext(plaintext_path, writer): + ciphertext_path = Path(plaintext_path + ".hegp") + if ciphertext_path.exists(): + print(f"Output file {ciphertext_path} exists, cannot overwrite.") + sys.exit(1) + with ciphertext_path.open("w") as ciphertext_file: + writer(ciphertext_file) + genotype = read_genotype(genotype_file) if summary_file: summary = read_summary(summary_file) @@ -187,16 +205,20 @@ def encrypt_command(genotype_file, summary_file, key_file): .columns)) if key_file: write_key(key_file, key) + encrypted_genotype = encrypt_genotype(genotype, key, summary) if len(encrypted_genotype) < len(genotype): dropped_snps = len(genotype) - len(encrypted_genotype) print(f"Dropped {dropped_snps} SNP(s)") - ciphertext_path = Path(genotype_file.name + ".hegp") - if ciphertext_path.exists(): - print(f"Output file {ciphertext_path} exists, cannot overwrite.") - sys.exit(1) - with ciphertext_path.open("w") as ciphertext_file: - write_genotype(ciphertext_file, encrypted_genotype) + write_ciphertext(genotype_file.name, + lambda file: write_genotype(file, encrypted_genotype)) + + if phenotype_file: + write_ciphertext(phenotype_file.name, + lambda file: + write_phenotype(file, encrypt_phenotype( + read_phenotype(phenotype_file), + key))) @main.command("cat-genotype") @click.option("--output", "-o", "output_file", |