diff options
| author | Arun Isaac | 2025-11-13 02:23:34 +0000 |
|---|---|---|
| committer | Arun Isaac | 2025-11-13 02:23:34 +0000 |
| commit | 2bd7b74814ca208a30382c6ad847e169addc542d (patch) | |
| tree | 89d51b7a3aeffe759474e52ae104ed9f1b74646e | |
| parent | f0076f0a413ad3706e3b012b07e4a14c61c79b6e (diff) | |
| download | pyhegp-2bd7b74814ca208a30382c6ad847e169addc542d.tar.gz pyhegp-2bd7b74814ca208a30382c6ad847e169addc542d.tar.lz pyhegp-2bd7b74814ca208a30382c6ad847e169addc542d.zip | |
Add JWAS tests to HSmice test.
| -rw-r--r-- | .guix/hsmice-test.scm | 94 | ||||
| -rw-r--r-- | e2e-tests/hsmice/jwas-gwas.jl | 46 | ||||
| -rw-r--r-- | e2e-tests/hsmice/jwas-manhattan.r | 13 |
3 files changed, 152 insertions, 1 deletions
diff --git a/.guix/hsmice-test.scm b/.guix/hsmice-test.scm index c5e4257..bb84a09 100644 --- a/.guix/hsmice-test.scm +++ b/.guix/hsmice-test.scm @@ -18,9 +18,12 @@ (define-module (hsmice-test) #:use-module ((gn packages bioinformatics) #:select (r-genio)) + #:use-module ((gn packages julia) #:select (julia-jwas julia-pipe)) #:use-module ((gnu packages base) #:select (tar)) #:use-module ((gnu packages compression) #:select (gzip)) #:use-module ((gnu packages cran) #:select (r-dplyr r-purrr r-qqman r-stringr)) + #:use-module ((gnu packages julia) #:select (julia)) + #:use-module ((gnu packages julia-xyz) #:select (julia-csv julia-dataframes)) #:use-module ((gnu packages python) #:select (python)) #:use-module ((gnu packages python-science) #:select (python-pandas)) #:use-module ((gnu packages python-xyz) #:select (python-click)) @@ -195,9 +198,98 @@ genome-wide association study} library for R.") (define hsmice-qtl-checked (computed-file "hsmice-qtl-checked" hsmice-qtl-checked-gexp)) +(define hsmice-jwas-gwas-gexp + (let ((gwas-script (local-file "../e2e-tests/hsmice/jwas-gwas.jl")) + (jwas-manhattan-script (local-file "../e2e-tests/hsmice/jwas-manhattan.r")) + (script-profile (profile + (content (packages->manifest + (list julia julia-csv julia-dataframes + julia-jwas julia-pipe + r r-dplyr r-qqman r-readr)))))) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (mkdir #$output) + (setenv "HOME" "/tmp") + (set-path-environment-variable + "PATH" '("bin") '(#$script-profile)) + (set-path-environment-variable + "JULIA_LOAD_PATH" '("share/julia/loadpath") '(#$script-profile)) + (set-path-environment-variable + "JULIA_DEPOT_PATH" '("share/julia") '(#$script-profile)) + (set-path-environment-variable + "R_LIBS_SITE" '("site-library") '(#$script-profile)) + + ;; GWAS on plaintext + (invoke "julia" #$gwas-script + #$(file-append hsmice-wrangled "/genotype.tsv") + #$(file-append hsmice-wrangled "/phenotype.tsv") + (string-append #$output "/plaintext-jwas-gwas.tsv")) + (invoke "Rscript" #$jwas-manhattan-script + (string-append #$output "/plaintext-jwas-gwas.tsv")) + (copy-file "Rplots.pdf" + (string-append #$output "/plaintext-jwas-manhattan.pdf")) + + ;; GWAS with simple ciphertext data sharing + (invoke "julia" #$gwas-script + #$(file-append hsmice-ciphertext "/genotype.tsv.hegp") + #$(file-append hsmice-ciphertext "/phenotype.tsv.hegp") + (string-append #$output "/ciphertext-jwas-gwas.tsv")) + (invoke "Rscript" #$jwas-manhattan-script + (string-append #$output "/ciphertext-jwas-gwas.tsv")) + (copy-file "Rplots.pdf" + (string-append #$output "/ciphertext-jwas-manhattan.pdf")) + + ;; Joint federated GWAS + (invoke "julia" #$gwas-script + #$(file-append hsmice-ciphertext "/complete-genotype.tsv.hegp") + #$(file-append hsmice-ciphertext "/complete-phenotype.tsv.hegp") + (string-append #$output "/federated-ciphertext-jwas-gwas.tsv")) + (invoke "Rscript" #$jwas-manhattan-script + (string-append #$output "/federated-ciphertext-jwas-gwas.tsv")) + (copy-file "Rplots.pdf" + (string-append #$output "/federated-ciphertext-jwas-manhattan.pdf")))))) + +(define hsmice-jwas-gwas + (computed-file "hsmice-jwas-gwas" hsmice-jwas-gwas-gexp)) + +(define hsmice-jwas-qtl-checked-gexp + (let ((script-profile (profile + (content (packages->manifest + (list python python-pandas)))))) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils) + (srfi srfi-26)) + + (mkdir #$output) + (set-path-environment-variable + "PATH" '("bin") '(#$script-profile)) + (set-path-environment-variable + "GUIX_PYTHONPATH" + '(#$(string-append "lib/python" + (version-major+minor (package-version python)) + "/site-packages")) + '(#$script-profile)) + + ;; Check that the QTL is where it should be. + (for-each (cut invoke + "python3" + #$(local-file "../e2e-tests/hsmice/check-qtl.py") + <> + "modelfrequency > 0.4") + (find-files #$hsmice-jwas-gwas + "\\-gwas.tsv$")))))) + +(define hsmice-jwas-qtl-checked + (computed-file "hsmice-jwas-qtl-checked" hsmice-jwas-qtl-checked-gexp)) + (define-public hsmice-test (directory-union "hsmice-test" (list hsmice-r-mixed-model-gwas - hsmice-qtl-checked))) + hsmice-qtl-checked + hsmice-jwas-gwas + hsmice-jwas-qtl-checked))) hsmice-test diff --git a/e2e-tests/hsmice/jwas-gwas.jl b/e2e-tests/hsmice/jwas-gwas.jl new file mode 100644 index 0000000..193b8ed --- /dev/null +++ b/e2e-tests/hsmice/jwas-gwas.jl @@ -0,0 +1,46 @@ +using CSV, DataFrames, JWAS +using Pipe: @pipe + +if length(ARGS) != 3 + print(stderr, "Usage: julia jwas-gwas.jl GENOTYPE-FILE PHENOTYPE-FILE OUTPUT-GWAS-FILE\n") + exit(1) +end +genotype_file, phenotype_file, output_gwas_file = ARGS + +genotype = CSV.read(genotype_file, DataFrame; drop=[:reference]) +phenotype = @pipe(CSV.read(phenotype_file, DataFrame) + |> select(_, + "sample-id" => :sample_id, + "Biochem.ALP.resid" => :alp)) + +genotypes = get_genotypes(@pipe( + hcat(DataFrame(snp_id=string.(genotype.chromosome, ":", genotype.position)), + @pipe(genotype + |> select(_, Not([:chromosome, :position])))) + |> permutedims(_, :snp_id) + |> rename(_, :snp_id => :sample_id)), + 1.8e-5, + G_is_marker_variance=true, + quality_control=false, + center=false, + Pi=0.999, + estimateScale=true) + +model = build_model("alp = intercept + genotypes") +out = runMCMC(model, + phenotype, + Pi=0.99, + estimateScale=true, + chain_length=50000, + burnin=5000, + output_heritability=true, + output_samples_frequency=100, + # fix seed for reproducibility + seed=1) + +gwas = GWAS("results/MCMC_samples_marker_effects_genotypes_alp.txt") +CSV.write(output_gwas_file, + transform(gwas, + :marker_ID => ByRow(x -> split(x, ":")[1]) => :chromosome, + :marker_ID => ByRow(x -> parse(Int, split(x, ":")[2])) => :position); + delim="\t") diff --git a/e2e-tests/hsmice/jwas-manhattan.r b/e2e-tests/hsmice/jwas-manhattan.r new file mode 100644 index 0000000..a437a8c --- /dev/null +++ b/e2e-tests/hsmice/jwas-manhattan.r @@ -0,0 +1,13 @@ +library(dplyr) +library(qqman) +library(readr) + +args = commandArgs(trailingOnly=TRUE) +if (length(args) != 1) { + write("Usage: Rscript jwas-manhattan.r GWAS-FILE", stderr()) + quit(status=1) +} +gwas_file = args[1] + +gwas = mutate(read_tsv(gwas_file), antilog=10^(-modelfrequency)) +manhattan(gwas, chr="chromosome", bp="position", snp="marker_ID", p="antilog") |
