about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2025-11-13 02:23:34 +0000
committerArun Isaac2025-11-13 02:23:34 +0000
commit2bd7b74814ca208a30382c6ad847e169addc542d (patch)
tree89d51b7a3aeffe759474e52ae104ed9f1b74646e
parentf0076f0a413ad3706e3b012b07e4a14c61c79b6e (diff)
downloadpyhegp-2bd7b74814ca208a30382c6ad847e169addc542d.tar.gz
pyhegp-2bd7b74814ca208a30382c6ad847e169addc542d.tar.lz
pyhegp-2bd7b74814ca208a30382c6ad847e169addc542d.zip
Add JWAS tests to HSmice test.
-rw-r--r--.guix/hsmice-test.scm94
-rw-r--r--e2e-tests/hsmice/jwas-gwas.jl46
-rw-r--r--e2e-tests/hsmice/jwas-manhattan.r13
3 files changed, 152 insertions, 1 deletions
diff --git a/.guix/hsmice-test.scm b/.guix/hsmice-test.scm
index c5e4257..bb84a09 100644
--- a/.guix/hsmice-test.scm
+++ b/.guix/hsmice-test.scm
@@ -18,9 +18,12 @@
 
 (define-module (hsmice-test)
   #:use-module ((gn packages bioinformatics) #:select (r-genio))
+  #:use-module ((gn packages julia) #:select (julia-jwas julia-pipe))
   #:use-module ((gnu packages base) #:select (tar))
   #:use-module ((gnu packages compression) #:select (gzip))
   #:use-module ((gnu packages cran) #:select (r-dplyr r-purrr r-qqman r-stringr))
+  #:use-module ((gnu packages julia) #:select (julia))
+  #:use-module ((gnu packages julia-xyz) #:select (julia-csv julia-dataframes))
   #:use-module ((gnu packages python) #:select (python))
   #:use-module ((gnu packages python-science) #:select (python-pandas))
   #:use-module ((gnu packages python-xyz) #:select (python-click))
@@ -195,9 +198,98 @@ genome-wide association study} library for R.")
 (define hsmice-qtl-checked
   (computed-file "hsmice-qtl-checked" hsmice-qtl-checked-gexp))
 
+(define hsmice-jwas-gwas-gexp
+  (let ((gwas-script (local-file "../e2e-tests/hsmice/jwas-gwas.jl"))
+        (jwas-manhattan-script (local-file "../e2e-tests/hsmice/jwas-manhattan.r"))
+        (script-profile (profile
+                          (content (packages->manifest
+				    (list julia julia-csv julia-dataframes
+					  julia-jwas julia-pipe
+                                          r r-dplyr r-qqman r-readr))))))
+    (with-imported-modules '((guix build utils))
+      #~(begin
+          (use-modules (guix build utils))
+
+          (mkdir #$output)
+	  (setenv "HOME" "/tmp")
+          (set-path-environment-variable
+           "PATH" '("bin") '(#$script-profile))
+          (set-path-environment-variable
+           "JULIA_LOAD_PATH" '("share/julia/loadpath") '(#$script-profile))
+	  (set-path-environment-variable
+           "JULIA_DEPOT_PATH" '("share/julia") '(#$script-profile))
+          (set-path-environment-variable
+           "R_LIBS_SITE" '("site-library") '(#$script-profile))
+
+          ;; GWAS on plaintext
+          (invoke "julia" #$gwas-script
+                  #$(file-append hsmice-wrangled "/genotype.tsv")
+                  #$(file-append hsmice-wrangled "/phenotype.tsv")
+                  (string-append #$output "/plaintext-jwas-gwas.tsv"))
+          (invoke "Rscript" #$jwas-manhattan-script
+                  (string-append #$output "/plaintext-jwas-gwas.tsv"))
+          (copy-file "Rplots.pdf"
+                     (string-append #$output "/plaintext-jwas-manhattan.pdf"))
+
+          ;; GWAS with simple ciphertext data sharing
+          (invoke "julia" #$gwas-script
+                  #$(file-append hsmice-ciphertext "/genotype.tsv.hegp")
+                  #$(file-append hsmice-ciphertext "/phenotype.tsv.hegp")
+                  (string-append #$output "/ciphertext-jwas-gwas.tsv"))
+          (invoke "Rscript" #$jwas-manhattan-script
+                  (string-append #$output "/ciphertext-jwas-gwas.tsv"))
+          (copy-file "Rplots.pdf"
+                     (string-append #$output "/ciphertext-jwas-manhattan.pdf"))
+
+          ;; Joint federated GWAS
+          (invoke "julia" #$gwas-script
+                  #$(file-append hsmice-ciphertext "/complete-genotype.tsv.hegp")
+                  #$(file-append hsmice-ciphertext "/complete-phenotype.tsv.hegp")
+                  (string-append #$output "/federated-ciphertext-jwas-gwas.tsv"))
+          (invoke "Rscript" #$jwas-manhattan-script
+                  (string-append #$output "/federated-ciphertext-jwas-gwas.tsv"))
+          (copy-file "Rplots.pdf"
+                     (string-append #$output "/federated-ciphertext-jwas-manhattan.pdf"))))))
+
+(define hsmice-jwas-gwas
+  (computed-file "hsmice-jwas-gwas" hsmice-jwas-gwas-gexp))
+
+(define hsmice-jwas-qtl-checked-gexp
+  (let ((script-profile (profile
+                          (content (packages->manifest
+                                    (list python python-pandas))))))
+    (with-imported-modules '((guix build utils))
+      #~(begin
+          (use-modules (guix build utils)
+                       (srfi srfi-26))
+
+          (mkdir #$output)
+          (set-path-environment-variable
+           "PATH" '("bin") '(#$script-profile))
+          (set-path-environment-variable
+           "GUIX_PYTHONPATH"
+           '(#$(string-append "lib/python"
+                              (version-major+minor (package-version python))
+                              "/site-packages"))
+           '(#$script-profile))
+
+          ;; Check that the QTL is where it should be.
+          (for-each (cut invoke
+                         "python3"
+                         #$(local-file "../e2e-tests/hsmice/check-qtl.py")
+                         <>
+			 "modelfrequency > 0.4")
+                    (find-files #$hsmice-jwas-gwas
+                                "\\-gwas.tsv$"))))))
+
+(define hsmice-jwas-qtl-checked
+  (computed-file "hsmice-jwas-qtl-checked" hsmice-jwas-qtl-checked-gexp))
+
 (define-public hsmice-test
   (directory-union "hsmice-test"
                    (list hsmice-r-mixed-model-gwas
-                         hsmice-qtl-checked)))
+                         hsmice-qtl-checked
+			 hsmice-jwas-gwas
+			 hsmice-jwas-qtl-checked)))
 
 hsmice-test
diff --git a/e2e-tests/hsmice/jwas-gwas.jl b/e2e-tests/hsmice/jwas-gwas.jl
new file mode 100644
index 0000000..193b8ed
--- /dev/null
+++ b/e2e-tests/hsmice/jwas-gwas.jl
@@ -0,0 +1,46 @@
+using CSV, DataFrames, JWAS
+using Pipe: @pipe
+
+if length(ARGS) != 3
+    print(stderr, "Usage: julia jwas-gwas.jl GENOTYPE-FILE PHENOTYPE-FILE OUTPUT-GWAS-FILE\n")
+    exit(1)
+end
+genotype_file, phenotype_file, output_gwas_file = ARGS
+
+genotype = CSV.read(genotype_file, DataFrame; drop=[:reference])
+phenotype = @pipe(CSV.read(phenotype_file, DataFrame)
+                  |> select(_,
+                            "sample-id" => :sample_id,
+                            "Biochem.ALP.resid" => :alp))
+
+genotypes = get_genotypes(@pipe(
+    hcat(DataFrame(snp_id=string.(genotype.chromosome, ":", genotype.position)),
+         @pipe(genotype
+               |> select(_, Not([:chromosome, :position]))))
+    |> permutedims(_, :snp_id)
+    |> rename(_, :snp_id => :sample_id)),
+                          1.8e-5,
+                          G_is_marker_variance=true,
+                          quality_control=false,
+                          center=false,
+                          Pi=0.999,
+                          estimateScale=true)
+
+model = build_model("alp = intercept + genotypes")
+out = runMCMC(model,
+              phenotype,
+              Pi=0.99,
+              estimateScale=true,
+              chain_length=50000,
+              burnin=5000,
+              output_heritability=true,
+              output_samples_frequency=100,
+              # fix seed for reproducibility
+              seed=1)
+
+gwas = GWAS("results/MCMC_samples_marker_effects_genotypes_alp.txt")
+CSV.write(output_gwas_file,
+          transform(gwas,
+                    :marker_ID => ByRow(x -> split(x, ":")[1]) => :chromosome,
+                    :marker_ID => ByRow(x -> parse(Int, split(x, ":")[2])) => :position);
+          delim="\t")
diff --git a/e2e-tests/hsmice/jwas-manhattan.r b/e2e-tests/hsmice/jwas-manhattan.r
new file mode 100644
index 0000000..a437a8c
--- /dev/null
+++ b/e2e-tests/hsmice/jwas-manhattan.r
@@ -0,0 +1,13 @@
+library(dplyr)
+library(qqman)
+library(readr)
+
+args = commandArgs(trailingOnly=TRUE)
+if (length(args) != 1) {
+    write("Usage: Rscript jwas-manhattan.r GWAS-FILE", stderr())
+    quit(status=1)
+}
+gwas_file = args[1]
+
+gwas = mutate(read_tsv(gwas_file), antilog=10^(-modelfrequency))
+manhattan(gwas, chr="chromosome", bp="position", snp="marker_ID", p="antilog")