about summary refs log tree commit diff
path: root/.guix/hsmice-test.scm
diff options
context:
space:
mode:
Diffstat (limited to '.guix/hsmice-test.scm')
-rw-r--r--.guix/hsmice-test.scm148
1 files changed, 148 insertions, 0 deletions
diff --git a/.guix/hsmice-test.scm b/.guix/hsmice-test.scm
new file mode 100644
index 0000000..ee8ecbd
--- /dev/null
+++ b/.guix/hsmice-test.scm
@@ -0,0 +1,148 @@
+;;; pyhegp --- Homomorphic encryption of genotypes and phenotypes
+;;; Copyright © 2025 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;; This file is part of pyhegp.
+;;;
+;;; pyhegp is free software: you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; pyhegp is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with pyhegp. If not, see <https://www.gnu.org/licenses/>.
+
+(define-module (hsmice-test)
+  #:use-module ((gn packages bioinformatics) #:select (r-genio))
+  #:use-module ((gnu packages base) #:select (tar))
+  #:use-module ((gnu packages compression) #:select (gzip))
+  #:use-module ((gnu packages cran) #:select (r-dplyr r-purrr r-qqman r-stringr))
+  #:use-module ((gnu packages python) #:select (python))
+  #:use-module ((gnu packages python-science) #:select (python-pandas))
+  #:use-module ((gnu packages python-xyz) #:select (python-click))
+  #:use-module ((gnu packages statistics) #:select (r r-readr r-tibble r-tidyr))
+  #:use-module (guix build-system r)
+  #:use-module (guix download)
+  #:use-module (guix gexp)
+  #:use-module (guix git-download)
+  #:use-module ((guix licenses) #:prefix license:)
+  #:use-module (guix packages)
+  #:use-module (guix profiles)
+  #:use-module (guix utils)
+  #:use-module ((pyhegp-package) #:select (pyhegp)))
+
+;; See details of dataset at
+;; https://rdr.ucl.ac.uk/articles/dataset/HSmice_tar_gz/24114471?file=42304248
+(define hsmice-data
+  (origin
+    (method url-fetch)
+    (uri "https://ndownloader.figshare.com/files/42304248")
+    (file-name "HSmice.tar.gz")
+    (sha256
+     (base32
+      "1s6a83r0mll8z2lfv1b94zr2sjdrky5nyq1mpgl8fjjb5s8v2vyx"))))
+
+(define-public r-mixed-model-gwas
+  (package
+   (name "r-mixed-model-gwas")
+   (version "1.3")
+   (source (origin
+            (method git-fetch)
+            (uri (git-reference
+                  (url "https://github.com/encryption4genetics/mixed-model-gwas")
+                  (commit (string-append "v" version))))
+            (file-name (git-file-name name version))
+            (sha256
+             (base32
+              "0yv86mw9m981vzl80j100lg05kc6jm5ijhq9b8zcd8f2lr3115db"))))
+   (build-system r-build-system)
+   (home-page "https://github.com/encryption4genetics/mixed-model-gwas")
+   (synopsis "R mixed model GWAS")
+   (description "@code{r-mixed-model-gwas} implements a mixed model @acronym{GWAS,
+genome-wide association study} library for R.")
+   (license license:gpl3+)))
+
+(define test-profile
+  (profile
+   (content (packages->manifest (list gzip tar pyhegp
+                                      python python-click python-pandas
+                                      r r-dplyr r-genio
+                                      r-mixed-model-gwas r-purrr
+                                      r-qqman r-readr r-stringr
+                                      r-tibble r-tidyr)))))
+
+(define wrangle-script
+  (local-file "../e2e-tests/hsmice/wrangle.r"))
+
+(define gwas-script
+  (local-file "../e2e-tests/hsmice/gwas.r"))
+
+(define check-qtl-script
+  (local-file "../e2e-tests/hsmice/check-qtl.py"))
+
+(define hsmice-test-gexp
+  (with-imported-modules '((guix build utils))
+    #~(begin
+        (use-modules (guix build utils))
+
+        (mkdir #$output)
+        (set-path-environment-variable
+         "PATH" '("/bin") '(#$test-profile))
+        (set-path-environment-variable
+         "GUIX_PYTHONPATH"
+         '(#$(string-append "/lib/python"
+                            (version-major+minor (package-version python))
+                            "/site-packages"))
+         '(#$test-profile))
+        (set-path-environment-variable
+         "R_LIBS_SITE" '("/site-library") '(#$test-profile))
+        (invoke "tar" "-xvf" #$hsmice-data
+                "./HSmice/1_QTL_data/")
+        (invoke "Rscript" #$wrangle-script "HSmice/1_QTL_data" ".")
+
+        ;; GWAS on plaintext
+        (invoke "Rscript" #$gwas-script
+                "genotype.tsv" "phenotype.tsv"
+                (string-append #$output "/plaintext-pvalues"))
+        (copy-file "Rplots.pdf" (string-append #$output "/plaintext-manhattan.pdf"))
+
+        ;; GWAS with simple ciphertext data sharing
+        (invoke "pyhegp" "encrypt" "genotype.tsv" "phenotype.tsv")
+        (invoke "Rscript" #$gwas-script
+                "genotype.tsv.hegp" "phenotype.tsv.hegp"
+                (string-append #$output "/ciphertext-pvalues"))
+        (copy-file "Rplots.pdf"
+                   (string-append #$output "/ciphertext-manhattan.pdf"))
+
+        ;; Joint federated GWAS
+        (invoke "pyhegp" "summary" "genotype1.tsv" "-o" "summary1")
+        (invoke "pyhegp" "summary" "genotype2.tsv" "-o" "summary2")
+        (invoke "pyhegp" "pool" "-o" "complete-summary" "summary1" "summary2")
+        (invoke "pyhegp" "encrypt" "-s" "complete-summary" "genotype1.tsv" "phenotype1.tsv")
+        (invoke "pyhegp" "encrypt" "-s" "complete-summary" "genotype2.tsv" "phenotype2.tsv")
+        (invoke "pyhegp" "cat-genotype" "-o" "complete-genotype.tsv.hegp"
+                "genotype1.tsv.hegp" "genotype2.tsv.hegp")
+        (invoke "pyhegp" "cat-phenotype" "-o" "complete-phenotype.tsv.hegp"
+                "phenotype1.tsv.hegp" "phenotype2.tsv.hegp")
+        (invoke "Rscript" #$gwas-script
+                "complete-genotype.tsv.hegp" "complete-phenotype.tsv.hegp"
+                (string-append #$output "/federated-ciphertext-pvalues"))
+        (copy-file "Rplots.pdf"
+                   (string-append #$output "/federated-ciphertext-manhattan.pdf"))
+
+        ;; Check that the QTL is where it should be.
+        (for-each (lambda (pvalues-file)
+                    (invoke "python3" #$check-qtl-script
+                            (string-append #$output "/" pvalues-file)))
+                  (list "plaintext-pvalues"
+                        "ciphertext-pvalues"
+                        "federated-ciphertext-pvalues")))))
+
+(define-public hsmice-test
+  (computed-file "hsmice-test" hsmice-test-gexp))
+
+hsmice-test