e2e-tests/hsmice/jwas-gwas.jl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

using CSV, DataFrames, JWAS
using Pipe: @pipe

if length(ARGS) != 3
    print(stderr, "Usage: julia jwas-gwas.jl GENOTYPE-FILE PHENOTYPE-FILE OUTPUT-GWAS-FILE\n")
    exit(1)
end
genotype_file, phenotype_file, output_gwas_file = ARGS

genotype = CSV.read(genotype_file, DataFrame; drop=[:reference])
phenotype = @pipe(CSV.read(phenotype_file, DataFrame)
                  |> select(_,
                            "sample-id" => :sample_id,
                            "Biochem.ALP.resid" => :alp))

genotypes = get_genotypes(@pipe(
    hcat(DataFrame(snp_id=string.(genotype.chromosome, ":", genotype.position)),
         @pipe(genotype
               |> select(_, Not([:chromosome, :position]))))
    |> permutedims(_, :snp_id)
    |> rename(_, :snp_id => :sample_id)),
                          1.8e-5,
                          G_is_marker_variance=true,
                          quality_control=false,
                          center=false,
                          Pi=0.999,
                          estimateScale=true)

model = build_model("alp = intercept + genotypes")
out = runMCMC(model,
              phenotype,
              Pi=0.99,
              estimateScale=true,
              chain_length=50000,
              burnin=5000,
              output_heritability=true,
              output_samples_frequency=100,
              # fix seed for reproducibility
              seed=1)

gwas = GWAS("results/MCMC_samples_marker_effects_genotypes_alp.txt")
CSV.write(output_gwas_file,
          transform(gwas,
                    :marker_ID => ByRow(x -> split(x, ":")[1]) => :chromosome,
                    :marker_ID => ByRow(x -> parse(Int, split(x, ":")[2])) => :position);
          delim="\t")