The goal of gwasforest is to extract and reform data from GWAS results, and then make a single integrated forest plot containing multiple windows of which each shows the result of individual SNPs (or other items of interest).
The official release version of gwasforest can be installed from CRAN with:
utils::install.packages("gwasforest")
The development version of gwasforest can be installed from GitHub with:
devtools::install_github("yilixu/gwasforest", ref = "main")
library(gwasforest)
set.seed(123)
# generate example data
tempValue = runif(n = 18, min = 0.01, max = 2)
tempStdErr = tempValue / rep(3:5, times = 6)
eg_customFilename = data.frame(paste0("Marker", 1:6), tempValue[1:6], tempStdErr[1:6], tempValue[7:12], tempStdErr[7:12], tempValue[13:18], tempStdErr[13:18], stringsAsFactors = FALSE)
colnames(eg_customFilename) = c("MarkerName", paste0(rep("Study", times = 6), rep(1:3, each = 2), rep(c("__Value", "__StdErr"), times = 3)))
rm(tempValue, tempStdErr)
# take a quick look at the example: main input data (with standardized column names)
print(eg_customFilename)
#> MarkerName Study1__Value Study1__StdErr Study2__Value Study2__StdErr
#> 1 Marker1 0.5822793 0.19409309 1.0609299 0.3536433
#> 2 Marker2 1.5787272 0.39468180 1.7859139 0.4464785
#> 3 Marker3 0.8238641 0.16477281 1.1073557 0.2214711
#> 4 Marker4 1.7672046 0.58906821 0.9186633 0.3062211
#> 5 Marker5 1.8815299 0.47038247 1.9140984 0.4785246
#> 6 Marker6 0.1006574 0.02013149 0.9121350 0.1824270
#> Study3__Value Study3__StdErr
#> 1 1.35836556 0.45278852
#> 2 1.14954047 0.28738512
#> 3 0.21482012 0.04296402
#> 4 1.80065169 0.60021723
#> 5 0.49971459 0.12492865
#> 6 0.09369847 0.01873969
# run gwasforest function
eg_returnList = gwasforest(eg_customFilename, stdColnames = TRUE, valueFormat = "Effect", metaStudy = "Study1", colorMode = "duo")
#> [1] "Column names are in the same format as instruction example"
#> [1] "Loading user-provided values"
#> [1] "Start calculating Confidence Interval (non-exponential)"
#> [1] "Based on user's choice, GWAS results output file will not be generated"
#> [1] "All studies except meta study will be set in alphabetical order from top to bottom on the forest plot"
#> Registered S3 methods overwritten by 'ggplot2':
#> method from
#> [.quosures rlang
#> c.quosures rlang
#> print.quosures rlang
#> [1] "Based on user's choice, GWAS forest plot file will not be generated"
#> [1] "Run completed, thank you for using gwasforest"
# generate example data
tempValue = runif(n = 18, min = 0.01, max = 2)
tempStdErr = tempValue / rep(3:5, times = 6)
eg_customFilename2 = data.frame(paste0("Marker", 1:6), tempValue[1:6], tempStdErr[1:6], tempValue[7:12], tempStdErr[7:12], tempValue[13:18], tempStdErr[13:18], stringsAsFactors = FALSE)
colnames(eg_customFilename2) = c("MarkerName", paste0(rep("Study", times = 6), rep(1:3, each = 2), sample(LETTERS, 6)))
rm(tempValue, tempStdErr)
eg_customFilename_studyName = data.frame("studyName" = paste0("Study", 1:3), stringsAsFactors = FALSE)
# take a quick look at the example: main input data (without standardized column names)
print(eg_customFilename2)
#> MarkerName Study1K Study1G Study2U Study2L Study3O
#> 1 Marker1 0.6625622 0.2208541 1.3148545 0.43828485 1.92641822
#> 2 Marker2 1.9094623 0.4773656 1.4199756 0.35499391 1.80557510
#> 3 Marker3 1.7801832 0.3560366 1.0926914 0.21853828 1.38450350
#> 4 Marker4 1.3886788 0.4628929 1.1923426 0.39744754 1.59298016
#> 5 Marker5 1.2846086 0.3211521 0.5854279 0.14635697 0.05898123
#> 6 Marker6 1.9885969 0.3977194 0.3027562 0.06055123 0.96081398
#> Study3J
#> 1 0.64213941
#> 2 0.45139377
#> 3 0.27690070
#> 4 0.53099339
#> 5 0.01474531
#> 6 0.19216280
# take a quick look at the example: custom study name
print(eg_customFilename_studyName)
#> studyName
#> 1 Study1
#> 2 Study2
#> 3 Study3
# run gwasforest function
eg_returnList2 = gwasforest(eg_customFilename2, customFilename_studyName = eg_customFilename_studyName, stdColnames = FALSE, customColnames = c("Value", "StdErr"), valueFormat = "Effect", metaStudy = "Study1", colorMode = "duo")
#> [1] "Column names are grouped by study and in the order of |Value, StdErr|"
#> [1] "Start reforming"
#> [1] "Loading user-provided values"
#> [1] "Start calculating Confidence Interval (non-exponential)"
#> [1] "Based on user's choice, GWAS results output file will not be generated"
#> [1] "As per user's request, all studies except meta study will be set in the original order from top to bottom on the forest plot"
#> [1] "Based on user's choice, GWAS forest plot file will not be generated"
#> [1] "Run completed, thank you for using gwasforest"
# extract results table
eg_customFilename_results = eg_returnList[[1]]
# take a quick look at the example: results table
print(eg_customFilename_results)
#> MarkerName Value Upper Lower StudyName CI
#> 1 Marker1 0.58 0.9627017 0.20185681 Study1 0.58(0.2-0.96)
#> 2 Marker2 1.58 2.3523036 0.80515088 Study1 1.58(0.81-2.35)
#> 3 Marker3 0.82 1.1468188 0.50090936 Study1 0.82(0.5-1.15)
#> 4 Marker4 1.77 2.9217783 0.61263094 Study1 1.77(0.61-2.92)
#> 5 Marker5 1.88 2.8034795 0.95958025 Study1 1.88(0.96-2.8)
#> 6 Marker6 0.10 0.1401151 0.06119972 Study1 0.1(0.06-0.14)
#> 7 Marker1 1.06 1.7540708 0.36778904 Study2 1.06(0.37-1.75)
#> 8 Marker2 1.79 2.6610117 0.91081609 Study2 1.79(0.91-2.66)
#> 9 Marker3 1.11 1.5414391 0.67327225 Study2 1.11(0.67-1.54)
#> 10 Marker4 0.92 1.5188567 0.31846995 Study2 0.92(0.32-1.52)
#> 11 Marker5 1.91 2.8520066 0.97619016 Study2 1.91(0.98-2.85)
#> 12 Marker6 0.91 1.2696919 0.55457806 Study2 0.91(0.55-1.27)
#> 13 Marker1 1.36 2.2458311 0.47090006 Study3 1.36(0.47-2.25)
#> 14 Marker2 1.15 1.7128153 0.58626564 Study3 1.15(0.59-1.71)
#> 15 Marker3 0.21 0.2990296 0.13061063 Study3 0.21(0.13-0.3)
#> 16 Marker4 1.80 2.9770775 0.62422592 Study3 1.8(0.62-2.98)
#> 17 Marker5 0.50 0.7445747 0.25485444 Study3 0.5(0.25-0.74)
#> 18 Marker6 0.09 0.1304283 0.05696867 Study3 0.09(0.06-0.13)
library(ggplot2)
# render plot, see additional NOTES below
plot(eg_returnList[[2]])
#> Warning: Removed 12 rows containing missing values (geom_text_repel).