The goal of this Rmd
document is to load registration results and cognitive tests into R
, tidy the data, perform exploratory analysis, and use our new R
package braincog
to find brain-cognition differential correlations in girls with Turner syndrome and age-matched healthy controls.
params
## $seed
## [1] "2"
##
## $res_file
## [1] "res_2.Rdata"
Package SimpleITK
needs to be installed prior to running this Rmd
file because it takes a couple of hours to compile. For details on the installation process and pre-compiled packages, please refer to our instructions on GitHub.
pkgs_needed = c("ChristofSeiler/braincog",
"tibble","magrittr","dplyr","devtools","readr","stringr",
"PMA","genefilter","BiocParallel",
"ggfortify","EBImage","mice","abind","factoextra",
"visdat","ggbeeswarm","GGally")
letsinstall = setdiff(pkgs_needed, installed.packages())
if (length(letsinstall) > 0) {
source("http://bioconductor.org/biocLite.R")
biocLite(letsinstall)
}
Once all package have been installed, we are ready to load them.
library("braincog")
library("SimpleITK")
library("tibble")
library("magrittr")
library("dplyr")
library("devtools")
library("readr")
library("stringr")
library("PMA")
library("genefilter")
library("BiocParallel")
library("ggfortify")
library("EBImage")
library("mice")
library("abind")
library("factoextra")
library("visdat")
library("ggbeeswarm")
library("GGally")
theme_set(theme_grey())
The patient and behavioral data was saved in two separate text files. However these files are linked with an subject ID, which is internal ID at the Center for Interdisciplinary Brain Sciences Research (CIBSR) at Stanford School of Medicine.
cognition = read_csv("TURNER_DATA_all_import_tg.csv")
## Warning: Duplicated column names deduplicated: 'Please choose one' =>
## 'Please choose one_1' [10]
## Parsed with column specification:
## cols(
## .default = col_integer(),
## `CIBSR ID (Subject ID Generated by FilemakerPro and Timepoint)` = col_double(),
## Diagnosis = col_character(),
## Sex = col_character(),
## `Age (at consent)` = col_double(),
## `Please choose one` = col_character(),
## `Please choose one_1` = col_character(),
## `DATA quality HARDI` = col_character(),
## `DATA quality structural` = col_character()
## )
## See spec(...) for full column specifications.
sample_info = read_csv("TURNER_DATA_8_3_final.csv")
## Parsed with column specification:
## cols(
## ID = col_double(),
## Age = col_double(),
## Group = col_character(),
## Pubic = col_integer(),
## Breast = col_integer(),
## POO = col_character()
## )
Combined both data frames into one, remove unnecessary variables, and defined column types. We have size domains:
cognition %<>% dplyr::rename(ID = 'CIBSR ID (Subject ID Generated by FilemakerPro and Timepoint)')
cognition = left_join(cognition,sample_info,by = "ID")
cognition$ID %<>% formatC(format = "f",digits = 1)
Load template and mask images.
template_arr = ReadImage("Mean.nii.gz") %>% as.array
Plotting function for one slice using EBImage
package.
plot_slice(template_arr,axis = 1)
plot_slice(template_arr,axis = 2)
plot_slice(template_arr,axis = 3)
Mask out all the non-gray matter tissue.
label_arr = ReadImage("Gray.Matter.nii.gz") %>% as.array
# for debugguing select one region
#regions = ReadImage("FSL.Atlas.Cort.To.Mean.nii.gz") %>% as.array
#label_arr[regions != 1] = 0
plot_slice(label_arr)
Load Jacobian determinant images.
filenames = list.files(pattern = ".1.Ants.LogJacDet.nii.gz")
jac_list = lapply(filenames,function(filename) {
print(filename)
brain = ReadImage(filename)
gray_matter = as.array(brain)[label_arr != 0]
tibble(gray_matter)
})
## [1] "13135.1.Ants.LogJacDet.nii.gz"
## [1] "13184.1.Ants.LogJacDet.nii.gz"
## [1] "13227.1.Ants.LogJacDet.nii.gz"
## [1] "13635.1.Ants.LogJacDet.nii.gz"
## [1] "13778.1.Ants.LogJacDet.nii.gz"
## [1] "13779.1.Ants.LogJacDet.nii.gz"
## [1] "13880.1.Ants.LogJacDet.nii.gz"
## [1] "13929.1.Ants.LogJacDet.nii.gz"
## [1] "14035.1.Ants.LogJacDet.nii.gz"
## [1] "14170.1.Ants.LogJacDet.nii.gz"
## [1] "14181.1.Ants.LogJacDet.nii.gz"
## [1] "14355.1.Ants.LogJacDet.nii.gz"
## [1] "14356.1.Ants.LogJacDet.nii.gz"
## [1] "14454.1.Ants.LogJacDet.nii.gz"
## [1] "14465.1.Ants.LogJacDet.nii.gz"
## [1] "14552.1.Ants.LogJacDet.nii.gz"
## [1] "14782.1.Ants.LogJacDet.nii.gz"
## [1] "14816.1.Ants.LogJacDet.nii.gz"
## [1] "14825.1.Ants.LogJacDet.nii.gz"
## [1] "15173.1.Ants.LogJacDet.nii.gz"
## [1] "15177.1.Ants.LogJacDet.nii.gz"
## [1] "15190.1.Ants.LogJacDet.nii.gz"
## [1] "15271.1.Ants.LogJacDet.nii.gz"
## [1] "15272.1.Ants.LogJacDet.nii.gz"
## [1] "15276.1.Ants.LogJacDet.nii.gz"
## [1] "15299.1.Ants.LogJacDet.nii.gz"
## [1] "15391.1.Ants.LogJacDet.nii.gz"
## [1] "15406.1.Ants.LogJacDet.nii.gz"
## [1] "15621.1.Ants.LogJacDet.nii.gz"
## [1] "15622.1.Ants.LogJacDet.nii.gz"
## [1] "15695.1.Ants.LogJacDet.nii.gz"
## [1] "15698.1.Ants.LogJacDet.nii.gz"
## [1] "15718.1.Ants.LogJacDet.nii.gz"
## [1] "15721.1.Ants.LogJacDet.nii.gz"
## [1] "15830.1.Ants.LogJacDet.nii.gz"
## [1] "15836.1.Ants.LogJacDet.nii.gz"
## [1] "15916.1.Ants.LogJacDet.nii.gz"
## [1] "15938.1.Ants.LogJacDet.nii.gz"
## [1] "16054.1.Ants.LogJacDet.nii.gz"
## [1] "16247.1.Ants.LogJacDet.nii.gz"
## [1] "16289.1.Ants.LogJacDet.nii.gz"
## [1] "16307.1.Ants.LogJacDet.nii.gz"
## [1] "16482.1.Ants.LogJacDet.nii.gz"
## [1] "16494.1.Ants.LogJacDet.nii.gz"
## [1] "17162.1.Ants.LogJacDet.nii.gz"
## [1] "17971.1.Ants.LogJacDet.nii.gz"
## [1] "18076.1.Ants.LogJacDet.nii.gz"
## [1] "18102.1.Ants.LogJacDet.nii.gz"
## [1] "18220.1.Ants.LogJacDet.nii.gz"
## [1] "18226.1.Ants.LogJacDet.nii.gz"
## [1] "18287.1.Ants.LogJacDet.nii.gz"
## [1] "18321.1.Ants.LogJacDet.nii.gz"
## [1] "18374.1.Ants.LogJacDet.nii.gz"
## [1] "18425.1.Ants.LogJacDet.nii.gz"
## [1] "18447.1.Ants.LogJacDet.nii.gz"
## [1] "18463.1.Ants.LogJacDet.nii.gz"
## [1] "18473.1.Ants.LogJacDet.nii.gz"
## [1] "18487.1.Ants.LogJacDet.nii.gz"
## [1] "18501.1.Ants.LogJacDet.nii.gz"
## [1] "18571.1.Ants.LogJacDet.nii.gz"
## [1] "18576.1.Ants.LogJacDet.nii.gz"
## [1] "18591.1.Ants.LogJacDet.nii.gz"
## [1] "18620.1.Ants.LogJacDet.nii.gz"
## [1] "18621.1.Ants.LogJacDet.nii.gz"
## [1] "18622.1.Ants.LogJacDet.nii.gz"
## [1] "18629.1.Ants.LogJacDet.nii.gz"
## [1] "18684.1.Ants.LogJacDet.nii.gz"
## [1] "18706.1.Ants.LogJacDet.nii.gz"
## [1] "18723.1.Ants.LogJacDet.nii.gz"
## [1] "18744.1.Ants.LogJacDet.nii.gz"
## [1] "18749.1.Ants.LogJacDet.nii.gz"
## [1] "18789.1.Ants.LogJacDet.nii.gz"
## [1] "18795.1.Ants.LogJacDet.nii.gz"
## [1] "18802.1.Ants.LogJacDet.nii.gz"
## [1] "18825.1.Ants.LogJacDet.nii.gz"
## [1] "18866.1.Ants.LogJacDet.nii.gz"
## [1] "18869.1.Ants.LogJacDet.nii.gz"
## [1] "18870.1.Ants.LogJacDet.nii.gz"
## [1] "18888.1.Ants.LogJacDet.nii.gz"
## [1] "18968.1.Ants.LogJacDet.nii.gz"
## [1] "19095.1.Ants.LogJacDet.nii.gz"
## [1] "19117.1.Ants.LogJacDet.nii.gz"
## [1] "19118.1.Ants.LogJacDet.nii.gz"
## [1] "19253.1.Ants.LogJacDet.nii.gz"
## [1] "19283.1.Ants.LogJacDet.nii.gz"
## [1] "19293.1.Ants.LogJacDet.nii.gz"
## [1] "19319.1.Ants.LogJacDet.nii.gz"
## [1] "19320.1.Ants.LogJacDet.nii.gz"
## [1] "19347.1.Ants.LogJacDet.nii.gz"
## [1] "19387.1.Ants.LogJacDet.nii.gz"
## [1] "19406.1.Ants.LogJacDet.nii.gz"
## [1] "19407.1.Ants.LogJacDet.nii.gz"
## [1] "19422.1.Ants.LogJacDet.nii.gz"
## [1] "19426.1.Ants.LogJacDet.nii.gz"
## [1] "19427.1.Ants.LogJacDet.nii.gz"
## [1] "19442.1.Ants.LogJacDet.nii.gz"
## [1] "19446.1.Ants.LogJacDet.nii.gz"
## [1] "19484.1.Ants.LogJacDet.nii.gz"
## [1] "19485.1.Ants.LogJacDet.nii.gz"
## [1] "19549.1.Ants.LogJacDet.nii.gz"
## [1] "19614.1.Ants.LogJacDet.nii.gz"
## [1] "19630.1.Ants.LogJacDet.nii.gz"
## [1] "19631.1.Ants.LogJacDet.nii.gz"
## [1] "19643.1.Ants.LogJacDet.nii.gz"
## [1] "19645.1.Ants.LogJacDet.nii.gz"
## [1] "20321.1.Ants.LogJacDet.nii.gz"
## [1] "20349.1.Ants.LogJacDet.nii.gz"
## [1] "20377.1.Ants.LogJacDet.nii.gz"
## [1] "20378.1.Ants.LogJacDet.nii.gz"
## [1] "20387.1.Ants.LogJacDet.nii.gz"
morphometry = jac_list %>% bind_cols %>% t
rownames(morphometry) = substr(filenames,1,7)
Remove some columns in cognition
that are not important in this study.
cognition %<>% dplyr::rename('Quality' = 'DATA quality structural')
cognition$Quality %<>% factor
cognition %<>% select(-`Redcap ID`)
cognition %<>% select(-`Please choose one`)
cognition %<>% select(-`Please choose one_1`)
cognition %<>% select(-`Age (at consent)`)
cognition %<>% select(-`DATA quality HARDI`)
cognition %<>% select(-`Group`)
cognition$Diagnosis %<>% factor
cognition$Sex %<>% factor
cognition$POO %<>% factor
names(cognition)
## [1] "ID"
## [2] "Affect Recognition Total Scaled Score"
## [3] "List Memory and List Memory Delayed Total Scaled Score"
## [4] "AA vs. RS Contrast Scaled Scaled Score"
## [5] "Diagnosis"
## [6] "Sex"
## [7] "Quality"
## [8] "VCI Composite Score"
## [9] "PRI Composite Score"
## [10] "WMI Composite Score"
## [11] "PSI Composite Score"
## [12] "FSIQ Composite Score"
## [13] "Auditory Attention Total Correct Scaled Score"
## [14] "Response Set Total Correct Scaled Score"
## [15] "Auditory Attention Combined Scaled Score"
## [16] "Response Set Combined Scaled Score"
## [17] "Inhibition-Naming Combined Scaled Score"
## [18] "Inhibition-Inhibition Combined Scaled Score"
## [19] "Inhibition-Switching Combined Scaled Score"
## [20] "IN-Naming vs Inhibition Contrast Scaled Score"
## [21] "IN-Inhibition vs Switching Contrast Scaled Score"
## [22] "Comprehension of Instructions Total Score Scaled Score"
## [23] "Speeded Naming Total Completion Time Scaled Score"
## [24] "Speeded Naming Combined Scaled Score"
## [25] "Word Generation-Semantic Total Score Scaled Score"
## [26] "Word Generation-Initial Letter Total Score Scaled Score"
## [27] "Memory for Faces Total Score Scaled Score"
## [28] "Memory for Faces Delayed Total Score Scaled Score"
## [29] "MF vs MFD Contrast Scaled Score"
## [30] "Narrative Memory Free and Cued Recall Total Score Scaled Score"
## [31] "Narrative Memory Free Recall Total Score Scaled Score"
## [32] "NM Free and Cued Recall vs Recognition Contrast Scaled Score"
## [33] "Fingertip Tapping-Dominant Hand Combined Scaled Score"
## [34] "Fingertip Tapping-Nondominant Hand Combined Scaled Score"
## [35] "Fingertip Tapping-Repetitions Combined Scaled Score"
## [36] "Fingertip Tapping-Sequences Combined Scaled Score"
## [37] "FT Dominant Hand vs. Nondominant Hand Contrast Scaled Score"
## [38] "FT Repetitions vs. Sequences Contrast Scaled Score"
## [39] "Imitating Hand Position Total Score Scaled Score"
## [40] "Visuomotor Precision Total Completion Time Scaled Score"
## [41] "Visuomotor Precision Combined Scaled Score"
## [42] "Arrows Total Score Scaled Score"
## [43] "Picture Puzzles Total Score Scaled Score"
## [44] "Age"
## [45] "Pubic"
## [46] "Breast"
## [47] "POO"
Now we need to handle the missing values in the NEPSY subtest scores.
vis_dat(cognition) +
theme(axis.text.x = element_text(angle = 90, hjust = 0))
vis_miss(cognition) +
theme(axis.text.x = element_text(angle = 90, hjust = 0))
Exclude subjects and subtests that have more than the preset threshold of missing values.
exclude_thres = 0.2
# check variables
nna = apply(cognition,2,function(column) mean(is.na(column)))
exclude_cols = nna > exclude_thres
sum(exclude_cols)
## [1] 9
which(exclude_cols)
## List Memory and List Memory Delayed Total Scaled Score
## 3
## AA vs. RS Contrast Scaled Scaled Score
## 4
## Quality
## 7
## Response Set Total Correct Scaled Score
## 14
## Inhibition-Switching Combined Scaled Score
## 19
## IN-Inhibition vs Switching Contrast Scaled Score
## 21
## NM Free and Cued Recall vs Recognition Contrast Scaled Score
## 32
## Imitating Hand Position Total Score Scaled Score
## 39
## POO
## 47
if(sum(exclude_cols) > 0)
cognition = cognition[,-which(exclude_cols)]
# check subjects
nna = apply(cognition,1,function(row) mean(is.na(row)))
exclude_rows = nna > exclude_thres
sum(exclude_rows)
## [1] 12
if(sum(exclude_rows))
cognition = cognition[-which(exclude_rows),]
sum(is.na(cognition))/prod(dim(cognition))
## [1] 0.0368937
vis_dat(cognition) +
theme(axis.text.x = element_text(angle = 90, hjust = 0))
vis_miss(cognition) +
theme(axis.text.x = element_text(angle = 90, hjust = 0))
Run data imputation method predictive mean matching pmm
.
cognition_subset = cognition %>% select(-ID) %>% select(-Diagnosis) %>% select(-Sex)
num_imp = 20
res_mice = mice(cognition_subset,method = "pmm",
m = num_imp,
maxit = 20,
seed = as.integer(params$seed),
printFlag = FALSE)
# take the mean over all imputed data sets
cognition_imp = lapply(seq_len(num_imp),function(i) complete(res_mice, i)) %>%
abind(along = 3) %>%
apply(MARGIN = c(1,2),FUN = mean) %>%
as.tibble
# fix the data frame
names(cognition_imp) = names(cognition_subset)
cognition_imp %<>% add_column(ID = cognition$ID,
Diagnosis = cognition$Diagnosis,
Sex = cognition$Sex)
Match patient IDs in cognition_imp
and morphometry
tables. Make sure that the rows are ordered in the same way.
inter_id = intersect(rownames(morphometry),cognition_imp$ID)
cognition_imp = cognition_imp[sapply(inter_id,function(ID) which(cognition_imp$ID == ID)),]
morphometry = morphometry[sapply(inter_id,function(ID) which(rownames(morphometry) == ID)),]
Save output for next data analysis step.
save(cognition_imp,file = "cognition_imp.Rdata")
save(morphometry,file = "morphometry.Rdata")
After data preparation, we end up with the following sample size.
summize_data = function(cognition) {
cognition %>% dplyr::group_by(Diagnosis) %>% dplyr::summarize(
n = length(ID),
FSIQ_mean = mean(`FSIQ Composite Score`,na.rm = TRUE),
FSIQ_sd = sd(`FSIQ Composite Score`,na.rm = TRUE),
FSIQ_min = min(`FSIQ Composite Score`,na.rm = TRUE),
FSIQ_max = max(`FSIQ Composite Score`,na.rm = TRUE),
arrows_mean = mean(`Arrows Total Score Scaled Score`,na.rm = TRUE),
arrows_sd = sd(`Arrows Total Score Scaled Score`,na.rm = TRUE),
age_mean = mean(Age,na.rm = TRUE),
age_sd = sd(Age,na.rm = TRUE),
age_min = min(Age,na.rm = TRUE),
age_max = max(Age,na.rm = TRUE)) %>%
mutate_if(is.numeric, funs(round(., 1))) %>%
print(width = Inf)
}
summize_data(cognition)
## # A tibble: 2 x 12
## Diagnosis n FSIQ_mean FSIQ_sd FSIQ_min FSIQ_max arrows_mean arrows_sd age_mean age_sd age_min age_max
## <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 48 114.5 12.8 87 144 10.9 2.7 10.0 2.1 5.2 14.2
## 2 Monosomic Turner 54 94.3 13.7 54 121 6.4 3.5 10.2 2.5 5.5 15.9
summize_data(cognition_imp)
## # A tibble: 2 x 12
## Diagnosis n FSIQ_mean FSIQ_sd FSIQ_min FSIQ_max arrows_mean arrows_sd age_mean age_sd age_min age_max
## <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 45 113.2 11.6 87 134 10.6 2.9 10.1 2.1 5.2 14.2
## 2 Monosomic Turner 53 94.3 13.8 54 121 6.5 3.5 10.2 2.5 5.5 15.9
Do a two-sample nonparametric test for age, a chi-squared test of homogeneity for Tanner stages to check if the two groups are age matched.
# test of shift in location
with(cognition_imp,
wilcox.test(Age[Diagnosis=="Monosomic Turner"],
Age[Diagnosis=="Control"]))
##
## Wilcoxon rank sum test with continuity correction
##
## data: Age[Diagnosis == "Monosomic Turner"] and Age[Diagnosis == "Control"]
## W = 1211.5, p-value = 0.8951
## alternative hypothesis: true location shift is not equal to 0
# test of scale
with(cognition_imp,
mood.test(Age[Diagnosis=="Monosomic Turner"],
Age[Diagnosis=="Control"]))
##
## Mood two-sample test of scale
##
## data: Age[Diagnosis == "Monosomic Turner"] and Age[Diagnosis == "Control"]
## Z = 0.94731, p-value = 0.3435
## alternative hypothesis: two.sided
with(cognition_imp,
ansari.test(Age[Diagnosis=="Monosomic Turner"],
Age[Diagnosis=="Control"]))
##
## Ansari-Bradley test
##
## data: Age[Diagnosis == "Monosomic Turner"] and Age[Diagnosis == "Control"]
## AB = 1264.5, p-value = 0.388
## alternative hypothesis: true ratio of scales is not equal to 1
# test of homogeneity
with(cognition,table(Diagnosis,Breast))
## Breast
## Diagnosis 1 2 3 4
## Control 26 9 3 5
## Monosomic Turner 37 9 4 0
with(cognition,table(Diagnosis,Pubic))
## Pubic
## Diagnosis 1 2 3 4 5
## Control 26 7 4 6 0
## Monosomic Turner 32 8 8 1 1
combined = tibble(Diagnosis = c(cognition$Diagnosis,cognition$Diagnosis),
Tanner = c(cognition$Breast,cognition$Pubic))
# not enough observations at Turner stage 3 to 5, so merge them
ct = table(combined)
ct
## Tanner
## Diagnosis 1 2 3 4 5
## 1 52 16 7 11 0
## 2 69 17 12 1 1
ct_merge = tibble(tanner_1 = ct[,1],tanner_2 = ct[,2],tanner_345 = rowSums(ct[,3:5]))
ct_merge
## # A tibble: 2 x 3
## tanner_1 tanner_2 tanner_345
## <int> <int> <dbl>
## 1 52 16 18
## 2 69 17 14
chisq.test(ct_merge)
##
## Pearson's Chi-squared test
##
## data: ct_merge
## X-squared = 1.8756, df = 2, p-value = 0.3915
Prepare cognition data. Mapping test to domain.
shorter_col_names = function(col_names) {
col_names %>% str_replace("Scaled","") %>%
str_replace("Score","") %>%
str_replace("Score","") %>%
str_replace("Combined","") %>%
str_replace("Total","") %>%
str_replace("Contrast","") %>%
trimws %>%
gsub(pattern = "\\.",replacement = "") %>%
str_replace("-"," ") %>%
lapply(function(col_name)
str_split(string = col_name,pattern = " ")[[1]] %>% paste(collapse = "_")) %>%
str_replace("__","_")
}
cognition_scores_only = cognition_imp %>% select(ends_with("Scaled Score"))
names(cognition_scores_only) %<>% shorter_col_names
domain_mapping = tibble(
test = names(cognition_scores_only),
domain = c(
rep("Attention and Executive Functioning",7),
rep("Language",5),
rep("Memory and Languageearning",5),
rep("Sensorimotor",8),
rep("Visuospatial Processing",2)
)
)
iq_scores_only = cognition_imp[,c("VCI Composite Score",
"PRI Composite Score",
"WMI Composite Score",
"PSI Composite Score")]
PCA plot for cognition.
data = cognition_imp %>% dplyr::rename(FSIQ = 'FSIQ Composite Score')
res_pca_cognition = prcomp(cognition_scores_only,center = TRUE, scale. = TRUE)
fviz_eig(res_pca_cognition,geom="bar")
explained_var = (100*res_pca_cognition$sdev^2/sum(res_pca_cognition$sdev^2)) %>% round(.,1)
autoplot(res_pca_cognition,
size = 3,
data = data,
colour = "FSIQ",
shape = "Diagnosis",
xlab = paste0("PC1 (",explained_var[1],"%)"),
ylab = paste0("PC2 (",explained_var[2],"%)")) +
coord_fixed(ratio = explained_var[2] / explained_var[1]) +
ggtitle("Cognitive Tests")
ggplot2::ggsave(filename = "cognition_pca.pdf",width = 8,height = 3)
Plot loadings of the first cognition principal component.
cog_load = domain_mapping %>% add_column(loadings = res_pca_cognition$rotation[,1])
cog_load$test = factor(cog_load$test, levels = domain_mapping$test)
ggplot(cog_load,aes(x = loadings,y = test,color = domain)) +
geom_point(size = 3) +
geom_vline(xintercept = 0) +
ggtitle("Cognition Loadings")
ggplot2::ggsave(filename = "cognition_loadings.pdf",width = 8,height = 5)
PCA plot for morphometry.
plot_morphometry_pca = function(res_pca_morphometry,sample_info) {
explained_var = (100*res_pca_morphometry$sdev^2/sum(res_pca_morphometry$sdev^2)) %>% round(.,1)
autoplot(res_pca_morphometry,
size = 3,
data = sample_info,
colour = "FSIQ",
shape = "Diagnosis",
xlab = paste0("PC1 (",explained_var[1],"%)"),
ylab = paste0("PC2 (",explained_var[2],"%)")) +
coord_fixed(ratio = explained_var[2] / explained_var[1]) +
ggtitle("Brain Morphometry")
}
sample_info = cognition_imp %>% dplyr::rename(FSIQ = 'FSIQ Composite Score')
res_pca_morphometry = prcomp(morphometry,center = TRUE, scale. = TRUE)
fviz_eig(res_pca_morphometry,geom="bar")
plot_morphometry_pca(res_pca_morphometry,sample_info)
# # remove potential outlier
# potential_outlier = which.max(abs(res_pca_morphometry$x[,2]))
# res_pca_morphometry = prcomp(morphometry[-potential_outlier,],scale. = FALSE)
# fviz_eig(res_pca_morphometry,geom="bar")
# plot_morphometry_pca(res_pca_morphometry,sample_info[-potential_outlier,])
ggplot2::ggsave(filename = "morphometry_pca.pdf",width = 6,height = 4)
Plot loadings of the first morphometry principal component.
loadings_to_arr = function(loadings,axis = 3,probs = 0.2) {
# color quantiles
loadings = morphometry_loadings
qs = quantile(loadings,probs = c(probs/2,1-probs/2))
loadings_sign = rep("white",length(loadings))
loadings_sign[loadings >= qs[1] & loadings <= qs[2]] = "darkgray"
loadings_sign[loadings < qs[1]] = "blue"
loadings_sign[loadings > qs[2]] = "red"
loadings_arr = array("white",dim = dim(label_arr))
loadings_arr[label_arr != 0] = loadings_sign
loadings_arr
}
morphometry_loadings = res_pca_morphometry$rotation[,1]
ggplot(tibble(morphometry_loadings),aes(x = morphometry_loadings)) +
geom_histogram(bins = 100)
morphometry_loadings_arr = loadings_to_arr(morphometry_loadings)
combine_slices(morphometry_loadings_arr,
center_color = "darkgray",
crop = 15,
title = "Morphometry Loadings")
ggplot2::ggsave(filename = "morphometry_loadings.pdf",width = 5,height = 5)
Test interaction.
tb_score = tibble(Diagnosis = cognition_imp$Diagnosis,
FSIQ = cognition_imp$`FSIQ Composite Score`,
Cognition = res_pca_cognition$x[,1],
Morphometry = res_pca_morphometry$x[,1])
ggplot(tb_score,aes(y = Cognition,x = Morphometry,color = FSIQ,shape = Diagnosis)) +
geom_point(size = 3) +
ggtitle("Morphometry-Cognition Correlation")
ggplot(tb_score,aes(y = Cognition,x = Morphometry,color = Diagnosis)) +
stat_smooth(method = "lm", se = FALSE, mapping = aes(color = Diagnosis)) +
geom_point(size = 3) +
ggtitle("Morphometry-Cognition Correlation")
model1 = lm(formula = Cognition ~ Morphometry,data = tb_score)
summary(model1)
##
## Call:
## lm(formula = Cognition ~ Morphometry, data = tb_score)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8808 -1.5915 -0.1849 1.5538 5.3724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.852e-15 2.419e-01 0.000 1
## Morphometry -9.762e-03 1.528e-03 -6.387 6.04e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.394 on 96 degrees of freedom
## Multiple R-squared: 0.2982, Adjusted R-squared: 0.2909
## F-statistic: 40.79 on 1 and 96 DF, p-value: 6.044e-09
model2 = lm(formula = Cognition ~ Morphometry + Diagnosis,data = tb_score)
summary(model2)
##
## Call:
## lm(formula = Cognition ~ Morphometry + Diagnosis, data = tb_score)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1307 -1.6350 -0.1582 1.8190 4.8679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.128248 0.652486 -3.262 0.001539 **
## Morphometry 0.001560 0.003558 0.438 0.662059
## DiagnosisMonosomic Turner 3.935251 1.129765 3.483 0.000751 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.267 on 95 degrees of freedom
## Multiple R-squared: 0.3777, Adjusted R-squared: 0.3646
## F-statistic: 28.83 on 2 and 95 DF, p-value: 1.645e-10
model3 = lm(formula = Cognition ~ Morphometry * Diagnosis,data = tb_score)
summary(model3)
##
## Call:
## lm(formula = Cognition ~ Morphometry * Diagnosis, data = tb_score)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1259 -1.6827 -0.1169 1.8081 4.8863
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -1.934354 0.883218 -2.190
## Morphometry 0.000324 0.005197 0.062
## DiagnosisMonosomic Turner 3.889173 1.143792 3.400
## Morphometry:DiagnosisMonosomic Turner 0.002345 0.007160 0.328
## Pr(>|t|)
## (Intercept) 0.03099 *
## Morphometry 0.95041
## DiagnosisMonosomic Turner 0.00099 ***
## Morphometry:DiagnosisMonosomic Turner 0.74394
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.277 on 94 degrees of freedom
## Multiple R-squared: 0.3784, Adjusted R-squared: 0.3585
## F-statistic: 19.07 on 3 and 94 DF, p-value: 9.639e-10
anova(model1,model2)
## Analysis of Variance Table
##
## Model 1: Cognition ~ Morphometry
## Model 2: Cognition ~ Morphometry + Diagnosis
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 96 550.39
## 2 95 488.06 1 62.333 12.133 0.0007508 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model2,model3)
## Analysis of Variance Table
##
## Model 1: Cognition ~ Morphometry + Diagnosis
## Model 2: Cognition ~ Morphometry * Diagnosis
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 95 488.06
## 2 94 487.50 1 0.55659 0.1073 0.7439
Alternatively analysis by separating groups and testing correlation.
score_ts = tb_score %>%
filter(Diagnosis == "Monosomic Turner") %>%
.[,c("Cognition","Morphometry")] %>%
scale(center = TRUE,scale = TRUE)
ggplot(score_ts,aes(y = Cognition,x = Morphometry)) +
stat_smooth(method = "lm", se = FALSE) +
geom_point(size = 3) +
ggtitle("Morphometry-Cognition for Monosomic Turner") +
coord_fixed()
score_control = tb_score %>%
filter(Diagnosis == "Control") %>%
.[,c("Cognition","Morphometry")] %>%
scale(center = TRUE,scale = TRUE)
ggplot(score_control,aes(y = Cognition,x = Morphometry)) +
stat_smooth(method = "lm", se = FALSE) +
geom_point(size = 3) +
ggtitle("Morphometry-Cognition for Control") +
coord_fixed()
cor(score_ts)["Cognition","Morphometry"]
## [1] 0.07287551
cor(score_control)["Cognition","Morphometry"]
## [1] 0.009988887
Let’s look at the pairwise correlations. We see that Visuomotor Precision Completation is negatively correlated to most other tests.
ggcorr(cognition_scores_only)
Use testing to select voxels and cognitive tests that are different between TS and controls.
Cognition:
alpha = 0.05
res_ttests = colttests(x = as.matrix(cognition_scores_only),
fac = cognition_imp$Diagnosis)
cog_sel = res_ttests$p.value %>% p.adjust(method = "BH") < alpha
mean(cog_sel)
## [1] 0.7777778
rownames(res_ttests)[cog_sel]
## [1] "Affect_Recognition"
## [2] "Response_Set"
## [3] "Inhibition_Inhibition"
## [4] "IN_Naming_vs_Inhibition"
## [5] "Comprehension_of_Instructions"
## [6] "Speeded_Naming_Completion_Time"
## [7] "Speeded_Naming"
## [8] "Word_Generation_Semantic"
## [9] "Word_Generation_Initial_Letter"
## [10] "Memory_for_Faces"
## [11] "Memory_for_Faces_Delayed"
## [12] "MF_vs_MFD"
## [13] "Narrative_Memory_Free_and_Cued_Recall"
## [14] "Narrative_Memory_Free_Recall"
## [15] "Fingertip_Tapping_Dominant_Hand"
## [16] "Fingertip_Tapping_Nondominant_Hand"
## [17] "Fingertip_Tapping_Sequences"
## [18] "FT_Repetitions_vs_Sequences"
## [19] "Visuomotor_Precision"
## [20] "Arrows"
## [21] "Picture_Puzzles"
Morphometry:
alpha = 0.05
res_ttests = colttests(x = morphometry,
fac = cognition_imp$Diagnosis)
morph_sel = res_ttests$p.value %>% p.adjust(method = "BH") < alpha
mean(morph_sel)
## [1] 0.1267654
plot_filter = function(morph_sel,axis = 3) {
filter_arr = array("white",dim = dim(label_arr))
filter_arr[label_arr != 0] = morph_filtered_vec = ifelse(morph_sel,
no = "darkgray",
yes = "red")
array_dims = lapply(1:3,function(i) seq_len(dim(filter_arr)[i]))
array_dims[[axis]] = round(dim(filter_arr)[axis]/2)
slice = filter_arr[array_dims[[1]],array_dims[[2]],array_dims[[3]]] %>%
flip %>%
flop
display(EBImage::Image(slice),method = "raster",interpolate = FALSE)
}
plot_filter(morph_sel,axis = 1)
plot_filter(morph_sel,axis = 2)
plot_filter(morph_sel,axis = 3)
Load cortical atlas to define minimum detectable cluster size.
cortical_atlas_arr = ReadImage("FSL.Atlas.Cort.To.Mean.nii.gz") %>% as.array
plot_slice(cortical_atlas_arr)
min_clustersize = min(table(cortical_atlas_arr))/2
min_clustersize
## [1] 861
Finally, we are ready to do a brain cognition differential correlation analysis.
# fac: (n x 1) factor with two levels
# morphometry: (n x num_voxels) matrix
# cognition: (n x num_tests) matrix
# gray_matter: binary image
res = NULL
if(!file.exists(params$res_file)) {
res = braincog(fac = cognition_imp$Diagnosis,
morphometry = scale(morphometry,center = TRUE,scale = FALSE),
cognition = scale(cognition_scores_only,center = TRUE,scale = FALSE),
gray_matter = label_arr,
min_clustersize = min_clustersize,
num_perm = 1000,
slurm = TRUE,
seed = 0xdada)
save(res,file = paste0("res_",params$seed,".Rdata"))
} else {
load(params$res_file)
}
summary(res)
## # A tibble: 2 x 6
## id label size color pvalue pvalue_adj
## <int> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 2 1306 #E41A1C 0.001 0.00
## 2 2 3 1005 #377EB8 0.035 0.04
selected_clusters = summary(res) %>%
dplyr::filter(pvalue_adj <= 0.05)
for(cluster_id in selected_clusters$id) {
plot(res,cluster_id) %>% print
ggplot2::ggsave(filename = paste0("cluster_",cluster_id,".pdf"),width = 5,height = 5)
}
for(cluster_id in selected_clusters$id)
plot_statistic(res,cluster_id) %>% print
Plot results for cognition.
plot_cognition(res,domain_mapping,alpha = 0.01)
ggsave(filename = "CognitiveCoefficients.pdf",width = 10,height = 5)
Pairs plot to visualize correlations in original data on only selected brain clusters and cognitive tests.
# selection at some significance level alpha
selected_tests = summary_cognition(res) %>%
dplyr::filter(pvalue_adj <= 0.01)
selected_tests %<>% filter(test != "Diagnosis")
selected_clusters = summary(res) %>%
dplyr::filter(pvalue_adj <= 0.05)
# compute cluster size for each participant
seg = res$seg
gray_matter = res$gray_matter
seg_gray_matter = seg[gray_matter==1]
brain_selection = apply(selected_clusters,1,function(cluster) {
tb = morphometry[,seg_gray_matter==cluster["label"]] %>%
exp %>%
rowSums %>%
tibble
names(tb) = paste0("Cluster_",cluster["id"])
tb
}) %>% bind_cols
cog_selection = cognition_scores_only[,selected_tests$test]
Plot raw cluster size data points.
if(length(brain_selection) > 0) {
brain_selection_long = reshape2::melt(brain_selection %>%
add_column(diagnosis = cognition_imp$Diagnosis),
id.vars = "diagnosis")
p = ggplot(brain_selection_long,aes(x = variable,y = value, color = diagnosis)) +
geom_beeswarm(cex = 4) +
ylab("cluster size") +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
facet_wrap(~ str_replace(variable,"_"," "),scales = "free_x")
print(p)
ggplot2::ggsave(filename = "selected_brain_cluster_size.pdf",width = 6,height = 2.5)
}
Plot raw cognitive test data points.
break_line = function(long_test_names) {
long_test_names %>%
as.character %>%
str_split(pattern = "_") %>%
.[[1]] %>%
paste0(collapse = " ") %>%
str_wrap(width = 20)
}
if(length(cog_selection) > 0) {
cog_selection_long = reshape2::melt(cog_selection %>%
add_column(diagnosis = cognition_imp$Diagnosis),
id.vars = "diagnosis")
cog_selection_long %<>% mutate(test_with_linebreak = sapply(variable,break_line))
p = ggplot(cog_selection_long,aes(x = value, color = diagnosis)) +
stat_ecdf() +
facet_wrap(~test_with_linebreak,ncol = 7) +
xlab("test score") +
theme(axis.title.y = element_blank()) +
theme(legend.position="top")
print(p)
ggplot2::ggsave(filename = "selected_cognition_score.pdf",width = 10,height = 5)
}
Plot raw correlation data between clusters and cognitive tests.
if(length(brain_selection) > 0 & length(cog_selection) > 0) {
for(i in seq_len(ncol(brain_selection))) {
brain_column = brain_selection[,i]
column_name = names(brain_column)
clustercog_selection = bind_cols(brain_column,
cog_selection,
diagnosis = cognition_imp$Diagnosis)
clustercog_selection_long = reshape2::melt(clustercog_selection,
id.vars = c(column_name,"diagnosis"))
clustercog_selection_long %<>% mutate(test_with_linebreak = sapply(variable,break_line))
p = ggplot(clustercog_selection_long,
aes_string(x = column_name,y = "value",color = "diagnosis")) +
geom_point(alpha = 0.5) +
stat_smooth(method = "lm", se = FALSE, mapping = aes(color = diagnosis)) +
facet_wrap(~test_with_linebreak,ncol = 7) +
ggtitle(str_replace(column_name,"_"," ")) +
xlab("cluster size") +
ylab("test score") +
theme(legend.position="top")
print(p)
ggplot2::ggsave(plot = p, filename = paste0("pair_brain_cognition_",column_name,".pdf"),
width = 10,height = 6)
}
}
session_info()
## Session info -------------------------------------------------------------
## setting value
## version R version 3.4.1 (2017-06-30)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Los_Angeles
## date 2017-09-28
## Packages -----------------------------------------------------------------
## package * version date
## abind * 1.4-5 2016-07-21
## annotate 1.54.0 2017-04-25
## AnnotationDbi 1.38.2 2017-07-27
## assertthat 0.2.0 2017-04-11
## backports 1.1.1 2017-09-25
## base * 3.4.1 2017-07-07
## base64enc 0.1-3 2015-07-28
## BatchJobs 1.7 2017-09-28
## BBmisc 1.11 2017-03-10
## beeswarm 0.2.3 2016-04-25
## bindr 0.1 2016-11-13
## bindrcpp * 0.2 2017-06-17
## Biobase 2.36.2 2017-05-04
## BiocGenerics 0.22.0 2017-04-25
## BiocInstaller * 1.26.1 2017-09-01
## BiocParallel * 1.10.1 2017-05-03
## bit 1.1-12 2014-04-09
## bit64 0.9-7 2017-05-08
## bitops 1.0-6 2013-08-17
## blob 1.1.0 2017-06-17
## braincog * 0.1.0 2017-09-28
## brew 1.0-6 2011-04-13
## checkmate 1.8.4 2017-09-25
## colorspace 1.3-2 2016-12-14
## compiler 3.4.1 2017-07-07
## cowplot 0.8.0 2017-07-30
## curl 2.8.1 2017-07-21
## data.table 1.10.4 2017-02-01
## datasets * 3.4.1 2017-07-07
## DBI 0.7 2017-06-18
## devtools * 1.13.3 2017-08-02
## digest 0.6.12 2017-01-27
## dplyr * 0.7.2 2017-07-20
## EBImage * 4.18.2 2017-09-09
## evaluate 0.10.1 2017-06-24
## factoextra * 1.0.5 2017-08-22
## fftwtools 0.9-8 2017-03-25
## genefilter * 1.58.1 2017-05-06
## GGally * 1.3.2 2017-08-02
## ggbeeswarm * 0.6.0 2017-08-07
## ggfortify * 0.4.1 2017-02-16
## ggplot2 * 2.2.1 2016-12-30
## ggpubr 0.1.5 2017-08-22
## ggrepel 0.6.5 2016-11-24
## git2r 0.19.0 2017-07-19
## glue 1.1.1 2017-06-21
## graphics * 3.4.1 2017-07-07
## grDevices * 3.4.1 2017-07-07
## grid 3.4.1 2017-07-07
## gridExtra 2.2.1 2016-02-29
## gtable 0.2.0 2016-02-26
## hms 0.3 2016-11-22
## htmltools 0.3.6 2017-04-28
## httr 1.3.1 2017-08-20
## impute * 1.50.1 2017-05-06
## IRanges 2.10.3 2017-09-02
## jpeg 0.1-8 2014-01-23
## knitr 1.17 2017-08-10
## labeling 0.3 2014-08-23
## lattice 0.20-35 2017-03-25
## lazyeval 0.2.0 2016-06-12
## locfit 1.5-9.1 2013-04-20
## magrittr * 1.5 2014-11-22
## MASS 7.3-47 2017-02-26
## Matrix 1.2-11 2017-08-16
## memoise 1.1.0 2017-04-21
## methods * 3.4.1 2017-07-07
## mice * 2.30 2017-02-18
## munsell 0.4.3 2016-02-13
## nnet 7.3-12 2016-02-02
## parallel 3.4.1 2017-07-07
## pkgconfig 2.0.1 2017-03-21
## plyr * 1.8.4 2016-06-08
## PMA * 1.0.9 2013-03-25
## png 0.1-7 2013-12-03
## purrr 0.2.3 2017-08-02
## R6 2.2.2 2017-06-17
## RColorBrewer 1.1-2 2014-12-07
## Rcpp 0.12.12 2017-07-15
## RCurl 1.95-4.8 2016-03-01
## readr * 1.1.1 2017-05-16
## reshape 0.8.7 2017-08-06
## reshape2 1.4.2 2016-10-22
## rlang 0.1.2 2017-08-09
## rmarkdown 1.6 2017-06-15
## rpart 4.1-11 2017-03-13
## rprojroot 1.2 2017-01-16
## RSQLite 2.0 2017-06-19
## S4Vectors 0.14.3 2017-06-03
## scales 0.5.0 2017-08-24
## sendmailR 1.2-1 2014-09-21
## SimpleITK * 1.1.0-9303 2017-08-30
## splines 3.4.1 2017-07-07
## stats * 3.4.1 2017-07-07
## stats4 3.4.1 2017-07-07
## stringi 1.1.5 2017-04-07
## stringr * 1.2.0 2017-02-18
## survival 2.41-3 2017-04-04
## tibble * 1.3.4 2017-08-22
## tidyr 0.7.1 2017-09-01
## tidyselect 0.2.0 2017-08-30
## tiff 0.1-5 2013-09-04
## tools 3.4.1 2017-07-07
## utils * 3.4.1 2017-07-07
## vipor 0.4.5 2017-03-22
## visdat * 0.1.0 2017-07-11
## withr 2.0.0 2017-07-28
## XML 3.98-1.9 2017-06-19
## xtable 1.8-2 2016-02-05
## yaml 2.1.14 2016-11-12
## source
## CRAN (R 3.4.0)
## Bioconductor
## Bioconductor
## CRAN (R 3.4.0)
## cran (@1.1.1)
## local
## cran (@0.1-3)
## Github (ChristofSeiler/BatchJobs@e5a5ba7)
## cran (@1.11)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## cran (@1.1-12)
## cran (@0.9-7)
## CRAN (R 3.4.0)
## cran (@1.1.0)
## Github (ChristofSeiler/braincog@c79ec30)
## cran (@1.0-6)
## cran (@1.8.4)
## CRAN (R 3.4.0)
## local
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## cran (@1.10.4)
## local
## cran (@0.7)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## cran (@4.18.2)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## Bioconductor
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## local
## local
## local
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## local
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## local
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## cran (@1.2)
## cran (@2.0)
## Bioconductor
## CRAN (R 3.4.1)
## cran (@1.2-1)
## local
## local
## local
## local
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## local
## local
## CRAN (R 3.4.0)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.1)
## CRAN (R 3.4.0)
## CRAN (R 3.4.0)