I am working on placing letters representing the results of a Tukey's HSD on a boxplot that I've made. I've found a description of how to do it here: r-graph gallery, but I am getting an error after I run the function.
Here is my data:
dput(WaterConDryMass)
structure(list(ChillTime = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L), .Label = c("Pre-chill",
"6", "13", "24", "Post-chill"), class = "factor"), dmass = c(0.22,
0.19, 0.34, 0.12, 0.23, 0.33, 0.38, 0.15, 0.31, 0.34, 0.45, 0.48,
0.59, 0.54, 0.73, 0.69, 0.53, 0.57, 0.39, 0.8)), .Names = c("ChillTime",
"dmass"), row.names = c(NA, -20L), class = "data.frame")
Necessary packages:
install.packages('ggplot2')
install.packages('multcompView')
With my code for the boxplot:
WaterConDryMass$ChillTime <- factor(WaterConDryMass$ChillTime, levels=c("Pre-chill", "6", "13", "24", "Post-chill"))
ggplot(WaterConDryMass, aes(x = ChillTime, y = dmass)) +
geom_blank() +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
labs(x = 'Time (weeks)', y = 'Water Content (DM %)') +
ggtitle(expression(atop(bold("Water Content"), atop(italic("(Dry Mass)"), "")))) +
theme(plot.title = element_text(hjust = 0.5, face='bold')) +
annotate(geom = "rect", xmin = 1.5, xmax = 4.5, ymin = -Inf, ymax = Inf, alpha = 0.6, fill = "grey90") +
geom_boxplot(fill = 'green2') +
geom_vline(aes(xintercept=1.5), linetype="dashed") +
geom_vline(aes(xintercept=4.5), linetype="dashed")
Produces this:
For the ANOVA:
Model4 <- aov(dmass~ChillTime, data=WaterConDryMass)
Tukey's HSD:
TUKEY <- TukeyHSD(Model4)
plot(TUKEY , las=1 , col="brown" )
Code up until the function (where there is an error)
# I need to group the treatments that are not different each other together.
generate_label_df <- function(TUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- TUKEY[[variable]][,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$ChillTime=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$ChillTime) , ]
return(Tukey.labels)
}
# Apply the function on my dataset
LABELS=generate_label_df(TUKEY , "WaterConDryMass$ChillT")
The error:
Error in strsplit(x, sep) : non-character argument
Code from the rest of the script (which I haven't gotten to yet):
# A panel of colors to draw each group with the same color :
my_colors=c( rgb(143,199,74,maxColorValue = 255),rgb(242,104,34,maxColorValue = 255), rgb(111,145,202,maxColorValue = 255),rgb(254,188,18,maxColorValue = 255) , rgb(74,132,54,maxColorValue = 255),rgb(236,33,39,maxColorValue = 255),rgb(165,103,40,maxColorValue = 255))
# Draw the basic boxplot
a=boxplot(data$value ~ data$treatment , ylim=c(min(data$value) , 1.1*max(data$value)) , col=my_colors[as.numeric(LABELS[,1])] , ylab="value" , main="")
# I want to write the letter over each box. Over is how high I want to write it.
over=0.1*max( a$stats[nrow(a$stats),] )
#Add the labels
text( c(1:nlevels(WaterConDryMass$ChillTime)) , a$stats[nrow(a$stats),]+over , LABELS[,1] , col=my_colors[as.numeric(LABELS[,1])] )
I'll eventually have incorporate my specific box plot into the "# draw basic box plot" in the final bit of code, but haven't gotten that far yet.
The goal is go get a box plot that has letters above the top whisker like this, but not necessarily color coordinated like the example:
EDIT: Here is start to finish code copied from your question to get you your plot.
I had to change the labels of ChillTime
in the structure of your dataframe at the start so they use underscores rather than hyphens. Likewise for when you convert ChillTime
to a factor - the levels can't have hyphens in for multcompLetters
to work. Finally, you just need to put the variable name into your function (ChillTime
) rather than WaterConDryMass$ChillT
.
library(ggplot2)
library(multcompView)
WaterConDryMass <- structure(list(ChillTime = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L), .Label = c("Pre_chill", "6", "13", "24", "Post_chill"), class = "factor"), dmass = c(0.22, 0.19, 0.34, 0.12, 0.23, 0.33, 0.38, 0.15, 0.31, 0.34, 0.45, 0.48, 0.59, 0.54, 0.73, 0.69, 0.53, 0.57, 0.39, 0.8)), .Names = c("ChillTime", "dmass"), row.names = c(NA, -20L), class = "data.frame")
WaterConDryMass$ChillTime <- factor(WaterConDryMass$ChillTime, levels=c("Pre_chill", "6", "13", "24", "Post_chill"))
ggplot(WaterConDryMass, aes(x = ChillTime, y = dmass)) +
geom_blank() +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
labs(x = 'Time (weeks)', y = 'Water Content (DM %)') +
ggtitle(expression(atop(bold("Water Content"), atop(italic("(Dry Mass)"), "")))) +
theme(plot.title = element_text(hjust = 0.5, face='bold')) +
annotate(geom = "rect", xmin = 1.5, xmax = 4.5, ymin = -Inf, ymax = Inf, alpha = 0.6, fill = "grey90") +
geom_boxplot(fill = 'green2') +
geom_vline(aes(xintercept=1.5), linetype="dashed") +
geom_vline(aes(xintercept=4.5), linetype="dashed")
Model4 <- aov(dmass~ChillTime, data=WaterConDryMass)
TUKEY <- TukeyHSD(Model4)
plot(TUKEY , las=1 , col="brown" )
generate_label_df <- function(TUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- TUKEY[[variable]][,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$ChillTime=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$ChillTime) , ]
return(Tukey.labels)
}
# Apply the function on my dataset
LABELS=generate_label_df(TUKEY , "ChillTime")
# A panel of colors to draw each group with the same color :
my_colors=c( rgb(143,199,74,maxColorValue = 255),rgb(242,104,34,maxColorValue = 255), rgb(111,145,202,maxColorValue = 255),rgb(254,188,18,maxColorValue = 255) , rgb(74,132,54,maxColorValue = 255),rgb(236,33,39,maxColorValue = 255),rgb(165,103,40,maxColorValue = 255))
# Draw the basic boxplot
a=boxplot(WaterConDryMass$dmass ~ WaterConDryMass$ChillTime , ylim=c(min(WaterConDryMass$dmass) , 1.1*max(WaterConDryMass$dmass)) , col=my_colors[as.numeric(LABELS[,1])] , ylab="value" , main="")
# I want to write the letter over each box. Over is how high I want to write it.
over=0.1*max( a$stats[nrow(a$stats),] )
#Add the labels
text( c(1:nlevels(WaterConDryMass$ChillTime)) , a$stats[nrow(a$stats),]+over , LABELS[,1] , col=my_colors[as.numeric(LABELS[,1])]