R: Color overlaps in Venn diagram by size of overlap

Kristoffer Vitting-Seerup picture Kristoffer Vitting-Seerup · Jul 14, 2014 · Viewed 7.2k times · Source

I think Venn diagrams are an extremly usefull way of comparing data. The problem is that as soon as I start having multiple (3 or more) classes the size of the circles can no longer indicate the size of the overlap.

What I would like to do is to color each filed in the Venn diagram by the size of the overlap instead of the class label:

For example when I draw a normal Venn diagram:

require(VennDiagram)
# Make data
oneName <- function() paste(sample(LETTERS,5,replace=TRUE),collapse="")
geneNames <- replicate(1000, oneName())

GroupA <- sample(geneNames, 400, replace=FALSE)
GroupB <- sample(geneNames, 750, replace=FALSE)
GroupC <- sample(geneNames, 250, replace=FALSE)
GroupD <- sample(geneNames, 300, replace=FALSE)

v1 <- venn.diagram(list(A=GroupA, B=GroupB, C=GroupC, D=GroupD), filename=NULL, fill=rainbow(4))
grid.newpage()
grid.draw(v1)

It looks like this: Resulting Venn diagram

The resulting Venn diagram is divided into 15 separate fields each with its own color and number. The color of each separate field is determined by the color of the categories/groups indicated by the fill argument.

What I want to do is instead to color each separate field using a color gradient indicating the size of the field so it will be visually easy to spot the largest/smallest groups (Similar to how the coloring of heatmaps / levelplots works)

Is there a way of doing that in R?

Answer

winni2k picture winni2k · Nov 8, 2014

Instead of changing the color of each of the 15 fields, what may be a useful partial solution to your problem is to scale the size of each field label by the size of the field instead. I ran into this problem and rewrote draw.quad.venn() to accept a new variable cex.prop that lets you switch on scaling of field labels according to field size. cex.prop may be "lin" for linear scaling and "log10" for log10 scaling. Here is the code. Just run all of it and you should get this image:

enter image description here

The code I used to generate this plot is below. I have put within comments (###BEGIN WWK and ###END WWK) the parts of draw.quad.venn() that I added. I have also put code with changes to all four venn diagram functions on github.

draw.quad.venn <- function (area1, area2, area3, area4, n12, n13, n14, n23, n24, 
    n34, n123, n124, n134, n234, n1234, category = rep("", 4), 
    lwd = rep(2, 4), lty = rep("solid", 4), col = rep("black", 
        4), fill = NULL, alpha = rep(0.5, 4), label.col = rep("black", 
        15), cex = rep(1, 15), fontface = rep("plain", 15), fontfamily = rep("serif", 
        15), cat.pos = c(-15, 15, 0, 0), cat.dist = c(0.22, 0.22, 
        0.11, 0.11), cat.col = rep("black", 4), cat.cex = rep(1, 
        4), cat.fontface = rep("plain", 4), cat.fontfamily = rep("serif", 
        4), cat.just = rep(list(c(0.5, 0.5)), 4), rotation.degree = 0, 
    rotation.centre = c(0.5, 0.5), ind = TRUE,
### BEGIN WWK
                            cex.prop=NULL,
### END WWK
                            ...) 
{
    if (length(category) == 1) {
        cat <- rep(category, 4)
    }
    else if (length(category) != 4) {
        stop("Unexpected parameter length for 'category'")
    }
    if (length(lwd) == 1) {
        lwd <- rep(lwd, 4)
    }
    else if (length(lwd) != 4) {
        stop("Unexpected parameter length for 'lwd'")
    }
    if (length(lty) == 1) {
        lty <- rep(lty, 4)
    }
    else if (length(lty) != 4) {
        stop("Unexpected parameter length for 'lty'")
    }
    if (length(col) == 1) {
        col <- rep(col, 4)
    }
    else if (length(col) != 4) {
        stop("Unexpected parameter length for 'col'")
    }
    if (length(label.col) == 1) {
        label.col <- rep(label.col, 15)
    }
    else if (length(label.col) != 15) {
        stop("Unexpected parameter length for 'label.col'")
    }
    if (length(cex) == 1) {
        cex <- rep(cex, 15)
    }
    else if (length(cex) != 15) {
        stop("Unexpected parameter length for 'cex'")
    }
    if (length(fontface) == 1) {
        fontface <- rep(fontface, 15)
    }
    else if (length(fontface) != 15) {
        stop("Unexpected parameter length for 'fontface'")
    }
    if (length(fontfamily) == 1) {
        fontfamily <- rep(fontfamily, 15)
    }
    else if (length(fontfamily) != 15) {
        stop("Unexpected parameter length for 'fontfamily'")
    }
    if (length(fill) == 1) {
        fill <- rep(fill, 4)
    }
    else if (length(fill) != 4 & length(fill) != 0) {
        stop("Unexpected parameter length for 'fill'")
    }
    if (length(alpha) == 1) {
        alpha <- rep(alpha, 4)
    }
    else if (length(alpha) != 4 & length(alpha) != 0) {
        stop("Unexpected parameter length for 'alpha'")
    }
    if (length(cat.pos) == 1) {
        cat.pos <- rep(cat.pos, 4)
    }
    else if (length(cat.pos) != 4) {
        stop("Unexpected parameter length for 'cat.pos'")
    }
    if (length(cat.dist) == 1) {
        cat.dist <- rep(cat.dist, 4)
    }
    else if (length(cat.dist) != 4) {
        stop("Unexpected parameter length for 'cat.dist'")
    }
    if (length(cat.col) == 1) {
        cat.col <- rep(cat.col, 4)
    }
    else if (length(cat.col) != 4) {
        stop("Unexpected parameter length for 'cat.col'")
    }
    if (length(cat.cex) == 1) {
        cat.cex <- rep(cat.cex, 4)
    }
    else if (length(cat.cex) != 4) {
        stop("Unexpected parameter length for 'cat.cex'")
    }
    if (length(cat.fontface) == 1) {
        cat.fontface <- rep(cat.fontface, 4)
    }
    else if (length(cat.fontface) != 4) {
        stop("Unexpected parameter length for 'cat.fontface'")
    }
    if (length(cat.fontfamily) == 1) {
        cat.fontfamily <- rep(cat.fontfamily, 4)
    }
    else if (length(cat.fontfamily) != 4) {
        stop("Unexpected parameter length for 'cat.fontfamily'")
    }
    if (!(class(cat.just) == "list" & length(cat.just) == 4 & 
        length(cat.just[[1]]) == 2 & length(cat.just[[2]]) == 
        2 & length(cat.just[[3]]) == 2 & length(cat.just[[4]]) == 
        2)) {
        stop("Unexpected parameter format for 'cat.just'")
    }
    cat.pos <- cat.pos + rotation.degree
    a6 <- n1234
    a12 <- n123 - a6
    a11 <- n124 - a6
    a5 <- n134 - a6
    a7 <- n234 - a6
    a15 <- n12 - a6 - a11 - a12
    a4 <- n13 - a6 - a5 - a12
    a10 <- n14 - a6 - a5 - a11
    a13 <- n23 - a6 - a7 - a12
    a8 <- n24 - a6 - a7 - a11
    a2 <- n34 - a6 - a5 - a7
    a9 <- area1 - a4 - a5 - a6 - a10 - a11 - a12 - a15
    a14 <- area2 - a6 - a7 - a8 - a11 - a12 - a13 - a15
    a1 <- area3 - a2 - a4 - a5 - a6 - a7 - a12 - a13
    a3 <- area4 - a2 - a5 - a6 - a7 - a8 - a10 - a11
    areas <- c(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, 
        a12, a13, a14, a15)
    areas.error <- c("a1  <- area3 - a2 - a4 - a5 - a6 - a7 - a12 - a13", 
        "a2  <- n34 - a6 - a5 - a7", "a3  <- area4 - a2 - a5 - a6 - a7 - a8 - a10 - a11", 
        "a4  <- n13 - a6 - a5 - a12", "a5  <- n134 - a6", "a6  <- n1234", 
        "a7  <- n234 - a6", "a8  <- n24 - a6 - a7 - a11", "a9  <- area1 - a4 - a5 - a6 - a10 - a11 - a12 - a15", 
        "a10 <- n14 - a6 - a5 - a11", "a11 <- n124 - a6", "a12 <- n123 - a6", 
        "a15 <- n12 - a6 - a11 - a12", "a13 <- n23 - a6 - a7 - a12", 
        "a14 <- area2 - a6 - a7 - a8 - a11 - a12 - a13 - a15")
    for (i in 1:length(areas)) {
        if (areas[i] < 0) {
            stop(paste("Impossible:", areas.error[i], "produces negative area"))
        }
    }
    grob.list <- gList()
    ellipse.positions <- matrix(nrow = 4, ncol = 7)
    colnames(ellipse.positions) <- c("x", "y", "a", "b", "rotation", 
        "fill.mapping", "line.mapping")
    ellipse.positions[1, ] <- c(0.65, 0.47, 0.35, 0.2, 45, 2, 
        4)
    ellipse.positions[2, ] <- c(0.35, 0.47, 0.35, 0.2, 135, 1, 
        1)
    ellipse.positions[3, ] <- c(0.5, 0.57, 0.33, 0.15, 45, 4, 
        3)
    ellipse.positions[4, ] <- c(0.5, 0.57, 0.35, 0.15, 135, 3, 
        2)
    for (i in 1:4) {
        grob.list <- gList(grob.list, VennDiagram::ellipse(x = ellipse.positions[i, 
            "x"], y = ellipse.positions[i, "y"], a = ellipse.positions[i, 
            "a"], b = ellipse.positions[i, "b"], rotation = ellipse.positions[i, 
            "rotation"], gp = gpar(lty = 0, fill = fill[ellipse.positions[i, 
            "fill.mapping"]], alpha = alpha[ellipse.positions[i, 
            "fill.mapping"]])))
    }
    for (i in 1:4) {
        grob.list <- gList(grob.list, ellipse(x = ellipse.positions[i, 
            "x"], y = ellipse.positions[i, "y"], a = ellipse.positions[i, 
            "a"], b = ellipse.positions[i, "b"], rotation = ellipse.positions[i, 
            "rotation"], gp = gpar(lwd = lwd[ellipse.positions[i, 
            "line.mapping"]], lty = lty[ellipse.positions[i, 
            "line.mapping"]], col = col[ellipse.positions[i, 
            "line.mapping"]], fill = "transparent")))
    }
    label.matrix <- matrix(nrow = 15, ncol = 3)
    colnames(label.matrix) <- c("label", "x", "y")
    label.matrix[1, ] <- c(a1, 0.35, 0.77)
    label.matrix[2, ] <- c(a2, 0.5, 0.69)
    label.matrix[3, ] <- c(a3, 0.65, 0.77)
    label.matrix[4, ] <- c(a4, 0.31, 0.67)
    label.matrix[5, ] <- c(a5, 0.4, 0.58)
    label.matrix[6, ] <- c(a6, 0.5, 0.47)
    label.matrix[7, ] <- c(a7, 0.6, 0.58)
    label.matrix[8, ] <- c(a8, 0.69, 0.67)
    label.matrix[9, ] <- c(a9, 0.18, 0.58)
    label.matrix[10, ] <- c(a10, 0.32, 0.42)
    label.matrix[11, ] <- c(a11, 0.425, 0.38)
    label.matrix[12, ] <- c(a12, 0.575, 0.38)
    label.matrix[13, ] <- c(a13, 0.68, 0.42)
    label.matrix[14, ] <- c(a14, 0.82, 0.58)
    label.matrix[15, ] <- c(a15, 0.5, 0.28)

### BEGIN WWK
    if(length(cex.prop) == 1){
        maxArea = max(areas)
        if(cex.prop == "lin"){
            for(i in 1:length(areas)){
                cex[i] = cex[i] * areas[i] / maxArea
            }
        }
        else if(cex.prop == "log10"){
            for(i in 1:length(areas)){
                if(areas[i] != 0){
                    cex[i] = cex[i] * log10(areas[i]) / log10(maxArea)
                }
                else{
                    warn(paste("Error in log10 rescaling of areas: area ",i," is zero", sep=""))
                }
            }        
        }
        else {
            stop(paste("Unknown value passed to cex.prop:", cex.prop))
        }
    }
### END WWK

    for (i in 1:nrow(label.matrix)) {
        grob.list <- gList(grob.list, textGrob(label = label.matrix[i, 
            "label"], x = label.matrix[i, "x"], y = label.matrix[i, 
            "y"], gp = gpar(col = label.col[i], cex = cex[i], 
            fontface = fontface[i], fontfamily = fontfamily[i])))
    }
    cat.pos.x <- c(0.18, 0.82, 0.35, 0.65)
    cat.pos.y <- c(0.58, 0.58, 0.77, 0.77)
    for (i in 1:4) {
        this.cat.pos <- find.cat.pos(x = cat.pos.x[i], y = cat.pos.y[i], 
            pos = cat.pos[i], dist = cat.dist[i])
        grob.list <- gList(grob.list, textGrob(label = category[i], 
            x = this.cat.pos$x, y = this.cat.pos$y, just = cat.just[[i]], 
            gp = gpar(col = cat.col[i], cex = cat.cex[i], fontface = cat.fontface[i], 
                fontfamily = cat.fontfamily[i])))
    }
    grob.list <- VennDiagram::adjust.venn(VennDiagram::rotate.venn.degrees(grob.list, 
        rotation.degree, rotation.centre[1], rotation.centre[2]), 
        ...)
    if (ind) {
        grid.draw(grob.list)
    }
    return(grob.list)
}
assignInNamespace("draw.quad.venn",draw.quad.venn, ns="VennDiagram")

# Make data
oneName <- function() paste(sample(LETTERS,5,replace=TRUE),collapse="")
geneNames <- replicate(1000, oneName())

GroupA <- sample(geneNames, 400, replace=FALSE)
GroupB <- sample(geneNames, 750, replace=FALSE)
GroupC <- sample(geneNames, 250, replace=FALSE)
GroupD <- sample(geneNames, 300, replace=FALSE)

v1 <- venn.diagram(list(A=GroupA, B=GroupB, C=GroupC, D=GroupD), filename=NULL, fill=rainbow(4), cex.prop="log10", cex=2)
png("test.png", width=7, height=7, units='in', res=150)
grid.newpage()
grid.draw(v1)
dev.off()