Is it possible to plot the smooth components of a gam fit with ggplot2?

unique2 picture unique2 · Nov 1, 2013 · Viewed 14k times · Source

I am fitting a model using gam from the mgcv package and store the result in model and so far I have been looking at the smooth components using plot(model). I have recently started using ggplot2 and like its output. So I am wondering, is it possible to plot these graphs using ggplot2?

Here is an example:

x1 = rnorm(1000)
x2 = rnorm(1000)
n = rpois(1000, exp(x1) + x2^2)

model = gam(n ~ s(x1, k=10) + s(x2, k=20), family="poisson")
plot(model, rug=FALSE, select=1)
plot(model, rug=FALSE, select=2)

And I am interest in s(x1, k=10) and s(x2, k=20) not in the fit.

Partial answer:

I dug deeper into plot.gam and mgcv:::plot.mgcv.smooth and built my own function which extracts the predicted effects and standard errors from the smooth components. It doesn't handle all options and cases of plot.gam so I only consider it a partial solution, but it works well for me.

EvaluateSmooths = function(model, select=NULL, x=NULL, n=100) {
  if (is.null(select)) {
    select = 1:length(model$smooth)
  }
  do.call(rbind, lapply(select, function(i) {
    smooth = model$smooth[[i]]
    data = model$model

    if (is.null(x)) {
      min = min(data[smooth$term])
      max = max(data[smooth$term])
      x = seq(min, max, length=n)
    }
    if (smooth$by == "NA") {
      by.level = "NA"
    } else {
      by.level = smooth$by.level
    }
    range = data.frame(x=x, by=by.level)
    names(range) = c(smooth$term, smooth$by)

    mat = PredictMat(smooth, range)
    par = smooth$first.para:smooth$last.para

    y = mat %*% model$coefficients[par]

    se = sqrt(rowSums(
      (mat %*% model$Vp[par, par, drop = FALSE]) * mat
    ))

    return(data.frame(
      label=smooth$label
      , x.var=smooth$term
      , x.val=x
      , by.var=smooth$by
      , by.val=by.level
      , value = y
      , se = se
    ))
  }))
}

This returns a "molten" data frame with the smooth components, so it is now possible to use ggplot with the example above :

smooths = EvaluateSmooths(model)

ggplot(smooths, aes(x.val, value)) + 
  geom_line() + 
  geom_line(aes(y=value + 2*se), linetype="dashed") + 
  geom_line(aes(y=value - 2*se), linetype="dashed") + 
  facet_grid(. ~ x.var)

If anyone knows a package which allows this in the general case I would be very grateful.

Answer

Dag Hjermann picture Dag Hjermann · Jan 17, 2014

You can use the visreg package combined with the plyr package. visreg basically plots any model that you can use predict() on.

library(mgcv)
library(visreg)
library(plyr)
library(ggplot2)

# Estimating gam model:
x1 = rnorm(1000)
x2 = rnorm(1000)
n = rpois(1000, exp(x1) + x2^2)
model = gam(n ~ s(x1, k=10) + s(x2, k=20), family="poisson")

# use plot = FALSE to get plot data from visreg without plotting
plotdata <- visreg(model, type = "contrast", plot = FALSE)

# The output from visreg is a list of the same length as the number of 'x' variables,
#   so we use ldply to pick the objects we want from the each list part and make a dataframe: 
smooths <- ldply(plotdata, function(part)   
  data.frame(Variable = part$meta$x, 
             x=part$fit[[part$meta$x]], 
             smooth=part$fit$visregFit, 
             lower=part$fit$visregLwr, 
             upper=part$fit$visregUpr))

# The ggplot:
ggplot(smooths, aes(x, smooth)) + geom_line() +
  geom_line(aes(y=lower), linetype="dashed") + 
  geom_line(aes(y=upper), linetype="dashed") + 
  facet_grid(. ~ Variable, scales = "free_x")

We can put the whole thing into a function, and add an option to show the residuals from the model (res = TRUE):

ggplot.model <- function(model, type="conditional", res=FALSE, 
                       col.line="#7fc97f", col.point="#beaed4", size.line=1, size.point=1) {
  require(visreg)
  require(plyr)
  plotdata <- visreg(model, type = type, plot = FALSE)
  smooths <- ldply(plotdata, function(part)   
    data.frame(Variable = part$meta$x, 
             x=part$fit[[part$meta$x]], 
             smooth=part$fit$visregFit, 
             lower=part$fit$visregLwr, 
             upper=part$fit$visregUpr))
  residuals <- ldply(plotdata, function(part)
    data.frame(Variable = part$meta$x, 
               x=part$res[[part$meta$x]], 
               y=part$res$visregRes))
  if (res)
    ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
      geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
      geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
      geom_point(data = residuals, aes(x, y), col=col.point, size=size.point) +
      facet_grid(. ~ Variable, scales = "free_x")
  else
    ggplot(smooths, aes(x, smooth)) + geom_line(col=col.line, size=size.line) +
      geom_line(aes(y=lower), linetype="dashed", col=col.line, size=size.line) +
      geom_line(aes(y=upper), linetype="dashed", col=col.line, size=size.line) +
      facet_grid(. ~ Variable, scales = "free_x")
  }

ggplot.model(model)
ggplot.model(model, res=TRUE)

ggplot without residuals ggplot with residuals Colors are picked from http://colorbrewer2.org/.