Bayes

Marginal distribution plots

Posted on Updated on

Marginal distribution plots

Often, it is useful to visualize graphically how two variables are correlated with each other. For instance, I recently carried out a Bayesian analysis to get constraints on cluster scaling relations. Immediately, I wanted look at the marginal samples from the posterior distribution for the normalisation and slope of my relation.

Apparently, there is no obvious way to so in ggplot, but after much search I finally managed to plot what I intended to…

To start off, let's generate some data:

Data

set.seed(888)

mydata <- data.frame(
    "A"=c(rnorm(100,44,0.1),rnorm(100,43.8,0.12)),
    "B"=c(rnorm(100,1.33,0.15),rnorm(100,1.4,0.15)),
    "sample"=c(rep("s",100),rep("p",100))
    )

Plot the data

First Method

There are two different options to display the data. One is based on the code given here, which I show below…

library(ggplot2)
library(gridExtra)
empty <- ggplot()+geom_point(aes(1,1), colour="white") +
     theme(                              
       plot.background = element_blank(), 
       panel.grid.major = element_blank(), 
       panel.grid.minor = element_blank(), 
       panel.border = element_blank(), 
       panel.background = element_blank(),
       axis.title.x = element_blank(),
       axis.title.y = element_blank(),
       axis.text.x = element_blank(),
       axis.text.y = element_blank(),
       axis.ticks = element_blank()
     )

#scatterplot of x and y variables
scatter <- ggplot(mydata,aes(A, B)) + 
  geom_point(aes(color=factor(sample))) + 
  scale_color_manual(name="Data",labels=c("run1","run2"),values = c("pink", "blue")) + 
  theme(legend.position=c(1,1),legend.justification=c(1,1)) 

#marginal density of x - plot on top
plot_top <- ggplot(mydata, aes(A, fill=factor(sample))) + 
  geom_density(alpha=.5) + 
  scale_fill_manual(values = c("pink", "blue")) + 
  theme(legend.position = "none")

#marginal density of y - plot on the right
plot_right <- ggplot(mydata, aes(B, fill=factor(sample))) + 
  geom_density(alpha=.5) + 
  coord_flip() + 
  scale_fill_manual(values = c("pink", "blue")) + 
  theme(legend.position = "none") 

#arrange the plots together, with appropriate height and width for each row and column
grid.arrange(plot_top, empty, scatter, plot_right, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))

plot of chunk plot

Now that's one way to view the data. Another one involves plotting probability contours, say 1\sigma, 2\sigma or 3\sigma contours on top of the scatter plot.

Second Method

To start with, let's create a contour function to generate the probability contours.

library(MASS)
panel.conf.lines <- function(x, y, ...) {
    lims = c(min(x) - 0.5 * (max(x) - min(x)), max(x) + 0.5 * (max(x) - min(x)), 
        min(y) - 0.5 * (max(y) - min(y)), max(y) + 0.5 * (max(y) - min(y)))
    kd <- kde2d(x, y, n = 100, lims = lims)
    ## store density at each A,B pair
    pp <- array()
    for (i in 1:length(x)) {
        z.x <- max(which(kd$x < x[i]))
        z.y <- max(which(kd$y < y[i]))
        pp[i] <- kd$z[z.x, z.y]
        ## cat(i,' pp',pp[i],'\n')
    }
    ## find density level enclosing fraction of pairs
    c1 <- quantile(pp, 0.32, na.rm = TRUE)
    c2 <- quantile(pp, 0.05, na.rm = TRUE)
    c3 <- quantile(pp, 0.0025, na.rm = TRUE)
    cl1 <- contourLines(kd, levels = c1)
    cl2 <- contourLines(kd, levels = c2)
    cl3 <- contourLines(kd, levels = c3)
    cat("c1=", c1, " c2=", c2, " c3=", c3, "\n")
    cl <- mapply(function(x, piece) {
        rbind(data.frame(x, piece = piece), c(NA, NA, NA))
    }, c(cl3, cl2, cl1), seq_along(c(cl3, cl2, cl1)), SIMPLIFY = FALSE)
    cl_df <- do.call("rbind", cl)
    return(cl_df)
}

Now let's generate contours for the two subsets of our data, i.e. sample S and sample P.

conf <- panel.conf.lines(mydata$A[1:100],mydata$B[1:100])
conf2 <- panel.conf.lines(mydata$A[101:200],mydata$B[101:200])

And finally let's create the plot…

plotablm <- ggplot(mydata, aes(x = A, y = B)) + geom_polygon(aes(x = x, y = y, 
    group = piece, fill = as.factor(level)), data = conf, alpha = I(1/2)) + 
    geom_path(aes(x = x, y = y, group = piece, linetype = as.factor(level)), 
        size = 0.5, data = conf2) + geom_point(aes(colour = factor(sample)), 
    size = 1.5) + coord_cartesian(xlim = c(43.25, 45), ylim = c(0.75, 2)) + 
    xlab(expression(A)) + ylab(expression(B)) + theme_bw(18) + theme(legend.justification = c(1, 
    1), legend.position = c(1, 1)) + scale_colour_manual(name = "Data", label = c("B", 
    "P"), values = c("#7f7fe8", "#e87fe8")) + scale_fill_manual(name = "Sample B", 
    label = c(expression(paste(1, "", sigma)), expression(paste(2, "", sigma)), 
        expression(paste(3, "", sigma))), values = c("#7f7fe8", "#e87fe8", "#e8e87f")) + 
    scale_linetype_discrete(name = "Sample P", label = c(expression(paste(1, 
        "", sigma)), expression(paste(2, "", sigma)), expression(paste(3, "", 
        sigma))))
plotablm

plot of chunk data + contour function + plot