ansaurus

Question

How to create an "inkblot" chart with R?

Answer 1

+8 A:

You can use polygon in base graphics, for instance

x <- seq(as.POSIXct("1949-01-01", tz="GMT"), length=36, by="months")
y <- rnorm(length(x))
plot(x, y, type="n", ylim=c(-1,1)*max(abs(y)))
polygon(c(x, rev(x)), c(y, -rev(y)), col="cornflowerblue", border=NA)

plot.polygon output

Update: Using panel.polygon from lattice:

library("lattice")
library("RColorBrewer")

df <- data.frame(x=rep(x,3),
                 y=rnorm(3*length(x)),
                 variable=gl(3, length(x)))

p <- xyplot(y~x|variable, data=df,
            ylim=c(-1,1)*max(abs(y)),
            layout=c(1,3),
            fill=brewer.pal(3, "Pastel2"),
            panel=function(...) {
              args <- list(...)
              print(args)
              panel.polygon(c(args$x, rev(args$x)),
                            c(args$y, -rev(args$y)),
                            fill=args$fill[panel.number()],
                            border=NA)
            })
print(p)

lattice plot using panel.polygon

rcs 2010-01-29 10:43:34

This is really nice. Could you elaborate on howto stack several time series together?

Karsten W. 2010-01-29 13:22:28

See my updated answer ...

rcs 2010-01-29 14:29:45

Looks very nice!

Karsten W. 2010-01-29 14:48:31

Answer 2

+9 A:

With a little polishing, this ggplot solution will look like what you want:

alt text

Here's how to make it from your data:

require(ggplot2)

First, let's take your input data and import and restructure it into a form ggplot likes:

rdata = read.csv("data.csv", 
# options: load '-' as na, ignore first comment line #Solar,
# strip whitespace that ends line, accept numbers as col headings
  na.strings="-", skip=1, strip.white=T, check.names=F)
# Convert to long format and check years are numeric
data = melt(rdata)
data = transform(data,year=as.numeric(as.character(variable)))
# geom_ribbon hates NAs.
data = data[!is.na(data$value),]

> summary(data)
           Country       variable       value             year     
 Austria       : 12   1996   : 25   Min.   :  0.00   Min.   :1996  
 Belgium       : 12   1997   : 25   1st Qu.:  0.00   1st Qu.:1999  
 Croatia       : 12   1998   : 25   Median :  7.00   Median :2002  
 Cyprus        : 12   1999   : 25   Mean   : 36.73   Mean   :2002  
 Czech Republic: 12   2000   : 25   3rd Qu.: 30.00   3rd Qu.:2004  
 Denmark       : 12   2001   : 25   Max.   :580.00   Max.   :2007  
 (Other)       :228   (Other):150

Now let's plot it:

ggplot(data=data, aes(fill=Country)) +
  facet_grid(Country~.,space="free", scales="free_y") +
  opts(legend.position="none") +
  geom_ribbon(aes(x=year,ymin=-value,ymax=+value))

Alex Brown 2010-01-29 17:37:43

Great solution! This shows how powerful ggplot2 is, when it is used by skillfull hands.

Karsten W. 2010-01-31 15:39:24

Answer 3

+4 A:

Using rcs' first approach, here a solution for the sample data with base graphics:

rawData <- read.csv("solar.csv", na.strings="-")
data <- ts(t(as.matrix(rawData[,2:13])), names=rawData[,1], start=1996)

inkblot <- function(series, col=NULL, min.height=40, col.value=24, col.category=17, ...) {  
  # assumes non-negative values  
  # assumes that series is multivariate series  
  # assumes that series names are set, i.e. colnames(series) != NULL  

  x <- as.vector(time(series))  
  if(length(col)==0){  
    col <- rainbow(dim(series)[2])  
  }  

  ytotal <- 0  
  for(category in colnames(series)) {  
    y <- series[, category]
    y <- y[!is.na(y)]
    ytotal <- ytotal + max(y, min.height)  
  }  

  oldpar = par(no.readonly = TRUE)   
  par(mar=c(2,3,0,10)+0.1, cex=0.7)  

  plot(x, 1:length(x), type="n", ylim=c(0,1)*ytotal, yaxt="n", xaxt="n", bty="n", ylab="", xlab="", ...) 
  axis(side=1, at=x)  

  catNumber <- 1  
  offset <- 0  
  for(category in rev(colnames(series))) {  
    print(paste("working on: ", category))
    y <- 0.5 * as.vector(series[,category])  
    offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height)  
    print(paste("offset= ", str(offset)))
    polygon(c(x, rev(x)), c(offset+y, offset-rev(y)), col=col[catNumber], border=NA)  
    mtext(text=y[1], side=2, at=offset, las=2, cex=0.7, col=col.value)  
    mtext(text=y[length(y)], side=4, line=-1, at=offset, las=2, cex=0.7, col=col.value)  
    mtext(text=category, side=4, line=2, at=offset, las=2, cex=0.7, col=col.category)  
    offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height)  
    catNumber <- catNumber + 1   
  }  
}  


inkblot(data)

I still need to figure out the vertical grid lines and the transparent coloring. plot

Karsten W. 2010-01-30 22:54:12

In your function plot need `y` value which doesn't exists. And `max` in `ytotal` computation fail on `NA` values.

Marek 2010-02-05 15:42:32

Thank you for pointing this out. I corrected the code accordingly.

Karsten W. 2010-02-07 16:59:55

Answer 4

+4 A:

Late to this game, but I created a stacked "blot" chart using ggplot2 and another set of data. This uses geom_polygon after the data has been smoothed out.

# data: Masaaki Ishida ([email protected])
# http://luna.pos.to/whale/sta.html

head(blue, 2)
##      Season Norway U.K. Japan Panama Denmark Germany U.S.A. Netherlands
## ## [1,]   1931      0 6050     0      0       0       0      0           0
## ## [2,]   1932  10128 8496     0      0       0       0      0           0
## ##      U.S.S.R. South.Africa TOTAL
## ## [1,]        0            0  6050
## ## [2,]        0            0 18624

hourglass.plot <- function(df) {
  stack.df <- df[,-1]
  stack.df <- stack.df[,sort(colnames(stack.df))]
  stack.df <- apply(stack.df, 1, cumsum)
  stack.df <- apply(stack.df, 1, function(x) sapply(x, cumsum))
  stack.df <- t(apply(stack.df, 1, function(x) x - mean(x)))
  # use this for actual data
  ##  coords.df <- data.frame(x = rep(c(df[,1], rev(df[,1])), times = dim(stack.df)[2]), y = c(apply(stack.df, 1, min), as.numeric(apply(stack.df, 2, function(x) c(rev(x),x)))[1:(length(df[,1])*length(colnames(stack.df))*2-length(df[,1]))]), id = rep(colnames(stack.df), each = 2*length(df[,1])))

  ##  qplot(x = x, y = y, data = coords.df, geom = "polygon", color = I("white"), fill = id)

  # use this for smoothed data
  density.df <- apply(stack.df, 2, function(x) spline(x = df[,1], y = x))
  id.df <- sort(rep(colnames(stack.df), each = as.numeric(lapply(density.df, function(x) length(x$x)))))
  density.df <- do.call("rbind", lapply(density.df, as.data.frame))
  density.df <- data.frame(density.df, id = id.df)
  smooth.df <- data.frame(x = unlist(tapply(density.df$x, density.df$id, function(x) c(x, rev(x)))), y = c(apply(unstack(density.df[,2:3]), 1, min), unlist(tapply(density.df$y, density.df$id, function(x) c(rev(x), x)))[1:(table(density.df$id)[1]+2*max(cumsum(table(density.df$id))[-dim(stack.df)[2]]))]), id = rep(names(table(density.df$id)), each = 2*table(density.df$id)))

  qplot(x = x, y = y, data = smooth.df, geom = "polygon", color = I("white"), fill = id)
}

hourglass.plot(blue[,-12]) + opts(title = c("Blue Whale Catch"))

alt text

apeescape 2010-06-30 21:22:38

ansaurus

tags:

views:

answers:

How to create an "inkblot" chart with R?

related questions