tags:

views:

474

answers:

4

How can I create a chart like

http://junkcharts.typepad.com/junk_charts/2010/01/leaving-ink-traces.html

where several time series (one per country) are displayed horizontally as symmetric areas?

I think if I could display one time series in this way, it is easy to generalize to several using mfrow.

Sample data:

#Solar energy production in Europe, by country (EC),(1 000 toe)  
Country,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007  
Belgium,1,1,1,1,1,1,2,2,3,3,3,5  
Bulgaria,-,-,-,-,-,-,-,-,-,-,-,-  
Czech Republic,0,0,0,0,0,0,0,0,2,2,3,4  
Denmark,6,7,7,8,8,8,9,9,9,10,10,11  
Germany (including ex-GDR from 1991),57,70,83,78,96,150,184,216,262,353,472,580  
Estonia,-,-,-,-,-,-,-,-,-,-,-,-  
Ireland,0,0,0,0,0,0,0,0,0,0,1,1  
Greece,86,89,93,97,99,100,99,99,101,102,109,160  
Spain,26,23,26,29,33,38,43,48,58,65,83,137  
France,15,16,17,18,26,19,19,18,19,22,29,37  
Italy,8,9,11,11,12,14,16,18,21,30,38,56  
Cyprus,32,33,34,35,35,34,35,36,40,41,43,54  
Latvia,-,-,-,-,-,-,-,-,-,-,-,-  
Lithuania,-,-,-,-,-,-,-,-,-,-,-,-  
Luxembourg (Grand-Duché),0,0,0,0,0,0,0,0,1,2,2,2  
Hungary,0,0,0,0,0,1,2,2,2,2,2,3  
Netherlands,6,7,8,10,12,14,16,19,20,22,22,23  
Austria,42,48,55,58,64,69,74,80,86,92,101,108  
Poland,0,0,0,0,0,0,0,0,0,0,0,0  
Portugal,16,16,17,18,18,19,20,21,21,23,24,28  
Romania,0,0,0,0,0,0,0,0,0,0,0,0  
Slovenia,-,-,-,-,-,-,-,-,-,-,-,-  
Slovakia,0,0,0,0,0,0,0,0,0,0,0,0  
Finland,0,0,0,0,1,1,1,1,1,1,1,1  
Sweden,4,4,5,5,5,6,4,5,5,6,6,9  
United Kingdom,6,6,7,7,11,13,16,20,25,30,37,46  
Croatia,0,0,0,0,0,0,0,0,0,0,0,1  
Turkey,159,179,210,236,262,287,318,350,375,385,402,420  
Iceland,-,-,-,-,-,-,-,-,-,-,-,-  
Norway,0,0,0,0,0,0,0,0,0,0,0,0  
Switzerland,18,19,21,23,24,26,23,24,25,26,28,30  
#-='Not applicable' or 'Real zero' or 'Zero by default' :=Not available "
#Source of Data:,Eurostat, http://spreadsheets.google.com/ccc?key=0Agol553XfuDZdFpCQU1CUVdPZ3M0djJBSE1za1NGV0E&hl=en_GB  
#Last Update:,30.04.2009  
#Date of extraction:,17 Aug 2009 07:41:12 GMT, http://epp.eurostat.ec.europa.eu/tgm/table.do?tab=table&init=1&plugin=1&language=en&pcode=ten00082
+8  A: 

You can use polygon in base graphics, for instance

x <- seq(as.POSIXct("1949-01-01", tz="GMT"), length=36, by="months")
y <- rnorm(length(x))
plot(x, y, type="n", ylim=c(-1,1)*max(abs(y)))
polygon(c(x, rev(x)), c(y, -rev(y)), col="cornflowerblue", border=NA)

plot.polygon output

Update: Using panel.polygon from lattice:

library("lattice")
library("RColorBrewer")

df <- data.frame(x=rep(x,3),
                 y=rnorm(3*length(x)),
                 variable=gl(3, length(x)))

p <- xyplot(y~x|variable, data=df,
            ylim=c(-1,1)*max(abs(y)),
            layout=c(1,3),
            fill=brewer.pal(3, "Pastel2"),
            panel=function(...) {
              args <- list(...)
              print(args)
              panel.polygon(c(args$x, rev(args$x)),
                            c(args$y, -rev(args$y)),
                            fill=args$fill[panel.number()],
                            border=NA)
            })
print(p)

lattice plot using panel.polygon

rcs
This is really nice. Could you elaborate on howto stack several time series together?
Karsten W.
See my updated answer ...
rcs
Looks very nice!
Karsten W.
+9  A: 

With a little polishing, this ggplot solution will look like what you want:

alt text

Here's how to make it from your data:

require(ggplot2)

First, let's take your input data and import and restructure it into a form ggplot likes:

rdata = read.csv("data.csv", 
# options: load '-' as na, ignore first comment line #Solar,
# strip whitespace that ends line, accept numbers as col headings
  na.strings="-", skip=1, strip.white=T, check.names=F)
# Convert to long format and check years are numeric
data = melt(rdata)
data = transform(data,year=as.numeric(as.character(variable)))
# geom_ribbon hates NAs.
data = data[!is.na(data$value),]

> summary(data)
           Country       variable       value             year     
 Austria       : 12   1996   : 25   Min.   :  0.00   Min.   :1996  
 Belgium       : 12   1997   : 25   1st Qu.:  0.00   1st Qu.:1999  
 Croatia       : 12   1998   : 25   Median :  7.00   Median :2002  
 Cyprus        : 12   1999   : 25   Mean   : 36.73   Mean   :2002  
 Czech Republic: 12   2000   : 25   3rd Qu.: 30.00   3rd Qu.:2004  
 Denmark       : 12   2001   : 25   Max.   :580.00   Max.   :2007  
 (Other)       :228   (Other):150

Now let's plot it:

ggplot(data=data, aes(fill=Country)) +
  facet_grid(Country~.,space="free", scales="free_y") +
  opts(legend.position="none") +
  geom_ribbon(aes(x=year,ymin=-value,ymax=+value))
Alex Brown
Great solution! This shows how powerful ggplot2 is, when it is used by skillfull hands.
Karsten W.
+4  A: 

Using rcs' first approach, here a solution for the sample data with base graphics:

rawData <- read.csv("solar.csv", na.strings="-")
data <- ts(t(as.matrix(rawData[,2:13])), names=rawData[,1], start=1996)

inkblot <- function(series, col=NULL, min.height=40, col.value=24, col.category=17, ...) {  
  # assumes non-negative values  
  # assumes that series is multivariate series  
  # assumes that series names are set, i.e. colnames(series) != NULL  

  x <- as.vector(time(series))  
  if(length(col)==0){  
    col <- rainbow(dim(series)[2])  
  }  

  ytotal <- 0  
  for(category in colnames(series)) {  
    y <- series[, category]
    y <- y[!is.na(y)]
    ytotal <- ytotal + max(y, min.height)  
  }  

  oldpar = par(no.readonly = TRUE)   
  par(mar=c(2,3,0,10)+0.1, cex=0.7)  

  plot(x, 1:length(x), type="n", ylim=c(0,1)*ytotal, yaxt="n", xaxt="n", bty="n", ylab="", xlab="", ...) 
  axis(side=1, at=x)  

  catNumber <- 1  
  offset <- 0  
  for(category in rev(colnames(series))) {  
    print(paste("working on: ", category))
    y <- 0.5 * as.vector(series[,category])  
    offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height)  
    print(paste("offset= ", str(offset)))
    polygon(c(x, rev(x)), c(offset+y, offset-rev(y)), col=col[catNumber], border=NA)  
    mtext(text=y[1], side=2, at=offset, las=2, cex=0.7, col=col.value)  
    mtext(text=y[length(y)], side=4, line=-1, at=offset, las=2, cex=0.7, col=col.value)  
    mtext(text=category, side=4, line=2, at=offset, las=2, cex=0.7, col=col.category)  
    offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height)  
    catNumber <- catNumber + 1   
  }  
}  


inkblot(data)  

I still need to figure out the vertical grid lines and the transparent coloring. plot

Karsten W.
In your function plot need `y` value which doesn't exists. And `max` in `ytotal` computation fail on `NA` values.
Marek
Thank you for pointing this out. I corrected the code accordingly.
Karsten W.
+4  A: 

Late to this game, but I created a stacked "blot" chart using ggplot2 and another set of data. This uses geom_polygon after the data has been smoothed out.

# data: Masaaki Ishida ([email protected])
# http://luna.pos.to/whale/sta.html

head(blue, 2)
##      Season Norway U.K. Japan Panama Denmark Germany U.S.A. Netherlands
## ## [1,]   1931      0 6050     0      0       0       0      0           0
## ## [2,]   1932  10128 8496     0      0       0       0      0           0
## ##      U.S.S.R. South.Africa TOTAL
## ## [1,]        0            0  6050
## ## [2,]        0            0 18624

hourglass.plot <- function(df) {
  stack.df <- df[,-1]
  stack.df <- stack.df[,sort(colnames(stack.df))]
  stack.df <- apply(stack.df, 1, cumsum)
  stack.df <- apply(stack.df, 1, function(x) sapply(x, cumsum))
  stack.df <- t(apply(stack.df, 1, function(x) x - mean(x)))
  # use this for actual data
  ##  coords.df <- data.frame(x = rep(c(df[,1], rev(df[,1])), times = dim(stack.df)[2]), y = c(apply(stack.df, 1, min), as.numeric(apply(stack.df, 2, function(x) c(rev(x),x)))[1:(length(df[,1])*length(colnames(stack.df))*2-length(df[,1]))]), id = rep(colnames(stack.df), each = 2*length(df[,1])))

  ##  qplot(x = x, y = y, data = coords.df, geom = "polygon", color = I("white"), fill = id)

  # use this for smoothed data
  density.df <- apply(stack.df, 2, function(x) spline(x = df[,1], y = x))
  id.df <- sort(rep(colnames(stack.df), each = as.numeric(lapply(density.df, function(x) length(x$x)))))
  density.df <- do.call("rbind", lapply(density.df, as.data.frame))
  density.df <- data.frame(density.df, id = id.df)
  smooth.df <- data.frame(x = unlist(tapply(density.df$x, density.df$id, function(x) c(x, rev(x)))), y = c(apply(unstack(density.df[,2:3]), 1, min), unlist(tapply(density.df$y, density.df$id, function(x) c(rev(x), x)))[1:(table(density.df$id)[1]+2*max(cumsum(table(density.df$id))[-dim(stack.df)[2]]))]), id = rep(names(table(density.df$id)), each = 2*table(density.df$id)))

  qplot(x = x, y = y, data = smooth.df, geom = "polygon", color = I("white"), fill = id)
}

hourglass.plot(blue[,-12]) + opts(title = c("Blue Whale Catch"))

alt text

apeescape