Chapter 5 Comparison–barchart, boxplots, synaplots

Comparison as assessing overlapping distributions

5.1 Graph considerations for communication: aggregation, abstraction, complexity

5.1.1 Simple bar chart

library(tidyverse)
library(ggforce)
library(ggthemes)
library(ggstance)

mtcars.df = mtcars
mtcars.df = mtcars.df %>% mutate(cyl = as.factor(cyl))

s.mtcars.df = mtcars.df %>% group_by(cyl) %>% summarise(m.hp = mean(hp), se.hp= sd(hp)/n()^.5)


ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_bar(stat="identity")

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_col()

## Change order of bars
cyl.order <- c("8", "6", "4")
ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_col() +
  scale_x_discrete(limits = cyl.order)

5.1.2 Bar chart with error bars

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_bar(stat="identity")+
  geom_linerange(aes(ymin=m.hp-2*se.hp, ymax=m.hp+2*se.hp))

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_bar(stat="identity")+
  geom_linerange(aes(ymin=m.hp-2*se.hp, ymax=m.hp+2*se.hp))+
  geom_point(data = mtcars.df, aes(cyl, hp), position = position_jitter(width = .2, height = 0))

5.1.3 dotplot and offset range plot

## Set seed and create data
set.seed(999)
df = data_frame(A = runif(12,1,17), B = runif(12, 2, 8))


l.df = gather(df, condition, value)
l.df$condition = as.factor(l.df$condition)
m.l.df = l.df %>% group_by(condition) %>% summarise(m.value = mean(value, na.rm=TRUE), 
      n= sum(!is.na(value)), sd=sd(value, na.rm=TRUE), sde=sd(value, na.rm=TRUE)/n^.5,
       ci= 2*sde)
m.l.df$n.condition = as.numeric(m.l.df$condition)-.05 


## Plot with offset for mean and error bar
ggplot()+
  geom_dotplot(data = filter(l.df, condition=="A"|condition=="B"),
               aes(condition, value), binaxis = "y", stackdir = "up")+
  geom_linerange(data = filter(m.l.df, condition=="A"|condition=="B"), 
                 aes(n.condition, ymin=m.value-ci, ymax=m.value+ci), color="grey50") +
  geom_point(data = filter(m.l.df, condition=="A"|condition=="B"), 
             aes(n.condition, y= m.value), shape = 21, size = 4, fill="grey", alpha=.7) +
  labs(x="", y="") +
  ylim(2, 15)
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

5.1.4 Statistical significance in context

ggplot(data = mtcars.df, aes(x = as.factor(cyl), y = hp)) + 
  geom_boxplot(colour = "darkgrey") + 
  geom_point(stat="summary", fun.y = "mean", size = 6, shape = 1) +
  geom_pointrange(stat="summary", fun.data = "mean_cl_boot") +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 1, 
               dotsize = 6,alpha = .3, color = "black", fill = "red") +
  geom_hline(aes(yintercept = mean(hp)), size = 1.2) 

5.2 Comparing distributions, box, violyn, and sina plot

library(tidyr)

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_col()+
  geom_linerange(aes(ymin=m.hp-2*se.hp, ymax=m.hp+2*se.hp))

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_col()+
  geom_linerange(aes(ymin=m.hp-2*se.hp, ymax=m.hp+2*se.hp))+
  geom_point(data = mtcars.df, aes(cyl, hp), position = position_jitter(width = .2, height = 0))

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_point(stat="identity", size = 3)+
  geom_linerange(aes(ymin=m.hp-2*se.hp, ymax=m.hp+2*se.hp))+
  geom_point(data = mtcars.df, aes(cyl, hp), position = position_jitter(width = .2, height = 0))

## Sina plot
ggplot(mpg, aes(as.factor(cyl), hwy))+
 geom_sina(aes(color = as.factor(cyl)),size = 1, alpha =.5) +
 geom_tufteboxplot()+
  labs(title = "ggforce: sina plot with Tufte boxplot")

# ggplot(s.mtcars.df, aes(m.hp, m.mpg, colour =as.factor(cyl)))+
# geom_pointrangeh(aes(xmin= m.hp-se.hp, xmax = m.hp+se.hp))+
#   geom_pointrange(aes(ymin= m.mpg-se.mpg, ymax = m.mpg+se.mpg))+
#   labs(title = "ggstance: horizontal point range")

ggplot(mtcars.df, aes(as.factor(cyl), hp))+
geom_boxplot()

ggplot(mtcars.df, aes(as.factor(cyl), hp))+
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))

ggplot(data = s.mtcars.df, aes(x = cyl, y = m.hp)) +
  geom_violin(data= mtcars.df, aes(cyl, hp))+
  geom_point(stat="identity", size = 3)+
  geom_linerange(aes(ymin=m.hp-2*se.hp, ymax=m.hp+2*se.hp))+
  geom_point(data = mtcars.df, aes(cyl, hp), 
             position = position_jitter(width = .2, height = 0), alpha =.6)

5.2.1 Compare empirical and theoretical distribution

sum.mtcars.df = mtcars.df%>% group_by(cyl) %>% 
    summarise(m.hp = mean(hp), sd.hp = sd(hp))

ggplot(mtcars.df) +
  geom_boxplot(aes(as.factor(cyl), hp)) +
  geom_linerange(data = sum.mtcars.df,
        aes(x = as.factor(cyl), 
        ymin = m.hp + qnorm(.25)*sd.hp, ymax = m.hp + qnorm(.75)*sd.hp),
                 size = 5, alpha = .25) +
  geom_point(data = sum.mtcars.df,
         aes(as.factor(cyl), y= m.hp),size = 6, alpha = .33)

5.2.2 Tufte-inspired minimal bar chart

http://motioninsocial.com/tufte/

#TODO replace with better dataset with for more columns
library(ggthemes)
ggplot(mtcars.df, aes(x=as.factor(cyl))) + 
  geom_bar(width=0.25, fill="gray") +  
  scale_y_continuous(breaks=seq(2, 12, 2)) + 
  geom_hline(yintercept=seq(2, 12, 2), colour="white", lwd=.5) +
  theme_tufte(base_size=12, ticks=F, base_family = "Arial") 
## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call(C_textBounds,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead
## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

## Warning in grid.Call.graphics(C_text,
## as.graphicsAnnot(x$label), x$x, x$y, : font family
## 'Arial' not found, will use 'sans' instead

5.3 Comparing across many variables

5.3.1 Dot plots and reordering

Comparing many mean values

#TODO Change to mean sd catepilar plot
mtcars.df = mtcars
mtcars.df$name = rownames(mtcars.df)
ggplot(data = mtcars.df, aes(reorder(name, hp), y = hp, colour = as.factor(cyl))) +
  geom_point(size = 2) +
  geom_hline(aes(yintercept = mean(hp)), colour = "darkgrey") +
  geom_linerange(aes(ymin= -Inf, ymax= hp), alpha =.5) +
  coord_flip() +
  labs(x = "Cylinders", y = "Power (hp)") +
  scale_colour_brewer(name = "Number of \nCylinders", palette="Dark2") + # http://colorbrewer2.org
  theme(legend.position = c(.75, .25)) +
  theme_minimal()

5.3.2 Point range on x and y

## Point range on x and y
#library(ggstance)
s.mtcars.df = mtcars %>% group_by(cyl) %>% 
  summarise(m.hp = mean(hp), se.hp = sd(hp)/n()^.5,
            m.mpg = mean(mpg), se.mpg = sd(mpg)/n()^.5)

5.3.3 Tufte boxplot for many variables

library(ggthemes)

ggplot(mtcars, aes(factor(cyl), mpg)) +
  geom_tufteboxplot(median.type = "line", whisker.type = 'line', hoffset = 0, width = 4) +
  geom_rangeframe() 
## Warning: position_dodge requires non-overlapping x
## intervals

## Tufte boxplot
ggplot(mtcars, aes(as.factor(cyl), mpg))+
  geom_tufteboxplot()+
  labs(title = "ggthemes: Tufte boxplot")

ggplot(mtcars, aes(disp, mpg, color = as.factor(cyl)))+
geom_point()+
  geom_rangeframe(size = 2, colour = "grey35")+
  labs(title = "ggthemes: Tufte range frame")

5.3.4 Tufte-inspired slope graphs

library(tidyverse)
library(ggrepel)

# https://github.com/leeper/slopegraph
cancer.df = read_csv("data/tufte-cancer-survival-data.csv")
## Parsed with column specification:
## cols(
##   Type = col_character(),
##   `Year 5` = col_double(),
##   `Year 10` = col_double(),
##   `Year 15` = col_double(),
##   `Year 20` = col_double()
## )
l.cancer.df = cancer.df %>% gather(key = year, value = rate, 2:5)

l.cancer.df$year = factor(l.cancer.df$year, 
                          levels = c("Year 5", "Year 10", "Year 15", "Year 20"))

ggplot(l.cancer.df, aes(year, rate, group = Type))+
  geom_line(colour = "grey70") +
  geom_text_repel(data = l.cancer.df %>% filter(year == "Year 5"),
                  aes(label = Type), nudge_x = -.35, direction = "y",
                  point.padding = .02)+
  geom_text_repel(data = l.cancer.df %>% filter(year=="Year 20"),
                  aes(label = Type), nudge_x = .35,  direction = "y",
                  point.padding = .02)+
  geom_label(aes(label = rate), colour = "grey55", label.size = .02)+
  theme_void()+
  theme(axis.text = element_text(size = rel(.85)),
        axis.text.y=element_blank())

5.3.5 Parallel coordinate plot with similar items highlighted

Scatterplots can show how items relate when there are only two dimensions, but many situations involve comparison between items based on 4-10 dimensions. Parallel coordinate plots can show how items relate on many dimenions by arraying the dimesions on the horizontal axis and the value for that dimension on the vertical axis. Each line represents an item and sets of lines that rise and fall in parallel indicate similar items. For comparisons that involve more dimensions it dimensionality reduction techniques, such as PCA or t-SNE can provide a meaningful two-dimensional representation that can be easily visualized with a scatter plot.

Creating a parallel coordinate plot involves five steps: 1. Transform the variables to make uniform comparisons, such as greater values have similar meaning 2. Select variables or dimensions of interest and convert to long format–one column for the variable names and one for the values 3. Scale the items for each variable–subtract the mean value and divide by the standard deviation 4. Highlight one or more items 6. Order the variables in a meaningful fashion, such as by the standard deviation

## Highlight closest pair of items multidimensional space ##
library(tidyverse)
mtcars.df = mtcars
mtcars.df$name = row.names(mtcars.df)

## Transform variables 
mtcars.df = mtcars.df %>% mutate(gpm = 1/mpg) %>% mutate(speed = 1/qsec) 

## Select variables 
mtcars.df = mtcars.df %>% dplyr::select(cyl:carb, gpm, speed, name)

## Convert to long format
l.mtcars.df = mtcars.df %>% gather(key = var, value = value, -name)

##  Scale values
l.scaled.mtcars.df = l.mtcars.df %>% group_by(var, s.value = scale(value)) %>% 
  ungroup()

## Identify similar items
target = "Datsun 710" # specifies the row number of interest
l.scaled.mtcars.df$target_s.value = l.scaled.mtcars.df$s.value[l.scaled.mtcars.df$name==target]

l.scaled.mtcars.df = l.scaled.mtcars.df %>% group_by(name) %>% 
  mutate(distance = (sum((s.value-target_s.value)^2))^0.5)


##  Scale items
scaled_mtcars.df = mtcars.df %>% mutate_at(vars(cyl:speed), scale)

## Highlight similar items
dist.df = as.matrix(dist(scaled_mtcars.df, upper = TRUE, diag = FALSE))  %>% as.data.frame()

## Find the closest vehicle to target vehicle
target = 18 # specifies the row number of interest
closest = which(min(dist.df[dist.df[, target]>0, target])==dist.df[, target])
mtcars.df$name[target]
## [1] "Fiat 128"
mtcars.df$name[closest]
## [1] "Toyota Corolla"
## Convert to long format
l.scaled_mtcars.df = scaled_mtcars.df %>% gather(key = var, value = value, -name)


## Select pair to highlight
pair.df = l.scaled_mtcars.df %>% filter(name==mtcars.df$name[target]|name==mtcars.df$name[closest])

## Order variables by the standard deviation

library(ggrepel)
ggplot(l.scaled_mtcars.df, aes(reorder(var, value, sd), value, group = name)) +
  geom_line(alpha = .3, size = .2) +
  geom_line(data = pair.df, colour = "red", size = .6, alpha = .6) +
  geom_label_repel(data = pair.df %>% filter(var=="speed"), 
                   aes("speed", value, label = name), nudge_y = -.75)+
  theme_minimal()

5.4 Gliphs: Chernof face and radar plots

Show patterns and outliers not precise comparisons