7 Generating visual outputs

7.1 Graphics with base R

hist(gapminder$lifeExp,xlab="Life expectancy",main = 'Histogram of Life Expectancy')

Arrange figures into multiple panels with par

df<-gapminder[gapminder$country=="Switzerland",]
par(mfrow=c(1,3))
plot(y = df$lifeExp,x=df$year,xlab="Years",ylab="Life expectancy")
plot(y = df$pop,x=df$year,xlab="Years",ylab="Population size")
plot(y = df$gdpPercap,x=df$year,xlab="Years",ylab="GDP per capita")

df<-gapminder[gapminder$country=="Zimbabwe",]
par(mfrow=c(1,3))
plot(y = df$lifeExp,x=df$year,xlab="Years",ylab="Life expectancy")
plot(y = df$pop,x=df$year,xlab="Years",ylab="Population size")
plot(y = df$gdpPercap,x=df$year,xlab="Years",ylab="GDP per capita")

7.2 Graphics with ggplot2

library(ggplot2)

We can look at multiple countries at the same time in a prettier way

df<-gapminder %>%
      dplyr::mutate(country = as.character(country)) %>%
      dplyr::filter(country %in% c("Switzerland","Australia","Zimbabwe","India"))
      
ggplot(df,aes(x=year,y=lifeExp,color=country)) +
  geom_point()+
  geom_line()

ggplot(df,aes(x=year,y=gdpPercap,color=country))+
  geom_point()+
  geom_line()

Now, let’s plot the mean GDP per-capita over time for each continent

gdp_c <- gapminder %>%
          dplyr::group_by(continent,year) %>%
          dplyr::summarize(mean_gdpPercap = mean(gdpPercap),
                           mean_le = mean(lifeExp),
                           min_le = min(lifeExp),
                           max_le = max(lifeExp),
                           se_le = sd(lifeExp)/sqrt(dplyr::n()))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
head(gdp_c)
## # A tibble: 6 × 7
## # Groups:   continent [1]
##   continent  year mean_gdpPercap mean_le min_le max_le se_le
##   <fct>     <int>          <dbl>   <dbl>  <dbl>  <dbl> <dbl>
## 1 Africa     1952          1253.    39.1   30     52.7 0.714
## 2 Africa     1957          1385.    41.3   31.6   58.1 0.779
## 3 Africa     1962          1598.    43.3   32.8   60.2 0.815
## 4 Africa     1967          2050.    45.3   34.1   61.6 0.844
## 5 Africa     1972          2340.    47.5   35.4   64.3 0.890
## 6 Africa     1977          2586.    49.6   36.8   67.1 0.944
ggplot(gdp_c,aes(x=year,y=mean_gdpPercap,color=continent))+
  geom_point()+
  geom_line()

We can pipe objects directly into the ggplot() function:

gdp_c %>% 
  ggplot(aes(x=year,y=mean_gdpPercap,color=continent))+
    geom_point()+
    geom_line()

And even do this:

gapminder %>%
  dplyr::group_by(continent,year) %>%
  dplyr::summarize(mean_gdpPercap = mean(gdpPercap)) %>%
  ggplot(aes(x=year,y=mean_gdpPercap,color=continent))+
    geom_point()+
    geom_line()

7.2.0.1 Exercise

Plot the life expectancy over time of all countries for the years with population size larger than 2+06

gapminder %>%
  dplyr::filter(pop>=2e+06) %>%
  ggplot(aes(x=year,y=gdpPercap,color=country))+
    geom_point()+
    geom_line()+
    facet_wrap(~continent)+
    theme(legend.position = "none")

7.2.1 Some ggplot tricks

Make sure your data is in the right format (wide vs long). Usually, ggplot requires the data in long format. The functions tidyr::pivot_wider() and tidyr::pivot_longer() are very useful to transform one into the other.

?tidyr::pivot_wider()
?tidyr::pivot_longer()

To change the order of colors, modify the factor levels

gapminder %>%
  dplyr::group_by(continent,year) %>%
  dplyr::mutate(continent = factor(as.character(continent),
                                   levels = c("Oceania","Europe","Africa","Americas","Asia"))) %>%
  dplyr::summarize(mean_gdpPercap = mean(gdpPercap)) %>%
  ggplot(aes(x=year,y=mean_gdpPercap,color=continent))+
    geom_point()+
    geom_line()

You can store the plots in an object and keep adding layers to it

p<-gapminder %>%
    dplyr::group_by(continent,year) %>%
    dplyr::mutate(continent = factor(as.character(continent),
                                     levels = c("Oceania","Europe","Africa","Americas","Asia"))) %>%
    dplyr::summarize(mean_gdpPercap = mean(gdpPercap)) %>%
    ggplot(aes(x=year,y=mean_gdpPercap,color=continent))+
      geom_point()+
      geom_line()

# Change the color palette
p + scale_color_viridis_d(begin = 0.1,end=0.8)