# 1. Load data ------------------------------------------------------------

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.4
## -- Attaching packages ------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1       v purrr   0.3.2  
## v tibble  2.1.1       v dplyr   0.8.0.1
## v tidyr   0.8.3       v stringr 1.4.0  
## v readr   1.1.1       v forcats 0.2.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## -- Conflicts ---------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
country_demo <- read.csv("country_demo.csv", header = TRUE)
head(country_demo, 10)
##     X     country continent year lifeExp      pop gdpPercap
## 1   1 Afghanistan      Asia 1952  28.801  8425333  779.4453
## 2   2 Afghanistan      Asia 1957  30.332  9240934  820.8530
## 3   3 Afghanistan      Asia 1962  31.997 10267083  853.1007
## 4   4 Afghanistan      Asia 1967  34.020 11537966  836.1971
## 5   5 Afghanistan      Asia 1972  36.088 13079460  739.9811
## 6   6 Afghanistan      Asia 1977  38.438 14880372  786.1134
## 7   7 Afghanistan      Asia 1982  39.854 12881816  978.0114
## 8   8 Afghanistan      Asia 1987  40.822 13867957  852.3959
## 9   9 Afghanistan      Asia 1992  41.674 16317921  649.3414
## 10 10 Afghanistan      Asia 1997  41.763 22227415  635.3414
names(country_demo)
## [1] "X"         "country"   "continent" "year"      "lifeExp"   "pop"      
## [7] "gdpPercap"
str(country_demo)
## 'data.frame':    1704 obs. of  7 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
summary(country_demo)
##        X                 country        continent        year     
##  Min.   :   1.0   Afghanistan:  12   Africa  :624   Min.   :1952  
##  1st Qu.: 426.8   Albania    :  12   Americas:300   1st Qu.:1966  
##  Median : 852.5   Algeria    :  12   Asia    :396   Median :1980  
##  Mean   : 852.5   Angola     :  12   Europe  :360   Mean   :1980  
##  3rd Qu.:1278.2   Argentina  :  12   Oceania : 24   3rd Qu.:1993  
##  Max.   :1704.0   Australia  :  12                  Max.   :2007  
##                   (Other)    :1632                                
##     lifeExp           pop              gdpPercap       
##  Min.   :23.60   Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:48.20   1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :60.71   Median :7.024e+06   Median :  3531.8  
##  Mean   :59.47   Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:70.85   3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :82.60   Max.   :1.319e+09   Max.   :113523.1  
## 
names(country_demo)[1] <- "ID"
names(country_demo)
## [1] "ID"        "country"   "continent" "year"      "lifeExp"   "pop"      
## [7] "gdpPercap"
# 2. Interactive scatter plot --------------------------------------------------------


# Filter for year = 2007 only.


country_demo_2007 <- filter(country_demo,
                            year == 2007)


# Interactive plots

library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
scatter_v1 <- ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
                                colour = continent) +
  geom_point() + labs(colour = "Continent") +
  scale_x_log10() + ggtitle("Life expectancy and GDP per Capita, 2007") +
  xlab("GDP per capita (log scale)") +
  ylab("Life expectancy")

ggplotly(scatter_v1)
# 3. Interactive line chart ----------------------------------------------------------

country_demo_by_year_v3 <- summarise(group_by(country_demo,
                                              year, continent),
                                     tot_pop = sum(as.numeric(pop)))
country_demo_by_year_v3
## # A tibble: 60 x 3
## # Groups:   year [12]
##     year continent    tot_pop
##    <int> <fct>          <dbl>
##  1  1952 Africa     237640501
##  2  1952 Americas   345152446
##  3  1952 Asia      1395357351
##  4  1952 Europe     418120846
##  5  1952 Oceania     10686006
##  6  1957 Africa     264837738
##  7  1957 Americas   386953916
##  8  1957 Asia      1562780599
##  9  1957 Europe     437890351
## 10  1957 Oceania     11941976
## # ... with 50 more rows
names(country_demo_by_year_v3)
## [1] "year"      "continent" "tot_pop"
line_v1 <- ggplot(country_demo_by_year_v3) + aes(x = year,
                                                 y = tot_pop,
                                                 colour = continent) + 
  geom_line(aes(linetype = continent), size = 1.5) + ggtitle("Total population by year") +
  xlab("Year") +
  ylab("Total population") +
  labs(colour = "Continent", linetype = "Continent")




ggplotly(line_v1) %>%
  layout(legend = "none")
# 4. Interactive histogram -----------------------------------------------------------


histogram_v1 <- ggplot(country_demo_2007) + aes(x = gdpPercap) +
  geom_histogram(fill = "Orange", colour = "blue") + 
  scale_x_log10() +
  xlab("GDP per capita (log scale)") +
  ylab("Count") + ggtitle("Histogram of GDP per capita, 2007")

ggplotly(histogram_v1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# 5. Interactive boxplot -------------------------------------------------------------



box_v1 <- ggplot(country_demo_2007) + aes(x = continent, y = gdpPercap) +
  geom_boxplot(colour = "blue", fill = "yellow") +
  xlab("Continent") + ylab("GDP per capita")

ggplotly(box_v1)