Use the force with Alita
# 1. Load data
## X country continent year lifeExp pop gdpPercap
## 1 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 7 7 Afghanistan Asia 1982 39.854 12881816 978.0114
## 8 8 Afghanistan Asia 1987 40.822 13867957 852.3959
## 9 9 Afghanistan Asia 1992 41.674 16317921 649.3414
## 10 10 Afghanistan Asia 1997 41.763 22227415 635.3414
## [1] "X" "country" "continent" "year" "lifeExp" "pop"
## [7] "gdpPercap"
## 'data.frame': 1706 obs. of 7 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
## X country continent year
## Min. : 1.0 Length:1706 Length:1706 Min. :1952
## 1st Qu.: 427.2 Class :character Class :character 1st Qu.:1967
## Median : 853.5 Mode :character Mode :character Median :1982
## Mean : 853.5 Mean :1980
## 3rd Qu.:1279.8 3rd Qu.:1997
## Max. :1706.0 Max. :2007
## lifeExp pop gdpPercap
## Min. :23.60 Min. :6.660e+02 Min. : 241.2
## 1st Qu.:48.22 1st Qu.:2.792e+06 1st Qu.: 1203.2
## Median :60.71 Median :7.021e+06 Median : 3534.8
## Mean :59.48 Mean :2.957e+07 Mean : 7234.0
## 3rd Qu.:70.85 3rd Qu.:1.957e+07 3rd Qu.: 9349.0
## Max. :82.60 Max. :1.319e+09 Max. :113523.1
## [1] "ID" "country" "continent" "year" "lifeExp" "pop"
## [7] "gdpPercap"
Filter for year = 2007 only.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Using pipe to filter for year = 1977 only.
## ID country continent year lifeExp pop gdpPercap
## 1 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 2 18 Albania Europe 1977 68.930 2509048 3533.0039
## 3 30 Algeria Africa 1977 58.014 17152804 4910.4168
## 4 42 Angola Africa 1977 39.483 6162675 3008.6474
## 5 54 Argentina Americas 1977 68.481 26983828 10079.0267
## 6 66 Australia Oceania 1977 73.490 14074100 18334.1975
## 7 78 Austria Europe 1977 72.170 7568430 19749.4223
## 8 90 Bahrain Asia 1977 65.593 297410 19340.1020
## 9 102 Bangladesh Asia 1977 46.923 80428306 659.8772
## 10 114 Belgium Europe 1977 72.800 9821800 19117.9745
Scatter plot for year = 2007 only.
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp) +
geom_point() + ggtitle("Life expectancy and GDP per capita")
ggplot(country_demo_2007) + aes(x = lifeExp, y = gdpPercap) +
geom_point() + ggtitle("Life expectancy and GDP per capita, 2007")
ggplot(country_demo_2007) + aes(x = pop, y = gdpPercap) +
geom_point() + ggtitle("GDP per capita and population")
Label scatter plot Note: Think carefully (and use the force) before labelling
ggplot(country_demo_2007) + aes(x = pop, y = gdpPercap) +
geom_point() + ggtitle("GDP per capita and population") +
geom_text(aes(label = country),hjust = 0, vjust = 0)
library(ggrepel)
ggplot(country_demo_2007) + aes(x = pop, y = gdpPercap) +
geom_point() + ggtitle("GDP per capita and population") +
geom_label_repel(aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = "blue")
ggplot(country_demo_2007) + aes(x = pop, y = gdpPercap) +
geom_point() + ggtitle("GDP per capita and population") +
geom_label_repel(data = subset(country_demo_2007, country == "Iron City" | country == "Zalem"),
aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = "blue",
force = 200)
ggplot(country_demo_2007) + aes(x = pop, y = gdpPercap) +
geom_point() + xlab("Population") + ylab("GDP per Capita") +
ggtitle("GDP per capita and population, 2007")
ggplot(country_demo_2007) + aes(x = pop, y = gdpPercap) +
geom_point() + scale_x_log10() + scale_y_log10()
ggplot(country_demo) + aes(x = pop, y = gdpPercap) +
geom_point() + xlab("Population") + ylab("GDP per Capita") +
ggtitle("GDP per capita and population")
ggplot(country_demo) + aes(x = pop, y = gdpPercap) +
geom_point() + xlab("Population (log scale)") + ylab("GDP per Capita (log scale") +
ggtitle("GDP per capita and population") +
scale_x_log10() + scale_y_log10()
library(ggpubr)
ggplot(country_demo) + aes(x = pop, y = gdpPercap) +
geom_point(shape = 1, colour = "green") + scale_x_log10() + scale_y_log10() +
xlab("Population (log scale)") + ylab("GDP per Capita (log scale)") +
ggtitle("GDP per capita and population") +
geom_smooth(method = lm) +
stat_regline_equation(label.x = 8, label.y = 4) +
stat_cor(method = "pearson", label.x = 8, label.y = 3.8) +
geom_label_repel(data = subset(country_demo_2007, country == "Iron City" | country == "Zalem"),
aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = "blue",
force = 200)
## `geom_smooth()` using formula 'y ~ x'
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
colour = continent) +
geom_point() + labs(colour = "Continent") +
scale_x_log10()
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
colour = continent) +
geom_point() + labs(colour = "Planet")
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
colour = continent) +
geom_point() + labs(colour = "Continent") +
scale_x_log10() + ggtitle("Life expectancy and GDP per Capita, 2007") +
xlab("GDP per capita (log scale)") +
ylab("Life expectancy")
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
colour = continent,
size = pop) +
geom_point() + scale_x_log10()
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
colour = continent,
size = pop) +
geom_point() + scale_x_log10() +
labs(colour = "Continent", size = "Population") +
ggtitle("Life expectancy and GDP per Capita, 2007") +
xlab("GDP per capita (log scale)") +
ylab("Life expectancy")
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp,
colour = continent,
size = pop) +
geom_point() + scale_x_log10() +
labs(colour = "Continent", size = "Population") +
ggtitle("Life expectancy and GDP per Capita, 2007") +
xlab("GDP per capita (log scale)") +
ylab("Life expectancy") +
geom_label_repel(data = subset(country_demo_2007, country == "Iron City" | country == "Zalem"),
aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = "blue",
force = 200,
size = 5)
ggplot(country_demo_2007) + aes(x = pop, y = lifeExp,
colour = continent,
size = gdpPercap) +
geom_point() +
scale_x_log10() +
labs(colour = "Continent", size = "Population") +
ggtitle("Life expectancy and population, 2007") +
xlab("Population (log scale") +
ylab("Life expectancy") +
facet_wrap(~ continent)
ggplot(country_demo_2007) + aes(x = pop, y = lifeExp,
colour = continent,
size = gdpPercap) +
geom_point() +
scale_x_log10() +
labs(colour = "Continent", size = "Population") +
ggtitle("Life expectancy and population, 2007") +
xlab("Population (log scale") +
ylab("Life expectancy") +
facet_wrap(~ continent) +
geom_label_repel(data = subset(country_demo_2007, country == "Iron City" | country == "Zalem"),
aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = "blue",
force = 200,
size = 3)
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp) +
geom_point() + scale_x_log10() +
facet_wrap(~ continent)
ggplot(country_demo_2007) + aes(x = gdpPercap, y = lifeExp) +
geom_point() + scale_x_log10() +
facet_wrap(~ continent) + xlab("GDP per capita (log scale)") +
ylab("Life expectancy")
ggplot(country_demo) + aes(x = gdpPercap, y = lifeExp,
colour = continent,
size = pop) +
geom_point() +
scale_x_log10() +
facet_wrap(~ year) +
labs(colour = "Continent", size = "Population") +
ggtitle("Life expectancy and GDP per capita") +
xlab("GDP per capita (log scale)") +
ylab("Life expectancy")
## # A tibble: 1,706 x 7
## # Groups: year [12]
## ID country continent year lifeExp pop gdpPercap
## <int> <chr> <chr> <int> <dbl> <int> <dbl>
## 1 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,696 more rows
country_demo %>%
group_by(year) %>%
summarise(
mean_life_exp = mean(lifeExp),
tot_pop = sum(as.numeric(pop)))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 12 x 3
## year mean_life_exp tot_pop
## <int> <dbl> <dbl>
## 1 1952 49.1 2406957150
## 2 1957 51.5 2664404580
## 3 1962 53.6 2899782974
## 4 1967 55.7 3217478384
## 5 1972 57.6 3576977158
## 6 1977 59.6 3930045807
## 7 1982 61.5 4289436840
## 8 1987 63.2 4691477418
## 9 1992 64.2 5110710260
## 10 1997 65.0 5515204472
## 11 2002 65.7 5886977579
## 12 2007 67.0 6257680511
country_demo %>%
group_by(year) %>%
summarise(
median_GDP_percap = median(gdpPercap),
max_pop = max(pop)
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 12 x 3
## year median_GDP_percap max_pop
## <int> <dbl> <int>
## 1 1952 1969. 556263527
## 2 1957 2173. 637408000
## 3 1962 2335. 665770000
## 4 1967 2678. 754550000
## 5 1972 3339. 862030000
## 6 1977 3799. 943455000
## 7 1982 4216. 1000281000
## 8 1987 4280. 1084035000
## 9 1992 4386. 1164970000
## 10 1997 4782. 1230075000
## 11 2002 5320. 1280400000
## 12 2007 6548. 1318683096
country_demo %>%
group_by(year) %>%
summarise(
max_GDP_percap = max(gdpPercap),
min_pop = min(pop)
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 12 x 3
## year max_GDP_percap min_pop
## <int> <dbl> <int>
## 1 1952 108382. 60011
## 2 1957 113523. 61325
## 3 1962 95458. 65345
## 4 1967 80895. 70787
## 5 1972 109348. 76595
## 6 1977 59265. 86796
## 7 1982 33693. 98593
## 8 1987 31541. 110812
## 9 1992 34933. 125911
## 10 1997 41283. 145608
## 11 2002 44684. 170372
## 12 2007 49357. 666
country_demo %>%
group_by(year, continent) %>%
summarise(
mean_life_exp = mean(lifeExp),
tot_pop = sum(as.numeric(pop))
)
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
## # A tibble: 60 x 4
## # Groups: year [12]
## year continent mean_life_exp tot_pop
## <int> <chr> <dbl> <dbl>
## 1 1952 Africa 39.1 237640501
## 2 1952 Americas 53.3 345152446
## 3 1952 Asia 46.3 1395357351
## 4 1952 Europe 64.4 418120846
## 5 1952 Oceania 69.3 10686006
## 6 1957 Africa 41.3 264837738
## 7 1957 Americas 56.0 386953916
## 8 1957 Asia 49.3 1562780599
## 9 1957 Europe 66.7 437890351
## 10 1957 Oceania 70.3 11941976
## # ... with 50 more rows
country_demo %>%
group_by(continent, year) %>%
summarise(
max_life_exp = max(lifeExp),
min_pop = min(pop)
)
## `summarise()` regrouping output by 'continent' (override with `.groups` argument)
## # A tibble: 60 x 4
## # Groups: continent [5]
## continent year max_life_exp min_pop
## <chr> <int> <dbl> <int>
## 1 Africa 1952 52.7 60011
## 2 Africa 1957 58.1 61325
## 3 Africa 1962 60.2 65345
## 4 Africa 1967 61.6 70787
## 5 Africa 1972 64.3 76595
## 6 Africa 1977 67.1 86796
## 7 Africa 1982 69.9 98593
## 8 Africa 1987 71.9 110812
## 9 Africa 1992 73.6 125911
## 10 Africa 1997 74.8 145608
## # ... with 50 more rows
country_demo_by_year_v2 <- country_demo %>%
group_by(year) %>%
summarise(
tot_pop = sum(as.numeric(pop)),
mean_life_exp = mean(lifeExp)
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 12 x 3
## year tot_pop mean_life_exp
## <int> <dbl> <dbl>
## 1 1952 2406957150 49.1
## 2 1957 2664404580 51.5
## 3 1962 2899782974 53.6
## 4 1967 3217478384 55.7
## 5 1972 3576977158 57.6
## 6 1977 3930045807 59.6
## 7 1982 4289436840 61.5
## 8 1987 4691477418 63.2
## 9 1992 5110710260 64.2
## 10 1997 5515204472 65.0
## 11 2002 5886977579 65.7
## 12 2007 6257680511 67.0
2 steps in 1
country_demo %>%
group_by(year) %>%
summarise(
tot_pop = sum(as.numeric(pop))
) %>%
ggplot(aes(x = year, y = tot_pop)) +
geom_point()
## `summarise()` ungrouping output (override with `.groups` argument)
Change axis range
country_demo_by_year_v3 <- summarise(group_by(country_demo,
year, continent),
tot_pop = sum(as.numeric(pop)))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
## # A tibble: 60 x 3
## # Groups: year [12]
## year continent tot_pop
## <int> <chr> <dbl>
## 1 1952 Africa 237640501
## 2 1952 Americas 345152446
## 3 1952 Asia 1395357351
## 4 1952 Europe 418120846
## 5 1952 Oceania 10686006
## 6 1957 Africa 264837738
## 7 1957 Americas 386953916
## 8 1957 Asia 1562780599
## 9 1957 Europe 437890351
## 10 1957 Oceania 11941976
## # ... with 50 more rows
## [1] "year" "tot_pop" "mean_life_exp"
ggplot(country_demo_by_year_v2) + aes(x = year, y = tot_pop) +
geom_line(linetype = 2) + expand_limits(y = 0)
ggplot(country_demo_by_year_v2) + aes(x = year, y = tot_pop) +
geom_line() + expand_limits(y = 0) +
ggtitle("Total population by year") +
xlab("Year") +
ylab("Total population")
## [1] "year" "continent" "tot_pop"
country_demo_by_continent_2007 <- country_demo %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(
tot_pop = sum(as.numeric(pop)),
mean_life_exp = mean(lifeExp)
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 3
## continent tot_pop mean_life_exp
## <chr> <dbl> <dbl>
## 1 Africa 929539692 54.8
## 2 Americas 905538516 73.0
## 3 Asia 3811953827 70.7
## 4 Europe 586098529 77.6
## 5 Oceania 24549947 80.7
ggplot(country_demo_by_continent_2007) + aes(x = continent,
y = mean_life_exp) +
geom_col() + xlab("Continent") + ylab("Mean life expectancy") +
coord_cartesian(ylim = c(35,85))
ggplot(country_demo_by_continent_2007) + aes(x = continent,
y = mean_life_exp) +
geom_col(fill = "green", colour = "red") + xlab("Continent") + ylab("Mean life expectancy") +
coord_cartesian(ylim = c(35,85))
ggplot(country_demo_by_year_v3) + aes(x = continent,
y = tot_pop) +
geom_col() + xlab("Continent") + ylab("Total population") +
ggtitle("Total population by continent")
ggplot(country_demo_by_year_v3) + aes(x = continent,
y = tot_pop,
fill = continent) +
geom_col() + xlab("Continent") + ylab("Total population") +
ggtitle("Total population by continent")
ggplot(country_demo_by_year_v3) + aes(x = continent,
y = tot_pop,
fill = continent) +
geom_col() + xlab("Continent") + ylab("Total population") +
ggtitle("Total population by continent") +
scale_fill_grey()
ggplot(country_demo_by_year_v3) + aes(x = continent,
y = tot_pop,
fill = continent) +
geom_col() + xlab("Continent") + ylab("Total population") +
ggtitle("Total population by continent") +
scale_fill_brewer(palette = "Dark6") +
labs(fill = "Planet")
## Warning in pal_name(palette, type): Unknown palette Dark6
ggplot(country_demo_by_year_v3) + aes(x = continent,
y = tot_pop) +
geom_col() + xlab("Continent") + ylab("Total population") +
ggtitle("Total population by continent")+ coord_flip()
ggplot(country_demo_by_year_v3) + aes(x = continent,
y = tot_pop,
fill = continent) +
geom_col() + xlab("Continent") + ylab("Total population") +
ggtitle("Total population by continent")+ coord_flip() +
labs(fill = "Continent")
Plot for Oceania subset
country_demo_oceania_2007 <- country_demo %>%
filter(continent == "Oceania",
year == 2007)
country_demo_oceania_2007
## ID country continent year lifeExp pop gdpPercap
## 1 72 Australia Oceania 2007 81.235 20434176 34435.37
## 2 1104 New Zealand Oceania 2007 80.204 4115771 25185.01
ggplot(country_demo_oceania_2007) + aes(x = country, y = pop) +
geom_col() + ggtitle("Population, Oceania, 2007") +
xlab("Country") + ylab("Population")
ggplot(country_demo_oceania_2007) + aes(x = country, y = pop) +
geom_col(fill = "yellow", colour = "dark blue") + ggtitle("Population, Oceania, 2007") +
xlab("Country") + ylab("Population")
## ID country continent year lifeExp pop gdpPercap
## 1 12 Afghanistan Asia 2007 43.828 31889923 974.5803
## 2 24 Albania Europe 2007 76.423 3600523 5937.0295
## 3 36 Algeria Africa 2007 72.301 33333216 6223.3675
## 4 48 Angola Africa 2007 42.731 12420476 4797.2313
## 5 60 Argentina Americas 2007 75.320 40301927 12779.3796
## 6 72 Australia Oceania 2007 81.235 20434176 34435.3674
## 7 84 Austria Europe 2007 79.829 8199783 36126.4927
## 8 96 Bahrain Asia 2007 75.635 708573 29796.0483
## 9 108 Bangladesh Asia 2007 64.062 150448339 1391.2538
## 10 120 Belgium Europe 2007 79.441 10392226 33692.6051
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(country_demo_2007) + aes(lifeExp) +
geom_histogram(fill = "blue") + xlab("Life expectancy") +
ylab("Count") + ggtitle("Histogram of life expectancy, 2007")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(country_demo_2007) + aes(lifeExp) +
geom_histogram(fill = "blue", binwidth = 5) + xlab("Life expectancy") +
ylab("Count") + ggtitle("Histogram of life expectancy, 2007")
ggplot(country_demo_2007) + aes(x = gdpPercap) +
geom_histogram(fill = "Orange", colour = "blue") + xlab("GDP per capita") +
ylab("Count") + ggtitle("Histogram of GDP per capita, 2007")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(country_demo_2007) + aes(x = gdpPercap) +
geom_histogram(fill = "Orange", colour = "blue") +
scale_x_log10() +
xlab("GDP per capita (log scale)") +
ylab("Count") + ggtitle("Histogram of GDP per capita, 2007")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(country_demo_2007) + aes(x = continent, y = gdpPercap,
fill = continent) +
geom_boxplot() +
labs(fill = "Alita")
ggplot(country_demo) + aes(x = continent, y = lifeExp) +
geom_boxplot(fill = "yellow") +
facet_wrap( ~ year) + xlab("Continent") + ylab("Life expectancy") +
ggtitle("Life expectancy by continent and year") +
theme(axis.text = element_text(size = 9),
axis.title = element_text(size = 10, face = "bold"),
strip.text.x = element_text(size = 9, face = "italic"))
## [1] "ID" "country" "continent" "year" "lifeExp" "pop"
## [7] "gdpPercap"
Remove scientific notation
Basic violin plot
Scale maximum width proportional to size
Scale maximum width to 1
Disable trim to the range of the data
ggplot(country_demo_2007) + aes(factor(continent), y = gdpPercap) +
geom_violin(scale = "width", trim = FALSE)
Adjusting the fit
ggplot(country_demo_2007) + aes(factor(continent), y = gdpPercap) +
geom_violin(adjust = 0.5, trim = FALSE)
Add colour
ggplot(country_demo_2007) + aes(factor(continent), y = gdpPercap, fill = continent) +
geom_violin(scale = "width", trim = FALSE) +
ggtitle("Violin plot of GDP per capita by continent") +
xlab("Continent") + ylab("GDP per capita") +
labs(fill = "Continent")
Some modifications
ggplot(country_demo_2007) + aes(factor(continent), y = gdpPercap) +
geom_violin(scale = "width",
fill = "grey", colour = "#336699", trim = FALSE)
Draw quantiles
library(ggpubr)
ggplot(country_demo) + aes(x = pop, y = gdpPercap) +
geom_point(shape = 1, colour = "green") + scale_x_log10() + scale_y_log10() +
xlab("Population (log scale)") + ylab("GDP per Capita (log scale)") +
ggtitle("GDP per capita and population") +
geom_smooth(method = lm) +
stat_regline_equation(label.x = 8, label.y = 4) +
stat_cor(method = "pearson", label.x = 8, label.y = 3.8) +
geom_label_repel(data = subset(country_demo_2007, country == "Iron City" | country == "Zalem"),
aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = "blue",
force = 200)
## `geom_smooth()` using formula 'y ~ x'
## [1] "ID" "country" "continent" "year" "lifeExp" "pop"
## [7] "gdpPercap"
## lifeExp pop gdpPercap
## lifeExp 1.00 0.06 0.58
## pop 0.06 1.00 -0.03
## gdpPercap 0.58 -0.03 1.00
rownames(corr_matrix) <- c("Life Expentency", "Population", "GDP per Capita")
colnames(corr_matrix) <- c("Life Expentency", "Population", "GDP per Capita")
corr_matrix
## Life Expentency Population GDP per Capita
## Life Expentency 1.00 0.06 0.58
## Population 0.06 1.00 -0.03
## GDP per Capita 0.58 -0.03 1.00
## Var1 Var2 value
## 1 Life Expentency Life Expentency 1.00
## 2 Population Life Expentency 0.06
## 3 GDP per Capita Life Expentency 0.58
## 4 Life Expentency Population 0.06
## 5 Population Population 1.00
## 6 GDP per Capita Population -0.03
## 7 Life Expentency GDP per Capita 0.58
## 8 Population GDP per Capita -0.03
## 9 GDP per Capita GDP per Capita 1.00
ggplot(data = corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation")
ggplot(data = corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "white", high = "steelblue")
ggplot(data = corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "white", high = "black")
ggplot(data = corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "blue", high = "purple")
ggplot(data = corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "white", high = "darkgreen") +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 5)
get_lower <- function(corr_matrix) {
corr_matrix[upper.tri(corr_matrix)] <- NA
return(corr_matrix)
}
lower_corr_matrix <- get_lower(corr_matrix)
lower_corr_matrix
## Life Expentency Population GDP per Capita
## Life Expentency 1.00 NA NA
## Population 0.06 1.00 NA
## GDP per Capita 0.58 -0.03 1
## Var1 Var2 value
## 1 Life Expentency Life Expentency 1.00
## 2 Population Life Expentency 0.06
## 3 GDP per Capita Life Expentency 0.58
## 5 Population Population 1.00
## 6 GDP per Capita Population -0.03
## 9 GDP per Capita GDP per Capita 1.00
ggplot(data = lower_corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "white", high = "darkgreen") +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 5)
ggplot(data = lower_corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "white", high = "darkgreen") +
geom_text(aes(Var1, Var2, label = value), color = "black", size = 5) +
theme_minimal() +
theme(legend.justification = c(1, 0),
legend.position = c(0.6, 0.8),
legend.direction = "horizontal")
ggplot(data = lower_corr_matrix_long,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
ggtitle("Correlation Heat Map") +
xlab("") + ylab("") + labs(fill = "Correlation") +
scale_fill_gradient(low = "white", high = "darkgreen") +
geom_text(aes(Var1, Var2, label = value), color = "black", size = 5) +
theme_classic() +
theme(legend.justification = c(1, 0),
legend.position = c(0.6, 0.8),
legend.direction = "horizontal")
## [1] "ID" "country" "continent" "year" "lifeExp" "pop"
## [7] "gdpPercap"
# names(country_demo_2)[5] <- "Life Expectency"
# names(country_demo_2)[6] <- "Population"
# names(country_demo_2)[7] <- "GDP per Capita"
names(country_demo_2)[5:7] <- c("Life Expectency",
"Population",
"GDP per Capita")
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(dplyr)
country_continent <- country_demo %>% group_by(continent) %>% summarize(number = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
## continent number
## <chr> <int>
## 1 Africa 624
## 2 Americas 302
## 3 Asia 396
## 4 Europe 360
## 5 Oceania 24