Directions

The world of quirks and the stats…

Data for demo

Back to the spellbook

1. Load data

hero <- read.csv("mha_v2.csv", header = TRUE)
head(hero)
##           ï..Name Occupation        Quirk Power Speed Technique Intelligence
## 1     All For One    Villain  All for One   6.5   6.5       6.5          6.5
## 2       All Might       Hero  One for All   6.5   6.5       6.0          6.0
## 3    Best Jeanist       Hero Fiber Master   2.0   3.0       5.0          2.0
## 4     Bubble Girl       Hero       Bubble   1.0   2.0       4.5          3.0
## 5 Camie Utsushimi    Student      Glamour   1.5   2.5       5.0          2.5
## 6       Cementoss       Hero       Cement   4.5   5.0       5.0          5.0
##   Score Rank
## 1  26.0    1
## 2  25.0    2
## 3  12.0   94
## 4  10.5  111
## 5  11.5  105
## 6  19.5   10

1.1 Rename

names(hero)[1] <- "Name"

1.2 Preview

head(hero, 10)
##               Name Occupation        Quirk Power Speed Technique Intelligence
## 1      All For One    Villain  All for One   6.5   6.5       6.5          6.5
## 2        All Might       Hero  One for All   6.5   6.5       6.0          6.0
## 3     Best Jeanist       Hero Fiber Master   2.0   3.0       5.0          2.0
## 4      Bubble Girl       Hero       Bubble   1.0   2.0       4.5          3.0
## 5  Camie Utsushimi    Student      Glamour   1.5   2.5       5.0          2.5
## 6        Cementoss       Hero       Cement   4.5   5.0       5.0          5.0
## 7       Centipeder       Hero    Centipede   2.0   4.0       4.0          4.0
## 8     Chronostasis    Villain Chronostasis   2.0   2.0       5.0          5.0
## 9          Curious    Villain     Landmine   2.0   3.0       3.0          4.0
## 10            Dabi    Villain    Cremation   4.0   3.0       1.5          3.0
##    Score Rank
## 1   26.0    1
## 2   25.0    2
## 3   12.0   94
## 4   10.5  111
## 5   11.5  105
## 6   19.5   10
## 7   14.0   63
## 8   14.0   63
## 9   12.0   94
## 10  11.5  105

1.3 Data table

library(DT)
## Warning: package 'DT' was built under R version 4.0.5
datatable(hero,
          extensions = "Scroller",
          filter = "top",
          options = list(scrollX = TRUE,
                         scrollY = 200))
## This version of bslib is designed to work with rmarkdown version 2.7 or higher.

2. Aggregate

How many students, heros, and villians are there?

table(hero$Occupation)
## 
##    Hero    Jedi Student Villain 
##      31       1      53      35

3. Top 10

Who are the top 10?

hero_top10 <- subset(hero, Rank <= 10)
hero_top10
##                 Name Occupation        Quirk Power Speed Technique Intelligence
## 1        All For One    Villain  All for One   6.5   6.5       6.5          6.5
## 2          All Might       Hero  One for All   6.5   6.5       6.0          6.0
## 6          Cementoss       Hero       Cement   4.5   5.0       5.0          5.0
## 17          Endeavor       Hero    Hellflame   6.5   5.5       5.0          4.0
## 18       Eraser Head       Hero      Erasure   3.0   4.5       6.5          5.5
## 28             Hawks       Hero Fierce Wings   3.5   6.0       6.5          5.0
## 39    Izuku Midoriya    Student  One for All   5.5   5.0       5.0          5.0
## 43    Katsuki Bakugo    Student    Explosion   5.0   5.0       5.5          5.0
## 84         Re-Destro    Villain       Stress   5.5   5.0       5.5          5.0
## 101   Tamaki Amajiki    Student     Manifest   5.0   4.0       6.0          5.0
## 108 Tomura Shigaraki    Villain        Decay   6.5   3.5       5.0          5.0
## 120            Yeoda       Jedi        Force  10.0  10.0      10.0         10.0
##     Score Rank
## 1    26.0    1
## 2    25.0    2
## 6    19.5   10
## 17   21.0    3
## 18   19.5   10
## 28   21.0    3
## 39   20.5    6
## 43   20.5    6
## 84   21.0    3
## 101  20.0    8
## 108  20.0    8
## 120  40.0    1

4. Sort

Sort the results by rank.

hero_top10_sort <- hero_top10[order(hero_top10$Rank),]
hero_top10_sort
##                 Name Occupation        Quirk Power Speed Technique Intelligence
## 1        All For One    Villain  All for One   6.5   6.5       6.5          6.5
## 120            Yeoda       Jedi        Force  10.0  10.0      10.0         10.0
## 2          All Might       Hero  One for All   6.5   6.5       6.0          6.0
## 17          Endeavor       Hero    Hellflame   6.5   5.5       5.0          4.0
## 28             Hawks       Hero Fierce Wings   3.5   6.0       6.5          5.0
## 84         Re-Destro    Villain       Stress   5.5   5.0       5.5          5.0
## 39    Izuku Midoriya    Student  One for All   5.5   5.0       5.0          5.0
## 43    Katsuki Bakugo    Student    Explosion   5.0   5.0       5.5          5.0
## 101   Tamaki Amajiki    Student     Manifest   5.0   4.0       6.0          5.0
## 108 Tomura Shigaraki    Villain        Decay   6.5   3.5       5.0          5.0
## 6          Cementoss       Hero       Cement   4.5   5.0       5.0          5.0
## 18       Eraser Head       Hero      Erasure   3.0   4.5       6.5          5.5
##     Score Rank
## 1    26.0    1
## 120  40.0    1
## 2    25.0    2
## 17   21.0    3
## 28   21.0    3
## 84   21.0    3
## 39   20.5    6
## 43   20.5    6
## 101  20.0    8
## 108  20.0    8
## 6    19.5   10
## 18   19.5   10

5. Scatter plot

Looks like someone is off the charts :-)

hero_top20 <- subset(hero, Rank <= 20)
head(hero_top20)
##           Name Occupation       Quirk Power Speed Technique Intelligence Score
## 1  All For One    Villain All for One   6.5   6.5       6.5          6.5  26.0
## 2    All Might       Hero One for All   6.5   6.5       6.0          6.0  25.0
## 6    Cementoss       Hero      Cement   4.5   5.0       5.0          5.0  19.5
## 15    Edgeshot       Hero   Foldabody   3.5   6.0       5.0          4.0  18.5
## 17    Endeavor       Hero   Hellflame   6.5   5.5       5.0          4.0  21.0
## 18 Eraser Head       Hero     Erasure   3.0   4.5       6.5          5.5  19.5
##    Rank
## 1     1
## 2     2
## 6    10
## 15   13
## 17    3
## 18   10
library(ggplot2)
library(ggrepel)
library(ggpubr)


ggplot(hero_top20) + aes(x = Power, y = Speed) +
  geom_point() +
  geom_point(data = subset(hero_top20, Name == "Yeoda"), 
             aes(x = Power ,y = Speed), 
             color = "red",
             size = 5) +
  geom_smooth(method = loess) +
  geom_label_repel(aes(label = Name),
                   box.padding   = 0.35, 
                   point.padding = 0.8,
                   segment.color = "blue") +
  xlab("Power") + ylab("Speed") +
  coord_cartesian(xlim = c(1, 12), ylim = c(1, 12)) +
  ggtitle("No tradeoff between power and speed?") +
  theme(axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

6. Waffle

A breakdown of occupations.

library(dplyr)

hero_occupation <- hero %>%
  group_by(Occupation) %>%
  summarize(count = n())

hero_occupation
## # A tibble: 4 x 2
##   Occupation count
##   <chr>      <int>
## 1 Hero          31
## 2 Jedi           1
## 3 Student       53
## 4 Villain       35
hero_occupation <- hero_occupation[c(3, 4, 1, 2),]

hero_occupation
## # A tibble: 4 x 2
##   Occupation count
##   <chr>      <int>
## 1 Student       53
## 2 Villain       35
## 3 Hero          31
## 4 Jedi           1
hero_occupation_vec <- hero_occupation %>%
  pull(count, Occupation)

hero_occupation_vec
## Student Villain    Hero    Jedi 
##      53      35      31       1
library(waffle)
waffle(hero_occupation_vec, 
       rows = 5, legend_pos = "bottom")

I am a hero too!