hero <- read.csv("mha_v2.csv", header = TRUE)
head(hero)
## ï..Name Occupation Quirk Power Speed Technique Intelligence
## 1 All For One Villain All for One 6.5 6.5 6.5 6.5
## 2 All Might Hero One for All 6.5 6.5 6.0 6.0
## 3 Best Jeanist Hero Fiber Master 2.0 3.0 5.0 2.0
## 4 Bubble Girl Hero Bubble 1.0 2.0 4.5 3.0
## 5 Camie Utsushimi Student Glamour 1.5 2.5 5.0 2.5
## 6 Cementoss Hero Cement 4.5 5.0 5.0 5.0
## Score Rank
## 1 26.0 1
## 2 25.0 2
## 3 12.0 94
## 4 10.5 111
## 5 11.5 105
## 6 19.5 10
names(hero)[1] <- "Name"
head(hero, 10)
## Name Occupation Quirk Power Speed Technique Intelligence
## 1 All For One Villain All for One 6.5 6.5 6.5 6.5
## 2 All Might Hero One for All 6.5 6.5 6.0 6.0
## 3 Best Jeanist Hero Fiber Master 2.0 3.0 5.0 2.0
## 4 Bubble Girl Hero Bubble 1.0 2.0 4.5 3.0
## 5 Camie Utsushimi Student Glamour 1.5 2.5 5.0 2.5
## 6 Cementoss Hero Cement 4.5 5.0 5.0 5.0
## 7 Centipeder Hero Centipede 2.0 4.0 4.0 4.0
## 8 Chronostasis Villain Chronostasis 2.0 2.0 5.0 5.0
## 9 Curious Villain Landmine 2.0 3.0 3.0 4.0
## 10 Dabi Villain Cremation 4.0 3.0 1.5 3.0
## Score Rank
## 1 26.0 1
## 2 25.0 2
## 3 12.0 94
## 4 10.5 111
## 5 11.5 105
## 6 19.5 10
## 7 14.0 63
## 8 14.0 63
## 9 12.0 94
## 10 11.5 105
nrow(hero)
## [1] 120
A nicer table to look at.
library(DT)
datatable(hero,
extensions = "Scroller",
filter = "top",
options = list(scrollX = TRUE,
scrollY = 200))
## This version of bslib is designed to work with rmarkdown version 2.7 or higher.
How many students, heros, and villians are there?
table(hero$Occupation)
##
## Hero Jedi Student Villain
## 31 1 53 35
The top 5.
hero_top10 <- subset(hero, Rank <= 5)
hero_top10
## Name Occupation Quirk Power Speed Technique Intelligence
## 1 All For One Villain All for One 6.5 6.5 6.5 6.5
## 2 All Might Hero One for All 6.5 6.5 6.0 6.0
## 17 Endeavor Hero Hellflame 6.5 5.5 5.0 4.0
## 28 Hawks Hero Fierce Wings 3.5 6.0 6.5 5.0
## 84 Re-Destro Villain Stress 5.5 5.0 5.5 5.0
## 120 Yeoda Jedi Force 10.0 10.0 10.0 10.0
## Score Rank
## 1 26 1
## 2 25 2
## 17 21 3
## 28 21 3
## 84 21 3
## 120 40 1
mean(hero$Power, na.rm = TRUE)
## [1] 3.425
median(hero$Power, na.rm = TRUE)
## [1] 3
library(DescTools)
Mode(hero$Intelligence)
## [1] 3
## attr(,"freq")
## [1] 34
quantile(hero$Technique)
## 0% 25% 50% 75% 100%
## 1.000 3.875 4.250 5.000 10.000
sd(hero$Intelligence)
## [1] 1.371351
var(hero$Intelligence)
## [1] 1.880602
cor(hero[,c(4:7)], method = "pearson")
## Power Speed Technique Intelligence
## Power 1.00000000 0.6052469 -0.03934614 0.0368783
## Speed 0.60524692 1.0000000 0.32649036 0.1344191
## Technique -0.03934614 0.3264904 1.00000000 0.4307295
## Intelligence 0.03687830 0.1344191 0.43072945 1.0000000
max(hero$Score)
## [1] 40
min(hero$Score)
## [1] 8
max(hero$Score) - min(hero$Score)
## [1] 32
Manual calculation.
hero$Speed_z <- (hero$Speed - mean(hero$Speed))/sd(hero$Speed)
head(hero$Speed_z)
## [1] 2.0439343 2.0439343 -0.3506331 -1.0347952 -0.6927141 1.0176911
A simpler way.
hero$Speed_z2 <- scale(hero$Speed)
head(hero$Speed_z2)
## [,1]
## [1,] 2.0439343
## [2,] 2.0439343
## [3,] -0.3506331
## [4,] -1.0347952
## [5,] -0.6927141
## [6,] 1.0176911
library(moments)
moments::skewness(hero$Intelligence)
## [1] 0.5438367
moments::kurtosis(hero$Intelligence)
## [1] 5.414441
Statistical test for normality using skewness and kurtosis.
H0: The data have a skewness and kurtosis that matches a normal distribution.
H1: The data have a skewness and kurtosis that does not match a normal distribution.
jarque.test(hero$Intelligence)
##
## Jarque-Bera Normality Test
##
## data: hero$Intelligence
## JB = 35.063, p-value = 2.433e-08
## alternative hypothesis: greater
Count of number by occupation.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
occupation_df <- hero %>%
group_by(Occupation) %>%
summarize(Number = n())
## `summarise()` ungrouping output (override with `.groups` argument)
occupation_df
## # A tibble: 4 x 2
## Occupation Number
## <chr> <int>
## 1 Hero 31
## 2 Jedi 1
## 3 Student 53
## 4 Villain 35
Mean Power by occupation
power_df <- hero %>%
group_by(Occupation) %>%
summarize(Average_Power = mean(Power))
## `summarise()` ungrouping output (override with `.groups` argument)
power_df
## # A tibble: 4 x 2
## Occupation Average_Power
## <chr> <dbl>
## 1 Hero 3.60
## 2 Jedi 10
## 3 Student 3.40
## 4 Villain 3.13
Median Intelligence by occupation
intelligence_df <- hero %>%
group_by(Occupation) %>%
summarize(Average_Intelligence = mean(Intelligence))
## `summarise()` ungrouping output (override with `.groups` argument)
intelligence_df
## # A tibble: 4 x 2
## Occupation Average_Intelligence
## <chr> <dbl>
## 1 Hero 3.90
## 2 Jedi 10
## 3 Student 3.75
## 4 Villain 3.57
Maximum Speed by occupation
speed_df <- hero %>%
group_by(Occupation) %>%
summarize(Max_Speed = max(Speed))
## `summarise()` ungrouping output (override with `.groups` argument)
speed_df
## # A tibble: 4 x 2
## Occupation Max_Speed
## <chr> <dbl>
## 1 Hero 6.5
## 2 Jedi 10
## 3 Student 6
## 4 Villain 6.5
Minimum technique by occupation
technique_df <- hero %>%
group_by(Occupation) %>%
summarize(Min_Speed = min(Technique))
## `summarise()` ungrouping output (override with `.groups` argument)
technique_df
## # A tibble: 4 x 2
## Occupation Min_Speed
## <chr> <dbl>
## 1 Hero 1
## 2 Jedi 10
## 3 Student 2
## 4 Villain 1
Create a frequency table of ranks.
Descriptive statistics of ranks may not make much sense; so it’s just for illustration.
library(dplyr)
rank_df <- hero %>%
group_by(Rank) %>%
summarize(frequency = n())
## `summarise()` ungrouping output (override with `.groups` argument)
rank_df
## # A tibble: 28 x 2
## Rank frequency
## <int> <int>
## 1 1 2
## 2 2 1
## 3 3 3
## 4 6 2
## 5 8 2
## 6 10 2
## 7 12 1
## 8 13 3
## 9 16 4
## 10 20 3
## # ... with 18 more rows
Convert data.
rank_df_data <- rep(rank_df$Rank, rank_df$frequency)
rank_df_data
## [1] 1 1 2 3 3 3 6 6 8 8 10 10 12 13 13 13 16 16
## [19] 16 16 20 20 20 23 23 23 23 23 23 29 29 31 31 31 31 31
## [37] 31 31 31 39 39 41 41 41 41 41 41 41 41 41 41 41 41 41
## [55] 41 41 41 41 41 59 59 59 59 63 63 63 63 63 63 63 63 63
## [73] 63 63 63 63 63 63 78 78 80 80 80 80 80 80 80 80 80 80
## [91] 80 91 91 91 94 94 94 94 94 94 94 94 94 94 94 105 105 105
## [109] 105 105 110 111 112 112 112 112 112 117 118 119
Mean.
mean(rank_df_data)
## [1] 55.66667
Check.
sum(rank_df$Rank * rank_df$frequency) / sum(rank_df$frequency)
## [1] 55.66667
mean(hero$Rank, na.rm = TRUE)
## [1] 55.66667
Median.
median(rank_df_data)
## [1] 59
Mode.
DescTools::Mode(rank_df_data)
## [1] 41
## attr(,"freq")
## [1] 18
Standard Deviation.
sd(rank_df_data)
## [1] 34.22671
Variance.
var(rank_df_data)
## [1] 1171.468
Plus Ultra!!