smash <- read.csv("smash_3.csv", header = TRUE)
head(smash)
## character game regular_fall fast_fall weight walk_speed run_speed
## 1 mario Super Mario 1.50 2.400 98 1.155 1.760
## 2 donkey kong Donkey Kong 1.63 2.608 127 1.365 1.873
## 3 link Zelda 1.60 3.040 104 1.247 1.534
## 4 samus Metroid 1.33 2.168 108 1.115 1.654
## 5 dark samus Metroid 1.33 2.168 108 1.115 1.654
## 6 yoshi Super Mario 1.29 2.064 104 1.208 2.046
## full_hop_height short_hop_height double_hop_height before after expertise
## 1 36.33 17.54 36.33 3 8 Amateur
## 2 34.00 17.30 35.50 8 10 Expert
## 3 27.80 13.38 29.00 10 7 Expert
## 4 37.00 18.00 37.00 6 9 Expert
## 5 37.00 18.00 37.00 2 5 Amateur
## 6 36.09 14.43 51.56 5 9 Amateur
table(smash$game)
##
## Animal Crossing Banjo Kazooie Bayonetta Castlevania Donkey Kong
## 2 1 1 2 3
## Dragon Quest Duck Hunt Earthbound F-Zero Final Fantasy
## 1 1 1 2 1
## Fire Emblem Icarus Kirby Mega Man Metal Gear
## 6 2 4 1 1
## Metroid Mother Nintendo Pikmin Pokemon
## 4 1 1 1 7
## Punch Out Sonic Splatoon Stack Up Star Fox
## 1 1 1 1 2
## Street Fighter Super Mario Wii Xenoblade Zelda
## 2 10 4 1 6
H0: Metroid = Zelda
H1: Metroid <> Zelda
Subset required data. Compare Metroid vs. Zelda.
subset_1 <- subset(smash, game == "Metroid" | game == "Zelda")
head(subset_1)
## character game regular_fall fast_fall weight walk_speed run_speed
## 3 link Zelda 1.60 3.040 104 1.247 1.534
## 4 samus Metroid 1.33 2.168 108 1.115 1.654
## 5 dark samus Metroid 1.33 2.168 108 1.115 1.654
## 17 sheik Zelda 1.75 2.800 78 1.470 2.420
## 18 zelda Zelda 1.35 2.160 85 0.914 1.430
## 23 young link Zelda 1.80 2.880 88 1.260 1.749
## full_hop_height short_hop_height double_hop_height before after expertise
## 3 27.80 13.38 29.00 10 7 Expert
## 4 37.00 18.00 37.00 6 9 Expert
## 5 37.00 18.00 37.00 2 5 Amateur
## 17 39.00 18.75 40.00 6 5 Expert
## 18 31.55 15.24 31.55 4 7 Amateur
## 23 33.66 16.26 33.66 7 10 Expert
Assuming unequal variance by default.
t.test(subset_1$run_speed ~ subset_1$game)
##
## Welch Two Sample t-test
##
## data: subset_1$run_speed by subset_1$game
## t = 0.94211, df = 7.1841, p-value = 0.3767
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3363160 0.7856494
## sample estimates:
## mean in group Metroid mean in group Zelda
## 1.954500 1.729833
Assuming equal variance.
t.test(subset_1$run_speed ~ subset_1$game, var.equal = TRUE)
##
## Two Sample t-test
##
## data: subset_1$run_speed by subset_1$game
## t = 0.91593, df = 8, p-value = 0.3865
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3409716 0.7903049
## sample estimates:
## mean in group Metroid mean in group Zelda
## 1.954500 1.729833
Test for normality.
shapiro.test(subset_1$run_speed)
##
## Shapiro-Wilk normality test
##
## data: subset_1$run_speed
## W = 0.92509, p-value = 0.4013
Compare Kirby vs. Super Mario.
subset_2 <- subset(smash, game == "Kirby" | game == "Super Mario")
head(subset_2)
## character game regular_fall fast_fall weight walk_speed run_speed
## 1 mario Super Mario 1.50 2.400 98 1.155 1.760
## 6 yoshi Super Mario 1.29 2.064 104 1.208 2.046
## 7 kirby Kirby 1.23 1.968 79 0.977 1.727
## 10 luigi Super Mario 1.32 2.112 97 1.134 1.650
## 14 peach Super Mario 1.19 1.904 89 0.924 1.595
## 15 daisy Super Mario 1.19 1.904 89 0.924 1.595
## full_hop_height short_hop_height double_hop_height before after expertise
## 1 36.33 17.54 36.33 3 8 Amateur
## 6 36.09 14.43 51.56 5 9 Amateur
## 7 25.37 12.24 22.00 5 9 Amateur
## 10 44.00 19.98 41.31 7 10 Expert
## 14 30.03 14.50 30.03 2 5 Amateur
## 15 30.03 14.50 30.03 10 8 Expert
H0: Kirby >= Super Mario
H1: Kirby < Super Mario
Assuming unequal variance by default.
t.test(subset_2$regular_fall ~ subset_2$game, alternative = "less")
##
## Welch Two Sample t-test
##
## data: subset_2$regular_fall by subset_2$game
## t = 0.48396, df = 4.7882, p-value = 0.6751
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.4319869
## sample estimates:
## mean in group Kirby mean in group Super Mario
## 1.580 1.497
Assuming equal variance.
t.test(subset_2$regular_fall ~ subset_2$game, alternative = "less", var.equal = TRUE)
##
## Two Sample t-test
##
## data: subset_2$regular_fall by subset_2$game
## t = 0.52578, df = 12, p-value = 0.6957
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.3643548
## sample estimates:
## mean in group Kirby mean in group Super Mario
## 1.580 1.497
H0: Kirby <= Super Mario
H1: Kirby > Super Mario
Assuming unequal variance by default.
t.test(subset_2$regular_fall ~ subset_2$game, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: subset_2$regular_fall by subset_2$game
## t = 0.48396, df = 4.7882, p-value = 0.3249
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.2659869 Inf
## sample estimates:
## mean in group Kirby mean in group Super Mario
## 1.580 1.497
Assuming equal variance.
t.test(subset_2$regular_fall ~ subset_2$game, alternative = "greater", var.equal = TRUE)
##
## Two Sample t-test
##
## data: subset_2$regular_fall by subset_2$game
## t = 0.52578, df = 12, p-value = 0.3043
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.1983548 Inf
## sample estimates:
## mean in group Kirby mean in group Super Mario
## 1.580 1.497
Test for normality.
shapiro.test(subset_2$regular_fall)
##
## Shapiro-Wilk normality test
##
## data: subset_2$regular_fall
## W = 0.92898, p-value = 0.2953
Player skill level by character before and after training.
H0: before = after
H1: before <> after
t.test(smash$before, smash$after, paired = TRUE)
##
## Paired t-test
##
## data: smash$before and smash$after
## t = -4.4097, df = 71, p-value = 3.606e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.0370808 -0.7684747
## sample estimates:
## mean of the differences
## -1.402778
t.test(smash$before, smash$after, paired = TRUE, alternative = "less")
##
## Paired t-test
##
## data: smash$before and smash$after
## t = -4.4097, df = 71, p-value = 1.803e-05
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.8726076
## sample estimates:
## mean of the differences
## -1.402778
t.test(smash$before, smash$after, paired = TRUE, alternative = "greater")
##
## Paired t-test
##
## data: smash$before and smash$after
## t = -4.4097, df = 71, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1.932948 Inf
## sample estimates:
## mean of the differences
## -1.402778
Test for normality.
shapiro.test(smash$before)
##
## Shapiro-Wilk normality test
##
## data: smash$before
## W = 0.92748, p-value = 0.0004692
shapiro.test(smash$after)
##
## Shapiro-Wilk normality test
##
## data: smash$after
## W = 0.90091, p-value = 3.329e-05
2-sided test.
H0: Variance of walk_speed = Variance of run_speed
H1: Variance of walk_speed <> Variance of run_speed
var.test(smash$walk_speed, smash$run_speed,
alternative = "two.sided")
##
## F test to compare two variances
##
## data: smash$walk_speed and smash$run_speed
## F = 0.2979, num df = 71, denom df = 71, p-value = 7.777e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1863816 0.4761475
## sample estimates:
## ratio of variances
## 0.2979012
1-sided upper
H0: Variance of walk_speed <= Variance of run_speed
H1: Variance of walk_speed > Variance of run_speed
var.test(smash$walk_speed, smash$run_speed,
alternative = "greater")
##
## F test to compare two variances
##
## data: smash$walk_speed and smash$run_speed
## F = 0.2979, num df = 71, denom df = 71, p-value = 1
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.2010832 Inf
## sample estimates:
## ratio of variances
## 0.2979012
1-sided lower
H0: Variance of walk_speed >= Variance of run_speed
H1: Variance of walk_speed < Variance of run_speed
var.test(smash$walk_speed, smash$run_speed,
alternative = "less")
##
## F test to compare two variances
##
## data: smash$walk_speed and smash$run_speed
## F = 0.2979, num df = 71, denom df = 71, p-value = 3.888e-07
## alternative hypothesis: true ratio of variances is less than 1
## 95 percent confidence interval:
## 0.0000000 0.4413354
## sample estimates:
## ratio of variances
## 0.2979012
Subset data.
subset_3 <- smash[, c(1, 6, 7)]
head(subset_3)
## character walk_speed run_speed
## 1 mario 1.155 1.760
## 2 donkey kong 1.365 1.873
## 3 link 1.247 1.534
## 4 samus 1.115 1.654
## 5 dark samus 1.115 1.654
## 6 yoshi 1.208 2.046
Convert to long.
library(reshape2)
subset_3_long <- melt(subset_3,
id.vars = c("character"),
variable.name = "movement",
value.name = "speed")
head(subset_3_long)
## character movement speed
## 1 mario walk_speed 1.155
## 2 donkey kong walk_speed 1.365
## 3 link walk_speed 1.247
## 4 samus walk_speed 1.115
## 5 dark samus walk_speed 1.115
## 6 yoshi walk_speed 1.208
tail(subset_3_long)
## character movement speed
## 139 king k rool run_speed 1.485
## 140 isabelle run_speed 1.480
## 141 incineroar run_speed 1.180
## 142 piranha plant run_speed 1.720
## 143 hero run_speed 1.840
## 144 banjo and kazooie run_speed 2.180
2-sided test.
H0: Variance of walk_speed = Variance of run_speed
H1: Variance of walk_speed <> Variance of run_speed
var.test(subset_3_long$speed ~ subset_3_long$movement,
alternative = "two.sided")
##
## F test to compare two variances
##
## data: subset_3_long$speed by subset_3_long$movement
## F = 0.2979, num df = 71, denom df = 71, p-value = 7.777e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1863816 0.4761475
## sample estimates:
## ratio of variances
## 0.2979012
1-sided upper
H0: Variance of walk_speed <= Variance of run_speed
H1: Variance of walk_speed > Variance of run_speed
var.test(subset_3_long$speed ~ subset_3_long$movement,
alternative = "greater")
##
## F test to compare two variances
##
## data: subset_3_long$speed by subset_3_long$movement
## F = 0.2979, num df = 71, denom df = 71, p-value = 1
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 0.2010832 Inf
## sample estimates:
## ratio of variances
## 0.2979012
1-sided lower
H0: Variance of walk_speed >= Variance of run_speed
H1: Variance of walk_speed < Variance of run_speed
var.test(subset_3_long$speed ~ subset_3_long$movement,
alternative = "less")
##
## F test to compare two variances
##
## data: subset_3_long$speed by subset_3_long$movement
## F = 0.2979, num df = 71, denom df = 71, p-value = 3.888e-07
## alternative hypothesis: true ratio of variances is less than 1
## 95 percent confidence interval:
## 0.0000000 0.4413354
## sample estimates:
## ratio of variances
## 0.2979012
Compare 3 groups for simplicity.
subset_4 <- subset(smash, game == "Pokemon" | game == "Super Mario" |
game == "Fire Emblem")
head(subset_4)
## character game regular_fall fast_fall weight walk_speed run_speed
## 1 mario Super Mario 1.50 2.400 98 1.155 1.760
## 6 yoshi Super Mario 1.29 2.064 104 1.208 2.046
## 9 pikachu Pokemon 1.55 2.480 79 1.302 2.039
## 10 luigi Super Mario 1.32 2.112 97 1.134 1.650
## 13 jigglypuff Pokemon 0.98 1.568 68 0.735 1.271
## 14 peach Super Mario 1.19 1.904 89 0.924 1.595
## full_hop_height short_hop_height double_hop_height before after expertise
## 1 36.33 17.54 36.33 3 8 Amateur
## 6 36.09 14.43 51.56 5 9 Amateur
## 9 35.50 17.12 35.50 8 9 Expert
## 10 44.00 19.98 41.31 7 10 Expert
## 13 19.79 11.26 19.79 3 7 Amateur
## 14 30.03 14.50 30.03 2 5 Amateur
H0: The means of fast_fall by game are equal
H1: The means of fast_fall by game are not equal
anova <- aov(fast_fall ~ game, data = subset_4)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## game 2 0.353 0.1765 1.44 0.26
## Residuals 20 2.452 0.1226
Use post hoc test to determine the mean differences.
TukeyHSD((anova))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = fast_fall ~ game, data = subset_4)
##
## $game
## diff lwr upr p adj
## Pokemon-Fire Emblem -0.1624762 -0.6552818 0.3303294 0.6867043
## Super Mario-Fire Emblem -0.3051333 -0.7625510 0.1522844 0.2343060
## Super Mario-Pokemon -0.1426571 -0.5791770 0.2938628 0.6911360
Test for normality.
shapiro.test(subset_4$fast_fall)
##
## Shapiro-Wilk normality test
##
## data: subset_4$fast_fall
## W = 0.91682, p-value = 0.05698
When there is more than 1 variable to categorise the data, we can use a two-way ANOVA.
Use game and expertise.
anova_2 <- aov(regular_fall ~ game + expertise,
data = subset_4)
summary(anova_2)
## Df Sum Sq Mean Sq F value Pr(>F)
## game 2 0.1162 0.05812 1.005 0.385
## expertise 1 0.1242 0.12418 2.147 0.159
## Residuals 19 1.0990 0.05784
In this case, the addition of expertise did not improve the model because of the lower residual sum of squares.
Use a post hoc test to determine the mean differences.
TukeyHSD(anova_2)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = regular_fall ~ game + expertise, data = subset_4)
##
## $game
## diff lwr upr p adj
## Pokemon-Fire Emblem -0.05333333 -0.3932506 0.2865840 0.9165281
## Super Mario-Fire Emblem -0.16633333 -0.4818415 0.1491748 0.3916202
## Super Mario-Pokemon -0.11300000 -0.4140937 0.1880937 0.6141903
##
## $expertise
## diff lwr upr p adj
## Expert-Amateur 0.140846 -0.06927372 0.3509656 0.176753
anova_3 <- aov(regular_fall ~ game * expertise,
data = subset_4)
summary(anova_3)
## Df Sum Sq Mean Sq F value Pr(>F)
## game 2 0.1162 0.05812 1.002 0.388
## expertise 1 0.1242 0.12418 2.141 0.162
## game:expertise 2 0.1130 0.05650 0.974 0.398
## Residuals 17 0.9860 0.05800
TukeyHSD(anova_3)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = regular_fall ~ game * expertise, data = subset_4)
##
## $game
## diff lwr upr p adj
## Pokemon-Fire Emblem -0.05333333 -0.3970504 0.2903838 0.9167871
## Super Mario-Fire Emblem -0.16633333 -0.4853684 0.1527018 0.3946154
## Super Mario-Pokemon -0.11300000 -0.4174595 0.1914595 0.6158672
##
## $expertise
## diff lwr upr p adj
## Expert-Amateur 0.140846 -0.07124772 0.3529396 0.1791826
##
## $`game:expertise`
## diff lwr upr p adj
## Pokemon:Amateur-Fire Emblem:Amateur -0.3025 -0.9696311 0.3646311 0.6978071
## Super Mario:Amateur-Fire Emblem:Amateur -0.1605 -0.6772575 0.3562575 0.9137858
## Fire Emblem:Expert-Fire Emblem:Amateur 0.0925 -0.5746311 0.7596311 0.9974338
## Pokemon:Expert-Fire Emblem:Amateur 0.0895 -0.4272575 0.6062575 0.9927677
## Super Mario:Expert-Fire Emblem:Amateur -0.1105 -0.6272575 0.4062575 0.9813904
## Super Mario:Amateur-Pokemon:Amateur 0.1420 -0.5025098 0.7865098 0.9788009
## Fire Emblem:Expert-Pokemon:Amateur 0.3950 -0.3753366 1.1653366 0.5853137
## Pokemon:Expert-Pokemon:Amateur 0.3920 -0.2525098 1.0365098 0.4102376
## Super Mario:Expert-Pokemon:Amateur 0.1920 -0.4525098 0.8365098 0.9264976
## Fire Emblem:Expert-Super Mario:Amateur 0.2530 -0.3915098 0.8975098 0.8038794
## Pokemon:Expert-Super Mario:Amateur 0.2500 -0.2372036 0.7372036 0.5846068
## Super Mario:Expert-Super Mario:Amateur 0.0500 -0.4372036 0.5372036 0.9993932
## Pokemon:Expert-Fire Emblem:Expert -0.0030 -0.6475098 0.6415098 1.0000000
## Super Mario:Expert-Fire Emblem:Expert -0.2030 -0.8475098 0.4415098 0.9091189
## Super Mario:Expert-Pokemon:Expert -0.2000 -0.6872036 0.2872036 0.7742353
For small samples, a Kruskal-Wallis can be used to determine mean differences.
kw <- kruskal.test(fast_fall ~ game, data = subset_4)
kw
##
## Kruskal-Wallis rank sum test
##
## data: fast_fall by game
## Kruskal-Wallis chi-squared = 3.0472, df = 2, p-value = 0.2179
Use a post hoc test to determine the mean differences. Or just do this to get the Kruskal-Wallis statistic in 1 step.
library(dunn.test)
dunn.test(subset_4$fast_fall, subset_4$game, kw = TRUE,
table = TRUE, alpha = 0.05)
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 3.0472, df = 2, p-value = 0.22
##
##
## Comparison of x by group
## (No adjustment)
## Col Mean-|
## Row Mean | Fire Emb Pokemon
## ---------+----------------------
## Pokemon | 0.658023
## | 0.2553
## |
## Super Ma | 1.698708 1.037161
## | 0.0447 0.1498
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
Next game!!