Directions

Let’s go!!!

Data for demo

Back to the spellbook

1. Independent samples t-test

1.1 Load data

smash <- read.csv("smash_3.csv", header = TRUE)
head(smash)
##     character        game regular_fall fast_fall weight walk_speed run_speed
## 1       mario Super Mario         1.50     2.400     98      1.155     1.760
## 2 donkey kong Donkey Kong         1.63     2.608    127      1.365     1.873
## 3        link       Zelda         1.60     3.040    104      1.247     1.534
## 4       samus     Metroid         1.33     2.168    108      1.115     1.654
## 5  dark samus     Metroid         1.33     2.168    108      1.115     1.654
## 6       yoshi Super Mario         1.29     2.064    104      1.208     2.046
##   full_hop_height short_hop_height double_hop_height before after expertise
## 1           36.33            17.54             36.33      3     8   Amateur
## 2           34.00            17.30             35.50      8    10    Expert
## 3           27.80            13.38             29.00     10     7    Expert
## 4           37.00            18.00             37.00      6     9    Expert
## 5           37.00            18.00             37.00      2     5   Amateur
## 6           36.09            14.43             51.56      5     9   Amateur
table(smash$game)
## 
## Animal Crossing   Banjo Kazooie       Bayonetta     Castlevania     Donkey Kong 
##               2               1               1               2               3 
##    Dragon Quest       Duck Hunt      Earthbound          F-Zero   Final Fantasy 
##               1               1               1               2               1 
##     Fire Emblem          Icarus           Kirby        Mega Man      Metal Gear 
##               6               2               4               1               1 
##         Metroid          Mother        Nintendo          Pikmin         Pokemon 
##               4               1               1               1               7 
##       Punch Out           Sonic        Splatoon        Stack Up        Star Fox 
##               1               1               1               1               2 
##  Street Fighter     Super Mario             Wii       Xenoblade           Zelda 
##               2              10               4               1               6

1.2 Independent samples t-test

H0: Metroid = Zelda

H1: Metroid <> Zelda

Subset required data.

Compare Metroid vs. Zelda.

subset_1 <- subset(smash, game == "Metroid" | game == "Zelda")
head(subset_1)
##     character    game regular_fall fast_fall weight walk_speed run_speed
## 3        link   Zelda         1.60     3.040    104      1.247     1.534
## 4       samus Metroid         1.33     2.168    108      1.115     1.654
## 5  dark samus Metroid         1.33     2.168    108      1.115     1.654
## 17      sheik   Zelda         1.75     2.800     78      1.470     2.420
## 18      zelda   Zelda         1.35     2.160     85      0.914     1.430
## 23 young link   Zelda         1.80     2.880     88      1.260     1.749
##    full_hop_height short_hop_height double_hop_height before after expertise
## 3            27.80            13.38             29.00     10     7    Expert
## 4            37.00            18.00             37.00      6     9    Expert
## 5            37.00            18.00             37.00      2     5   Amateur
## 17           39.00            18.75             40.00      6     5    Expert
## 18           31.55            15.24             31.55      4     7   Amateur
## 23           33.66            16.26             33.66      7    10    Expert

Assuming unequal variance by default.

t.test(subset_1$run_speed ~ subset_1$game)
## 
##  Welch Two Sample t-test
## 
## data:  subset_1$run_speed by subset_1$game
## t = 0.94211, df = 7.1841, p-value = 0.3767
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3363160  0.7856494
## sample estimates:
## mean in group Metroid   mean in group Zelda 
##              1.954500              1.729833

Assuming equal variance.

t.test(subset_1$run_speed ~ subset_1$game, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  subset_1$run_speed by subset_1$game
## t = 0.91593, df = 8, p-value = 0.3865
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3409716  0.7903049
## sample estimates:
## mean in group Metroid   mean in group Zelda 
##              1.954500              1.729833

2. One direction t-test

2.1 Subset data

Compare Kirby vs. Super Mario.

subset_2 <- subset(smash, game == "Kirby" | game == "Super Mario")
head(subset_2)
##    character        game regular_fall fast_fall weight walk_speed run_speed
## 1      mario Super Mario         1.50     2.400     98      1.155     1.760
## 6      yoshi Super Mario         1.29     2.064    104      1.208     2.046
## 7      kirby       Kirby         1.23     1.968     79      0.977     1.727
## 10     luigi Super Mario         1.32     2.112     97      1.134     1.650
## 14     peach Super Mario         1.19     1.904     89      0.924     1.595
## 15     daisy Super Mario         1.19     1.904     89      0.924     1.595
##    full_hop_height short_hop_height double_hop_height before after expertise
## 1            36.33            17.54             36.33      3     8   Amateur
## 6            36.09            14.43             51.56      5     9   Amateur
## 7            25.37            12.24             22.00      5     9   Amateur
## 10           44.00            19.98             41.31      7    10    Expert
## 14           30.03            14.50             30.03      2     5   Amateur
## 15           30.03            14.50             30.03     10     8    Expert

2.2 Execute t-test

2.2.1 Less than alternate hypothesis

H0: Kirby >= Super Mario

H1: Kirby < Super Mario

Assuming unequal variance by default.

t.test(subset_2$regular_fall ~ subset_2$game, alternative = "less")
## 
##  Welch Two Sample t-test
## 
## data:  subset_2$regular_fall by subset_2$game
## t = 0.48396, df = 4.7882, p-value = 0.6751
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.4319869
## sample estimates:
##       mean in group Kirby mean in group Super Mario 
##                     1.580                     1.497

Assuming equal variance.

t.test(subset_2$regular_fall ~ subset_2$game, alternative = "less", var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  subset_2$regular_fall by subset_2$game
## t = 0.52578, df = 12, p-value = 0.6957
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.3643548
## sample estimates:
##       mean in group Kirby mean in group Super Mario 
##                     1.580                     1.497

2.2.2 Greater than alternate hypothesis

H0: Kirby <= Super Mario

H1: Kirby > Super Mario

Assuming unequal variance by default.

t.test(subset_2$regular_fall ~ subset_2$game, alternative = "greater")
## 
##  Welch Two Sample t-test
## 
## data:  subset_2$regular_fall by subset_2$game
## t = 0.48396, df = 4.7882, p-value = 0.3249
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.2659869        Inf
## sample estimates:
##       mean in group Kirby mean in group Super Mario 
##                     1.580                     1.497

Assuming equal variance.

t.test(subset_2$regular_fall ~ subset_2$game, alternative = "greater", var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  subset_2$regular_fall by subset_2$game
## t = 0.52578, df = 12, p-value = 0.3043
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.1983548        Inf
## sample estimates:
##       mean in group Kirby mean in group Super Mario 
##                     1.580                     1.497

3. Paired-samples t-test

Player skill level by character before and after training.

3.1 Paired samples t-test

H0: before = after

H1: before <> after

t.test(smash$before, smash$after, paired = TRUE)
## 
##  Paired t-test
## 
## data:  smash$before and smash$after
## t = -4.4097, df = 71, p-value = 3.606e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.0370808 -0.7684747
## sample estimates:
## mean of the differences 
##               -1.402778

3.1 Less than alternate hypothesis

t.test(smash$before, smash$after, paired = TRUE, alternative = "less")
## 
##  Paired t-test
## 
## data:  smash$before and smash$after
## t = -4.4097, df = 71, p-value = 1.803e-05
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf -0.8726076
## sample estimates:
## mean of the differences 
##               -1.402778

3.2 Greater than alternate hypothesis

t.test(smash$before, smash$after, paired = TRUE, alternative = "greater")
## 
##  Paired t-test
## 
## data:  smash$before and smash$after
## t = -4.4097, df = 71, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -1.932948       Inf
## sample estimates:
## mean of the differences 
##               -1.402778

4. F-Test

2-sided test.

H0: Variance of walk_speed = Variance of run_speed

H1: Variance of walk_speed <> Variance of run_speed

var.test(smash$walk_speed, smash$run_speed, 
         alternative = "two.sided")
## 
##  F test to compare two variances
## 
## data:  smash$walk_speed and smash$run_speed
## F = 0.2979, num df = 71, denom df = 71, p-value = 7.777e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1863816 0.4761475
## sample estimates:
## ratio of variances 
##          0.2979012

1-sided upper

H0: Variance of walk_speed <= Variance of run_speed

H1: Variance of walk_speed > Variance of run_speed

var.test(smash$walk_speed, smash$run_speed, 
         alternative = "greater")
## 
##  F test to compare two variances
## 
## data:  smash$walk_speed and smash$run_speed
## F = 0.2979, num df = 71, denom df = 71, p-value = 1
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.2010832       Inf
## sample estimates:
## ratio of variances 
##          0.2979012

1-sided lower

H0: Variance of walk_speed >= Variance of run_speed

H1: Variance of walk_speed < Variance of run_speed

var.test(smash$walk_speed, smash$run_speed, 
         alternative = "less")
## 
##  F test to compare two variances
## 
## data:  smash$walk_speed and smash$run_speed
## F = 0.2979, num df = 71, denom df = 71, p-value = 3.888e-07
## alternative hypothesis: true ratio of variances is less than 1
## 95 percent confidence interval:
##  0.0000000 0.4413354
## sample estimates:
## ratio of variances 
##          0.2979012

5. ANOVA

Compare 3 groups for simplicity.

subset_4 <- subset(smash, game == "Pokemon" | game == "Super Mario" | 
                     game == "Fire Emblem")
head(subset_4)
##     character        game regular_fall fast_fall weight walk_speed run_speed
## 1       mario Super Mario         1.50     2.400     98      1.155     1.760
## 6       yoshi Super Mario         1.29     2.064    104      1.208     2.046
## 9     pikachu     Pokemon         1.55     2.480     79      1.302     2.039
## 10      luigi Super Mario         1.32     2.112     97      1.134     1.650
## 13 jigglypuff     Pokemon         0.98     1.568     68      0.735     1.271
## 14      peach Super Mario         1.19     1.904     89      0.924     1.595
##    full_hop_height short_hop_height double_hop_height before after expertise
## 1            36.33            17.54             36.33      3     8   Amateur
## 6            36.09            14.43             51.56      5     9   Amateur
## 9            35.50            17.12             35.50      8     9    Expert
## 10           44.00            19.98             41.31      7    10    Expert
## 13           19.79            11.26             19.79      3     7   Amateur
## 14           30.03            14.50             30.03      2     5   Amateur

H0: The means of fast_fall by game are equal

H1: The means of fast_fall by game are not equal

anova <- aov(fast_fall ~ game, data = subset_4)

summary(anova)
##             Df Sum Sq Mean Sq F value Pr(>F)
## game         2  0.353  0.1765    1.44   0.26
## Residuals   20  2.452  0.1226

Use post hoc test to determine the mean differences.

TukeyHSD((anova))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = fast_fall ~ game, data = subset_4)
## 
## $game
##                               diff        lwr       upr     p adj
## Pokemon-Fire Emblem     -0.1624762 -0.6552818 0.3303294 0.6867043
## Super Mario-Fire Emblem -0.3051333 -0.7625510 0.1522844 0.2343060
## Super Mario-Pokemon     -0.1426571 -0.5791770 0.2938628 0.6911360

Test for normality.

shapiro.test(subset_4$fast_fall)
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_4$fast_fall
## W = 0.91682, p-value = 0.05698

Next game!!