# 1. Load data ------------------------------------------------------------
hogwarts <- read.csv("super_heroes_hogwarts_v3a.csv", header = TRUE)
head(hogwarts)
## ID Name Gender Race Height Publisher
## 1 A001 A-Bomb Male Human 203 Marvel Comics
## 2 A002 Abe Sapien Male Icthyo Sapien 191 Dark Horse Comics
## 3 A004 Abomination Male Human / Radiation 203 Marvel Comics
## 4 A009 Agent 13 Female <NA> 173 Marvel Comics
## 5 A015 Alex Mercer Male Human NA Wildstorm
## 6 A016 Alex Woolsly Male <NA> NA NBC - Heroes
## Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 1 good 441 10 10 7 6
## 2 good 65 7 7 6 8
## 3 bad 441 6 8 1 6
## 4 good 61 7 7 1 9
## 5 bad NA 10 6 8 3
## 6 good NA 8 10 5 5
## Trusting Loyal Stubborn Brave HouseID House STR DEX CON INT WIS CHA
## 1 7 7 7 9 1 Slytherin 18 11 17 12 13 11
## 2 6 7 6 9 1 Slytherin 16 17 10 13 15 11
## 3 3 3 5 2 1 Slytherin 13 14 13 10 18 15
## 4 7 4 6 6 1 Slytherin 15 18 16 16 17 10
## 5 4 4 1 8 1 Slytherin 14 17 13 12 10 11
## 6 6 7 7 6 1 Slytherin 14 14 11 13 12 12
## Level HP
## 1 1 7
## 2 8 72
## 3 15 135
## 4 14 140
## 5 9 72
## 6 1 8
str(hogwarts)
## 'data.frame': 734 obs. of 26 variables:
## $ ID : Factor w/ 734 levels "A001","A002",..: 1 2 4 9 15 16 24 25 28 32 ...
## $ Name : Factor w/ 715 levels "A-Bomb","Abe Sapien",..: 1 2 4 9 15 16 23 24 27 31 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 2 2 1 2 2 2 1 2 2 ...
## $ Race : Factor w/ 61 levels "Alien","Alpha",..: 24 33 28 NA 24 NA 57 43 24 21 ...
## $ Height : num 203 191 203 173 NA NA NA 165 183 61 ...
## $ Publisher : Factor w/ 25 levels "","ABC Studios",..: 13 3 13 13 25 15 3 13 4 4 ...
## $ Alignment : Factor w/ 3 levels "bad","good","neutral": 2 2 1 2 1 2 2 2 2 1 ...
## $ Weight : int 441 65 441 61 NA NA NA 57 83 NA ...
## $ Manipulative: int 10 7 6 7 10 8 8 9 7 7 ...
## $ Resourceful : int 10 7 8 7 6 10 6 8 6 7 ...
## $ Dismissive : int 7 6 1 1 8 5 8 9 6 7 ...
## $ Intelligent : int 6 8 6 9 3 5 7 4 5 1 ...
## $ Trusting : int 7 6 3 7 4 6 4 1 8 9 ...
## $ Loyal : int 7 7 3 4 4 7 1 6 3 1 ...
## $ Stubborn : int 7 6 5 6 1 7 5 5 3 6 ...
## $ Brave : int 9 9 2 6 8 6 2 4 2 5 ...
## $ HouseID : int 1 1 1 1 1 1 1 1 1 1 ...
## $ House : Factor w/ 4 levels "Gryffindor","Hufflepuff",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ STR : int 18 16 13 15 14 14 15 8 10 8 ...
## $ DEX : int 11 17 14 18 17 14 17 17 17 10 ...
## $ CON : int 17 10 13 16 13 11 15 12 15 11 ...
## $ INT : int 12 13 10 16 12 13 18 15 18 16 ...
## $ WIS : int 13 15 18 17 10 12 13 17 13 12 ...
## $ CHA : int 11 11 15 10 11 12 18 18 14 11 ...
## $ Level : int 1 8 15 14 9 1 11 1 8 7 ...
## $ HP : int 7 72 135 140 72 8 88 8 56 63 ...
summary(hogwarts)
## ID Name Gender Race
## A001 : 1 Goliath : 3 Female:200 Human :208
## A002 : 1 Spider-Man: 3 Male :505 Mutant : 63
## A003 : 1 Angel : 2 NA's : 29 God / Eternal : 14
## A004 : 1 Atlas : 2 Cyborg : 11
## A005 : 1 Atom : 2 Human / Radiation: 11
## A006 : 1 Batgirl : 2 (Other) :123
## (Other):728 (Other) :720 NA's :304
## Height Publisher Alignment Weight
## Min. : 15.2 Marvel Comics :388 bad :207 Min. : 2.0
## 1st Qu.:173.0 DC Comics :215 good :496 1st Qu.: 61.0
## Median :183.0 NBC - Heroes : 19 neutral: 24 Median : 81.0
## Mean :186.7 Dark Horse Comics: 18 NA's : 7 Mean :112.3
## 3rd Qu.:191.0 : 15 3rd Qu.:108.0
## Max. :975.0 George Lucas : 14 Max. :900.0
## NA's :217 (Other) : 65 NA's :239
## Manipulative Resourceful Dismissive Intelligent
## Min. : 1.000 Min. : 1.000 Min. : 1.00 Min. : 1.000
## 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 4.00 1st Qu.: 4.000
## Median : 6.000 Median : 6.000 Median : 6.00 Median : 6.000
## Mean : 5.827 Mean : 5.993 Mean : 5.74 Mean : 5.749
## 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.00 3rd Qu.: 7.000
## Max. :10.000 Max. :10.000 Max. :10.00 Max. :10.000
##
## Trusting Loyal Stubborn Brave
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 4.000
## Median : 6.000 Median : 6.000 Median : 6.000 Median : 6.000
## Mean : 5.872 Mean : 5.778 Mean : 5.828 Mean : 5.835
## 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
##
## HouseID House STR DEX
## Min. :1.000 Gryffindor:188 Min. : 8.00 Min. :10.00
## 1st Qu.:1.000 Hufflepuff:189 1st Qu.:10.00 1st Qu.:12.00
## Median :3.000 Ravenclaw :156 Median :13.00 Median :14.00
## Mean :2.495 Slytherin :201 Mean :13.09 Mean :13.97
## 3rd Qu.:4.000 3rd Qu.:16.00 3rd Qu.:16.00
## Max. :4.000 Max. :18.00 Max. :18.00
##
## CON INT WIS CHA
## Min. :10.00 Min. :10.00 Min. :10.00 Min. :10.00
## 1st Qu.:12.00 1st Qu.:12.00 1st Qu.:12.00 1st Qu.:12.00
## Median :14.00 Median :14.00 Median :14.00 Median :14.00
## Mean :14.13 Mean :14.01 Mean :13.99 Mean :14.02
## 3rd Qu.:16.00 3rd Qu.:16.00 3rd Qu.:16.00 3rd Qu.:16.00
## Max. :18.00 Max. :18.00 Max. :18.00 Max. :18.00
##
## Level HP
## Min. : 1.000 Min. : 6.00
## 1st Qu.: 5.000 1st Qu.: 36.00
## Median : 8.000 Median : 63.00
## Mean : 8.316 Mean : 66.89
## 3rd Qu.:12.000 3rd Qu.: 91.00
## Max. :15.000 Max. :150.00
##
nrow(hogwarts)
## [1] 734
names(hogwarts)
## [1] "ID" "Name" "Gender" "Race"
## [5] "Height" "Publisher" "Alignment" "Weight"
## [9] "Manipulative" "Resourceful" "Dismissive" "Intelligent"
## [13] "Trusting" "Loyal" "Stubborn" "Brave"
## [17] "HouseID" "House" "STR" "DEX"
## [21] "CON" "INT" "WIS" "CHA"
## [25] "Level" "HP"
# 2. Training validation split ----------------------------------------------------------------------
set.seed(666)
train_index <- sample(1:nrow(hogwarts), 0.6 * nrow(hogwarts))
valid_index <- setdiff(1:nrow(hogwarts), train_index)
train_df <- hogwarts[train_index, ]
valid_df <- hogwarts[valid_index, ]
nrow(train_df)
## [1] 440
nrow(valid_df)
## [1] 294
# 2. Discriminant analysis method 1 for classification ------------------------------------------------
library(DiscriMiner)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'caret':
## method from
## print.plsda DiscriMiner
hogwarts_da1 <- linDA(train_df[, c(19:24)], train_df[, 18], prior = NULL)
hogwarts_da1$functions
## Gryffindor Hufflepuff Ravenclaw Slytherin
## constant -85.284470 -87.332216 -88.794843 -87.306166
## STR 1.346405 1.345711 1.278927 1.328601
## DEX 1.942957 1.980996 1.998845 2.036727
## CON 2.208822 2.294455 2.274260 2.220187
## INT 2.217254 2.218879 2.239196 2.262369
## WIS 2.125235 2.216598 2.252038 2.148734
## CHA 2.338362 2.266359 2.368045 2.330359
hogwarts_da1$confusion
## predicted
## original Gryffindor Hufflepuff Ravenclaw Slytherin
## Gryffindor 40 32 11 30
## Hufflepuff 30 37 14 31
## Ravenclaw 20 24 21 30
## Slytherin 30 29 19 42
head(hogwarts_da1$scores)
## Gryffindor Hufflepuff Ravenclaw Slytherin
## 574 75.90927 75.48810 75.22420 75.81402
## 638 103.74422 104.32058 104.72113 104.11872
## 608 69.20524 69.16321 68.36535 68.91883
## 123 61.33969 60.31274 60.24482 60.74449
## 540 89.28855 89.59703 90.02551 89.09504
## 654 76.18705 76.48728 76.08213 76.09919
confusionMatrix(hogwarts_da1$classification, train_df$House)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Gryffindor Hufflepuff Ravenclaw Slytherin
## Gryffindor 40 30 20 30
## Hufflepuff 32 37 24 29
## Ravenclaw 11 14 21 19
## Slytherin 30 31 30 42
##
## Overall Statistics
##
## Accuracy : 0.3182
## 95% CI : (0.2749, 0.364)
## No Information Rate : 0.2727
## P-Value [Acc > NIR] : 0.01958
##
## Kappa : 0.0849
##
## Mcnemar's Test P-Value : 0.24968
##
## Statistics by Class:
##
## Class: Gryffindor Class: Hufflepuff Class: Ravenclaw
## Sensitivity 0.35398 0.33036 0.22105
## Specificity 0.75535 0.74085 0.87246
## Pos Pred Value 0.33333 0.30328 0.32308
## Neg Pred Value 0.77187 0.76415 0.80267
## Prevalence 0.25682 0.25455 0.21591
## Detection Rate 0.09091 0.08409 0.04773
## Detection Prevalence 0.27273 0.27727 0.14773
## Balanced Accuracy 0.55467 0.53561 0.54676
## Class: Slytherin
## Sensitivity 0.35000
## Specificity 0.71562
## Pos Pred Value 0.31579
## Neg Pred Value 0.74593
## Prevalence 0.27273
## Detection Rate 0.09545
## Detection Prevalence 0.30227
## Balanced Accuracy 0.53281
hogwarts_da1_pred <- classify(hogwarts_da1, newdata = valid_df[, c(19:24)])
head(hogwarts_da1_pred$scores)
## Gryffindor Hufflepuff Ravenclaw Slytherin
## 1 77.80129 77.84498 77.32957 78.05345
## 2 83.63707 83.80682 83.82974 83.59856
## 3 68.89152 68.73506 68.09510 69.05077
## 4 74.84416 74.67949 74.24544 75.00111
## 5 89.12322 88.85752 89.07167 89.32767
## 6 72.51990 72.26318 71.53793 72.41071
head(hogwarts_da1_pred$pred_class)
## [1] Slytherin Ravenclaw Slytherin Slytherin Slytherin Gryffindor
## Levels: Gryffindor Hufflepuff Ravenclaw Slytherin
confusionMatrix(hogwarts_da1_pred$pred_class, valid_df$House)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Gryffindor Hufflepuff Ravenclaw Slytherin
## Gryffindor 25 22 14 23
## Hufflepuff 19 19 15 22
## Ravenclaw 14 10 11 7
## Slytherin 17 26 21 29
##
## Overall Statistics
##
## Accuracy : 0.2857
## 95% CI : (0.2348, 0.341)
## No Information Rate : 0.2755
## P-Value [Acc > NIR] : 0.3688
##
## Kappa : 0.0393
##
## Mcnemar's Test P-Value : 0.1497
##
## Statistics by Class:
##
## Class: Gryffindor Class: Hufflepuff Class: Ravenclaw
## Sensitivity 0.33333 0.24675 0.18033
## Specificity 0.73059 0.74194 0.86695
## Pos Pred Value 0.29762 0.25333 0.26190
## Neg Pred Value 0.76190 0.73516 0.80159
## Prevalence 0.25510 0.26190 0.20748
## Detection Rate 0.08503 0.06463 0.03741
## Detection Prevalence 0.28571 0.25510 0.14286
## Balanced Accuracy 0.53196 0.49434 0.52364
## Class: Slytherin
## Sensitivity 0.35802
## Specificity 0.69953
## Pos Pred Value 0.31183
## Neg Pred Value 0.74129
## Prevalence 0.27551
## Detection Rate 0.09864
## Detection Prevalence 0.31633
## Balanced Accuracy 0.52878
# 2.1 Merge predicted class with validation data set ----------------------
pred_class <- as.data.frame(hogwarts_da1_pred$pred_class)
head(pred_class)
## hogwarts_da1_pred$pred_class
## 1 Slytherin
## 2 Ravenclaw
## 3 Slytherin
## 4 Slytherin
## 5 Slytherin
## 6 Gryffindor
valid_df_with_pred_class <- cbind(valid_df, pred_class)
head(valid_df_with_pred_class)
## ID Name Gender Race Height Publisher
## 2 A002 Abe Sapien Male Icthyo Sapien 191 Dark Horse Comics
## 3 A004 Abomination Male Human / Radiation 203 Marvel Comics
## 5 A015 Alex Mercer Male Human NA Wildstorm
## 12 A036 Aquababy Male <NA> NA DC Comics
## 13 A039 Arachne Female Human 175 Marvel Comics
## 14 A040 Archangel Male Mutant 183 Marvel Comics
## Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2 good 65 7 7 6 8
## 3 bad 441 6 8 1 6
## 5 bad NA 10 6 8 3
## 12 good NA 6 6 6 2
## 13 good 63 9 7 9 8
## 14 good 68 7 6 3 9
## Trusting Loyal Stubborn Brave HouseID House STR DEX CON INT WIS CHA
## 2 6 7 6 9 1 Slytherin 16 17 10 13 15 11
## 3 3 3 5 2 1 Slytherin 13 14 13 10 18 15
## 5 4 4 1 8 1 Slytherin 14 17 13 12 10 11
## 12 3 6 7 5 1 Slytherin 13 14 12 17 12 11
## 13 6 3 2 5 1 Slytherin 10 14 15 17 12 16
## 14 4 7 2 8 1 Slytherin 16 14 14 13 10 12
## Level HP hogwarts_da1_pred$pred_class
## 2 8 72 Slytherin
## 3 15 135 Ravenclaw
## 5 9 72 Slytherin
## 12 3 30 Slytherin
## 13 12 84 Slytherin
## 14 4 36 Gryffindor
names(valid_df_with_pred_class)
## [1] "ID" "Name"
## [3] "Gender" "Race"
## [5] "Height" "Publisher"
## [7] "Alignment" "Weight"
## [9] "Manipulative" "Resourceful"
## [11] "Dismissive" "Intelligent"
## [13] "Trusting" "Loyal"
## [15] "Stubborn" "Brave"
## [17] "HouseID" "House"
## [19] "STR" "DEX"
## [21] "CON" "INT"
## [23] "WIS" "CHA"
## [25] "Level" "HP"
## [27] "hogwarts_da1_pred$pred_class"
colnames(valid_df_with_pred_class)[27] <- "Predicted_House"
head(valid_df_with_pred_class)
## ID Name Gender Race Height Publisher
## 2 A002 Abe Sapien Male Icthyo Sapien 191 Dark Horse Comics
## 3 A004 Abomination Male Human / Radiation 203 Marvel Comics
## 5 A015 Alex Mercer Male Human NA Wildstorm
## 12 A036 Aquababy Male <NA> NA DC Comics
## 13 A039 Arachne Female Human 175 Marvel Comics
## 14 A040 Archangel Male Mutant 183 Marvel Comics
## Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2 good 65 7 7 6 8
## 3 bad 441 6 8 1 6
## 5 bad NA 10 6 8 3
## 12 good NA 6 6 6 2
## 13 good 63 9 7 9 8
## 14 good 68 7 6 3 9
## Trusting Loyal Stubborn Brave HouseID House STR DEX CON INT WIS CHA
## 2 6 7 6 9 1 Slytherin 16 17 10 13 15 11
## 3 3 3 5 2 1 Slytherin 13 14 13 10 18 15
## 5 4 4 1 8 1 Slytherin 14 17 13 12 10 11
## 12 3 6 7 5 1 Slytherin 13 14 12 17 12 11
## 13 6 3 2 5 1 Slytherin 10 14 15 17 12 16
## 14 4 7 2 8 1 Slytherin 16 14 14 13 10 12
## Level HP Predicted_House
## 2 8 72 Slytherin
## 3 15 135 Ravenclaw
## 5 9 72 Slytherin
## 12 3 30 Slytherin
## 13 12 84 Slytherin
## 14 4 36 Gryffindor
# 3. Discriminant analysis method 2 for probabilities ---------------------------------------
library(MASS)
names(train_df)
## [1] "ID" "Name" "Gender" "Race"
## [5] "Height" "Publisher" "Alignment" "Weight"
## [9] "Manipulative" "Resourceful" "Dismissive" "Intelligent"
## [13] "Trusting" "Loyal" "Stubborn" "Brave"
## [17] "HouseID" "House" "STR" "DEX"
## [21] "CON" "INT" "WIS" "CHA"
## [25] "Level" "HP"
hogwarts_da2 <- lda(House ~ STR + DEX + CON + INT + WIS + CHA,
data = train_df)
hogwarts_da2
## Call:
## lda(House ~ STR + DEX + CON + INT + WIS + CHA, data = train_df)
##
## Prior probabilities of groups:
## Gryffindor Hufflepuff Ravenclaw Slytherin
## 0.2568182 0.2545455 0.2159091 0.2727273
##
## Group means:
## STR DEX CON INT WIS CHA
## Gryffindor 13.20354 13.61947 14.01770 13.98230 13.59292 14.00885
## Hufflepuff 13.28571 13.94643 14.56250 14.01786 14.14286 13.48214
## Ravenclaw 12.51579 14.02105 14.37895 14.09474 14.41053 14.26316
## Slytherin 13.05833 14.29167 14.08333 14.28333 13.75833 13.90833
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3
## STR 0.120711050 0.13519453 0.02741064
## DEX -0.070614292 -0.09370852 0.31194029
## CON -0.191957990 0.14698602 -0.01682226
## INT -0.010355169 -0.08978647 0.15079073
## WIS -0.309057846 0.04546084 -0.05615099
## CHA -0.005296665 -0.26948657 -0.10476872
##
## Proportion of trace:
## LD1 LD2 LD3
## 0.4909 0.3252 0.1839
# 3 LD functions to separate the classes.
# Number of LD functions = Groups - 1.
plot(hogwarts_da2)
# 3.1 Probabilities --------------------------------------------------------------
prob <- predict(hogwarts_da2, newdata = valid_df)
head(prob$posterior)
## Gryffindor Hufflepuff Ravenclaw Slytherin
## 2 0.2528202 0.2641105 0.1577407 0.3253287
## 3 0.2293724 0.2718088 0.2781108 0.2207079
## 5 0.2874606 0.2458260 0.1296283 0.3370851
## 12 0.2802979 0.2377423 0.1540282 0.3279316
## 13 0.2535968 0.1944250 0.2408543 0.3111239
## 14 0.3284375 0.2540759 0.1230235 0.2944632
# 3.2 Merge probabilities with validation data set ------------------------
prob_house <- as.data.frame(prob$posterior)
head(prob_house)
## Gryffindor Hufflepuff Ravenclaw Slytherin
## 2 0.2528202 0.2641105 0.1577407 0.3253287
## 3 0.2293724 0.2718088 0.2781108 0.2207079
## 5 0.2874606 0.2458260 0.1296283 0.3370851
## 12 0.2802979 0.2377423 0.1540282 0.3279316
## 13 0.2535968 0.1944250 0.2408543 0.3111239
## 14 0.3284375 0.2540759 0.1230235 0.2944632
valid_df_with_prob_class <- cbind(valid_df, prob_house)
head(valid_df_with_prob_class)
## ID Name Gender Race Height Publisher
## 2 A002 Abe Sapien Male Icthyo Sapien 191 Dark Horse Comics
## 3 A004 Abomination Male Human / Radiation 203 Marvel Comics
## 5 A015 Alex Mercer Male Human NA Wildstorm
## 12 A036 Aquababy Male <NA> NA DC Comics
## 13 A039 Arachne Female Human 175 Marvel Comics
## 14 A040 Archangel Male Mutant 183 Marvel Comics
## Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2 good 65 7 7 6 8
## 3 bad 441 6 8 1 6
## 5 bad NA 10 6 8 3
## 12 good NA 6 6 6 2
## 13 good 63 9 7 9 8
## 14 good 68 7 6 3 9
## Trusting Loyal Stubborn Brave HouseID House STR DEX CON INT WIS CHA
## 2 6 7 6 9 1 Slytherin 16 17 10 13 15 11
## 3 3 3 5 2 1 Slytherin 13 14 13 10 18 15
## 5 4 4 1 8 1 Slytherin 14 17 13 12 10 11
## 12 3 6 7 5 1 Slytherin 13 14 12 17 12 11
## 13 6 3 2 5 1 Slytherin 10 14 15 17 12 16
## 14 4 7 2 8 1 Slytherin 16 14 14 13 10 12
## Level HP Gryffindor Hufflepuff Ravenclaw Slytherin
## 2 8 72 0.2528202 0.2641105 0.1577407 0.3253287
## 3 15 135 0.2293724 0.2718088 0.2781108 0.2207079
## 5 9 72 0.2874606 0.2458260 0.1296283 0.3370851
## 12 3 30 0.2802979 0.2377423 0.1540282 0.3279316
## 13 12 84 0.2535968 0.1944250 0.2408543 0.3111239
## 14 4 36 0.3284375 0.2540759 0.1230235 0.2944632
# Identify the predicted house using the max probability
max_prob <- max.col(valid_df_with_prob_class[, c("Gryffindor",
"Hufflepuff",
"Ravenclaw",
"Slytherin")], "first")
valid_df_with_prob_class$Pred_House <- c("Gryffindor",
"Hufflepuff",
"Ravenclaw",
"Slytherin")[max_prob]
head(valid_df_with_prob_class)
## ID Name Gender Race Height Publisher
## 2 A002 Abe Sapien Male Icthyo Sapien 191 Dark Horse Comics
## 3 A004 Abomination Male Human / Radiation 203 Marvel Comics
## 5 A015 Alex Mercer Male Human NA Wildstorm
## 12 A036 Aquababy Male <NA> NA DC Comics
## 13 A039 Arachne Female Human 175 Marvel Comics
## 14 A040 Archangel Male Mutant 183 Marvel Comics
## Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2 good 65 7 7 6 8
## 3 bad 441 6 8 1 6
## 5 bad NA 10 6 8 3
## 12 good NA 6 6 6 2
## 13 good 63 9 7 9 8
## 14 good 68 7 6 3 9
## Trusting Loyal Stubborn Brave HouseID House STR DEX CON INT WIS CHA
## 2 6 7 6 9 1 Slytherin 16 17 10 13 15 11
## 3 3 3 5 2 1 Slytherin 13 14 13 10 18 15
## 5 4 4 1 8 1 Slytherin 14 17 13 12 10 11
## 12 3 6 7 5 1 Slytherin 13 14 12 17 12 11
## 13 6 3 2 5 1 Slytherin 10 14 15 17 12 16
## 14 4 7 2 8 1 Slytherin 16 14 14 13 10 12
## Level HP Gryffindor Hufflepuff Ravenclaw Slytherin Pred_House
## 2 8 72 0.2528202 0.2641105 0.1577407 0.3253287 Slytherin
## 3 15 135 0.2293724 0.2718088 0.2781108 0.2207079 Ravenclaw
## 5 9 72 0.2874606 0.2458260 0.1296283 0.3370851 Slytherin
## 12 3 30 0.2802979 0.2377423 0.1540282 0.3279316 Slytherin
## 13 12 84 0.2535968 0.1944250 0.2408543 0.3111239 Slytherin
## 14 4 36 0.3284375 0.2540759 0.1230235 0.2944632 Gryffindor