# 1. Load data ------------------------------------------------------------

hogwarts <- read.csv("super_heroes_hogwarts_v3a.csv", header = TRUE)
head(hogwarts)
##     ID         Name Gender              Race Height         Publisher
## 1 A001       A-Bomb   Male             Human    203     Marvel Comics
## 2 A002   Abe Sapien   Male     Icthyo Sapien    191 Dark Horse Comics
## 3 A004  Abomination   Male Human / Radiation    203     Marvel Comics
## 4 A009     Agent 13 Female              <NA>    173     Marvel Comics
## 5 A015  Alex Mercer   Male             Human     NA         Wildstorm
## 6 A016 Alex Woolsly   Male              <NA>     NA      NBC - Heroes
##   Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 1      good    441           10          10          7           6
## 2      good     65            7           7          6           8
## 3       bad    441            6           8          1           6
## 4      good     61            7           7          1           9
## 5       bad     NA           10           6          8           3
## 6      good     NA            8          10          5           5
##   Trusting Loyal Stubborn Brave HouseID     House STR DEX CON INT WIS CHA
## 1        7     7        7     9       1 Slytherin  18  11  17  12  13  11
## 2        6     7        6     9       1 Slytherin  16  17  10  13  15  11
## 3        3     3        5     2       1 Slytherin  13  14  13  10  18  15
## 4        7     4        6     6       1 Slytherin  15  18  16  16  17  10
## 5        4     4        1     8       1 Slytherin  14  17  13  12  10  11
## 6        6     7        7     6       1 Slytherin  14  14  11  13  12  12
##   Level  HP
## 1     1   7
## 2     8  72
## 3    15 135
## 4    14 140
## 5     9  72
## 6     1   8
str(hogwarts)
## 'data.frame':    734 obs. of  26 variables:
##  $ ID          : Factor w/ 734 levels "A001","A002",..: 1 2 4 9 15 16 24 25 28 32 ...
##  $ Name        : Factor w/ 715 levels "A-Bomb","Abe Sapien",..: 1 2 4 9 15 16 23 24 27 31 ...
##  $ Gender      : Factor w/ 2 levels "Female","Male": 2 2 2 1 2 2 2 1 2 2 ...
##  $ Race        : Factor w/ 61 levels "Alien","Alpha",..: 24 33 28 NA 24 NA 57 43 24 21 ...
##  $ Height      : num  203 191 203 173 NA NA NA 165 183 61 ...
##  $ Publisher   : Factor w/ 25 levels "","ABC Studios",..: 13 3 13 13 25 15 3 13 4 4 ...
##  $ Alignment   : Factor w/ 3 levels "bad","good","neutral": 2 2 1 2 1 2 2 2 2 1 ...
##  $ Weight      : int  441 65 441 61 NA NA NA 57 83 NA ...
##  $ Manipulative: int  10 7 6 7 10 8 8 9 7 7 ...
##  $ Resourceful : int  10 7 8 7 6 10 6 8 6 7 ...
##  $ Dismissive  : int  7 6 1 1 8 5 8 9 6 7 ...
##  $ Intelligent : int  6 8 6 9 3 5 7 4 5 1 ...
##  $ Trusting    : int  7 6 3 7 4 6 4 1 8 9 ...
##  $ Loyal       : int  7 7 3 4 4 7 1 6 3 1 ...
##  $ Stubborn    : int  7 6 5 6 1 7 5 5 3 6 ...
##  $ Brave       : int  9 9 2 6 8 6 2 4 2 5 ...
##  $ HouseID     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ House       : Factor w/ 4 levels "Gryffindor","Hufflepuff",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ STR         : int  18 16 13 15 14 14 15 8 10 8 ...
##  $ DEX         : int  11 17 14 18 17 14 17 17 17 10 ...
##  $ CON         : int  17 10 13 16 13 11 15 12 15 11 ...
##  $ INT         : int  12 13 10 16 12 13 18 15 18 16 ...
##  $ WIS         : int  13 15 18 17 10 12 13 17 13 12 ...
##  $ CHA         : int  11 11 15 10 11 12 18 18 14 11 ...
##  $ Level       : int  1 8 15 14 9 1 11 1 8 7 ...
##  $ HP          : int  7 72 135 140 72 8 88 8 56 63 ...
summary(hogwarts)
##        ID              Name        Gender                   Race    
##  A001   :  1   Goliath   :  3   Female:200   Human            :208  
##  A002   :  1   Spider-Man:  3   Male  :505   Mutant           : 63  
##  A003   :  1   Angel     :  2   NA's  : 29   God / Eternal    : 14  
##  A004   :  1   Atlas     :  2                Cyborg           : 11  
##  A005   :  1   Atom      :  2                Human / Radiation: 11  
##  A006   :  1   Batgirl   :  2                (Other)          :123  
##  (Other):728   (Other)   :720                NA's             :304  
##      Height                  Publisher     Alignment       Weight     
##  Min.   : 15.2   Marvel Comics    :388   bad    :207   Min.   :  2.0  
##  1st Qu.:173.0   DC Comics        :215   good   :496   1st Qu.: 61.0  
##  Median :183.0   NBC - Heroes     : 19   neutral: 24   Median : 81.0  
##  Mean   :186.7   Dark Horse Comics: 18   NA's   :  7   Mean   :112.3  
##  3rd Qu.:191.0                    : 15                 3rd Qu.:108.0  
##  Max.   :975.0   George Lucas     : 14                 Max.   :900.0  
##  NA's   :217     (Other)          : 65                 NA's   :239    
##   Manipulative     Resourceful       Dismissive     Intelligent    
##  Min.   : 1.000   Min.   : 1.000   Min.   : 1.00   Min.   : 1.000  
##  1st Qu.: 4.000   1st Qu.: 4.000   1st Qu.: 4.00   1st Qu.: 4.000  
##  Median : 6.000   Median : 6.000   Median : 6.00   Median : 6.000  
##  Mean   : 5.827   Mean   : 5.993   Mean   : 5.74   Mean   : 5.749  
##  3rd Qu.: 8.000   3rd Qu.: 8.000   3rd Qu.: 8.00   3rd Qu.: 7.000  
##  Max.   :10.000   Max.   :10.000   Max.   :10.00   Max.   :10.000  
##                                                                    
##     Trusting          Loyal           Stubborn          Brave       
##  Min.   : 1.000   Min.   : 1.000   Min.   : 1.000   Min.   : 1.000  
##  1st Qu.: 4.000   1st Qu.: 4.000   1st Qu.: 4.000   1st Qu.: 4.000  
##  Median : 6.000   Median : 6.000   Median : 6.000   Median : 6.000  
##  Mean   : 5.872   Mean   : 5.778   Mean   : 5.828   Mean   : 5.835  
##  3rd Qu.: 8.000   3rd Qu.: 8.000   3rd Qu.: 8.000   3rd Qu.: 8.000  
##  Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
##                                                                     
##     HouseID             House          STR             DEX       
##  Min.   :1.000   Gryffindor:188   Min.   : 8.00   Min.   :10.00  
##  1st Qu.:1.000   Hufflepuff:189   1st Qu.:10.00   1st Qu.:12.00  
##  Median :3.000   Ravenclaw :156   Median :13.00   Median :14.00  
##  Mean   :2.495   Slytherin :201   Mean   :13.09   Mean   :13.97  
##  3rd Qu.:4.000                    3rd Qu.:16.00   3rd Qu.:16.00  
##  Max.   :4.000                    Max.   :18.00   Max.   :18.00  
##                                                                  
##       CON             INT             WIS             CHA       
##  Min.   :10.00   Min.   :10.00   Min.   :10.00   Min.   :10.00  
##  1st Qu.:12.00   1st Qu.:12.00   1st Qu.:12.00   1st Qu.:12.00  
##  Median :14.00   Median :14.00   Median :14.00   Median :14.00  
##  Mean   :14.13   Mean   :14.01   Mean   :13.99   Mean   :14.02  
##  3rd Qu.:16.00   3rd Qu.:16.00   3rd Qu.:16.00   3rd Qu.:16.00  
##  Max.   :18.00   Max.   :18.00   Max.   :18.00   Max.   :18.00  
##                                                                 
##      Level              HP        
##  Min.   : 1.000   Min.   :  6.00  
##  1st Qu.: 5.000   1st Qu.: 36.00  
##  Median : 8.000   Median : 63.00  
##  Mean   : 8.316   Mean   : 66.89  
##  3rd Qu.:12.000   3rd Qu.: 91.00  
##  Max.   :15.000   Max.   :150.00  
## 
nrow(hogwarts)
## [1] 734
names(hogwarts)
##  [1] "ID"           "Name"         "Gender"       "Race"        
##  [5] "Height"       "Publisher"    "Alignment"    "Weight"      
##  [9] "Manipulative" "Resourceful"  "Dismissive"   "Intelligent" 
## [13] "Trusting"     "Loyal"        "Stubborn"     "Brave"       
## [17] "HouseID"      "House"        "STR"          "DEX"         
## [21] "CON"          "INT"          "WIS"          "CHA"         
## [25] "Level"        "HP"
# 2. Training validation split ----------------------------------------------------------------------

set.seed(666)


train_index <- sample(1:nrow(hogwarts), 0.6 * nrow(hogwarts))
valid_index <- setdiff(1:nrow(hogwarts), train_index)

train_df <- hogwarts[train_index, ]
valid_df <- hogwarts[valid_index, ]
nrow(train_df)
## [1] 440
nrow(valid_df)
## [1] 294
# 2. Discriminant analysis method 1 for classification ------------------------------------------------

library(DiscriMiner)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 method overwritten by 'caret':
##   method      from       
##   print.plsda DiscriMiner
hogwarts_da1 <- linDA(train_df[, c(19:24)], train_df[, 18], prior = NULL)
hogwarts_da1$functions
##          Gryffindor Hufflepuff  Ravenclaw  Slytherin
## constant -85.284470 -87.332216 -88.794843 -87.306166
## STR        1.346405   1.345711   1.278927   1.328601
## DEX        1.942957   1.980996   1.998845   2.036727
## CON        2.208822   2.294455   2.274260   2.220187
## INT        2.217254   2.218879   2.239196   2.262369
## WIS        2.125235   2.216598   2.252038   2.148734
## CHA        2.338362   2.266359   2.368045   2.330359
hogwarts_da1$confusion
##             predicted
## original     Gryffindor Hufflepuff Ravenclaw Slytherin
##   Gryffindor         40         32        11        30
##   Hufflepuff         30         37        14        31
##   Ravenclaw          20         24        21        30
##   Slytherin          30         29        19        42
head(hogwarts_da1$scores)
##     Gryffindor Hufflepuff Ravenclaw Slytherin
## 574   75.90927   75.48810  75.22420  75.81402
## 638  103.74422  104.32058 104.72113 104.11872
## 608   69.20524   69.16321  68.36535  68.91883
## 123   61.33969   60.31274  60.24482  60.74449
## 540   89.28855   89.59703  90.02551  89.09504
## 654   76.18705   76.48728  76.08213  76.09919
confusionMatrix(hogwarts_da1$classification, train_df$House)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Gryffindor Hufflepuff Ravenclaw Slytherin
##   Gryffindor         40         30        20        30
##   Hufflepuff         32         37        24        29
##   Ravenclaw          11         14        21        19
##   Slytherin          30         31        30        42
## 
## Overall Statistics
##                                          
##                Accuracy : 0.3182         
##                  95% CI : (0.2749, 0.364)
##     No Information Rate : 0.2727         
##     P-Value [Acc > NIR] : 0.01958        
##                                          
##                   Kappa : 0.0849         
##                                          
##  Mcnemar's Test P-Value : 0.24968        
## 
## Statistics by Class:
## 
##                      Class: Gryffindor Class: Hufflepuff Class: Ravenclaw
## Sensitivity                    0.35398           0.33036          0.22105
## Specificity                    0.75535           0.74085          0.87246
## Pos Pred Value                 0.33333           0.30328          0.32308
## Neg Pred Value                 0.77187           0.76415          0.80267
## Prevalence                     0.25682           0.25455          0.21591
## Detection Rate                 0.09091           0.08409          0.04773
## Detection Prevalence           0.27273           0.27727          0.14773
## Balanced Accuracy              0.55467           0.53561          0.54676
##                      Class: Slytherin
## Sensitivity                   0.35000
## Specificity                   0.71562
## Pos Pred Value                0.31579
## Neg Pred Value                0.74593
## Prevalence                    0.27273
## Detection Rate                0.09545
## Detection Prevalence          0.30227
## Balanced Accuracy             0.53281
hogwarts_da1_pred <- classify(hogwarts_da1, newdata = valid_df[, c(19:24)])
head(hogwarts_da1_pred$scores)
##   Gryffindor Hufflepuff Ravenclaw Slytherin
## 1   77.80129   77.84498  77.32957  78.05345
## 2   83.63707   83.80682  83.82974  83.59856
## 3   68.89152   68.73506  68.09510  69.05077
## 4   74.84416   74.67949  74.24544  75.00111
## 5   89.12322   88.85752  89.07167  89.32767
## 6   72.51990   72.26318  71.53793  72.41071
head(hogwarts_da1_pred$pred_class)
## [1] Slytherin  Ravenclaw  Slytherin  Slytherin  Slytherin  Gryffindor
## Levels: Gryffindor Hufflepuff Ravenclaw Slytherin
confusionMatrix(hogwarts_da1_pred$pred_class, valid_df$House)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Gryffindor Hufflepuff Ravenclaw Slytherin
##   Gryffindor         25         22        14        23
##   Hufflepuff         19         19        15        22
##   Ravenclaw          14         10        11         7
##   Slytherin          17         26        21        29
## 
## Overall Statistics
##                                          
##                Accuracy : 0.2857         
##                  95% CI : (0.2348, 0.341)
##     No Information Rate : 0.2755         
##     P-Value [Acc > NIR] : 0.3688         
##                                          
##                   Kappa : 0.0393         
##                                          
##  Mcnemar's Test P-Value : 0.1497         
## 
## Statistics by Class:
## 
##                      Class: Gryffindor Class: Hufflepuff Class: Ravenclaw
## Sensitivity                    0.33333           0.24675          0.18033
## Specificity                    0.73059           0.74194          0.86695
## Pos Pred Value                 0.29762           0.25333          0.26190
## Neg Pred Value                 0.76190           0.73516          0.80159
## Prevalence                     0.25510           0.26190          0.20748
## Detection Rate                 0.08503           0.06463          0.03741
## Detection Prevalence           0.28571           0.25510          0.14286
## Balanced Accuracy              0.53196           0.49434          0.52364
##                      Class: Slytherin
## Sensitivity                   0.35802
## Specificity                   0.69953
## Pos Pred Value                0.31183
## Neg Pred Value                0.74129
## Prevalence                    0.27551
## Detection Rate                0.09864
## Detection Prevalence          0.31633
## Balanced Accuracy             0.52878
# 2.1 Merge predicted class with validation data set ----------------------

pred_class <- as.data.frame(hogwarts_da1_pred$pred_class)
head(pred_class)
##   hogwarts_da1_pred$pred_class
## 1                    Slytherin
## 2                    Ravenclaw
## 3                    Slytherin
## 4                    Slytherin
## 5                    Slytherin
## 6                   Gryffindor
valid_df_with_pred_class <- cbind(valid_df, pred_class)
head(valid_df_with_pred_class)
##      ID        Name Gender              Race Height         Publisher
## 2  A002  Abe Sapien   Male     Icthyo Sapien    191 Dark Horse Comics
## 3  A004 Abomination   Male Human / Radiation    203     Marvel Comics
## 5  A015 Alex Mercer   Male             Human     NA         Wildstorm
## 12 A036    Aquababy   Male              <NA>     NA         DC Comics
## 13 A039     Arachne Female             Human    175     Marvel Comics
## 14 A040   Archangel   Male            Mutant    183     Marvel Comics
##    Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2       good     65            7           7          6           8
## 3        bad    441            6           8          1           6
## 5        bad     NA           10           6          8           3
## 12      good     NA            6           6          6           2
## 13      good     63            9           7          9           8
## 14      good     68            7           6          3           9
##    Trusting Loyal Stubborn Brave HouseID     House STR DEX CON INT WIS CHA
## 2         6     7        6     9       1 Slytherin  16  17  10  13  15  11
## 3         3     3        5     2       1 Slytherin  13  14  13  10  18  15
## 5         4     4        1     8       1 Slytherin  14  17  13  12  10  11
## 12        3     6        7     5       1 Slytherin  13  14  12  17  12  11
## 13        6     3        2     5       1 Slytherin  10  14  15  17  12  16
## 14        4     7        2     8       1 Slytherin  16  14  14  13  10  12
##    Level  HP hogwarts_da1_pred$pred_class
## 2      8  72                    Slytherin
## 3     15 135                    Ravenclaw
## 5      9  72                    Slytherin
## 12     3  30                    Slytherin
## 13    12  84                    Slytherin
## 14     4  36                   Gryffindor
names(valid_df_with_pred_class)
##  [1] "ID"                           "Name"                        
##  [3] "Gender"                       "Race"                        
##  [5] "Height"                       "Publisher"                   
##  [7] "Alignment"                    "Weight"                      
##  [9] "Manipulative"                 "Resourceful"                 
## [11] "Dismissive"                   "Intelligent"                 
## [13] "Trusting"                     "Loyal"                       
## [15] "Stubborn"                     "Brave"                       
## [17] "HouseID"                      "House"                       
## [19] "STR"                          "DEX"                         
## [21] "CON"                          "INT"                         
## [23] "WIS"                          "CHA"                         
## [25] "Level"                        "HP"                          
## [27] "hogwarts_da1_pred$pred_class"
colnames(valid_df_with_pred_class)[27] <- "Predicted_House"

head(valid_df_with_pred_class)
##      ID        Name Gender              Race Height         Publisher
## 2  A002  Abe Sapien   Male     Icthyo Sapien    191 Dark Horse Comics
## 3  A004 Abomination   Male Human / Radiation    203     Marvel Comics
## 5  A015 Alex Mercer   Male             Human     NA         Wildstorm
## 12 A036    Aquababy   Male              <NA>     NA         DC Comics
## 13 A039     Arachne Female             Human    175     Marvel Comics
## 14 A040   Archangel   Male            Mutant    183     Marvel Comics
##    Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2       good     65            7           7          6           8
## 3        bad    441            6           8          1           6
## 5        bad     NA           10           6          8           3
## 12      good     NA            6           6          6           2
## 13      good     63            9           7          9           8
## 14      good     68            7           6          3           9
##    Trusting Loyal Stubborn Brave HouseID     House STR DEX CON INT WIS CHA
## 2         6     7        6     9       1 Slytherin  16  17  10  13  15  11
## 3         3     3        5     2       1 Slytherin  13  14  13  10  18  15
## 5         4     4        1     8       1 Slytherin  14  17  13  12  10  11
## 12        3     6        7     5       1 Slytherin  13  14  12  17  12  11
## 13        6     3        2     5       1 Slytherin  10  14  15  17  12  16
## 14        4     7        2     8       1 Slytherin  16  14  14  13  10  12
##    Level  HP Predicted_House
## 2      8  72       Slytherin
## 3     15 135       Ravenclaw
## 5      9  72       Slytherin
## 12     3  30       Slytherin
## 13    12  84       Slytherin
## 14     4  36      Gryffindor
# 3. Discriminant analysis method 2 for probabilities ---------------------------------------

library(MASS)
names(train_df)
##  [1] "ID"           "Name"         "Gender"       "Race"        
##  [5] "Height"       "Publisher"    "Alignment"    "Weight"      
##  [9] "Manipulative" "Resourceful"  "Dismissive"   "Intelligent" 
## [13] "Trusting"     "Loyal"        "Stubborn"     "Brave"       
## [17] "HouseID"      "House"        "STR"          "DEX"         
## [21] "CON"          "INT"          "WIS"          "CHA"         
## [25] "Level"        "HP"
hogwarts_da2 <- lda(House ~ STR + DEX + CON + INT + WIS + CHA,
                    data = train_df)
hogwarts_da2
## Call:
## lda(House ~ STR + DEX + CON + INT + WIS + CHA, data = train_df)
## 
## Prior probabilities of groups:
## Gryffindor Hufflepuff  Ravenclaw  Slytherin 
##  0.2568182  0.2545455  0.2159091  0.2727273 
## 
## Group means:
##                 STR      DEX      CON      INT      WIS      CHA
## Gryffindor 13.20354 13.61947 14.01770 13.98230 13.59292 14.00885
## Hufflepuff 13.28571 13.94643 14.56250 14.01786 14.14286 13.48214
## Ravenclaw  12.51579 14.02105 14.37895 14.09474 14.41053 14.26316
## Slytherin  13.05833 14.29167 14.08333 14.28333 13.75833 13.90833
## 
## Coefficients of linear discriminants:
##              LD1         LD2         LD3
## STR  0.120711050  0.13519453  0.02741064
## DEX -0.070614292 -0.09370852  0.31194029
## CON -0.191957990  0.14698602 -0.01682226
## INT -0.010355169 -0.08978647  0.15079073
## WIS -0.309057846  0.04546084 -0.05615099
## CHA -0.005296665 -0.26948657 -0.10476872
## 
## Proportion of trace:
##    LD1    LD2    LD3 
## 0.4909 0.3252 0.1839
# 3 LD functions to separate the classes. 
# Number of LD functions = Groups - 1.

plot(hogwarts_da2)

# 3.1 Probabilities --------------------------------------------------------------



prob <- predict(hogwarts_da2, newdata = valid_df)
head(prob$posterior)
##    Gryffindor Hufflepuff Ravenclaw Slytherin
## 2   0.2528202  0.2641105 0.1577407 0.3253287
## 3   0.2293724  0.2718088 0.2781108 0.2207079
## 5   0.2874606  0.2458260 0.1296283 0.3370851
## 12  0.2802979  0.2377423 0.1540282 0.3279316
## 13  0.2535968  0.1944250 0.2408543 0.3111239
## 14  0.3284375  0.2540759 0.1230235 0.2944632
# 3.2 Merge probabilities with validation data set ------------------------

prob_house <- as.data.frame(prob$posterior)
head(prob_house)
##    Gryffindor Hufflepuff Ravenclaw Slytherin
## 2   0.2528202  0.2641105 0.1577407 0.3253287
## 3   0.2293724  0.2718088 0.2781108 0.2207079
## 5   0.2874606  0.2458260 0.1296283 0.3370851
## 12  0.2802979  0.2377423 0.1540282 0.3279316
## 13  0.2535968  0.1944250 0.2408543 0.3111239
## 14  0.3284375  0.2540759 0.1230235 0.2944632
valid_df_with_prob_class <- cbind(valid_df, prob_house)
head(valid_df_with_prob_class)
##      ID        Name Gender              Race Height         Publisher
## 2  A002  Abe Sapien   Male     Icthyo Sapien    191 Dark Horse Comics
## 3  A004 Abomination   Male Human / Radiation    203     Marvel Comics
## 5  A015 Alex Mercer   Male             Human     NA         Wildstorm
## 12 A036    Aquababy   Male              <NA>     NA         DC Comics
## 13 A039     Arachne Female             Human    175     Marvel Comics
## 14 A040   Archangel   Male            Mutant    183     Marvel Comics
##    Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2       good     65            7           7          6           8
## 3        bad    441            6           8          1           6
## 5        bad     NA           10           6          8           3
## 12      good     NA            6           6          6           2
## 13      good     63            9           7          9           8
## 14      good     68            7           6          3           9
##    Trusting Loyal Stubborn Brave HouseID     House STR DEX CON INT WIS CHA
## 2         6     7        6     9       1 Slytherin  16  17  10  13  15  11
## 3         3     3        5     2       1 Slytherin  13  14  13  10  18  15
## 5         4     4        1     8       1 Slytherin  14  17  13  12  10  11
## 12        3     6        7     5       1 Slytherin  13  14  12  17  12  11
## 13        6     3        2     5       1 Slytherin  10  14  15  17  12  16
## 14        4     7        2     8       1 Slytherin  16  14  14  13  10  12
##    Level  HP Gryffindor Hufflepuff Ravenclaw Slytherin
## 2      8  72  0.2528202  0.2641105 0.1577407 0.3253287
## 3     15 135  0.2293724  0.2718088 0.2781108 0.2207079
## 5      9  72  0.2874606  0.2458260 0.1296283 0.3370851
## 12     3  30  0.2802979  0.2377423 0.1540282 0.3279316
## 13    12  84  0.2535968  0.1944250 0.2408543 0.3111239
## 14     4  36  0.3284375  0.2540759 0.1230235 0.2944632
# Identify the predicted house using the max probability

max_prob <- max.col(valid_df_with_prob_class[, c("Gryffindor", 
                                                 "Hufflepuff",
                                                 "Ravenclaw",
                                                 "Slytherin")], "first")
valid_df_with_prob_class$Pred_House <- c("Gryffindor",
                                         "Hufflepuff",
                                         "Ravenclaw",
                                         "Slytherin")[max_prob]

head(valid_df_with_prob_class)
##      ID        Name Gender              Race Height         Publisher
## 2  A002  Abe Sapien   Male     Icthyo Sapien    191 Dark Horse Comics
## 3  A004 Abomination   Male Human / Radiation    203     Marvel Comics
## 5  A015 Alex Mercer   Male             Human     NA         Wildstorm
## 12 A036    Aquababy   Male              <NA>     NA         DC Comics
## 13 A039     Arachne Female             Human    175     Marvel Comics
## 14 A040   Archangel   Male            Mutant    183     Marvel Comics
##    Alignment Weight Manipulative Resourceful Dismissive Intelligent
## 2       good     65            7           7          6           8
## 3        bad    441            6           8          1           6
## 5        bad     NA           10           6          8           3
## 12      good     NA            6           6          6           2
## 13      good     63            9           7          9           8
## 14      good     68            7           6          3           9
##    Trusting Loyal Stubborn Brave HouseID     House STR DEX CON INT WIS CHA
## 2         6     7        6     9       1 Slytherin  16  17  10  13  15  11
## 3         3     3        5     2       1 Slytherin  13  14  13  10  18  15
## 5         4     4        1     8       1 Slytherin  14  17  13  12  10  11
## 12        3     6        7     5       1 Slytherin  13  14  12  17  12  11
## 13        6     3        2     5       1 Slytherin  10  14  15  17  12  16
## 14        4     7        2     8       1 Slytherin  16  14  14  13  10  12
##    Level  HP Gryffindor Hufflepuff Ravenclaw Slytherin Pred_House
## 2      8  72  0.2528202  0.2641105 0.1577407 0.3253287  Slytherin
## 3     15 135  0.2293724  0.2718088 0.2781108 0.2207079  Ravenclaw
## 5      9  72  0.2874606  0.2458260 0.1296283 0.3370851  Slytherin
## 12     3  30  0.2802979  0.2377423 0.1540282 0.3279316  Slytherin
## 13    12  84  0.2535968  0.1944250 0.2408543 0.3111239  Slytherin
## 14     4  36  0.3284375  0.2540759 0.1230235 0.2944632 Gryffindor