Neural networks to predict whether to call for help from a Pillar.
1 = call for help; 0 = don’t call for help
Rank is given in 12 levels. Upper Moon 1 is Rank 1, Lower Moon 6 is Rank 12. This represents the presence of the Demon Moon in the battle.
Battle difficulty is given on a scale of 1 to 100.
Effectiveness of the attack on a scale of 1 to 10.
demon <- read.csv("demon_slayer_9.csv", header = TRUE)
head(demon, 10)
## ï..ID Character Breathing Form Battle_difficulty Effectiveness_rating
## 1 1 Tanjiro Water 4 28 4
## 2 2 Inosuke Beast 7 20 5
## 3 3 Zenitsu Thunder 1 24 6
## 4 4 Inosuke Beast 4 27 7
## 5 5 Inosuke Beast 1 43 1
## 6 6 Zenitsu Thunder 7 53 3
## 7 7 Zenitsu Thunder 1 45 5
## 8 8 Zenitsu Thunder 7 75 3
## 9 9 Inosuke Beast 4 59 10
## 10 10 Tanjiro Sun 13 35 6
## Hours_to_daybreak Rank Call_pillars
## 1 4 1 1
## 2 3 7 0
## 3 4 4 0
## 4 5 11 0
## 5 8 3 0
## 6 8 7 0
## 7 5 12 1
## 8 5 6 0
## 9 4 1 0
## 10 6 9 0
nrow(demon)
## [1] 6666
names(demon)
## [1] "ï..ID" "Character" "Breathing"
## [4] "Form" "Battle_difficulty" "Effectiveness_rating"
## [7] "Hours_to_daybreak" "Rank" "Call_pillars"
str(demon)
## 'data.frame': 6666 obs. of 9 variables:
## $ ï..ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Character : chr "Tanjiro" "Inosuke" "Zenitsu" "Inosuke" ...
## $ Breathing : chr "Water" "Beast" "Thunder" "Beast" ...
## $ Form : int 4 7 1 4 1 7 1 7 4 13 ...
## $ Battle_difficulty : int 28 20 24 27 43 53 45 75 59 35 ...
## $ Effectiveness_rating: int 4 5 6 7 1 3 5 3 10 6 ...
## $ Hours_to_daybreak : int 4 3 4 5 8 8 5 5 4 6 ...
## $ Rank : int 1 7 4 11 3 7 12 6 1 9 ...
## $ Call_pillars : int 1 0 0 0 0 0 1 0 0 0 ...
Rename.
names(demon)[1] <- "ID"
names(demon)
## [1] "ID" "Character" "Breathing"
## [4] "Form" "Battle_difficulty" "Effectiveness_rating"
## [7] "Hours_to_daybreak" "Rank" "Call_pillars"
demon_2 <- demon[,-c(1)]
head(demon_2)
## Character Breathing Form Battle_difficulty Effectiveness_rating
## 1 Tanjiro Water 4 28 4
## 2 Inosuke Beast 7 20 5
## 3 Zenitsu Thunder 1 24 6
## 4 Inosuke Beast 4 27 7
## 5 Inosuke Beast 1 43 1
## 6 Zenitsu Thunder 7 53 3
## Hours_to_daybreak Rank Call_pillars
## 1 4 1 1
## 2 3 7 0
## 3 4 4 0
## 4 5 11 0
## 5 8 3 0
## 6 8 7 0
nrow(demon_2)
## [1] 6666
names(demon_2)
## [1] "Character" "Breathing" "Form"
## [4] "Battle_difficulty" "Effectiveness_rating" "Hours_to_daybreak"
## [7] "Rank" "Call_pillars"
str(demon_2)
## 'data.frame': 6666 obs. of 8 variables:
## $ Character : chr "Tanjiro" "Inosuke" "Zenitsu" "Inosuke" ...
## $ Breathing : chr "Water" "Beast" "Thunder" "Beast" ...
## $ Form : int 4 7 1 4 1 7 1 7 4 13 ...
## $ Battle_difficulty : int 28 20 24 27 43 53 45 75 59 35 ...
## $ Effectiveness_rating: int 4 5 6 7 1 3 5 3 10 6 ...
## $ Hours_to_daybreak : int 4 3 4 5 8 8 5 5 4 6 ...
## $ Rank : int 1 7 4 11 3 7 12 6 1 9 ...
## $ Call_pillars : int 1 0 0 0 0 0 1 0 0 0 ...
The presence of characters in the group battle.
Recode, then remove the Character variable.
table(demon_2$Character)
##
## Inosuke Nezuko Tanjiro Zenitsu
## 1652 1692 1649 1673
demon_2$Tanjiro <- ifelse(demon_2$Character == "Tanjiro", 1 , 0)
demon_2$Nezuko <- ifelse(demon_2$Character == "Nezuko", 1 , 0)
demon_2$Zenitsu <- ifelse(demon_2$Character == "Zenitsu", 1 , 0)
demon_2$Inosuke <- ifelse(demon_2$Character == "Inosuke", 1 , 0)
names(demon_2)
## [1] "Character" "Breathing" "Form"
## [4] "Battle_difficulty" "Effectiveness_rating" "Hours_to_daybreak"
## [7] "Rank" "Call_pillars" "Tanjiro"
## [10] "Nezuko" "Zenitsu" "Inosuke"
demon_3 <- demon_2[, -c(1)]
names(demon_3)
## [1] "Breathing" "Form" "Battle_difficulty"
## [4] "Effectiveness_rating" "Hours_to_daybreak" "Rank"
## [7] "Call_pillars" "Tanjiro" "Nezuko"
## [10] "Zenitsu" "Inosuke"
The breathing forms used by the characters.
Recode, then remove the breathing variable.
table(demon_3$Breathing)
##
## Beast Blood Demon Art Sun Thunder Water
## 1652 1692 851 1673 798
demon_3$Water <- ifelse(demon_3$Breathing == "Water", 1 , 0)
demon_3$Blood <- ifelse(demon_3$Breathing == "Blood Demon Art", 1 , 0)
demon_3$Thunder <- ifelse(demon_3$Breathing == "Thunder", 1 , 0)
demon_3$Beast <- ifelse(demon_3$Breathing == "Beast", 1 , 0)
demon_3$Sun <- ifelse(demon_3$Breathing == "Sun", 1 , 0)
names(demon_3)
## [1] "Breathing" "Form" "Battle_difficulty"
## [4] "Effectiveness_rating" "Hours_to_daybreak" "Rank"
## [7] "Call_pillars" "Tanjiro" "Nezuko"
## [10] "Zenitsu" "Inosuke" "Water"
## [13] "Blood" "Thunder" "Beast"
## [16] "Sun"
demon_4 <- demon_3[, -c(1)]
names(demon_4)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Call_pillars"
## [7] "Tanjiro" "Nezuko" "Zenitsu"
## [10] "Inosuke" "Water" "Blood"
## [13] "Thunder" "Beast" "Sun"
names(demon_4)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Call_pillars"
## [7] "Tanjiro" "Nezuko" "Zenitsu"
## [10] "Inosuke" "Water" "Blood"
## [13] "Thunder" "Beast" "Sun"
For simplicity, assume Form is numerical, with higher numbers associated with complexity and power.
Reorder variables.
names(demon_4)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Call_pillars"
## [7] "Tanjiro" "Nezuko" "Zenitsu"
## [10] "Inosuke" "Water" "Blood"
## [13] "Thunder" "Beast" "Sun"
demon_5 <- demon_4[, c(1:5,7:15, 6)]
names(demon_5)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
Using our favourite seed :-)
set.seed(666)
train_index <- sample(1:nrow(demon_5), 0.6 * nrow(demon_5))
valid_index <- setdiff(1:nrow(demon_5), train_index)
train_df <- demon_5[train_index, ]
valid_df <- demon_5[valid_index, ]
nrow(train_df)
## [1] 3999
nrow(valid_df)
## [1] 2667
names(train_df)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
names(valid_df)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
compare_df_cols(train_df, valid_df)
## column_name train_df valid_df
## 1 Battle_difficulty integer integer
## 2 Beast numeric numeric
## 3 Blood numeric numeric
## 4 Call_pillars integer integer
## 5 Effectiveness_rating integer integer
## 6 Form integer integer
## 7 Hours_to_daybreak integer integer
## 8 Inosuke numeric numeric
## 9 Nezuko numeric numeric
## 10 Rank integer integer
## 11 Sun numeric numeric
## 12 Tanjiro numeric numeric
## 13 Thunder numeric numeric
## 14 Water numeric numeric
## 15 Zenitsu numeric numeric
compare_df_cols_same(train_df, valid_df)
## [1] TRUE
names(train_df)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
Normalise the numerical data using training set (i.e. excluding the dummy variables)
Create normalised values from training set.
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.4
train_df_norm <- preProcess(train_df[1:5], method = c("range"))
Transform training set using normalised values.
train_df_transform <- predict(train_df_norm, train_df[1:5])
summary(train_df_transform)
## Form Battle_difficulty Effectiveness_rating Hours_to_daybreak
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.2424 1st Qu.:0.2222 1st Qu.:0.2857
## Median :0.2308 Median :0.5051 Median :0.5556 Median :0.5714
## Mean :0.2732 Mean :0.4958 Mean :0.5012 Mean :0.4985
## 3rd Qu.:0.5385 3rd Qu.:0.7374 3rd Qu.:0.7778 3rd Qu.:0.7143
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Rank
## Min. :0.0000
## 1st Qu.:0.1818
## Median :0.4545
## Mean :0.4943
## 3rd Qu.:0.7273
## Max. :1.0000
names(train_df_transform)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank"
Transform validation set using normalised values.
valid_df_transform <- predict(train_df_norm, valid_df[1:5])
summary(valid_df_transform)
## Form Battle_difficulty Effectiveness_rating Hours_to_daybreak
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.07692 1st Qu.:0.2424 1st Qu.:0.2222 1st Qu.:0.2857
## Median :0.23077 Median :0.4949 Median :0.5556 Median :0.4286
## Mean :0.27034 Mean :0.4939 Mean :0.4969 Mean :0.4988
## 3rd Qu.:0.53846 3rd Qu.:0.7475 3rd Qu.:0.7778 3rd Qu.:0.8571
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Rank
## Min. :0.0000
## 1st Qu.:0.1818
## Median :0.4545
## Mean :0.4986
## 3rd Qu.:0.7273
## Max. :1.0000
names(valid_df_transform)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank"
Create full training and validation sets with dummy variables.
train_df_2 <- cbind(train_df_transform, train_df[6:15])
names(train_df_2)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
str(train_df_2)
## 'data.frame': 3999 obs. of 15 variables:
## $ Form : num 0 0.0769 0 0.6923 0.0769 ...
## $ Battle_difficulty : num 0.515 0 0.717 0.737 0.253 ...
## $ Effectiveness_rating: num 0.889 0.778 1 0.778 0.556 ...
## $ Hours_to_daybreak : num 0.571 0.286 1 1 0.143 ...
## $ Rank : num 0.273 0 0 0 0.273 ...
## $ Tanjiro : num 0 0 0 1 0 0 0 0 0 0 ...
## $ Nezuko : num 1 0 1 0 0 0 1 1 0 1 ...
## $ Zenitsu : num 0 1 0 0 1 1 0 0 1 0 ...
## $ Inosuke : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Water : num 0 0 0 1 0 0 0 0 0 0 ...
## $ Blood : num 1 0 1 0 0 0 1 1 0 1 ...
## $ Thunder : num 0 1 0 0 1 1 0 0 1 0 ...
## $ Beast : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sun : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Call_pillars : int 1 1 0 0 1 0 1 1 0 0 ...
valid_df_2 <- cbind(valid_df_transform, valid_df[6:15])
names(valid_df_2)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
str(valid_df_2)
## 'data.frame': 2667 obs. of 15 variables:
## $ Form : num 0.3077 0.0769 0.0769 0.5385 0.0769 ...
## $ Battle_difficulty : num 0.273 0.232 0.424 0.525 0.444 ...
## $ Effectiveness_rating: num 0.333 0.556 0 0.222 0.444 ...
## $ Hours_to_daybreak : num 0.429 0.429 1 1 0.571 ...
## $ Rank : num 0 0.273 0.182 0.545 1 ...
## $ Tanjiro : num 1 0 0 0 0 0 1 0 0 1 ...
## $ Nezuko : num 0 0 0 0 0 0 0 0 1 0 ...
## $ Zenitsu : num 0 1 0 1 1 0 0 1 0 0 ...
## $ Inosuke : num 0 0 1 0 0 1 0 0 0 0 ...
## $ Water : num 1 0 0 0 0 0 0 0 0 0 ...
## $ Blood : num 0 0 0 0 0 0 0 0 1 0 ...
## $ Thunder : num 0 1 0 1 1 0 0 1 0 0 ...
## $ Beast : num 0 0 1 0 0 1 0 0 0 0 ...
## $ Sun : num 0 0 0 0 0 0 1 0 0 1 ...
## $ Call_pillars : int 1 0 0 0 1 0 0 0 1 0 ...
Check.
compare_df_cols(train_df_2, valid_df_2)
## column_name train_df_2 valid_df_2
## 1 Battle_difficulty numeric numeric
## 2 Beast numeric numeric
## 3 Blood numeric numeric
## 4 Call_pillars integer integer
## 5 Effectiveness_rating numeric numeric
## 6 Form numeric numeric
## 7 Hours_to_daybreak numeric numeric
## 8 Inosuke numeric numeric
## 9 Nezuko numeric numeric
## 10 Rank numeric numeric
## 11 Sun numeric numeric
## 12 Tanjiro numeric numeric
## 13 Thunder numeric numeric
## 14 Water numeric numeric
## 15 Zenitsu numeric numeric
compare_df_cols_same(train_df_2, valid_df_2)
## [1] TRUE
1 layer, 2 nodes.
Set the stepmax if needed. Per the documentation, it is the maximum steps for the training of the neural network. Reaching this maximum leads to a stop of the neural network’s training process.
Increase the rep as needed. Per the documentation, it is the number of repetitions for the neural network’s training.
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 4.0.5
names(train_df_2)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun" "Call_pillars"
train_df_2_nn1 <- neuralnet(Call_pillars ~ ., data = train_df_2, act.fct = "logistic", linear.output = FALSE, hidden = 2, stepmax = 1000000, rep = 2)
train_df_2_nn1$weights
## [[1]]
## [[1]][[1]]
## [,1] [,2]
## [1,] -50.426971 36.7345337
## [2,] 10.805680 -5.9952333
## [3,] 193.974976 44.0028946
## [4,] 8.953766 0.9585741
## [5,] 277.654314 -268.5245281
## [6,] 88.997577 -0.1057084
## [7,] -16.348574 2.6493832
## [8,] 7.762319 132.5877505
## [9,] -34.696665 1.1788552
## [10,] -6.683033 2.3656213
## [11,] -8.220026 1.3873790
## [12,] 8.285458 133.8915428
## [13,] -32.404576 -0.1234626
## [14,] -6.480132 1.0894272
## [15,] -73.802960 0.1219277
##
## [[1]][[2]]
## [,1]
## [1,] 0.6860964
## [2,] -1.7139075
## [3,] 2.7039296
##
##
## [[2]]
## [[2]][[1]]
## [,1] [,2]
## [1,] 79.5260970 53.868725
## [2,] -0.8955655 6.417298
## [3,] -97.5051023 4.008769
## [4,] -0.3629376 -1.406463
## [5,] -3.1313360 -423.261997
## [6,] -2.6269049 55.813151
## [7,] 5.6882079 2.447568
## [8,] 6.3672601 184.872776
## [9,] 6.4704520 3.812018
## [10,] 5.9544758 5.100791
## [11,] 5.8845943 4.361178
## [12,] 6.9796005 186.002533
## [13,] 5.6032181 6.309262
## [14,] 5.3071607 4.798609
## [15,] 7.1448084 6.860455
##
## [[2]][[2]]
## [,1]
## [1,] 2.327856
## [2,] -3.762068
## [3,] 3.098764
plot(train_df_2_nn1, rep = "best")
train_pred <- compute(train_df_2_nn1,
train_df_2[, -c(15)])
Check predictions .
head(train_pred$net.result, 10)
## [,1]
## 1598 0.8423899
## 4734 0.6650980
## 5003 0.8423899
## 873 0.2635087
## 652 0.8882939
## 1697 0.2635087
## 1074 0.8423899
## 6275 0.8423899
## 1125 0.2635087
## 5645 0.8423899
train_pred_val <- train_pred$net.result
train_pred_class <- ifelse(train_pred_val >= 0.5, 1, 0)
head(train_pred_class, 10)
## [,1]
## 1598 1
## 4734 1
## 5003 1
## 873 0
## 652 1
## 1697 0
## 1074 1
## 6275 1
## 1125 0
## 5645 1
Convert predictions to a data frame.
train_pred_class_df <- as.data.frame(train_pred_class)
head(train_pred_class_df)
## V1
## 1598 1
## 4734 1
## 5003 1
## 873 0
## 652 1
## 1697 0
names(train_pred_class_df)[1] <- "Call_pillars"
head(train_pred_class_df)
## Call_pillars
## 1598 1
## 4734 1
## 5003 1
## 873 0
## 652 1
## 1697 0
table(train_pred_class_df$Call_pillars)
##
## 0 1
## 2169 1830
confusionMatrix(as.factor(train_df_2$Call_pillars),
as.factor(train_pred_class_df$Call_pillars), positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1596 280
## 1 573 1550
##
## Accuracy : 0.7867
## 95% CI : (0.7737, 0.7993)
## No Information Rate : 0.5424
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5756
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.8470
## Specificity : 0.7358
## Pos Pred Value : 0.7301
## Neg Pred Value : 0.8507
## Prevalence : 0.4576
## Detection Rate : 0.3876
## Detection Prevalence : 0.5309
## Balanced Accuracy : 0.7914
##
## 'Positive' Class : 1
##
valid_pred <- compute(train_df_2_nn1,
valid_df_2[, -c(15)])
Check predicted values on validation set.
head(valid_pred$net.result, 10)
## [,1]
## 1 0.2635087
## 3 0.2635087
## 5 0.2635087
## 6 0.2635087
## 7 0.2635087
## 9 0.2635087
## 10 0.2635087
## 12 0.2635087
## 13 0.8423899
## 14 0.2635087
valid_prob <- valid_pred$net.result
valid_pred_class <- ifelse(valid_prob >= 0.5, 1, 0)
head(valid_pred_class, 10)
## [,1]
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
## 10 0
## 12 0
## 13 1
## 14 0
Convert predictions to a data frame.
valid_pred_class_df <- as.data.frame(valid_pred_class)
head(valid_pred_class_df)
## V1
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
names(valid_pred_class_df)[1] <- "Call_pillars"
head(valid_pred_class_df)
## Call_pillars
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
confusionMatrix(as.factor(valid_df_2$Call_pillars),
as.factor(valid_pred_class_df$Call_pillars),
positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1042 203
## 1 393 1029
##
## Accuracy : 0.7765
## 95% CI : (0.7602, 0.7922)
## No Information Rate : 0.5381
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5553
##
## Mcnemar's Test P-Value : 9.806e-15
##
## Sensitivity : 0.8352
## Specificity : 0.7261
## Pos Pred Value : 0.7236
## Neg Pred Value : 0.8369
## Prevalence : 0.4619
## Detection Rate : 0.3858
## Detection Prevalence : 0.5332
## Balanced Accuracy : 0.7807
##
## 'Positive' Class : 1
##
Using ROSE.
library(ROSE)
## Loaded ROSE 0.0-3
ROSE::roc.curve(valid_df_2$Call_pillars,
valid_pred_class_df$Call_pillars)
## Area under the curve (AUC): 0.780
Using ROCR. Requires numerical data.
library(ROCR)
## Warning: package 'ROCR' was built under R version 4.0.5
##
## Attaching package: 'ROCR'
## The following object is masked from 'package:neuralnet':
##
## prediction
prediction_1 <- ROCR::prediction(as.numeric(valid_pred_class_df$Call_pillars), as.numeric(valid_df_2$Call_pillars))
ROC with TPR and FPR.
perform_1_v1 <- performance(prediction_1, "tpr", "fpr")
plot(perform_1_v1)
AUC.
perform_1_auc <- performance(prediction_1, "auc")
perform_1_auc@y.name
## [1] "Area under the ROC curve"
perform_1_auc@y.values
## [[1]]
## [1] 0.7802882
Precision and recall.
perform_1_v2 <- performance(prediction_1, "prec", "rec")
plot(perform_1_v2)
Sensitivity and Specificity.
perform_1_v3 <- performance(prediction_1, "sens", "spec")
plot(perform_1_v3)
1 layer, 5 nodes.
library(neuralnet)
train_df_2_nn2 <- neuralnet(Call_pillars ~ ., data =
train_df_2,
linear.output =
FALSE, act.fct = "logistic", hidden = 5,
stepmax = 1000000, rep = 2)
train_df_2_nn2$weights
## [[1]]
## [[1]][[1]]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 26.444672 -130.4094710 0.87281189 13.931272 49.79712807
## [2,] -8.230804 124.6198315 0.03392294 -2.140055 2.78232700
## [3,] -2.382893 -134.3861189 -0.50357391 -2.481367 -69.12361586
## [4,] -5.618355 -51.7496290 -0.14352321 1.334470 1.37435439
## [5,] -7.287508 422.2891990 0.41033850 -73.704138 0.67306657
## [6,] -159.114905 47.5291471 0.01917555 -5.779833 -0.07346134
## [7,] 123.577415 -3.6040495 -3.52163233 4.302308 6.38751205
## [8,] -11.667077 -161.4284512 0.40307491 50.412978 -381.12897690
## [9,] 67.385617 -16.0845317 -0.09856417 1.663343 -381.70692185
## [10,] -10.152370 -2.5027749 -1.81069762 1.821248 5.91330714
## [11,] 12.219099 -98.7714093 -707.30944276 4.527302 6.08477907
## [12,] -13.173774 -161.0224658 2.15851811 52.611782 -380.09740281
## [13,] 69.752215 -17.6863415 0.48458704 1.055419 -381.11171324
## [14,] -10.213903 -0.9412664 -0.59664294 2.058414 6.54394004
## [15,] 15.882576 -2.7002854 -1.24004475 4.855282 5.44316869
##
## [[1]][[2]]
## [,1]
## [1,] 16.572195
## [2,] -3.526297
## [3,] 1.050166
## [4,] -19.526381
## [5,] 4.023391
## [6,] -15.562737
##
##
## [[2]]
## [[2]][[1]]
## [,1] [,2] [,3] [,4] [,5]
## [1,] -7.4721271 -6.79570163 310.9172416 -133.087819 -156.9062803
## [2,] -1.1379686 -0.43486049 -321.2468159 39.751256 -0.7939633
## [3,] -0.1052957 0.06694744 -398.3525678 9.172921 177.1226270
## [4,] -0.3121846 -0.12500447 32.8305522 1.120895 -0.9304677
## [5,] 0.9874076 0.46611758 303.2441211 512.308268 2.6041636
## [6,] 8.7850389 8.19786395 -323.3676259 55.803833 2.1245668
## [7,] 0.7921269 -2.33711768 0.1376323 -11.408409 -3.3169986
## [8,] 22.2566926 -355.68967892 -302.0238070 -221.726219 97.3259934
## [9,] -0.4704111 -0.77368488 73.0849741 -10.004848 -4.5979525
## [10,] 0.1524284 -0.68282252 77.4757943 -5.944947 -2.3737957
## [11,] -2.2478697 -1.02237181 15.7444425 -20.326013 -2.0888809
## [12,] 22.2252293 -355.71981480 -300.8382614 -222.710579 95.9411828
## [13,] -0.8978284 -2.59491266 73.5006646 -6.880786 -4.5525984
## [14,] -1.5016103 -2.70645022 77.0141767 -6.051479 -3.2672758
## [15,] -2.2204578 -1.06084302 13.4004029 -24.020942 -4.0677390
##
## [[2]][[2]]
## [,1]
## [1,] 0.8475312
## [2,] -28.5240062
## [3,] 130.5633418
## [4,] 1.2737533
## [5,] -3.6565960
## [6,] 29.4379142
plot(train_df_2_nn2, rep = "best")
Predictions on training set, neural network 2.
train_pred_2 <- compute(train_df_2_nn2,
train_df_2[, -c(15)])
Check predicted values neural network 2.
head(train_pred_2$net.result, 10)
## [,1]
## 1598 0.8547635
## 4734 0.1115173
## 5003 0.8503597
## 873 0.1874495
## 652 0.9095376
## 1697 0.1264152
## 1074 0.8407185
## 6275 0.8443391
## 1125 0.2900139
## 5645 0.8453829
Convert to predictions.
train_pred_val_2 <- train_pred_2$net.result
train_pred_class_2 <- ifelse(train_pred_val_2 >= 0.5, 1, 0)
head(train_pred_class_2, 10)
## [,1]
## 1598 1
## 4734 0
## 5003 1
## 873 0
## 652 1
## 1697 0
## 1074 1
## 6275 1
## 1125 0
## 5645 1
train_pred_class_2_df <- as.data.frame(train_pred_class_2)
head(train_pred_class_2_df)
## V1
## 1598 1
## 4734 0
## 5003 1
## 873 0
## 652 1
## 1697 0
names(train_pred_class_2_df)[1] <- "Call_pillars"
head(train_pred_class_2_df)
## Call_pillars
## 1598 1
## 4734 0
## 5003 1
## 873 0
## 652 1
## 1697 0
Accuracy for training set, neural network 2.
confusionMatrix(as.factor(train_df_2$Call_pillars),
as.factor(train_pred_class_2_df$Call_pillars), positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1565 311
## 1 363 1760
##
## Accuracy : 0.8315
## 95% CI : (0.8195, 0.8429)
## No Information Rate : 0.5179
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.6622
##
## Mcnemar's Test P-Value : 0.04948
##
## Sensitivity : 0.8498
## Specificity : 0.8117
## Pos Pred Value : 0.8290
## Neg Pred Value : 0.8342
## Prevalence : 0.5179
## Detection Rate : 0.4401
## Detection Prevalence : 0.5309
## Balanced Accuracy : 0.8308
##
## 'Positive' Class : 1
##
Predictions on validation set.
valid_pred_2 <- compute(train_df_2_nn1,
valid_df_2[, -c(15)])
Check predicted values on validation set.
head(valid_pred_2$net.result, 10)
## [,1]
## 1 0.2635087
## 3 0.2635087
## 5 0.2635087
## 6 0.2635087
## 7 0.2635087
## 9 0.2635087
## 10 0.2635087
## 12 0.2635087
## 13 0.8423899
## 14 0.2635087
valid_pred_val_2 <- valid_pred_2$net.result
valid_pred_class_2 <- ifelse(valid_pred_val_2 >= 0.5, 1, 0)
head(valid_pred_class_2, 10)
## [,1]
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
## 10 0
## 12 0
## 13 1
## 14 0
Convert predictions to a data frame.
valid_pred_class_df_2 <- as.data.frame(valid_pred_class_2)
head(valid_pred_class_df_2)
## V1
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
names(valid_pred_class_df_2)[1] <- "Call_pillars"
head(valid_pred_class_df_2)
## Call_pillars
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
Accuracy for validation set.
confusionMatrix(as.factor(valid_df_2$Call_pillars),
as.factor(valid_pred_class_df_2$Call_pillars),
positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1042 203
## 1 393 1029
##
## Accuracy : 0.7765
## 95% CI : (0.7602, 0.7922)
## No Information Rate : 0.5381
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5553
##
## Mcnemar's Test P-Value : 9.806e-15
##
## Sensitivity : 0.8352
## Specificity : 0.7261
## Pos Pred Value : 0.7236
## Neg Pred Value : 0.8369
## Prevalence : 0.4619
## Detection Rate : 0.3858
## Detection Prevalence : 0.5332
## Balanced Accuracy : 0.7807
##
## 'Positive' Class : 1
##
ROC.
Using ROSE.
library(ROSE)
ROSE::roc.curve(valid_df_2$Call_pillars,
valid_pred_class_df_2$Call_pillars)
## Area under the curve (AUC): 0.780
Using ROCR. Requires numerical data.
library(ROCR)
prediction_2 <- ROCR::prediction(as.numeric(valid_pred_class_df_2$Call_pillars), as.numeric(valid_df_2$Call_pillars))
ROC with TPR and FPR.
perform_2_v1 <- performance(prediction_2, "tpr", "fpr")
plot(perform_2_v1)
AUC.
perform_2_auc <- performance(prediction_2, "auc")
perform_2_auc@y.name
## [1] "Area under the ROC curve"
perform_2_auc@y.values
## [[1]]
## [1] 0.7802882
Precision and recall.
perform_2_v2 <- performance(prediction_2, "prec", "rec")
plot(perform_2_v2)
Sensitivity and Specificity.
perform_2_v3 <- performance(prediction_2, "sens", "spec")
plot(perform_2_v3)
2 layers, 5 nodes.
train_df_2_nn3 <- neuralnet(Call_pillars ~ ., data = train_df_2, linear.output = FALSE, act.fct = "logistic", hidden = c(3, 3), stepmax = 10000000000, rep = 2)
train_df_2_nn3$weights
## [[1]]
## [[1]][[1]]
## [,1] [,2] [,3]
## [1,] 11.5992714 0.57492293 5.3892202
## [2,] -0.1292135 -0.08147665 -3.2399845
## [3,] -32.7475571 -0.06095625 -2.2726688
## [4,] 0.1899531 0.09680151 0.8223979
## [5,] 0.4141658 0.14923032 -55.5110124
## [6,] 8.0698416 5.41144659 2.9446754
## [7,] 5.1564213 -1.34581268 3.8683265
## [8,] 6.6287530 18.13363768 -5.5667190
## [9,] 5.1170928 -1.50290815 3.2142141
## [10,] 5.4491127 -2.57229303 3.3818625
## [11,] 6.7029087 -3.54457334 1.4064230
## [12,] 5.3410325 18.02212150 -4.5511114
## [13,] 6.5194199 -3.57056747 3.5703294
## [14,] 6.3395399 -2.37501860 1.4809179
## [15,] 6.4947908 -3.68727184 2.6886134
##
## [[1]][[2]]
## [,1] [,2] [,3]
## [1,] -85.45083 39.75158 -1.141327
## [2,] -1608.02943 9826.21739 -2.857431
## [3,] 2336.71022 -6141.99982 2.738347
## [4,] 2648.90863 -16016.32311 7.194326
##
## [[1]][[3]]
## [,1]
## [1,] 3.128861
## [2,] 4.546573
## [3,] -4.548737
## [4,] -6.215599
##
##
## [[2]]
## [[2]][[1]]
## [,1] [,2] [,3]
## [1,] 3.92236061 -0.91149655 0.28923863
## [2,] -0.02036533 0.05632766 -0.02046306
## [3,] -3.07370052 -2.99881815 -3.00696583
## [4,] 0.02172538 -0.05517783 0.03208024
## [5,] -1.09420669 3.55102906 -0.96200772
## [6,] -0.60842954 1.63472956 1.56041847
## [7,] -0.81282409 0.16310671 0.51200584
## [8,] 18.07397663 4.35119949 20.35283758
## [9,] -0.72756005 0.94019386 0.45977579
## [10,] -1.55783576 0.68832983 0.33640625
## [11,] -0.04609823 0.32606603 0.02819863
## [12,] 18.08116973 3.10303373 18.64445244
## [13,] -0.25927120 -0.15079459 -0.05708675
## [14,] 0.65434236 -0.18775552 0.13899901
## [15,] -0.13301352 0.59017279 -0.06529873
##
## [[2]][[2]]
## [,1] [,2] [,3]
## [1,] -4.727008 -427.2540 1461.3253
## [2,] -79.315564 778.9679 -2984.8013
## [3,] -6.448387 -6090.5422 -911.2694
## [4,] 82.922810 5677.9716 -186.5053
##
## [[2]][[3]]
## [,1]
## [1,] -1.728495
## [2,] 6654.090103
## [3,] 3.404423
## [4,] 228782.026002
plot(train_df_2_nn3, rep = "best")
Predictions on training set, neural network 3.
train_pred_3 <- compute(train_df_2_nn3,
train_df_2[, -c(15)])
Check predicted values neural network 3.
head(train_pred_3$net.result, 10)
## [,1]
## 1598 0.8523778
## 4734 0.1724105
## 5003 0.6196951
## 873 0.1247076
## 652 0.8440818
## 1697 0.1511519
## 1074 0.8521739
## 6275 0.8524088
## 1125 0.1767908
## 5645 0.8524083
Predictions on training set, neural network 3.
train_pred_3 <- compute(train_df_2_nn3,
train_df_2[, -c(15)])
Check predicted values neural network 3.
head(train_pred_3$net.result, 10)
## [,1]
## 1598 0.8523778
## 4734 0.1724105
## 5003 0.6196951
## 873 0.1247076
## 652 0.8440818
## 1697 0.1511519
## 1074 0.8521739
## 6275 0.8524088
## 1125 0.1767908
## 5645 0.8524083
Convert to predictions.
train_pred_val_3 <- train_pred_3$net.result
train_pred_class_3 <- ifelse(train_pred_val_3 >= 0.5, 1, 0)
head(train_pred_class_3, 10)
## [,1]
## 1598 1
## 4734 0
## 5003 1
## 873 0
## 652 1
## 1697 0
## 1074 1
## 6275 1
## 1125 0
## 5645 1
train_pred_class_3_df <- as.data.frame(train_pred_class_3)
head(train_pred_class_3_df)
## V1
## 1598 1
## 4734 0
## 5003 1
## 873 0
## 652 1
## 1697 0
names(train_pred_class_3_df)[1] <- "Call_pillars"
head(train_pred_class_3_df)
## Call_pillars
## 1598 1
## 4734 0
## 5003 1
## 873 0
## 652 1
## 1697 0
Accuracy for training set, neural network 2.
confusionMatrix(as.factor(train_df_2$Call_pillars),
as.factor(train_pred_class_3_df$Call_pillars),
positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1559 317
## 1 282 1841
##
## Accuracy : 0.8502
## 95% CI : (0.8388, 0.8611)
## No Information Rate : 0.5396
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.699
##
## Mcnemar's Test P-Value : 0.1648
##
## Sensitivity : 0.8531
## Specificity : 0.8468
## Pos Pred Value : 0.8672
## Neg Pred Value : 0.8310
## Prevalence : 0.5396
## Detection Rate : 0.4604
## Detection Prevalence : 0.5309
## Balanced Accuracy : 0.8500
##
## 'Positive' Class : 1
##
Predictions on validation set.
valid_pred_3 <- compute(train_df_2_nn3,
valid_df_2[, -c(15)])
Check predicted values on validation set.
head(valid_pred_3$net.result, 10)
## [,1]
## 1 0.1771068
## 3 0.1753435
## 5 0.1760466
## 6 0.1671761
## 7 0.9153649
## 9 0.1766009
## 10 0.1504133
## 12 0.1761230
## 13 0.8033776
## 14 0.1156796
valid_pred_val_3 <- valid_pred_2$net.result
valid_pred_class_3 <- ifelse(valid_pred_val_3 >= 0.5, 1, 0)
head(valid_pred_class_3, 10)
## [,1]
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
## 10 0
## 12 0
## 13 1
## 14 0
Convert predictions to a data frame.
valid_pred_class_df_3 <- as.data.frame(valid_pred_class_3)
head(valid_pred_class_df_3)
## V1
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
names(valid_pred_class_df_3)[1] <- "Call_pillars"
head(valid_pred_class_df_3)
## Call_pillars
## 1 0
## 3 0
## 5 0
## 6 0
## 7 0
## 9 0
Accuracy for validation set.
confusionMatrix(as.factor(valid_df_2$Call_pillars),
as.factor(valid_pred_class_df_3$Call_pillars),
positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1042 203
## 1 393 1029
##
## Accuracy : 0.7765
## 95% CI : (0.7602, 0.7922)
## No Information Rate : 0.5381
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5553
##
## Mcnemar's Test P-Value : 9.806e-15
##
## Sensitivity : 0.8352
## Specificity : 0.7261
## Pos Pred Value : 0.7236
## Neg Pred Value : 0.8369
## Prevalence : 0.4619
## Detection Rate : 0.3858
## Detection Prevalence : 0.5332
## Balanced Accuracy : 0.7807
##
## 'Positive' Class : 1
##
ROC.
Using ROSE.
library(ROSE)
ROSE::roc.curve(valid_df_2$Call_pillars,
valid_pred_class_df_3$Call_pillars)
## Area under the curve (AUC): 0.780
Using ROCR. Requires numerical data.
library(ROCR)
prediction_3 <- ROCR::prediction(as.numeric(valid_pred_class_df_3$Call_pillars), as.numeric(valid_df_2$Call_pillars))
ROC with TPR and FPR.
perform_3_v1 <- performance(prediction_3, "tpr", "fpr")
plot(perform_3_v1)
AUC.
perform_3_auc <- performance(prediction_3, "auc")
perform_3_auc@y.name
## [1] "Area under the ROC curve"
perform_3_auc@y.values
## [[1]]
## [1] 0.7802882
Precision and recall.
perform_3_v2 <- performance(prediction_3, "prec", "rec")
plot(perform_3_v2)
Sensitivity and Specificity.
perform_3_v3 <- performance(prediction_3, "sens", "spec")
plot(perform_3_v3)
Strange numbers :-)
new_battle_df <- data.frame(Form = 6, Battle_difficulty = 66,
Effectiveness_rating = 6,
Hours_to_daybreak = 6,
Rank = 6, Tanjiro = 0, Nezuko = 0,
Zenitsu = 0, Inosuke = 1,
Water = 0, Blood = 0, Thunder = 0,
Beast = 1, Sun = 0)
new_battle_df
## Form Battle_difficulty Effectiveness_rating Hours_to_daybreak Rank Tanjiro
## 1 6 66 6 6 6 0
## Nezuko Zenitsu Inosuke Water Blood Thunder Beast Sun
## 1 0 0 1 0 0 0 1 0
Nornalise using training set.
new_battle_transform <- predict(train_df_norm, new_battle_df[1:5])
new_battle_transform
## Form Battle_difficulty Effectiveness_rating Hours_to_daybreak Rank
## 1 0.4615385 0.6565657 0.5555556 0.7142857 0.4545455
Combine with new battle.
new_battle_df_2 <- cbind(new_battle_transform,
new_battle_df[6:14])
names(new_battle_df_2)
## [1] "Form" "Battle_difficulty" "Effectiveness_rating"
## [4] "Hours_to_daybreak" "Rank" "Tanjiro"
## [7] "Nezuko" "Zenitsu" "Inosuke"
## [10] "Water" "Blood" "Thunder"
## [13] "Beast" "Sun"
str(new_battle_df_2)
## 'data.frame': 1 obs. of 14 variables:
## $ Form : num 0.462
## $ Battle_difficulty : num 0.657
## $ Effectiveness_rating: num 0.556
## $ Hours_to_daybreak : num 0.714
## $ Rank : num 0.455
## $ Tanjiro : num 0
## $ Nezuko : num 0
## $ Zenitsu : num 0
## $ Inosuke : num 1
## $ Water : num 0
## $ Blood : num 0
## $ Thunder : num 0
## $ Beast : num 1
## $ Sun : num 0
Use the first model for simplicity.
A more complex model may not necessarily be better.
new_battle_pred <- compute(train_df_2_nn1,
new_battle_df_2)
new_battle_pred$net.result
## [,1]
## [1,] 0.2635087
ifelse(new_battle_pred$net.result>=0.5, "Call Pillars", "Don't Call Pillars")
## [,1]
## [1,] "Don't Call Pillars"
The force is strong.