import pandas as pd
demon = pd.read_csv("demon_slayer_10.csv")
demon.head()
ID | Character | Breathing | Moon | Form | Battle_difficulty | Effectiveness_rating | Hours_to_daybreak | Call_pillars | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | Tanjiro | Water | Upper | 4 | 28 | 4 | 4 | 1 |
1 | 2 | Inosuke | Beast | Lower | 7 | 20 | 5 | 3 | 0 |
2 | 3 | Zenitsu | Thunder | Upper | 1 | 24 | 6 | 4 | 0 |
3 | 4 | Inosuke | Beast | Lower | 4 | 27 | 7 | 5 | 0 |
4 | 5 | Inosuke | Beast | Upper | 1 | 43 | 1 | 8 | 0 |
Filter required variables.
demon2 = demon.iloc[:, [1, 2, 3, 8]]
demon2.head()
Character | Breathing | Moon | Call_pillars | |
---|---|---|---|---|
0 | Tanjiro | Water | Upper | 1 |
1 | Inosuke | Beast | Lower | 0 |
2 | Zenitsu | Thunder | Upper | 0 |
3 | Inosuke | Beast | Lower | 0 |
4 | Inosuke | Beast | Upper | 0 |
predictors = ["Character", "Breathing", "Moon"]
outcome = "Call_Pillars"
X = pd.get_dummies(demon2[predictors])
y = demon2["Call_pillars"].astype("category")
X.head()
Character_Inosuke | Character_Nezuko | Character_Tanjiro | Character_Zenitsu | Breathing_Beast | Breathing_Blood Demon Art | Breathing_Sun | Breathing_Thunder | Breathing_Water | Moon_Lower | Moon_Upper | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
3 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
4 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
y.head()
0 1 1 0 2 0 3 0 4 0 Name: Call_pillars, dtype: category Categories (2, int64): [0, 1]
Check the classes.
list(y.cat.categories)
[0, 1]
Split the data.
import sklearn
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.4, random_state = 666)
Check the data.
X_train.head()
Character_Inosuke | Character_Nezuko | Character_Tanjiro | Character_Zenitsu | Breathing_Beast | Breathing_Blood Demon Art | Breathing_Sun | Breathing_Thunder | Breathing_Water | Moon_Lower | Moon_Upper | |
---|---|---|---|---|---|---|---|---|---|---|---|
3049 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
5182 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
4913 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
4683 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
232 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
X_valid.head()
Character_Inosuke | Character_Nezuko | Character_Tanjiro | Character_Zenitsu | Breathing_Beast | Breathing_Blood Demon Art | Breathing_Sun | Breathing_Thunder | Breathing_Water | Moon_Lower | Moon_Upper | |
---|---|---|---|---|---|---|---|---|---|---|---|
3958 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
2967 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
746 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
1856 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
3120 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
y_valid.head()
3958 1 2967 1 746 0 1856 0 3120 1 Name: Call_pillars, dtype: category Categories (2, int64): [0, 1]
y_valid.head()
3958 1 2967 1 746 0 1856 0 3120 1 Name: Call_pillars, dtype: category Categories (2, int64): [0, 1]
import sklearn
from sklearn.naive_bayes import MultinomialNB
demon_nb_model = MultinomialNB(alpha = 0.01)
demon_nb_model
MultinomialNB(alpha=0.01)
demon_nb_model.fit(X_train, y_train)
MultinomialNB(alpha=0.01)
Predict probabilities, training set.
predProb_train = demon_nb_model.predict_proba(X_train)
predProb_train
array([[0.66619876, 0.33380124], [0.68959507, 0.31040493], [0.67786638, 0.32213362], ..., [0.02583048, 0.97416952], [0.71892676, 0.28107324], [0.02719663, 0.97280337]])
Predict probabilities, validation set.
predProb_valid = demon_nb_model.predict_proba(X_valid)
predProb_valid
array([[0.02583048, 0.97416952], [0.02583048, 0.97416952], [0.68959507, 0.31040493], ..., [0.02583048, 0.97416952], [0.66619876, 0.33380124], [0.67786638, 0.32213362]])
Predict class membership in training set.
y_train_pred = demon_nb_model.predict(X_train)
y_train_pred
array([0, 0, 0, ..., 1, 0, 1])
Predict class membership in validation set.
y_valid_pred = demon_nb_model.predict(X_valid)
y_valid_pred
array([1, 1, 0, ..., 1, 0, 0])
nb_new = pd.read_csv("nb_new.csv")
nb_new.head()
Character_Inosuke | Character_Nezuko | Character_Tanjiro | Character_Zenitsu | Breathing_Beast | Breathing_Blood Demon Art | Breathing_Sun | Breathing_Thunder | Breathing_Water | Moon_Lower | Moon_Upper | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
predProb_nb_new = demon_nb_model.predict_proba(nb_new)
predProb_nb_new
array([[0.02719663, 0.97280337]])
nb_new_pred = demon_nb_model.predict(nb_new)
nb_new_pred
array([1])
Confusion matrix, training set.
from sklearn.metrics import confusion_matrix, accuracy_score
confusion_matrix_train = confusion_matrix(y_train, y_train_pred)
confusion_matrix_train
array([[1744, 136], [1244, 875]])
accuracy_train = accuracy_score(y_train, y_train_pred)
accuracy_train
0.654913728432108
from sklearn.metrics import classification_report
print(classification_report(y_train, y_train_pred))
precision recall f1-score support 0 0.58 0.93 0.72 1880 1 0.87 0.41 0.56 2119 accuracy 0.65 3999 macro avg 0.72 0.67 0.64 3999 weighted avg 0.73 0.65 0.63 3999
Confusion matrix, validation set.
confusion_matrix_valid = confusion_matrix(y_valid, y_valid_pred)
confusion_matrix_valid
array([[1136, 105], [ 850, 576]])
accuracy_valid = accuracy_score(y_valid, y_valid_pred)
accuracy_valid
0.6419197600299963
print(classification_report(y_valid, y_valid_pred))
precision recall f1-score support 0 0.57 0.92 0.70 1241 1 0.85 0.40 0.55 1426 accuracy 0.64 2667 macro avg 0.71 0.66 0.63 2667 weighted avg 0.72 0.64 0.62 2667
ROC.
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
fpr1, tpr1, thresh1 = roc_curve(y_valid, predProb_valid[:,1], pos_label = 1)
import matplotlib.pyplot as plt
plt.style.use("seaborn")
plt.plot(fpr1, tpr1, linestyle = '-', color = "blue", label = "Demon Slayer")
# roc curve for tpr = fpr (random line)
random_probs = [0 for i in range(len(y_valid))]
p_fpr, p_tpr, _ = roc_curve(y_valid, random_probs, pos_label = 1)
plt.plot(p_fpr, p_tpr, linestyle = '--', color='black')
[<matplotlib.lines.Line2D at 0x7fd990b21190>]
AUC.
from sklearn.metrics import roc_auc_score
auc = roc_auc_score(y_valid, predProb_valid[:,1])
auc
0.6774515643064849