Logistic regression to predict decision on a football player.
football <- read.csv("football_3.csv", header = TRUE)
head(football, 10)
## ID Name Age Photo
## 1 207439 L. Paredes 24 https://cdn.sofifa.org/players/4/19/207439.png
## 2 156713 A. Granqvist 33 https://cdn.sofifa.org/players/4/19/156713.png
## 3 229909 A. Lunev 26 https://cdn.sofifa.org/players/4/19/229909.png
## 4 187347 I. Smolnikov 29 https://cdn.sofifa.org/players/4/19/187347.png
## 5 153260 Hilton 40 https://cdn.sofifa.org/players/4/19/153260.png
## 6 187607 A. Dzyuba 29 https://cdn.sofifa.org/players/4/19/187607.png
## 7 204341 Luí_s Neto 30 https://cdn.sofifa.org/players/4/19/204341.png
## 8 223058 D. Kuzyaev 25 https://cdn.sofifa.org/players/4/19/223058.png
## 9 183389 G. Sio 29 https://cdn.sofifa.org/players/4/19/183389.png
## 10 156092 J. Villar 41 https://cdn.sofifa.org/players/4/19/156092.png
## Nationality Flag Overall Potential
## 1 Argentina https://cdn.sofifa.org/flags/52.png 80 85
## 2 Sweden https://cdn.sofifa.org/flags/46.png 80 80
## 3 Russia https://cdn.sofifa.org/flags/40.png 79 81
## 4 Russia https://cdn.sofifa.org/flags/40.png 79 79
## 5 Brazil https://cdn.sofifa.org/flags/54.png 78 78
## 6 Russia https://cdn.sofifa.org/flags/40.png 78 78
## 7 Portugal https://cdn.sofifa.org/flags/38.png 77 77
## 8 Russia https://cdn.sofifa.org/flags/40.png 77 80
## 9 Ivory Coast https://cdn.sofifa.org/flags/108.png 77 77
## 10 Paraguay https://cdn.sofifa.org/flags/58.png 77 77
## Club Club.Logo Value Wage
## 1 https://cdn.sofifa.org/flags/52.png 5684 1602
## 2 https://cdn.sofifa.org/flags/46.png 6370 3591
## 3 https://cdn.sofifa.org/flags/40.png 5675 3672
## 4 https://cdn.sofifa.org/flags/40.png 6030 1448
## 5 Montpellier HSC https://cdn.sofifa.org/teams/2/light/70.png 6405 19799
## 6 https://cdn.sofifa.org/flags/40.png 5764 1105
## 7 https://cdn.sofifa.org/flags/38.png 6075 2836
## 8 https://cdn.sofifa.org/flags/40.png 5565 2653
## 9 https://cdn.sofifa.org/flags/108.png 5275 2138
## 10 https://cdn.sofifa.org/flags/58.png 5698 2581
## Special Preferred.Foot International.Reputation Weak.Foot Skill.Moves
## 1 2122 Right 2 4 4
## 2 1797 Right 2 4 2
## 3 1217 Right 1 3 1
## 4 2038 Right 2 3 3
## 5 1807 Right 2 3 3
## 6 1810 Right 2 3 3
## 7 1749 Right 1 3 2
## 8 2041 Right 1 3 3
## 9 1933 Left 2 3 3
## 10 1168 Right 2 3 1
## Work.Rate Body.Type Real.Face Position Jersey.Number Joined
## 1 Medium/ Medium Normal No CM 5
## 2 High/ Medium Normal No LCB 4
## 3 Medium/ Medium Normal No GK 12
## 4 High/ High Lean No RB 2
## 5 Medium/ Medium Normal Yes CB 4 1-Aug-11
## 6 High/ Medium Stocky No ST 22
## 7 Medium/ Medium Lean No CB 4
## 8 Medium/ High Lean No RM 7
## 9 High/ Low Normal No ST 21
## 10 Medium/ Medium Normal No GK 1
## Loaned.From Contract.Valid.Until Height Weight LS ST RS LW LF CF
## 1 5'11 165lbs 71+2 71+2 71+2 75+2 75+2 75+2
## 2 6'4 185lbs 62+2 62+2 62+2 56+2 58+2 58+2
## 3 6'2 176lbs
## 4 5'10 154lbs 70+2 70+2 70+2 73+2 72+2 72+2
## 5 2019 5'11 172lbs 58+2 58+2 58+2 58+2 59+2 59+2
## 6 6'5 201lbs 77+2 77+2 77+2 71+2 74+2 74+2
## 7 6'2 157lbs 52+2 52+2 52+2 51+2 51+2 51+2
## 8 6'0 163lbs 70+2 70+2 70+2 74+2 74+2 74+2
## 9 5'11 176lbs 75+2 75+2 75+2 75+2 75+2 75+2
## 10 5'11 187lbs
## RF RW LAM CAM RAM LM LCM CM RCM RM LWB LDM CDM RDM RWB
## 1 75+2 75+2 77+2 77+2 77+2 76+2 79+2 79+2 79+2 76+2 75+2 77+2 77+2 77+2 75+2
## 2 58+2 56+2 58+2 58+2 58+2 57+2 64+2 64+2 64+2 57+2 68+2 74+2 74+2 74+2 68+2
## 3
## 4 72+2 73+2 73+2 73+2 73+2 75+2 74+2 74+2 74+2 75+2 78+2 75+2 75+2 75+2 78+2
## 5 59+2 58+2 62+2 62+2 62+2 60+2 67+2 67+2 67+2 60+2 67+2 73+2 73+2 73+2 67+2
## 6 74+2 71+2 71+2 71+2 71+2 71+2 66+2 66+2 66+2 71+2 52+2 52+2 52+2 52+2 52+2
## 7 51+2 51+2 54+2 54+2 54+2 54+2 61+2 61+2 61+2 54+2 67+2 72+2 72+2 72+2 67+2
## 8 74+2 74+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2
## 9 75+2 75+2 74+2 74+2 74+2 74+2 67+2 67+2 67+2 74+2 53+2 52+2 52+2 52+2 53+2
## 10
## LB LCB CB RCB RB Crossing Finishing HeadingAccuracy ShortPassing
## 1 74+2 72+2 72+2 72+2 74+2 76 55 60 84
## 2 70+2 79+2 79+2 79+2 70+2 49 51 81 73
## 3 16 14 17 25
## 4 78+2 73+2 73+2 73+2 78+2 73 61 69 79
## 5 68+2 76+2 76+2 76+2 68+2 60 45 79 73
## 6 48+2 48+2 48+2 48+2 48+2 61 79 86 71
## 7 69+2 75+2 75+2 75+2 69+2 42 33 80 72
## 8 74+2 70+2 70+2 70+2 74+2 67 64 51 82
## 9 50+2 46+2 46+2 46+2 50+2 68 77 71 73
## 10 14 12 12 30
## Volleys Dribbling Curve FKAccuracy LongPassing BallControl Acceleration
## 1 73 78 79 78 82 82 75
## 2 37 49 36 40 67 63 46
## 3 13 15 18 17 32 17 58
## 4 57 72 49 46 75 72 84
## 5 51 63 42 48 72 73 33
## 6 74 71 64 60 55 77 66
## 7 40 49 52 43 77 48 57
## 8 57 78 60 61 75 79 78
## 9 73 76 73 69 67 76 78
## 10 8 21 15 22 22 24 31
## SprintSpeed Agility Reactions Balance ShotPower Jumping Stamina Strength
## 1 69 77 74 77 82 61 79 69
## 2 49 55 76 36 74 64 67 83
## 3 54 36 76 50 24 60 27 70
## 4 90 80 75 76 67 85 93 68
## 5 38 51 70 60 55 79 54 76
## 6 65 50 75 32 78 63 77 93
## 7 59 69 78 61 42 79 72 72
## 8 81 80 73 76 76 60 79 59
## 9 85 79 71 73 77 70 78 74
## 10 32 50 73 68 29 56 22 62
## LongShots Aggression Interceptions Positioning Vision Penalties Composure
## 1 80 79 72 74 82 57 74
## 2 59 81 82 54 49 79 78
## 3 13 26 20 11 63 15 69
## 4 57 65 71 77 72 41 73
## 5 58 76 79 50 67 64 70
## 6 68 75 30 78 73 77 70
## 7 37 76 78 44 46 47 72
## 8 74 70 74 71 70 63 64
## 9 74 77 18 76 73 72 72
## 10 16 22 22 14 51 21 55
## Marking StandingTackle SlidingTackle GKDiving GKHandling GKKicking
## 1 73 75 72 9 14 6
## 2 82 83 79 7 9 12
## 3 18 20 12 80 73 65
## 4 76 76 80 7 12 10
## 5 83 77 76 12 7 11
## 6 21 15 19 15 12 11
## 7 80 77 78 10 15 13
## 8 71 77 76 15 16 13
## 9 40 18 12 15 9 10
## 10 13 13 14 75 75 74
## GKPositioning GKReflexes Decision Release.Clause
## 1 9 10 0
## 2 10 15 1
## 3 77 85 1
## 4 8 15 1
## 5 12 13 1
## 6 11 8 0
## 7 15 8 1
## 8 7 8 0
## 9 15 16 1
## 10 78 77 1
names(football)
## [1] "ID" "Name"
## [3] "Age" "Photo"
## [5] "Nationality" "Flag"
## [7] "Overall" "Potential"
## [9] "Club" "Club.Logo"
## [11] "Value" "Wage"
## [13] "Special" "Preferred.Foot"
## [15] "International.Reputation" "Weak.Foot"
## [17] "Skill.Moves" "Work.Rate"
## [19] "Body.Type" "Real.Face"
## [21] "Position" "Jersey.Number"
## [23] "Joined" "Loaned.From"
## [25] "Contract.Valid.Until" "Height"
## [27] "Weight" "LS"
## [29] "ST" "RS"
## [31] "LW" "LF"
## [33] "CF" "RF"
## [35] "RW" "LAM"
## [37] "CAM" "RAM"
## [39] "LM" "LCM"
## [41] "CM" "RCM"
## [43] "RM" "LWB"
## [45] "LDM" "CDM"
## [47] "RDM" "RWB"
## [49] "LB" "LCB"
## [51] "CB" "RCB"
## [53] "RB" "Crossing"
## [55] "Finishing" "HeadingAccuracy"
## [57] "ShortPassing" "Volleys"
## [59] "Dribbling" "Curve"
## [61] "FKAccuracy" "LongPassing"
## [63] "BallControl" "Acceleration"
## [65] "SprintSpeed" "Agility"
## [67] "Reactions" "Balance"
## [69] "ShotPower" "Jumping"
## [71] "Stamina" "Strength"
## [73] "LongShots" "Aggression"
## [75] "Interceptions" "Positioning"
## [77] "Vision" "Penalties"
## [79] "Composure" "Marking"
## [81] "StandingTackle" "SlidingTackle"
## [83] "GKDiving" "GKHandling"
## [85] "GKKicking" "GKPositioning"
## [87] "GKReflexes" "Decision"
## [89] "Release.Clause"
str(football)
## 'data.frame': 18159 obs. of 89 variables:
## $ ID : int 207439 156713 229909 187347 153260 187607 204341 223058 183389 156092 ...
## $ Name : Factor w/ 17148 levels "A. \201_ivkoviÛ\210",..: 9735 576 839 6706 6450 450 10142 3598 5890 8193 ...
## $ Age : int 24 33 26 29 40 29 30 25 29 41 ...
## $ Photo : Factor w/ 18159 levels "https://cdn.sofifa.org/players/4/19/100803.png",..: 5871 525 11255 2516 450 2539 5302 9314 2003 495 ...
## $ Nationality : Factor w/ 164 levels "Afghanistan",..: 7 146 129 129 21 129 124 129 80 120 ...
## $ Flag : Factor w/ 164 levels "https://cdn.sofifa.org/flags/1.png",..: 123 116 111 111 125 111 108 111 11 129 ...
## $ Overall : int 80 80 79 79 78 78 77 77 77 77 ...
## $ Potential : int 85 80 81 79 78 78 77 80 77 77 ...
## $ Club : Factor w/ 652 levels ""," SSV Jahn Regensburg",..: 1 1 1 1 396 1 1 1 1 1 ...
## $ Club.Logo : Factor w/ 679 levels "https://cdn.sofifa.org/flags/103.png",..: 20 18 16 16 628 16 14 16 2 23 ...
## $ Value : int 5684 6370 5675 6030 6405 5764 6075 5565 5275 5698 ...
## $ Wage : int 1602 3591 3672 1448 19799 1105 2836 2653 2138 2581 ...
## $ Special : int 2122 1797 1217 2038 1807 1810 1749 2041 1933 1168 ...
## $ Preferred.Foot : Factor w/ 2 levels "Left","Right": 2 2 2 2 2 2 2 2 1 2 ...
## $ International.Reputation: int 2 2 1 2 2 2 1 1 2 2 ...
## $ Weak.Foot : int 4 4 3 3 3 3 3 3 3 3 ...
## $ Skill.Moves : int 4 2 1 3 3 3 2 3 3 1 ...
## $ Work.Rate : Factor w/ 9 levels "High/ High","High/ Low",..: 9 3 9 1 9 3 9 7 2 9 ...
## $ Body.Type : Factor w/ 10 levels "Akinfenwa","C. Ronaldo",..: 7 7 7 4 7 10 4 4 7 7 ...
## $ Real.Face : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## $ Position : Factor w/ 28 levels "","CAM","CB",..: 6 10 7 19 3 28 3 24 28 7 ...
## $ Jersey.Number : int 5 4 12 2 4 22 4 7 21 1 ...
## $ Joined : Factor w/ 1737 levels "","1-Apr-08",..: 1 1 1 1 17 1 1 1 1 1 ...
## $ Loaned.From : Factor w/ 342 levels "","1. FC Kí_ln",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Contract.Valid.Until : Factor w/ 37 levels "","1-Dec-19",..: 1 1 1 1 13 1 1 1 1 1 ...
## $ Height : Factor w/ 21 levels "5'1","5'10","5'11",..: 3 16 14 2 3 17 14 12 3 3 ...
## $ Weight : Factor w/ 57 levels "110lbs","115lbs",..: 25 34 30 20 28 41 21 24 30 35 ...
## $ LS : Factor w/ 94 levels "","31+2","32+2",..: 62 44 1 60 36 74 26 60 70 1 ...
## $ ST : Factor w/ 94 levels "","31+2","32+2",..: 62 44 1 60 36 74 26 60 70 1 ...
## $ RS : Factor w/ 94 levels "","31+2","32+2",..: 62 44 1 60 36 74 26 60 70 1 ...
## $ LW : Factor w/ 106 levels "","25+2","27+2",..: 80 42 1 76 46 72 32 78 80 1 ...
## $ LF : Factor w/ 103 levels "","27+2","29+2",..: 75 41 1 69 43 73 28 73 75 1 ...
## $ CF : Factor w/ 103 levels "","27+2","29+2",..: 75 41 1 69 43 73 28 73 75 1 ...
## $ RF : Factor w/ 103 levels "","27+2","29+2",..: 75 41 1 69 43 73 28 73 75 1 ...
## $ RW : Factor w/ 106 levels "","25+2","27+2",..: 80 42 1 76 46 72 32 78 80 1 ...
## $ LAM : Factor w/ 102 levels "","27+2","28+2",..: 80 42 1 72 50 68 34 76 74 1 ...
## $ CAM : Factor w/ 102 levels "","27+2","28+2",..: 80 42 1 72 50 68 34 76 74 1 ...
## $ RAM : Factor w/ 102 levels "","27+2","28+2",..: 80 42 1 72 50 68 34 76 74 1 ...
## $ LM : Factor w/ 101 levels "","27+2","28+2",..: 77 39 1 75 45 67 33 75 73 1 ...
## $ LCM : Factor w/ 93 levels "","30+2","31+2",..: 77 46 1 67 52 50 40 69 52 1 ...
## $ CM : Factor w/ 93 levels "","30+2","31+2",..: 77 46 1 67 52 50 40 69 52 1 ...
## $ RCM : Factor w/ 93 levels "","30+2","31+2",..: 77 46 1 67 52 50 40 69 52 1 ...
## $ RM : Factor w/ 101 levels "","27+2","28+2",..: 77 39 1 75 45 67 33 75 73 1 ...
## $ LWB : Factor w/ 96 levels "","30+2","31+2",..: 78 63 1 84 61 30 61 78 32 1 ...
## $ LDM : Factor w/ 100 levels "","28+2","29+2",..: 83 77 1 79 75 31 73 79 31 1 ...
## $ CDM : Factor w/ 100 levels "","28+2","29+2",..: 83 77 1 79 75 31 73 79 31 1 ...
## $ RDM : Factor w/ 100 levels "","28+2","29+2",..: 83 77 1 79 75 31 73 79 31 1 ...
## $ RWB : Factor w/ 96 levels "","30+2","31+2",..: 78 63 1 84 61 30 61 78 32 1 ...
## $ LB : Factor w/ 99 levels "","29+2","30+2",..: 80 72 1 88 68 26 70 80 30 1 ...
## $ LCB : Factor w/ 109 levels "","25+2","27+2",..: 82 96 1 84 90 32 88 78 28 1 ...
## $ CB : Factor w/ 109 levels "","25+2","27+2",..: 82 96 1 84 90 32 88 78 28 1 ...
## $ RCB : Factor w/ 109 levels "","25+2","27+2",..: 82 96 1 84 90 32 88 78 28 1 ...
## $ RB : Factor w/ 99 levels "","29+2","30+2",..: 80 72 1 88 68 26 70 80 30 1 ...
## $ Crossing : int 76 49 16 73 60 61 42 67 68 14 ...
## $ Finishing : int 55 51 14 61 45 79 33 64 77 12 ...
## $ HeadingAccuracy : int 60 81 17 69 79 86 80 51 71 12 ...
## $ ShortPassing : int 84 73 25 79 73 71 72 82 73 30 ...
## $ Volleys : int 73 37 13 57 51 74 40 57 73 8 ...
## $ Dribbling : int 78 49 15 72 63 71 49 78 76 21 ...
## $ Curve : int 79 36 18 49 42 64 52 60 73 15 ...
## $ FKAccuracy : int 78 40 17 46 48 60 43 61 69 22 ...
## $ LongPassing : int 82 67 32 75 72 55 77 75 67 22 ...
## $ BallControl : int 82 63 17 72 73 77 48 79 76 24 ...
## $ Acceleration : int 75 46 58 84 33 66 57 78 78 31 ...
## $ SprintSpeed : int 69 49 54 90 38 65 59 81 85 32 ...
## $ Agility : int 77 55 36 80 51 50 69 80 79 50 ...
## $ Reactions : int 74 76 76 75 70 75 78 73 71 73 ...
## $ Balance : int 77 36 50 76 60 32 61 76 73 68 ...
## $ ShotPower : int 82 74 24 67 55 78 42 76 77 29 ...
## $ Jumping : int 61 64 60 85 79 63 79 60 70 56 ...
## $ Stamina : int 79 67 27 93 54 77 72 79 78 22 ...
## $ Strength : int 69 83 70 68 76 93 72 59 74 62 ...
## $ LongShots : int 80 59 13 57 58 68 37 74 74 16 ...
## $ Aggression : int 79 81 26 65 76 75 76 70 77 22 ...
## $ Interceptions : int 72 82 20 71 79 30 78 74 18 22 ...
## $ Positioning : int 74 54 11 77 50 78 44 71 76 14 ...
## $ Vision : int 82 49 63 72 67 73 46 70 73 51 ...
## $ Penalties : int 57 79 15 41 64 77 47 63 72 21 ...
## $ Composure : int 74 78 69 73 70 70 72 64 72 55 ...
## $ Marking : int 73 82 18 76 83 21 80 71 40 13 ...
## $ StandingTackle : int 75 83 20 76 77 15 77 77 18 13 ...
## $ SlidingTackle : int 72 79 12 80 76 19 78 76 12 14 ...
## $ GKDiving : int 9 7 80 7 12 15 10 15 15 75 ...
## $ GKHandling : int 14 9 73 12 7 12 15 16 9 75 ...
## $ GKKicking : int 6 12 65 10 11 11 13 13 10 74 ...
## $ GKPositioning : int 9 10 77 8 12 11 15 7 15 78 ...
## $ GKReflexes : int 10 15 85 15 13 8 8 8 16 77 ...
## $ Decision : int 0 1 1 1 1 0 1 0 1 1 ...
## $ Release.Clause : Factor w/ 1245 levels "","ä‰å1.1M","ä‰å1.2M",..: 1 1 1 1 1 1 1 1 1 1 ...
str(football$Wage)
## int [1:18159] 1602 3591 3672 1448 19799 1105 2836 2653 2138 2581 ...
str(football$Value)
## int [1:18159] 5684 6370 5675 6030 6405 5764 6075 5565 5275 5698 ...
table(football$Position)
##
## CAM CB CDM CF CM GK LAM LB LCB LCM LDM LF LM LS LW
## 12 958 1778 948 74 1394 2025 21 1322 648 395 243 15 1095 207 381
## LWB RAM RB RCB RCM RDM RF RM RS RW RWB ST
## 78 21 1291 662 391 248 16 1124 203 370 87 2152
nrow(football)
## [1] 18159
Strikers are defined in the dataset as Position = “ST”.
football_st <- subset(football, Position =="ST")
head(football_st)
## ID Name Age Photo
## 6 187607 A. Dzyuba 29 https://cdn.sofifa.org/players/4/19/187607.png
## 9 183389 G. Sio 29 https://cdn.sofifa.org/players/4/19/183389.png
## 19 245683 K. Fofana 26 https://cdn.sofifa.org/players/4/19/245683.png
## 46 190461 B. SiguríÁarson 27 https://cdn.sofifa.org/players/4/19/190461.png
## 66 225900 J. Sambenito 26 https://cdn.sofifa.org/players/4/19/225900.png
## 68 246405 B. Angulo 22 https://cdn.sofifa.org/players/4/19/246405.png
## Nationality Flag Overall Potential Club
## 6 Russia https://cdn.sofifa.org/flags/40.png 78 78
## 9 Ivory Coast https://cdn.sofifa.org/flags/108.png 77 77
## 19 Ivory Coast https://cdn.sofifa.org/flags/108.png 75 75
## 46 Iceland https://cdn.sofifa.org/flags/24.png 73 74
## 66 Paraguay https://cdn.sofifa.org/flags/58.png 71 74
## 68 Ecuador https://cdn.sofifa.org/flags/57.png 71 77
## Club.Logo Value Wage Special Preferred.Foot
## 6 https://cdn.sofifa.org/flags/40.png 5764 1105 1810 Right
## 9 https://cdn.sofifa.org/flags/108.png 5275 2138 1933 Left
## 19 https://cdn.sofifa.org/flags/108.png 5589 3875 1877 Right
## 46 https://cdn.sofifa.org/flags/24.png 5629 3661 1893 Right
## 66 https://cdn.sofifa.org/flags/58.png 6113 2445 1651 Right
## 68 https://cdn.sofifa.org/flags/57.png 5057 2216 1628 Right
## International.Reputation Weak.Foot Skill.Moves Work.Rate Body.Type
## 6 2 3 3 High/ Medium Stocky
## 9 2 3 3 High/ Low Normal
## 19 1 3 3 Medium/ Medium Normal
## 46 1 4 3 High/ High Normal
## 66 1 3 2 High/ Medium Lean
## 68 1 4 3 High/ Low Normal
## Real.Face Position Jersey.Number Joined Loaned.From Contract.Valid.Until
## 6 No ST 22
## 9 No ST 21
## 19 No ST 22
## 46 No ST 9
## 66 No ST 9
## 68 No ST 19
## Height Weight LS ST RS LW LF CF RF RW LAM CAM RAM LM
## 6 6'5 201lbs 77+2 77+2 77+2 71+2 74+2 74+2 74+2 71+2 71+2 71+2 71+2 71+2
## 9 5'11 176lbs 75+2 75+2 75+2 75+2 75+2 75+2 75+2 75+2 74+2 74+2 74+2 74+2
## 19 6'2 179lbs 73+2 73+2 73+2 71+2 72+2 72+2 72+2 71+2 71+2 71+2 71+2 71+2
## 46 6'1 190lbs 72+2 72+2 72+2 71+2 71+2 71+2 71+2 71+2 70+2 70+2 70+2 71+2
## 66 6'0 190lbs 70+2 70+2 70+2 64+2 67+2 67+2 67+2 64+2 63+2 63+2 63+2 62+2
## 68 6'0 154lbs 70+2 70+2 70+2 67+2 68+2 68+2 68+2 67+2 63+2 63+2 63+2 65+2
## LCM CM RCM RM LWB LDM CDM RDM RWB LB LCB CB RCB RB
## 6 66+2 66+2 66+2 71+2 52+2 52+2 52+2 52+2 52+2 48+2 48+2 48+2 48+2 48+2
## 9 67+2 67+2 67+2 74+2 53+2 52+2 52+2 52+2 53+2 50+2 46+2 46+2 46+2 50+2
## 19 67+2 67+2 67+2 71+2 59+2 57+2 57+2 57+2 59+2 57+2 52+2 52+2 52+2 57+2
## 46 64+2 64+2 64+2 71+2 59+2 55+2 55+2 55+2 59+2 56+2 53+2 53+2 53+2 56+2
## 66 55+2 55+2 55+2 62+2 43+2 41+2 41+2 41+2 43+2 41+2 38+2 38+2 38+2 41+2
## 68 54+2 54+2 54+2 65+2 47+2 39+2 39+2 39+2 47+2 44+2 36+2 36+2 36+2 44+2
## Crossing Finishing HeadingAccuracy ShortPassing Volleys Dribbling Curve
## 6 61 79 86 71 74 71 64
## 9 68 77 71 73 73 76 73
## 19 66 75 72 74 74 72 63
## 46 66 71 68 68 65 73 63
## 66 40 74 72 57 72 60 64
## 68 50 78 69 56 46 76 58
## FKAccuracy LongPassing BallControl Acceleration SprintSpeed Agility
## 6 60 55 77 66 65 50
## 9 69 67 76 78 85 79
## 19 59 58 75 59 77 63
## 46 48 44 73 78 79 83
## 66 42 42 63 79 72 61
## 68 58 33 71 82 79 78
## Reactions Balance ShotPower Jumping Stamina Strength LongShots Aggression
## 6 75 32 78 63 77 93 68 75
## 9 71 73 77 70 78 74 74 77
## 19 72 60 78 69 83 77 73 67
## 46 74 76 68 78 90 85 66 73
## 66 69 64 73 69 67 72 67 49
## 68 73 64 72 69 77 69 54 28
## Interceptions Positioning Vision Penalties Composure Marking StandingTackle
## 6 30 78 73 77 70 21 15
## 9 18 76 73 72 72 40 18
## 19 40 72 69 74 83 23 37
## 46 42 73 64 69 76 31 39
## 66 14 75 60 67 74 15 16
## 68 16 62 45 82 51 11 18
## SlidingTackle GKDiving GKHandling GKKicking GKPositioning GKReflexes
## 6 19 15 12 11 11 8
## 9 12 15 9 10 15 16
## 19 46 7 11 7 11 14
## 46 24 9 12 10 15 16
## 66 16 15 16 15 7 7
## 68 12 11 8 10 7 6
## Decision Release.Clause
## 6 0
## 9 1
## 19 1
## 46 0
## 66 0
## 68 1
nrow(football_st)
## [1] 2152
Split the data into training and validation sets.
Set the seed using our favourite number :-)
set.seed(666)
Create the indices for the split This samples the row indices to split the data into training and validation.
train_index <- sample(1:nrow(football_st), 0.6 * nrow(football_st))
valid_index <- setdiff(1:nrow(football_st), train_index)
Using the indices, create the training and validation sets. This is similar in principle to splitting a data frame by row.
train_df <- football_st[train_index, ]
valid_df <- football_st[valid_index, ]
It is a good habit to check after splitting.
nrow(train_df)
## [1] 1291
nrow(valid_df)
## [1] 861
names(train_df)
## [1] "ID" "Name"
## [3] "Age" "Photo"
## [5] "Nationality" "Flag"
## [7] "Overall" "Potential"
## [9] "Club" "Club.Logo"
## [11] "Value" "Wage"
## [13] "Special" "Preferred.Foot"
## [15] "International.Reputation" "Weak.Foot"
## [17] "Skill.Moves" "Work.Rate"
## [19] "Body.Type" "Real.Face"
## [21] "Position" "Jersey.Number"
## [23] "Joined" "Loaned.From"
## [25] "Contract.Valid.Until" "Height"
## [27] "Weight" "LS"
## [29] "ST" "RS"
## [31] "LW" "LF"
## [33] "CF" "RF"
## [35] "RW" "LAM"
## [37] "CAM" "RAM"
## [39] "LM" "LCM"
## [41] "CM" "RCM"
## [43] "RM" "LWB"
## [45] "LDM" "CDM"
## [47] "RDM" "RWB"
## [49] "LB" "LCB"
## [51] "CB" "RCB"
## [53] "RB" "Crossing"
## [55] "Finishing" "HeadingAccuracy"
## [57] "ShortPassing" "Volleys"
## [59] "Dribbling" "Curve"
## [61] "FKAccuracy" "LongPassing"
## [63] "BallControl" "Acceleration"
## [65] "SprintSpeed" "Agility"
## [67] "Reactions" "Balance"
## [69] "ShotPower" "Jumping"
## [71] "Stamina" "Strength"
## [73] "LongShots" "Aggression"
## [75] "Interceptions" "Positioning"
## [77] "Vision" "Penalties"
## [79] "Composure" "Marking"
## [81] "StandingTackle" "SlidingTackle"
## [83] "GKDiving" "GKHandling"
## [85] "GKKicking" "GKPositioning"
## [87] "GKReflexes" "Decision"
## [89] "Release.Clause"
train_df$Decision <- as.factor(train_df$Decision)
valid_df$Decision <- as.factor(valid_df$Decision)
logistic_reg_1 <- glm(Decision ~ Age + Crossing + Finishing + HeadingAccuracy +
ShortPassing + Volleys + Dribbling + Curve +
BallControl + Acceleration +
SprintSpeed + Agility + Reactions + Balance +
ShotPower + Jumping + Strength +
Aggression + Positioning + Composure,
data = train_df, family = "binomial")
summary(logistic_reg_1)
##
## Call:
## glm(formula = Decision ~ Age + Crossing + Finishing + HeadingAccuracy +
## ShortPassing + Volleys + Dribbling + Curve + BallControl +
## Acceleration + SprintSpeed + Agility + Reactions + Balance +
## ShotPower + Jumping + Strength + Aggression + Positioning +
## Composure, family = "binomial", data = train_df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5494 -1.1679 0.8109 1.1559 1.6141
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0700043 0.8775220 0.080 0.9364
## Age -0.0365861 0.0172659 -2.119 0.0341 *
## Crossing 0.0010418 0.0071265 0.146 0.8838
## Finishing -0.0153921 0.0174052 -0.884 0.3765
## HeadingAccuracy -0.0083713 0.0109290 -0.766 0.4437
## ShortPassing 0.0217390 0.0119988 1.812 0.0700 .
## Volleys -0.0086527 0.0100163 -0.864 0.3877
## Dribbling 0.0065157 0.0154621 0.421 0.6735
## Curve -0.0002802 0.0070235 -0.040 0.9682
## BallControl 0.0032000 0.0178404 0.179 0.8576
## Acceleration -0.0125772 0.0122329 -1.028 0.3039
## SprintSpeed -0.0006122 0.0116696 -0.052 0.9582
## Agility 0.0052299 0.0091886 0.569 0.5692
## Reactions 0.0021523 0.0128892 0.167 0.8674
## Balance 0.0018983 0.0079659 0.238 0.8116
## ShotPower 0.0143233 0.0127724 1.121 0.2621
## Jumping 0.0029168 0.0066383 0.439 0.6604
## Strength -0.0067525 0.0076664 -0.881 0.3784
## Aggression 0.0015171 0.0048487 0.313 0.7544
## Positioning 0.0111225 0.0144942 0.767 0.4429
## Composure -0.0031572 0.0108253 -0.292 0.7706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1789.7 on 1290 degrees of freedom
## Residual deviance: 1767.8 on 1270 degrees of freedom
## AIC: 1809.8
##
## Number of Fisher Scoring iterations: 4
logistic_reg_1_pred <- predict(logistic_reg_1,
newdata = valid_df, type = "response")
head(logistic_reg_1_pred)
## 9 19 68 101 102 141
## 0.5466647 0.5837867 0.5022499 0.4798854 0.5442764 0.4988865
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
confusionMatrix(as.factor(ifelse(logistic_reg_1_pred > 0.5, 1, 0)),
valid_df$Decision, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 220 210
## 1 230 201
##
## Accuracy : 0.489
## 95% CI : (0.4551, 0.5229)
## No Information Rate : 0.5226
## P-Value [Acc > NIR] : 0.9779
##
## Kappa : -0.022
##
## Mcnemar's Test P-Value : 0.3650
##
## Sensitivity : 0.4891
## Specificity : 0.4889
## Pos Pred Value : 0.4664
## Neg Pred Value : 0.5116
## Prevalence : 0.4774
## Detection Rate : 0.2334
## Detection Prevalence : 0.5006
## Balanced Accuracy : 0.4890
##
## 'Positive' Class : 1
##
library(caret)
logistic_reg_2 <- train(Decision ~ Age + Crossing + Finishing + HeadingAccuracy +
ShortPassing + Volleys + Dribbling + Curve +
BallControl + Acceleration +
SprintSpeed + Agility + Reactions + Balance +
ShotPower + Jumping + Strength +
Aggression + Positioning + Composure,
data = train_df, method="glm", family="binomial")
summary(logistic_reg_2)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5494 -1.1679 0.8109 1.1559 1.6141
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0700043 0.8775220 0.080 0.9364
## Age -0.0365861 0.0172659 -2.119 0.0341 *
## Crossing 0.0010418 0.0071265 0.146 0.8838
## Finishing -0.0153921 0.0174052 -0.884 0.3765
## HeadingAccuracy -0.0083713 0.0109290 -0.766 0.4437
## ShortPassing 0.0217390 0.0119988 1.812 0.0700 .
## Volleys -0.0086527 0.0100163 -0.864 0.3877
## Dribbling 0.0065157 0.0154621 0.421 0.6735
## Curve -0.0002802 0.0070235 -0.040 0.9682
## BallControl 0.0032000 0.0178404 0.179 0.8576
## Acceleration -0.0125772 0.0122329 -1.028 0.3039
## SprintSpeed -0.0006122 0.0116696 -0.052 0.9582
## Agility 0.0052299 0.0091886 0.569 0.5692
## Reactions 0.0021523 0.0128892 0.167 0.8674
## Balance 0.0018983 0.0079659 0.238 0.8116
## ShotPower 0.0143233 0.0127724 1.121 0.2621
## Jumping 0.0029168 0.0066383 0.439 0.6604
## Strength -0.0067525 0.0076664 -0.881 0.3784
## Aggression 0.0015171 0.0048487 0.313 0.7544
## Positioning 0.0111225 0.0144942 0.767 0.4429
## Composure -0.0031572 0.0108253 -0.292 0.7706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1789.7 on 1290 degrees of freedom
## Residual deviance: 1767.8 on 1270 degrees of freedom
## AIC: 1809.8
##
## Number of Fisher Scoring iterations: 4
Predictions.
logistic_reg_2_pred <- predict(logistic_reg_2,
newdata = valid_df, type = "raw")
head(logistic_reg_2_pred)
## [1] 1 1 1 0 1 0
## Levels: 0 1
Probabilities.
logistic_reg_2_prob <- predict(logistic_reg_2,
newdata = valid_df, type = "prob")
head(logistic_reg_2_prob)
## 0 1
## 9 0.4533353 0.5466647
## 19 0.4162133 0.5837867
## 68 0.4977501 0.5022499
## 101 0.5201146 0.4798854
## 102 0.4557236 0.5442764
## 141 0.5011135 0.4988865
Confusion matrix.
confusionMatrix(as.factor(logistic_reg_2_pred),
valid_df$Decision)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 220 210
## 1 230 201
##
## Accuracy : 0.489
## 95% CI : (0.4551, 0.5229)
## No Information Rate : 0.5226
## P-Value [Acc > NIR] : 0.9779
##
## Kappa : -0.022
##
## Mcnemar's Test P-Value : 0.3650
##
## Sensitivity : 0.4889
## Specificity : 0.4891
## Pos Pred Value : 0.5116
## Neg Pred Value : 0.4664
## Prevalence : 0.5226
## Detection Rate : 0.2555
## Detection Prevalence : 0.4994
## Balanced Accuracy : 0.4890
##
## 'Positive' Class : 0
##
This approach only works using method 2.
library(modelplotr)
## Package modelplotr loaded! Happy model plotting!
scores_and_ntiles <- prepare_scores_and_ntiles(datasets =
list("valid_df"),
dataset_labels =
list("Validation Data"),
models =
list("logistic_reg_2"),
model_labels =
list("Logistic regression"),
target_column = "Decision",
ntiles = 100)
## ... scoring caret model "logistic_reg_2" on dataset "valid_df".
## Data preparation step 1 succeeded! Dataframe created.
head(scores_and_ntiles)
## model_label dataset_label y_true prob_0 prob_1 ntl_0 ntl_1
## 9 Logistic regression Validation Data 1 0.4533353 0.5466647 78 23
## 19 Logistic regression Validation Data 1 0.4162133 0.5837867 91 10
## 68 Logistic regression Validation Data 1 0.4977501 0.5022499 51 50
## 101 Logistic regression Validation Data 0 0.5201146 0.4798854 38 63
## 102 Logistic regression Validation Data 0 0.4557236 0.5442764 77 24
## 141 Logistic regression Validation Data 1 0.5011135 0.4988865 50 51
plot_input <- plotting_scope(prepared_input = scores_and_ntiles)
## Data preparation step 2 succeeded! Dataframe created.
## "prepared_input" aggregated...
## Data preparation step 3 succeeded! Dataframe created.
##
## No comparison specified, default values are used.
##
## Single evaluation line will be plotted: Target value "1" plotted for dataset "Validation Data" and model "Logistic regression.
## "
## -> To compare models, specify: scope = "compare_models"
## -> To compare datasets, specify: scope = "compare_datasets"
## -> To compare target classes, specify: scope = "compare_targetclasses"
## -> To plot one line, do not specify scope or specify scope = "no_comparison".
head(plot_input)
## scope model_label dataset_label target_class ntile neg pos
## 1 no_comparison Logistic regression Validation Data 1 0 0 0
## 2 no_comparison Logistic regression Validation Data 1 1 2 7
## 3 no_comparison Logistic regression Validation Data 1 2 2 7
## 4 no_comparison Logistic regression Validation Data 1 3 6 2
## 5 no_comparison Logistic regression Validation Data 1 4 5 4
## 6 no_comparison Logistic regression Validation Data 1 5 5 3
## tot pct negtot postot tottot pcttot cumneg cumpos cumtot cumpct
## 1 0 NA NA NA NA NA 0 0 0 NA
## 2 9 0.7777778 450 411 861 0.4773519 2 7 9 0.7777778
## 3 9 0.7777778 450 411 861 0.4773519 4 14 18 0.7777778
## 4 8 0.2500000 450 411 861 0.4773519 10 16 26 0.6153846
## 5 9 0.4444444 450 411 861 0.4773519 15 20 35 0.5714286
## 6 8 0.3750000 450 411 861 0.4773519 20 23 43 0.5348837
## gain cumgain gain_ref gain_opt lift cumlift cumlift_ref
## 1 0.00000000 0.00000000 0.00 0.00000000 NA NA 1
## 2 0.01703163 0.01703163 0.01 0.02189781 1.6293593 1.629359 1
## 3 0.01703163 0.03406326 0.02 0.04379562 1.6293593 1.629359 1
## 4 0.00486618 0.03892944 0.03 0.06326034 0.5237226 1.289163 1
## 5 0.00973236 0.04866180 0.04 0.08515815 0.9310624 1.197080 1
## 6 0.00729927 0.05596107 0.05 0.10462287 0.7855839 1.120523 1
## legend
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
Cumulative gains for logistic regression.
plot_cumgains(data = plot_input)
Cumulative lift for logistic regression.
plot_cumlift(data = plot_input)
Response plot for logistic regression.
plot_response(data = plot_input)
Cumulative response plot for logistic regression.
plot_cumresponse(data = plot_input)
Multiplot for logistic regression.
plot_multiplot(data = plot_input)
This approach only works using method 1.
Check the McFadden statistic.
library(pscl)
## Warning: package 'pscl' was built under R version 3.6.3
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
summary(logistic_reg_1)
##
## Call:
## glm(formula = Decision ~ Age + Crossing + Finishing + HeadingAccuracy +
## ShortPassing + Volleys + Dribbling + Curve + BallControl +
## Acceleration + SprintSpeed + Agility + Reactions + Balance +
## ShotPower + Jumping + Strength + Aggression + Positioning +
## Composure, family = "binomial", data = train_df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5494 -1.1679 0.8109 1.1559 1.6141
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0700043 0.8775220 0.080 0.9364
## Age -0.0365861 0.0172659 -2.119 0.0341 *
## Crossing 0.0010418 0.0071265 0.146 0.8838
## Finishing -0.0153921 0.0174052 -0.884 0.3765
## HeadingAccuracy -0.0083713 0.0109290 -0.766 0.4437
## ShortPassing 0.0217390 0.0119988 1.812 0.0700 .
## Volleys -0.0086527 0.0100163 -0.864 0.3877
## Dribbling 0.0065157 0.0154621 0.421 0.6735
## Curve -0.0002802 0.0070235 -0.040 0.9682
## BallControl 0.0032000 0.0178404 0.179 0.8576
## Acceleration -0.0125772 0.0122329 -1.028 0.3039
## SprintSpeed -0.0006122 0.0116696 -0.052 0.9582
## Agility 0.0052299 0.0091886 0.569 0.5692
## Reactions 0.0021523 0.0128892 0.167 0.8674
## Balance 0.0018983 0.0079659 0.238 0.8116
## ShotPower 0.0143233 0.0127724 1.121 0.2621
## Jumping 0.0029168 0.0066383 0.439 0.6604
## Strength -0.0067525 0.0076664 -0.881 0.3784
## Aggression 0.0015171 0.0048487 0.313 0.7544
## Positioning 0.0111225 0.0144942 0.767 0.4429
## Composure -0.0031572 0.0108253 -0.292 0.7706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1789.7 on 1290 degrees of freedom
## Residual deviance: 1767.8 on 1270 degrees of freedom
## AIC: 1809.8
##
## Number of Fisher Scoring iterations: 4
pR2(logistic_reg_1)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -883.90975662 -894.85262281 21.88573238 0.01222868 0.01680966
## r2CU
## 0.02241288
varImp(logistic_reg_1)
## Overall
## Age 2.11898044
## Crossing 0.14619034
## Finishing 0.88433641
## HeadingAccuracy 0.76597453
## ShortPassing 1.81175625
## Volleys 0.86385724
## Dribbling 0.42139682
## Curve 0.03989895
## BallControl 0.17937045
## Acceleration 1.02814440
## SprintSpeed 0.05246461
## Agility 0.56916872
## Reactions 0.16698124
## Balance 0.23829986
## ShotPower 1.12142255
## Jumping 0.43938627
## Strength 0.88078835
## Aggression 0.31289032
## Positioning 0.76737174
## Composure 0.29164995
The ROC is a trade off the rate of a correct vs incorrect prediction.
The AUC metric ranges from 0.5 to 1.0.
Values >= 0.8 are good.
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
prob <- predict(logistic_reg_1, newdata = valid_df, type = "response")
pred <- prediction(prob, valid_df$Decision)
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
plot(perf)
auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]
auc
## [1] 0.5073425
Generally, the point on the ROC curve that is closest to TPR = 1 and FPR = 0 gives the optimal cut off. This point applies equal weight to both sensitivity and specificity.
opt.cut = function(perf, pred){
cut.ind = mapply(FUN=function(x, y, p){
d = (x - 0)^2 + (y-1)^2
ind = which(d == min(d))
c(sensitivity = y[[ind]], specificity = 1-x[[ind]],
cutoff = p[[ind]])
}, perf@x.values, perf@y.values, pred@cutoffs)
}
print(opt.cut(perf, pred))
## [,1]
## sensitivity 0.4671533
## specificity 0.5244444
## cutoff 0.5054612
Use the optimal cut off to compute the confusion matrix.
confusionMatrix(as.factor(ifelse(logistic_reg_2_prob[,2] > 0.5054612, "1", "0")),
valid_df$Decision)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 236 219
## 1 214 192
##
## Accuracy : 0.4971
## 95% CI : (0.4632, 0.531)
## No Information Rate : 0.5226
## P-Value [Acc > NIR] : 0.9376
##
## Kappa : -0.0084
##
## Mcnemar's Test P-Value : 0.8476
##
## Sensitivity : 0.5244
## Specificity : 0.4672
## Pos Pred Value : 0.5187
## Neg Pred Value : 0.4729
## Prevalence : 0.5226
## Detection Rate : 0.2741
## Detection Prevalence : 0.5285
## Balanced Accuracy : 0.4958
##
## 'Positive' Class : 0
##
The optimal cutoff can also be computed based on maximum accuracy. The performance object has x.values to the cutoffs, and y.values corresponding to the corresponding accuracies. This takes the index for the maximum accuracy and corresponding cutoff. .
Note: This is just a hypothetical example.
ind = which.max(slot(perf, "y.values")[[1]] )
acc = slot(perf, "y.values")[[1]][ind]
cutoff = slot(perf, "x.values")[[1]][ind]
print(c(accuracy = acc, cutoff = cutoff))
## accuracy cutoff
## 1.0000000 0.9977778
Logistic regression using k-fold cross validation.
ctrl <- trainControl(method = "repeatedcv", number = 10,
savePredictions = TRUE)
logistic_reg_3 <- train(Decision ~ Age + Crossing + Finishing + HeadingAccuracy +
ShortPassing + Volleys + Dribbling + Curve +
BallControl + Acceleration +
SprintSpeed + Agility + Reactions + Balance +
ShotPower + Jumping + Strength +
Aggression + Positioning + Composure,
data = train_df, method="glm", family="binomial",
trControl = ctrl, tuneLength = 5)
logistic_reg_3_pred <- predict(logistic_reg_3, newdata = valid_df)
confusionMatrix(logistic_reg_3_pred, valid_df$Decision)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 220 210
## 1 230 201
##
## Accuracy : 0.489
## 95% CI : (0.4551, 0.5229)
## No Information Rate : 0.5226
## P-Value [Acc > NIR] : 0.9779
##
## Kappa : -0.022
##
## Mcnemar's Test P-Value : 0.3650
##
## Sensitivity : 0.4889
## Specificity : 0.4891
## Pos Pred Value : 0.5116
## Neg Pred Value : 0.4664
## Prevalence : 0.5226
## Detection Rate : 0.2555
## Detection Prevalence : 0.4994
## Balanced Accuracy : 0.4890
##
## 'Positive' Class : 0
##
Logistic regression using bootstrapping.
ctrl <- trainControl(method = "boot632", number = 1000,
savePredictions = TRUE)
logistic_reg_4 <- train(Decision ~ Age + Crossing + Finishing + HeadingAccuracy +
ShortPassing + Volleys + Dribbling + Curve +
BallControl + Acceleration +
SprintSpeed + Agility + Reactions + Balance +
ShotPower + Jumping + Strength +
Aggression + Positioning + Composure,
data = train_df, method="glm", family="binomial",
trControl = ctrl, tuneLength = 5)
logistic_reg_4_pred <- predict(logistic_reg_4, newdata = valid_df)
confusionMatrix(logistic_reg_4_pred, valid_df$Decision, positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 220 210
## 1 230 201
##
## Accuracy : 0.489
## 95% CI : (0.4551, 0.5229)
## No Information Rate : 0.5226
## P-Value [Acc > NIR] : 0.9779
##
## Kappa : -0.022
##
## Mcnemar's Test P-Value : 0.3650
##
## Sensitivity : 0.4891
## Specificity : 0.4889
## Pos Pred Value : 0.4664
## Neg Pred Value : 0.5116
## Prevalence : 0.4774
## Detection Rate : 0.2334
## Detection Prevalence : 0.5006
## Balanced Accuracy : 0.4890
##
## 'Positive' Class : 1
##