import pandas as pd
from pandas import DataFrame
smash_df = pd.read_csv("smash_3.csv")
smash_df.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | full_hop_height | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
smash_df.shape
(72, 13)
len(smash_df["game"])
72
Unique values.
smash_df["game"] = smash_df["game"].astype("category")
smash_df["game"].info
<bound method Series.info of 0 Super Mario 1 Donkey Kong 2 Zelda 3 Metroid 4 Metroid ... 67 Animal Crossing 68 Pokemon 69 Super Mario 70 Dragon Quest 71 Banjo Kazooie Name: game, Length: 72, dtype: category Categories (30, object): ['Animal Crossing', 'Banjo Kazooie', 'Bayonetta', 'Castlevania', ..., 'Super Mario', 'Wii', 'Xenoblade', 'Zelda']>
smash_df["game"].value_counts()
Super Mario 10 Pokemon 7 Zelda 6 Fire Emblem 6 Wii 4 Kirby 4 Metroid 4 Donkey Kong 3 Animal Crossing 2 Icarus 2 Castlevania 2 Street Fighter 2 Star Fox 2 F-Zero 2 Punch Out 1 Xenoblade 1 Bayonetta 1 Stack Up 1 Splatoon 1 Sonic 1 Earthbound 1 Nintendo 1 Mother 1 Banjo Kazooie 1 Metal Gear 1 Mega Man 1 Dragon Quest 1 Duck Hunt 1 Final Fantasy 1 Pikmin 1 Name: game, dtype: int64
smash_df["game"].value_counts(sort = True)
Super Mario 10 Pokemon 7 Zelda 6 Fire Emblem 6 Wii 4 Kirby 4 Metroid 4 Donkey Kong 3 Animal Crossing 2 Icarus 2 Castlevania 2 Street Fighter 2 Star Fox 2 F-Zero 2 Punch Out 1 Xenoblade 1 Bayonetta 1 Stack Up 1 Splatoon 1 Sonic 1 Earthbound 1 Nintendo 1 Mother 1 Banjo Kazooie 1 Metal Gear 1 Mega Man 1 Dragon Quest 1 Duck Hunt 1 Final Fantasy 1 Pikmin 1 Name: game, dtype: int64
smash_df["game"].value_counts(sort = True, normalize = True)
Super Mario 0.138889 Pokemon 0.097222 Zelda 0.083333 Fire Emblem 0.083333 Wii 0.055556 Kirby 0.055556 Metroid 0.055556 Donkey Kong 0.041667 Animal Crossing 0.027778 Icarus 0.027778 Castlevania 0.027778 Street Fighter 0.027778 Star Fox 0.027778 F-Zero 0.027778 Punch Out 0.013889 Xenoblade 0.013889 Bayonetta 0.013889 Stack Up 0.013889 Splatoon 0.013889 Sonic 0.013889 Earthbound 0.013889 Nintendo 0.013889 Mother 0.013889 Banjo Kazooie 0.013889 Metal Gear 0.013889 Mega Man 0.013889 Dragon Quest 0.013889 Duck Hunt 0.013889 Final Fantasy 0.013889 Pikmin 0.013889 Name: game, dtype: float64
smash_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 72 entries, 0 to 71 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 character 72 non-null object 1 game 72 non-null category 2 regular_fall 72 non-null float64 3 fast_fall 72 non-null float64 4 weight 72 non-null int64 5 walk_speed 72 non-null float64 6 run_speed 72 non-null float64 7 full_hop_height 70 non-null float64 8 short_hop_height 70 non-null float64 9 double_hop_height 70 non-null float64 10 before 72 non-null int64 11 after 72 non-null int64 12 expertise 72 non-null object dtypes: category(1), float64(7), int64(3), object(2) memory usage: 8.2+ KB
smash_df.describe()
regular_fall | fast_fall | weight | walk_speed | run_speed | full_hop_height | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|
count | 72.000000 | 72.000000 | 72.00000 | 72.000000 | 72.000000 | 70.000000 | 70.000000 | 70.000000 | 72.000000 | 72.000000 |
mean | 1.600069 | 2.546306 | 96.25000 | 1.104171 | 1.806428 | 33.211864 | 16.027429 | 34.440000 | 6.319444 | 7.722222 |
std | 0.222869 | 0.353823 | 13.93218 | 0.213718 | 0.391566 | 5.112580 | 2.302452 | 6.379117 | 2.582860 | 1.576430 |
min | 0.980000 | 1.568000 | 62.00000 | 0.620000 | 1.180000 | 19.790000 | 11.260000 | 19.790000 | 2.000000 | 5.000000 |
25% | 1.472500 | 2.240000 | 88.00000 | 0.924000 | 1.591250 | 30.885375 | 14.500000 | 30.782500 | 4.000000 | 7.000000 |
50% | 1.620000 | 2.592000 | 95.50000 | 1.133000 | 1.722500 | 33.000000 | 16.225000 | 33.500000 | 6.000000 | 8.000000 |
75% | 1.770000 | 2.820000 | 104.50000 | 1.259000 | 1.979750 | 35.575000 | 17.482500 | 36.832500 | 8.250000 | 9.000000 |
max | 2.100000 | 3.360000 | 135.00000 | 1.575000 | 3.850000 | 50.510000 | 22.110000 | 57.350000 | 10.000000 | 10.000000 |
smash_df["expertise"] = smash_df["expertise"].astype("category")
smash_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 72 entries, 0 to 71 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 character 72 non-null object 1 game 72 non-null category 2 regular_fall 72 non-null float64 3 fast_fall 72 non-null float64 4 weight 72 non-null int64 5 walk_speed 72 non-null float64 6 run_speed 72 non-null float64 7 full_hop_height 70 non-null float64 8 short_hop_height 70 non-null float64 9 double_hop_height 70 non-null float64 10 before 72 non-null int64 11 after 72 non-null int64 12 expertise 72 non-null category dtypes: category(2), float64(7), int64(3), object(1) memory usage: 7.9+ KB
smash_df.describe(include = "category")
game | expertise | |
---|---|---|
count | 72 | 72 |
unique | 30 | 2 |
top | Super Mario | Expert |
freq | 10 | 45 |
Rename columns.
smash_df = smash_df.rename(columns = {"full_hop_height": "FULL_HOP_HEIGHT_RENAME"})
smash_df.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
zelda_df = smash_df[smash_df["game"] == "Zelda"]
zelda_df
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
16 | sheik | Zelda | 1.75 | 2.800 | 78 | 1.470 | 2.420 | 39.00 | 18.75 | 40.00 | 6 | 5 | Expert |
17 | zelda | Zelda | 1.35 | 2.160 | 85 | 0.914 | 1.430 | 31.55 | 15.24 | 31.55 | 4 | 7 | Amateur |
22 | young link | Zelda | 1.80 | 2.880 | 88 | 1.260 | 1.749 | 33.66 | 16.26 | 33.66 | 7 | 10 | Expert |
23 | ganondorf | Zelda | 1.65 | 2.640 | 118 | 0.767 | 1.340 | 25.49 | 12.24 | 26.00 | 10 | 8 | Expert |
42 | toon link | Zelda | 1.38 | 2.208 | 91 | 1.288 | 1.906 | 33.80 | 16.32 | 33.80 | 3 | 7 | Amateur |
zelda_df.shape
(6, 13)
zelda_df.describe()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|
count | 6.000000 | 6.000000 | 6.000000 | 6.000000 | 6.000000 | 6.000000 | 6.00000 | 6.000000 | 6.000000 | 6.000000 |
mean | 1.588333 | 2.621333 | 94.000000 | 1.157667 | 1.729833 | 31.883333 | 15.36500 | 32.335000 | 6.666667 | 7.333333 |
std | 0.187127 | 0.362804 | 14.546477 | 0.262673 | 0.396977 | 4.798678 | 2.31917 | 4.785912 | 2.943920 | 1.632993 |
min | 1.350000 | 2.160000 | 78.000000 | 0.767000 | 1.340000 | 25.490000 | 12.24000 | 26.000000 | 3.000000 | 5.000000 |
25% | 1.435000 | 2.316000 | 85.750000 | 0.997250 | 1.456000 | 28.737500 | 13.84500 | 29.637500 | 4.500000 | 7.000000 |
50% | 1.625000 | 2.720000 | 89.500000 | 1.253500 | 1.641500 | 32.605000 | 15.75000 | 32.605000 | 6.500000 | 7.000000 |
75% | 1.725000 | 2.860000 | 100.750000 | 1.281000 | 1.866750 | 33.765000 | 16.30500 | 33.765000 | 9.250000 | 7.750000 |
max | 1.800000 | 3.040000 | 118.000000 | 1.470000 | 2.420000 | 39.000000 | 18.75000 | 40.000000 | 10.000000 | 10.000000 |
Another way
zelda_df_2 = smash_df.query("game == 'Zelda'")
zelda_df_2
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
16 | sheik | Zelda | 1.75 | 2.800 | 78 | 1.470 | 2.420 | 39.00 | 18.75 | 40.00 | 6 | 5 | Expert |
17 | zelda | Zelda | 1.35 | 2.160 | 85 | 0.914 | 1.430 | 31.55 | 15.24 | 31.55 | 4 | 7 | Amateur |
22 | young link | Zelda | 1.80 | 2.880 | 88 | 1.260 | 1.749 | 33.66 | 16.26 | 33.66 | 7 | 10 | Expert |
23 | ganondorf | Zelda | 1.65 | 2.640 | 118 | 0.767 | 1.340 | 25.49 | 12.24 | 26.00 | 10 | 8 | Expert |
42 | toon link | Zelda | 1.38 | 2.208 | 91 | 1.288 | 1.906 | 33.80 | 16.32 | 33.80 | 3 | 7 | Amateur |
zelda_df_2.shape
(6, 13)
zelda_df_2.describe()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|
count | 6.000000 | 6.000000 | 6.000000 | 6.000000 | 6.000000 | 6.000000 | 6.00000 | 6.000000 | 6.000000 | 6.000000 |
mean | 1.588333 | 2.621333 | 94.000000 | 1.157667 | 1.729833 | 31.883333 | 15.36500 | 32.335000 | 6.666667 | 7.333333 |
std | 0.187127 | 0.362804 | 14.546477 | 0.262673 | 0.396977 | 4.798678 | 2.31917 | 4.785912 | 2.943920 | 1.632993 |
min | 1.350000 | 2.160000 | 78.000000 | 0.767000 | 1.340000 | 25.490000 | 12.24000 | 26.000000 | 3.000000 | 5.000000 |
25% | 1.435000 | 2.316000 | 85.750000 | 0.997250 | 1.456000 | 28.737500 | 13.84500 | 29.637500 | 4.500000 | 7.000000 |
50% | 1.625000 | 2.720000 | 89.500000 | 1.253500 | 1.641500 | 32.605000 | 15.75000 | 32.605000 | 6.500000 | 7.000000 |
75% | 1.725000 | 2.860000 | 100.750000 | 1.281000 | 1.866750 | 33.765000 | 16.30500 | 33.765000 | 9.250000 | 7.750000 |
max | 1.800000 | 3.040000 | 118.000000 | 1.470000 | 2.420000 | 39.000000 | 18.75000 | 40.000000 | 10.000000 | 10.000000 |
round(zelda_df["run_speed"].mean(),5)
1.72983
Using a list.
values = ["Donkey Kong", "Animal Crossing"]
dk_ac_df = smash_df[smash_df["game"].isin(values)]
dk_ac_df
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
35 | diddy kong | Donkey Kong | 1.75 | 2.800 | 90 | 1.313 | 2.006 | 41.21 | 19.90 | 41.21 | 10 | 9 | Expert |
44 | villager | Animal Crossing | 1.32 | 2.112 | 92 | 1.092 | 1.397 | 32.50 | 15.69 | 32.50 | 8 | 9 | Expert |
66 | king k rool | Donkey Kong | 1.70 | 2.720 | 133 | 0.903 | 1.485 | 33.00 | 13.50 | 32.61 | 3 | 8 | Amateur |
67 | isabelle | Animal Crossing | 1.30 | 2.080 | 88 | 1.140 | 1.480 | 32.50 | 15.69 | 32.50 | 9 | 7 | Expert |
Another method
dk_ac_df_2 = smash_df.query("game == 'Donkey Kong' | game == 'Animal Crossing'")
dk_ac_df_2
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
35 | diddy kong | Donkey Kong | 1.75 | 2.800 | 90 | 1.313 | 2.006 | 41.21 | 19.90 | 41.21 | 10 | 9 | Expert |
44 | villager | Animal Crossing | 1.32 | 2.112 | 92 | 1.092 | 1.397 | 32.50 | 15.69 | 32.50 | 8 | 9 | Expert |
66 | king k rool | Donkey Kong | 1.70 | 2.720 | 133 | 0.903 | 1.485 | 33.00 | 13.50 | 32.61 | 3 | 8 | Amateur |
67 | isabelle | Animal Crossing | 1.30 | 2.080 | 88 | 1.140 | 1.480 | 32.50 | 15.69 | 32.50 | 9 | 7 | Expert |
expert_metroid_df = smash_df[(smash_df["game"] == "Metroid") & (smash_df["expertise"] == "Expert")]
expert_metroid_df
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.0 | 18.00 | 37.0 | 6 | 9 | Expert |
31 | zero suit samus | Metroid | 1.70 | 2.720 | 80 | 1.470 | 2.310 | 44.5 | 21.35 | 44.2 | 10 | 10 | Expert |
expert_metroid_df_2 = smash_df.query("game == 'Metroid' & expertise == 'Expert'")
expert_metroid_df_2
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.0 | 18.00 | 37.0 | 6 | 9 | Expert |
31 | zero suit samus | Metroid | 1.70 | 2.720 | 80 | 1.470 | 2.310 | 44.5 | 21.35 | 44.2 | 10 | 10 | Expert |
fast_supermario_df = smash_df[(smash_df["game"] == "Super Mario") & (smash_df["run_speed"] > 2)]
fast_supermario_df
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5 | yoshi | Super Mario | 1.29 | 2.064 | 104 | 1.208 | 2.046 | 36.09 | 14.43 | 51.56 | 5 | 9 | Amateur |
fast_supermario_df_2 = smash_df.query("game == 'Super Mario' & run_speed > 2")
fast_supermario_df_2
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5 | yoshi | Super Mario | 1.29 | 2.064 | 104 | 1.208 | 2.046 | 36.09 | 14.43 | 51.56 | 5 | 9 | Amateur |
Another example.
fast_df = smash_df[(smash_df["fast_fall"] > 3) & (smash_df["run_speed"] > 2)]
fast_df
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
7 | fox | Star Fox | 2.10 | 3.36 | 77 | 1.523 | 2.402 | 35.0 | 16.40 | 37.0 | 6 | 8 | Expert |
47 | little mac | Punch Out | 1.95 | 3.12 | 87 | 1.386 | 2.464 | 26.0 | 12.53 | 26.0 | 6 | 8 | Expert |
fast_df_2 = smash_df.query("fast_fall > 3 & run_speed > 2")
fast_df_2
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
7 | fox | Star Fox | 2.10 | 3.36 | 77 | 1.523 | 2.402 | 35.0 | 16.40 | 37.0 | 6 | 8 | Expert |
47 | little mac | Punch Out | 1.95 | 3.12 | 87 | 1.386 | 2.464 | 26.0 | 12.53 | 26.0 | 6 | 8 | Expert |
Get variable names. If we already know which columns to filter, just use iloc.
smash_df.columns.values.tolist()
['character', 'game', 'regular_fall', 'fast_fall', 'weight', 'walk_speed', 'run_speed', 'FULL_HOP_HEIGHT_RENAME', 'short_hop_height', 'double_hop_height', 'before', 'after', 'expertise']
Get indices.
index_dictionary = {}
for col in smash_df.columns:
index_dictionary[col] = smash_df.columns.get_loc(col)
print(index_dictionary)
{'character': 0, 'game': 1, 'regular_fall': 2, 'fast_fall': 3, 'weight': 4, 'walk_speed': 5, 'run_speed': 6, 'FULL_HOP_HEIGHT_RENAME': 7, 'short_hop_height': 8, 'double_hop_height': 9, 'before': 10, 'after': 11, 'expertise': 12}
Convert to df
index_dictionary_df = pd.DataFrame(index_dictionary, index=[0])
index_dictionary_df
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
Transpose
index_dictionary_df_transposed = index_dictionary_df.transpose()
index_dictionary_df_transposed
0 | |
---|---|
character | 0 |
game | 1 |
regular_fall | 2 |
fast_fall | 3 |
weight | 4 |
walk_speed | 5 |
run_speed | 6 |
FULL_HOP_HEIGHT_RENAME | 7 |
short_hop_height | 8 |
double_hop_height | 9 |
before | 10 |
after | 11 |
expertise | 12 |
If it is too long to display, try:
print(index_dictionary_df_transposed.to_string())
print(index_dictionary_df_transposed.to_string())
0 character 0 game 1 regular_fall 2 fast_fall 3 weight 4 walk_speed 5 run_speed 6 FULL_HOP_HEIGHT_RENAME 7 short_hop_height 8 double_hop_height 9 before 10 after 11 expertise 12
smash_df.iloc[:,3:8]
fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | |
---|---|---|---|---|---|
0 | 2.400 | 98 | 1.155 | 1.760 | 36.33 |
1 | 2.608 | 127 | 1.365 | 1.873 | 34.00 |
2 | 3.040 | 104 | 1.247 | 1.534 | 27.80 |
3 | 2.168 | 108 | 1.115 | 1.654 | 37.00 |
4 | 2.168 | 108 | 1.115 | 1.654 | 37.00 |
... | ... | ... | ... | ... | ... |
67 | 2.080 | 88 | 1.140 | 1.480 | 32.50 |
68 | 2.816 | 116 | 0.620 | 1.180 | 31.60 |
69 | 2.730 | 112 | 0.760 | 1.720 | 37.40 |
70 | 2.512 | 101 | 0.980 | 1.840 | NaN |
71 | 2.816 | 106 | 1.060 | 2.180 | NaN |
72 rows × 5 columns
smash_df.iloc[:,[0, 3, 5, 7, 9]]
character | fast_fall | walk_speed | FULL_HOP_HEIGHT_RENAME | double_hop_height | |
---|---|---|---|---|---|
0 | mario | 2.400 | 1.155 | 36.33 | 36.33 |
1 | donkey kong | 2.608 | 1.365 | 34.00 | 35.50 |
2 | link | 3.040 | 1.247 | 27.80 | 29.00 |
3 | samus | 2.168 | 1.115 | 37.00 | 37.00 |
4 | dark samus | 2.168 | 1.115 | 37.00 | 37.00 |
... | ... | ... | ... | ... | ... |
67 | isabelle | 2.080 | 1.140 | 32.50 | 32.50 |
68 | incineroar | 2.816 | 0.620 | 31.60 | 32.80 |
69 | piranha plant | 2.730 | 0.760 | 37.40 | 38.52 |
70 | hero | 2.512 | 0.980 | NaN | NaN |
71 | banjo and kazooie | 2.816 | 1.060 | NaN | NaN |
72 rows × 5 columns
smash_df_1 = smash_df.iloc[:, [0, 5, 6, 12]]
smash_df_1
character | walk_speed | run_speed | expertise | |
---|---|---|---|---|
0 | mario | 1.155 | 1.760 | Amateur |
1 | donkey kong | 1.365 | 1.873 | Expert |
2 | link | 1.247 | 1.534 | Expert |
3 | samus | 1.115 | 1.654 | Expert |
4 | dark samus | 1.115 | 1.654 | Amateur |
... | ... | ... | ... | ... |
67 | isabelle | 1.140 | 1.480 | Expert |
68 | incineroar | 0.620 | 1.180 | Expert |
69 | piranha plant | 0.760 | 1.720 | Expert |
70 | hero | 0.980 | 1.840 | Expert |
71 | banjo and kazooie | 1.060 | 2.180 | Expert |
72 rows × 4 columns
smash_df_2 = smash_df.loc[:, "character":"weight"]
smash_df_2
character | game | regular_fall | fast_fall | weight | |
---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 |
2 | link | Zelda | 1.60 | 3.040 | 104 |
3 | samus | Metroid | 1.33 | 2.168 | 108 |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 |
... | ... | ... | ... | ... | ... |
67 | isabelle | Animal Crossing | 1.30 | 2.080 | 88 |
68 | incineroar | Pokemon | 1.76 | 2.816 | 116 |
69 | piranha plant | Super Mario | 1.95 | 2.730 | 112 |
70 | hero | Dragon Quest | 1.57 | 2.512 | 101 |
71 | banjo and kazooie | Banjo Kazooie | 1.76 | 2.816 | 106 |
72 rows × 5 columns
smash_df_3 = smash_df.loc[:, ["character", "game", "regular_fall", "fast_fall", "weight", "expertise",
"run_speed", "FULL_HOP_HEIGHT_RENAME"]]
smash_df_3
character | game | regular_fall | fast_fall | weight | expertise | run_speed | FULL_HOP_HEIGHT_RENAME | |
---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | Amateur | 1.760 | 36.33 |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | Expert | 1.873 | 34.00 |
2 | link | Zelda | 1.60 | 3.040 | 104 | Expert | 1.534 | 27.80 |
3 | samus | Metroid | 1.33 | 2.168 | 108 | Expert | 1.654 | 37.00 |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | Amateur | 1.654 | 37.00 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | isabelle | Animal Crossing | 1.30 | 2.080 | 88 | Expert | 1.480 | 32.50 |
68 | incineroar | Pokemon | 1.76 | 2.816 | 116 | Expert | 1.180 | 31.60 |
69 | piranha plant | Super Mario | 1.95 | 2.730 | 112 | Expert | 1.720 | 37.40 |
70 | hero | Dragon Quest | 1.57 | 2.512 | 101 | Expert | 1.840 | NaN |
71 | banjo and kazooie | Banjo Kazooie | 1.76 | 2.816 | 106 | Expert | 2.180 | NaN |
72 rows × 8 columns
For more customised ranges, use numpy.
Notice ranges take 1 less.
import numpy as np
smash_df_4 = smash_df.iloc[:, np.r_[0, 5, 6:13]]
smash_df_4.head()
character | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|
0 | mario | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
Notice ranges take 1 less.
smash_df_5 = smash_df.iloc[:, np.r_[0:5, 12, 6:8]]
smash_df_5.head()
character | game | regular_fall | fast_fall | weight | expertise | run_speed | FULL_HOP_HEIGHT_RENAME | |
---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | Amateur | 1.760 | 36.33 |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | Expert | 1.873 | 34.00 |
2 | link | Zelda | 1.60 | 3.040 | 104 | Expert | 1.534 | 27.80 |
3 | samus | Metroid | 1.33 | 2.168 | 108 | Expert | 1.654 | 37.00 |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | Amateur | 1.654 | 37.00 |
First 5 rows. Notice difference between loc and iloc
smash_df.loc[0:4]
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
smash_df.iloc[0:5]
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
smash_df.iloc[0:4]
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
smash_df.iloc[0:4, 5:8]
walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | |
---|---|---|---|
0 | 1.155 | 1.760 | 36.33 |
1 | 1.365 | 1.873 | 34.00 |
2 | 1.247 | 1.534 | 27.80 |
3 | 1.115 | 1.654 | 37.00 |
smash_df.iloc[0:4]["double_hop_height"]
0 36.33 1 35.50 2 29.00 3 37.00 Name: double_hop_height, dtype: float64
smash_df.iloc[3,0:4]
character samus game Metroid regular_fall 1.33 fast_fall 2.168 Name: 3, dtype: object
pd.concat((smash_df.iloc[:, 0:2], smash_df.iloc[:, 7:9]), axis = 1)
character | game | FULL_HOP_HEIGHT_RENAME | short_hop_height | |
---|---|---|---|---|
0 | mario | Super Mario | 36.33 | 17.54 |
1 | donkey kong | Donkey Kong | 34.00 | 17.30 |
2 | link | Zelda | 27.80 | 13.38 |
3 | samus | Metroid | 37.00 | 18.00 |
4 | dark samus | Metroid | 37.00 | 18.00 |
... | ... | ... | ... | ... |
67 | isabelle | Animal Crossing | 32.50 | 15.69 |
68 | incineroar | Pokemon | 31.60 | 14.40 |
69 | piranha plant | Super Mario | 37.40 | 17.50 |
70 | hero | Dragon Quest | NaN | NaN |
71 | banjo and kazooie | Banjo Kazooie | NaN | NaN |
72 rows × 4 columns
smash_left = pd.concat((smash_df.iloc[0:2], smash_df.iloc[7:9]), axis = 0)
smash_left
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
7 | fox | Star Fox | 2.10 | 3.360 | 77 | 1.523 | 2.402 | 35.00 | 16.40 | 37.00 | 6 | 8 | Expert |
8 | pikachu | Pokemon | 1.55 | 2.480 | 79 | 1.302 | 2.039 | 35.50 | 17.12 | 35.50 | 8 | 9 | Expert |
smash_right = pd.concat((smash_df.iloc[7:10], smash_df.iloc[50:52]), axis = 0)
smash_right
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
7 | fox | Star Fox | 2.10 | 3.360 | 77 | 1.523 | 2.402 | 35.0 | 16.40 | 37.00 | 6 | 8 | Expert |
8 | pikachu | Pokemon | 1.55 | 2.480 | 79 | 1.302 | 2.039 | 35.5 | 17.12 | 35.50 | 8 | 9 | Expert |
9 | luigi | Super Mario | 1.32 | 2.112 | 97 | 1.134 | 1.650 | 44.0 | 19.98 | 41.31 | 7 | 10 | Expert |
50 | mii swordfighter | Wii | 1.55 | 2.480 | 100 | 1.070 | 1.580 | 28.3 | 12.30 | 28.30 | 8 | 7 | Expert |
51 | mii gunner | Wii | 1.45 | 2.320 | 104 | 1.000 | 1.370 | 30.9 | 14.60 | 32.30 | 7 | 8 | Expert |
Left join
smash_merge_1 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "left", on = "game")
smash_merge_1
character_x | game | regular_fall_x | character_y | regular_fall_y | |
---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | luigi | 1.32 |
1 | donkey kong | Donkey Kong | 1.63 | NaN | NaN |
2 | fox | Star Fox | 2.10 | fox | 2.10 |
3 | pikachu | Pokemon | 1.55 | pikachu | 1.55 |
Right join
smash_merge_2 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "right", on = "game")
smash_merge_2
character_x | game | regular_fall_x | character_y | regular_fall_y | |
---|---|---|---|---|---|
0 | fox | Star Fox | 2.10 | fox | 2.10 |
1 | pikachu | Pokemon | 1.55 | pikachu | 1.55 |
2 | mario | Super Mario | 1.50 | luigi | 1.32 |
3 | NaN | Wii | NaN | mii swordfighter | 1.55 |
4 | NaN | Wii | NaN | mii gunner | 1.45 |
Inner join
smash_merge_3 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "inner", on = "game")
smash_merge_3
character_x | game | regular_fall_x | character_y | regular_fall_y | |
---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | luigi | 1.32 |
1 | fox | Star Fox | 2.10 | fox | 2.10 |
2 | pikachu | Pokemon | 1.55 | pikachu | 1.55 |
Outer join
smash_merge_4 = pd.merge(smash_left.iloc[:,0:3], smash_right.iloc[:, 0:3], how = "outer", on = "game")
smash_merge_4
character_x | game | regular_fall_x | character_y | regular_fall_y | |
---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | luigi | 1.32 |
1 | donkey kong | Donkey Kong | 1.63 | NaN | NaN |
2 | fox | Star Fox | 2.10 | fox | 2.10 |
3 | pikachu | Pokemon | 1.55 | pikachu | 1.55 |
4 | NaN | Wii | NaN | mii swordfighter | 1.55 |
5 | NaN | Wii | NaN | mii gunner | 1.45 |
import numpy as np
smash_df.sample(5)
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
50 | mii swordfighter | Wii | 1.55 | 2.480 | 100 | 1.070 | 1.580 | 28.30 | 12.30 | 28.30 | 8 | 7 | Expert |
33 | snake | Metal Gear | 1.73 | 2.768 | 106 | 0.882 | 1.595 | 21.62 | 13.69 | 34.07 | 4 | 5 | Amateur |
7 | fox | Star Fox | 2.10 | 3.360 | 77 | 1.523 | 2.402 | 35.00 | 16.40 | 37.00 | 6 | 8 | Expert |
35 | diddy kong | Donkey Kong | 1.75 | 2.800 | 90 | 1.313 | 2.006 | 41.21 | 19.90 | 41.21 | 10 | 9 | Expert |
smash_df.sample(frac = 0.1)
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
55 | bowser jr | Super Mario | 1.65 | 2.640 | 108 | 0.924 | 1.566 | 34.40 | 16.59 | 34.40 | 7 | 9 | Expert |
22 | young link | Zelda | 1.80 | 2.880 | 88 | 1.260 | 1.749 | 33.66 | 16.26 | 33.66 | 7 | 10 | Expert |
27 | mr game and watch | Nintendo | 1.24 | 1.984 | 75 | 1.180 | 1.679 | 27.51 | 13.26 | 27.51 | 3 | 5 | Amateur |
50 | mii swordfighter | Wii | 1.55 | 2.480 | 100 | 1.070 | 1.580 | 28.30 | 12.30 | 28.30 | 8 | 7 | Expert |
35 | diddy kong | Donkey Kong | 1.75 | 2.800 | 90 | 1.313 | 2.006 | 41.21 | 19.90 | 41.21 | 10 | 9 | Expert |
12 | jigglypuff | Pokemon | 0.98 | 1.568 | 68 | 0.735 | 1.271 | 19.79 | 11.26 | 19.79 | 3 | 7 | Amateur |
9 | luigi | Super Mario | 1.32 | 2.112 | 97 | 1.134 | 1.650 | 44.00 | 19.98 | 41.31 | 7 | 10 | Expert |
Oversampling. It's still a random sample 10x more likely if walk_speed >=1
sample_weights = [10 if walk_speed >= 1 else 1 for walk_speed in smash_df.walk_speed]
smash_df.sample(10, weights = sample_weights)
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
37 | sonic | Sonic | 1.65 | 2.640 | 86 | 1.444 | 3.850 | 35.00 | 16.89 | 35.00 | 10 | 5 | Expert |
63 | ridley | Metroid | 1.78 | 2.848 | 107 | 1.100 | 2.200 | 34.00 | 14.20 | 32.00 | 2 | 6 | Amateur |
38 | king dedede | Kirby | 1.95 | 3.120 | 127 | 1.029 | 1.496 | 32.85 | 16.02 | 32.85 | 6 | 5 | Expert |
40 | lucario | Pokemon | 1.68 | 2.688 | 92 | 1.103 | 1.705 | 37.62 | 18.19 | 37.62 | 5 | 8 | Amateur |
29 | pit | Icarus | 1.48 | 2.368 | 96 | 1.259 | 1.828 | 31.00 | 14.96 | 31.00 | 8 | 9 | Expert |
19 | pichu | Pokemon | 1.90 | 2.500 | 62 | 1.302 | 1.892 | 36.75 | 17.43 | 36.02 | 6 | 9 | Expert |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
21 | marth | Fire Emblem | 1.58 | 2.528 | 90 | 1.575 | 1.964 | 33.66 | 16.26 | 33.66 | 3 | 7 | Amateur |
67 | isabelle | Animal Crossing | 1.30 | 2.080 | 88 | 1.140 | 1.480 | 32.50 | 15.69 | 32.50 | 9 | 7 | Expert |
43 | wolf | Star Fox | 1.80 | 2.880 | 92 | 1.208 | 1.540 | 32.02 | 15.38 | 30.71 | 10 | 10 | Expert |
smash_df["fast_fall"].isnull().values.any()
False
smash_df["fast_fall"].isnull()
0 False 1 False 2 False 3 False 4 False ... 67 False 68 False 69 False 70 False 71 False Name: fast_fall, Length: 72, dtype: bool
smash_df["short_hop_height"].isnull().values.any()
True
smash_df["short_hop_height"].isnull()
0 False 1 False 2 False 3 False 4 False ... 67 False 68 False 69 False 70 True 71 True Name: short_hop_height, Length: 72, dtype: bool
smash_df.isnull().values.any()
True
smash_df.isnull()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | False | False | False | False | False | False | False | False | False | False | False | False | False |
1 | False | False | False | False | False | False | False | False | False | False | False | False | False |
2 | False | False | False | False | False | False | False | False | False | False | False | False | False |
3 | False | False | False | False | False | False | False | False | False | False | False | False | False |
4 | False | False | False | False | False | False | False | False | False | False | False | False | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | False | False | False | False | False | False | False | False | False | False | False | False | False |
68 | False | False | False | False | False | False | False | False | False | False | False | False | False |
69 | False | False | False | False | False | False | False | False | False | False | False | False | False |
70 | False | False | False | False | False | False | False | True | True | True | False | False | False |
71 | False | False | False | False | False | False | False | True | True | True | False | False | False |
72 rows × 13 columns
Retrieve the rows with NAs.
smash_df.loc[smash_df.isnull().any(axis = 1)]
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
70 | hero | Dragon Quest | 1.57 | 2.512 | 101 | 0.98 | 1.84 | NaN | NaN | NaN | 6 | 7 | Expert |
71 | banjo and kazooie | Banjo Kazooie | 1.76 | 2.816 | 106 | 1.06 | 2.18 | NaN | NaN | NaN | 7 | 8 | Expert |
Remove rows with missing values.
smash_df_complete = smash_df.dropna()
smash_df_complete.shape
(70, 13)
Use copy() to make a copy of the df
smash_recoding_df = smash_df.copy()
old_values = ["Amateur", "Expert"]
new_values = ["Blah", "Yay"]
smash_recoding_df["expertise"] = smash_recoding_df["expertise"].replace(old_values, new_values)
smash_recoding_df.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Blah |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Yay |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Yay |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Yay |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Blah |
def weight_groups_fn(weight):
if weight < 88:
return "light weight"
elif 88 <= weight < 104.5:
return "medium weight"
elif 104.5 <= weight:
return "heavy weight"
smash_recoding_df["weight_group"] = smash_recoding_df["weight"].apply(weight_groups_fn)
smash_recoding_df["weight_group"].value_counts(sort = True)
medium weight 37 heavy weight 18 light weight 17 Name: weight_group, dtype: int64
smash_recoding_df.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | weight_group | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Blah | medium weight |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Yay | heavy weight |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Yay | medium weight |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Yay | heavy weight |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Blah | heavy weight |
smash_w_dummies_df = pd.get_dummies(smash_recoding_df, prefix_sep = '_dummy_', drop_first = True)
smash_w_dummies_df.head(5)
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | ... | game_dummy_Stack Up | game_dummy_Star Fox | game_dummy_Street Fighter | game_dummy_Super Mario | game_dummy_Wii | game_dummy_Xenoblade | game_dummy_Zelda | expertise_dummy_Yay | weight_group_dummy_light weight | weight_group_dummy_medium weight | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
1 | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
2 | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 |
3 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
4 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 113 columns
Create missing values
import numpy as np
smash_w_missing_df = smash_recoding_df.copy()
missing_rows = smash_w_missing_df.sample(5).index # sample 5 rows to create missing values for illustration
smash_w_missing_df.loc[missing_rows, "regular_fall"] = np.nan # set those 5 samples rows to have missing values in "regular_fall"
print("Number of rows with valid regular_fall values after setting to NAN: ",
smash_w_missing_df["regular_fall"].count()) # or just simply smash_w_missing_df["regular_fall"].count()
Number of rows with valid regular_fall values after setting to NAN: 67
Replace the missing values using the median of the remaining values
median_regular_fall = smash_w_missing_df["regular_fall"].median() # compute the median
smash_w_missing_df["regular_fall"] = smash_w_missing_df["regular_fall"].fillna(value = median_regular_fall) # fill in the value
print('Number of rows with valid regular_fall values after filling NA values: ',
smash_w_missing_df["regular_fall"].count()) # or just simply smash_w_missing_df["regular_fall"].count()
Number of rows with valid regular_fall values after filling NA values: 72
from sklearn import preprocessing
smash_df.shape
(72, 13)
Get numerical columns for normalisation
smash_norm_df = smash_df.select_dtypes(include = np.number)
smash_norm_df
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 |
1 | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 |
2 | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 |
3 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 |
4 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | 1.30 | 2.080 | 88 | 1.140 | 1.480 | 32.50 | 15.69 | 32.50 | 9 | 7 |
68 | 1.76 | 2.816 | 116 | 0.620 | 1.180 | 31.60 | 14.40 | 32.80 | 7 | 6 |
69 | 1.95 | 2.730 | 112 | 0.760 | 1.720 | 37.40 | 17.50 | 38.52 | 9 | 7 |
70 | 1.57 | 2.512 | 101 | 0.980 | 1.840 | NaN | NaN | NaN | 6 | 7 |
71 | 1.76 | 2.816 | 106 | 1.060 | 2.180 | NaN | NaN | NaN | 7 | 8 |
72 rows × 10 columns
If numerical columns were not selected:
smash_norm_df = pd.DataFrame(scaler.fit_transform(smash_norm_df.select_dtypes(include = np.number)), index = smash_norm_df.index, columns = smash_norm_df.columns)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
smash_norm_df = pd.DataFrame(scaler.fit_transform(smash_norm_df),
index = smash_norm_df.index,
columns = smash_norm_df.columns)
smash_norm_df
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|
0 | -0.452156 | -0.416400 | 0.126490 | 0.239502 | -0.119401 | 0.614298 | 0.661683 | 0.298418 | -1.294201 | 0.177443 |
1 | 0.135239 | 0.175589 | 2.222609 | 1.228999 | 0.171209 | 0.155269 | 0.556693 | 0.167367 | 0.655223 | 1.455036 |
2 | -0.000314 | 1.405105 | 0.560170 | 0.672996 | -0.700620 | -1.066182 | -1.158132 | -0.858940 | 1.434992 | -0.461353 |
3 | -1.220287 | -1.076696 | 0.849290 | 0.051026 | -0.392008 | 0.746294 | 0.862912 | 0.404207 | -0.124547 | 0.816239 |
4 | -1.220287 | -1.076696 | 0.849290 | 0.051026 | -0.392008 | 0.746294 | 0.862912 | 0.404207 | -1.684085 | -1.738945 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | -1.355840 | -1.327153 | -0.596310 | 0.168823 | -0.839496 | -0.140243 | -0.147610 | -0.306313 | 1.045108 | -0.461353 |
68 | 0.722633 | 0.767578 | 1.427530 | -2.281362 | -1.611026 | -0.317551 | -0.711928 | -0.258945 | 0.265338 | -1.100149 |
69 | 1.581133 | 0.522813 | 1.138410 | -1.621696 | -0.222272 | 0.825097 | 0.644185 | 0.644205 | 1.045108 | -0.461353 |
70 | -0.135866 | -0.097637 | 0.343330 | -0.585080 | 0.086340 | NaN | NaN | NaN | -0.124547 | -0.461353 |
71 | 0.722633 | 0.767578 | 0.704730 | -0.208128 | 0.960741 | NaN | NaN | NaN | 0.265338 | 0.177443 |
72 rows × 10 columns
smash_cat_df = smash_df.select_dtypes(exclude = np.number)
smash_cat_df
character | game | expertise | |
---|---|---|---|
0 | mario | Super Mario | Amateur |
1 | donkey kong | Donkey Kong | Expert |
2 | link | Zelda | Expert |
3 | samus | Metroid | Expert |
4 | dark samus | Metroid | Amateur |
... | ... | ... | ... |
67 | isabelle | Animal Crossing | Expert |
68 | incineroar | Pokemon | Expert |
69 | piranha plant | Super Mario | Expert |
70 | hero | Dragon Quest | Expert |
71 | banjo and kazooie | Banjo Kazooie | Expert |
72 rows × 3 columns
smash_norm_combined_df = pd.concat([smash_norm_df, smash_df.select_dtypes(exclude = np.number)], axis = 1)
smash_norm_combined_df
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | character | game | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.452156 | -0.416400 | 0.126490 | 0.239502 | -0.119401 | 0.614298 | 0.661683 | 0.298418 | -1.294201 | 0.177443 | mario | Super Mario | Amateur |
1 | 0.135239 | 0.175589 | 2.222609 | 1.228999 | 0.171209 | 0.155269 | 0.556693 | 0.167367 | 0.655223 | 1.455036 | donkey kong | Donkey Kong | Expert |
2 | -0.000314 | 1.405105 | 0.560170 | 0.672996 | -0.700620 | -1.066182 | -1.158132 | -0.858940 | 1.434992 | -0.461353 | link | Zelda | Expert |
3 | -1.220287 | -1.076696 | 0.849290 | 0.051026 | -0.392008 | 0.746294 | 0.862912 | 0.404207 | -0.124547 | 0.816239 | samus | Metroid | Expert |
4 | -1.220287 | -1.076696 | 0.849290 | 0.051026 | -0.392008 | 0.746294 | 0.862912 | 0.404207 | -1.684085 | -1.738945 | dark samus | Metroid | Amateur |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | -1.355840 | -1.327153 | -0.596310 | 0.168823 | -0.839496 | -0.140243 | -0.147610 | -0.306313 | 1.045108 | -0.461353 | isabelle | Animal Crossing | Expert |
68 | 0.722633 | 0.767578 | 1.427530 | -2.281362 | -1.611026 | -0.317551 | -0.711928 | -0.258945 | 0.265338 | -1.100149 | incineroar | Pokemon | Expert |
69 | 1.581133 | 0.522813 | 1.138410 | -1.621696 | -0.222272 | 0.825097 | 0.644185 | 0.644205 | 1.045108 | -0.461353 | piranha plant | Super Mario | Expert |
70 | -0.135866 | -0.097637 | 0.343330 | -0.585080 | 0.086340 | NaN | NaN | NaN | -0.124547 | -0.461353 | hero | Dragon Quest | Expert |
71 | 0.722633 | 0.767578 | 0.704730 | -0.208128 | 0.960741 | NaN | NaN | NaN | 0.265338 | 0.177443 | banjo and kazooie | Banjo Kazooie | Expert |
72 rows × 13 columns
Natural log transformation
smash_df_numeric = smash_df.iloc[:, 2:11]
smash_df_numeric.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 |
1 | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 |
2 | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 |
3 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 |
4 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 |
Natural log
import numpy as np
smash_df_numeric_log = np.log(smash_df_numeric)
smash_df_numeric_log.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 0.405465 | 0.875469 | 4.584967 | 0.144100 | 0.565314 | 3.592644 | 2.864484 | 3.592644 | 1.098612 |
1 | 0.488580 | 0.958584 | 4.844187 | 0.311154 | 0.627541 | 3.526361 | 2.850707 | 3.569533 | 2.079442 |
2 | 0.470004 | 1.111858 | 4.644391 | 0.220741 | 0.427879 | 3.325036 | 2.593761 | 3.367296 | 2.302585 |
3 | 0.285179 | 0.773805 | 4.682131 | 0.108854 | 0.503197 | 3.610918 | 2.890372 | 3.610918 | 1.791759 |
4 | 0.285179 | 0.773805 | 4.682131 | 0.108854 | 0.503197 | 3.610918 | 2.890372 | 3.610918 | 0.693147 |
Transform back, i.e. exp()
smash_df_numeric_exp = np.exp(smash_df_numeric_log)
smash_df_numeric_exp.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98.0 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3.0 |
1 | 1.63 | 2.608 | 127.0 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8.0 |
2 | 1.60 | 3.040 | 104.0 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10.0 |
3 | 1.33 | 2.168 | 108.0 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6.0 |
4 | 1.33 | 2.168 | 108.0 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2.0 |
For base 10, use log10()
smash_df_numeric_log10 = np.log10(smash_df_numeric)
smash_df_numeric_log10.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 0.176091 | 0.380211 | 1.991226 | 0.062582 | 0.245513 | 1.560265 | 1.244030 | 1.560265 | 0.477121 |
1 | 0.212188 | 0.416308 | 2.103804 | 0.135133 | 0.272538 | 1.531479 | 1.238046 | 1.550228 | 0.903090 |
2 | 0.204120 | 0.482874 | 2.017033 | 0.095866 | 0.185825 | 1.444045 | 1.126456 | 1.462398 | 1.000000 |
3 | 0.123852 | 0.336059 | 2.033424 | 0.047275 | 0.218536 | 1.568202 | 1.255273 | 1.568202 | 0.778151 |
4 | 0.123852 | 0.336059 | 2.033424 | 0.047275 | 0.218536 | 1.568202 | 1.255273 | 1.568202 | 0.301030 |
Transform back using 10^x
def exp10(x):
return 10**x
# execute the function
smash_df_numeric_log10_exp10 = smash_df_numeric_log10.apply(exp10)
smash_df_numeric_log10_exp10.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98.0 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3.0 |
1 | 1.63 | 2.608 | 127.0 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8.0 |
2 | 1.60 | 3.040 | 104.0 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10.0 |
3 | 1.33 | 2.168 | 108.0 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6.0 |
4 | 1.33 | 2.168 | 108.0 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2.0 |
Multiplication and division
smash_df_numeric_2 = smash_df.iloc[:, 2:11]
smash_df_numeric_2.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 |
1 | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 |
2 | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 |
3 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 |
4 | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 |
Multiplication
def multiply_by_5(x):
return 5 * x
# execute the function
smash_df_numeric_2_x5 = smash_df_numeric_2.apply(multiply_by_5)
# displaying the DataFrame
smash_df_numeric_2_x5.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 7.50 | 12.00 | 490 | 5.775 | 8.800 | 181.65 | 87.7 | 181.65 | 15 |
1 | 8.15 | 13.04 | 635 | 6.825 | 9.365 | 170.00 | 86.5 | 177.50 | 40 |
2 | 8.00 | 15.20 | 520 | 6.235 | 7.670 | 139.00 | 66.9 | 145.00 | 50 |
3 | 6.65 | 10.84 | 540 | 5.575 | 8.270 | 185.00 | 90.0 | 185.00 | 30 |
4 | 6.65 | 10.84 | 540 | 5.575 | 8.270 | 185.00 | 90.0 | 185.00 | 10 |
Division, Round where applicable (if preferred)
def divide_by_5(x):
return x / 5
# execute the function
smash_df_numeric_2_divide_5 = round(smash_df_numeric_2_x5.apply(divide_by_5),2)
# displaying the DataFrame
smash_df_numeric_2_divide_5.head()
regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | |
---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.40 | 98.0 | 1.16 | 1.76 | 36.33 | 17.54 | 36.33 | 3.0 |
1 | 1.63 | 2.61 | 127.0 | 1.36 | 1.87 | 34.00 | 17.30 | 35.50 | 8.0 |
2 | 1.60 | 3.04 | 104.0 | 1.25 | 1.53 | 27.80 | 13.38 | 29.00 | 10.0 |
3 | 1.33 | 2.17 | 108.0 | 1.12 | 1.65 | 37.00 | 18.00 | 37.00 | 6.0 |
4 | 1.33 | 2.17 | 108.0 | 1.12 | 1.65 | 37.00 | 18.00 | 37.00 | 2.0 |
pivot_1 = smash_df.value_counts("expertise")
pivot_1
expertise Expert 45 Amateur 27 dtype: int64
pivot_2 = pd.pivot_table(smash_df, values = "regular_fall", index = "game", aggfunc = "mean")
pivot_2
regular_fall | |
---|---|
game | |
Animal Crossing | 1.310000 |
Banjo Kazooie | 1.760000 |
Bayonetta | 1.770000 |
Castlevania | 1.850000 |
Donkey Kong | 1.693333 |
Dragon Quest | 1.570000 |
Duck Hunt | 1.650000 |
Earthbound | 1.310000 |
F-Zero | 1.832500 |
Final Fantasy | 1.680000 |
Fire Emblem | 1.663333 |
Icarus | 1.515000 |
Kirby | 1.580000 |
Mega Man | 1.800000 |
Metal Gear | 1.730000 |
Metroid | 1.535000 |
Mother | 1.370000 |
Nintendo | 1.240000 |
Pikmin | 1.350000 |
Pokemon | 1.610000 |
Punch Out | 1.950000 |
Sonic | 1.650000 |
Splatoon | 1.580000 |
Stack Up | 1.600000 |
Star Fox | 1.950000 |
Street Fighter | 1.600000 |
Super Mario | 1.497000 |
Wii | 1.552500 |
Xenoblade | 1.580000 |
Zelda | 1.588333 |
pivot_3 = pd.pivot_table(smash_df, values = "regular_fall", columns = "expertise", index = "game", aggfunc = "mean")
pivot_3
expertise | Amateur | Expert |
---|---|---|
game | ||
Animal Crossing | NaN | 1.310000 |
Banjo Kazooie | NaN | 1.760000 |
Bayonetta | NaN | 1.770000 |
Castlevania | 1.8500 | 1.850000 |
Donkey Kong | 1.7000 | 1.690000 |
Dragon Quest | NaN | 1.570000 |
Duck Hunt | NaN | 1.650000 |
Earthbound | 1.3100 | NaN |
F-Zero | 1.8000 | 1.865000 |
Final Fantasy | NaN | 1.680000 |
Fire Emblem | 1.6325 | 1.725000 |
Icarus | NaN | 1.515000 |
Kirby | 1.3550 | 1.805000 |
Mega Man | NaN | 1.800000 |
Metal Gear | 1.7300 | NaN |
Metroid | 1.5550 | 1.515000 |
Mother | NaN | 1.370000 |
Nintendo | 1.2400 | NaN |
Pikmin | 1.3500 | NaN |
Pokemon | 1.3300 | 1.722000 |
Punch Out | NaN | 1.950000 |
Sonic | NaN | 1.650000 |
Splatoon | 1.5800 | NaN |
Stack Up | NaN | 1.600000 |
Star Fox | NaN | 1.950000 |
Street Fighter | 1.6000 | 1.600000 |
Super Mario | 1.4720 | 1.522000 |
Wii | 1.9100 | 1.433333 |
Xenoblade | NaN | 1.580000 |
Zelda | 1.3650 | 1.700000 |
pivot_4 = pd.pivot_table(smash_df, values = "regular_fall", index = "expertise", aggfunc = "median")
pivot_4
regular_fall | |
---|---|
expertise | |
Amateur | 1.58 |
Expert | 1.65 |
pivot_5 = pd.pivot_table(smash_df, values = ["regular_fall", "fast_fall"], index = "expertise", aggfunc = "median")
pivot_5
fast_fall | regular_fall | |
---|---|---|
expertise | ||
Amateur | 2.40 | 1.58 |
Expert | 2.64 | 1.65 |
pivot_6 = pd.pivot_table(smash_df, values = "regular_fall", index = ["expertise", "game"], aggfunc = "median")
pivot_6
regular_fall | ||
---|---|---|
expertise | game | |
Amateur | Castlevania | 1.850 |
Donkey Kong | 1.700 | |
Earthbound | 1.310 | |
F-Zero | 1.800 | |
Fire Emblem | 1.615 | |
Kirby | 1.355 | |
Metal Gear | 1.730 | |
Metroid | 1.555 | |
Nintendo | 1.240 | |
Pikmin | 1.350 | |
Pokemon | 1.330 | |
Splatoon | 1.580 | |
Street Fighter | 1.600 | |
Super Mario | 1.500 | |
Wii | 1.910 | |
Zelda | 1.365 | |
Expert | Animal Crossing | 1.310 |
Banjo Kazooie | 1.760 | |
Bayonetta | 1.770 | |
Castlevania | 1.850 | |
Donkey Kong | 1.690 | |
Dragon Quest | 1.570 | |
Duck Hunt | 1.650 | |
F-Zero | 1.865 | |
Final Fantasy | 1.680 | |
Fire Emblem | 1.725 | |
Icarus | 1.515 | |
Kirby | 1.805 | |
Mega Man | 1.800 | |
Metroid | 1.515 | |
Mother | 1.370 | |
Pokemon | 1.760 | |
Punch Out | 1.950 | |
Sonic | 1.650 | |
Stack Up | 1.600 | |
Star Fox | 1.950 | |
Street Fighter | 1.600 | |
Super Mario | 1.500 | |
Wii | 1.450 | |
Xenoblade | 1.580 | |
Zelda | 1.700 |
pivot_7 = pd.pivot_table(smash_df, values = "regular_fall", columns = ["expertise", "game"], aggfunc = "median")
pivot_7
expertise | Amateur | ... | Expert | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
game | Castlevania | Donkey Kong | Earthbound | F-Zero | Fire Emblem | Kirby | Metal Gear | Metroid | Nintendo | Pikmin | ... | Pokemon | Punch Out | Sonic | Stack Up | Star Fox | Street Fighter | Super Mario | Wii | Xenoblade | Zelda |
regular_fall | 1.85 | 1.7 | 1.31 | 1.8 | 1.615 | 1.355 | 1.73 | 1.555 | 1.24 | 1.35 | ... | 1.76 | 1.95 | 1.65 | 1.6 | 1.95 | 1.6 | 1.5 | 1.45 | 1.58 | 1.7 |
1 rows × 41 columns
pivot_8 = pd.pivot_table(smash_df, values = ["regular_fall", "fast_fall"], index = ["expertise", "game"], aggfunc = "median")
pivot_8
# to export
# pivot_8.to_csv("pivot_8.csv")
fast_fall | regular_fall | ||
---|---|---|---|
expertise | game | ||
Amateur | Castlevania | 2.960 | 1.850 |
Donkey Kong | 2.720 | 1.700 | |
Earthbound | 2.096 | 1.310 | |
F-Zero | 2.880 | 1.800 | |
Fire Emblem | 2.584 | 1.615 | |
Kirby | 2.168 | 1.355 | |
Metal Gear | 2.768 | 1.730 | |
Metroid | 2.508 | 1.555 | |
Nintendo | 1.984 | 1.240 | |
Pikmin | 2.160 | 1.350 | |
Pokemon | 2.128 | 1.330 | |
Splatoon | 2.528 | 1.580 | |
Street Fighter | 2.240 | 1.600 | |
Super Mario | 2.400 | 1.500 | |
Wii | 3.072 | 1.910 | |
Zelda | 2.184 | 1.365 | |
Expert | Animal Crossing | 2.096 | 1.310 |
Banjo Kazooie | 2.816 | 1.760 | |
Bayonetta | 2.832 | 1.770 | |
Castlevania | 2.960 | 1.850 | |
Donkey Kong | 2.704 | 1.690 | |
Dragon Quest | 2.512 | 1.570 | |
Duck Hunt | 2.640 | 1.650 | |
F-Zero | 2.984 | 1.865 | |
Final Fantasy | 2.688 | 1.680 | |
Fire Emblem | 2.760 | 1.725 | |
Icarus | 2.424 | 1.515 | |
Kirby | 2.888 | 1.805 | |
Mega Man | 2.880 | 1.800 | |
Metroid | 2.444 | 1.515 | |
Mother | 2.192 | 1.370 | |
Pokemon | 2.500 | 1.760 | |
Punch Out | 3.120 | 1.950 | |
Sonic | 2.640 | 1.650 | |
Stack Up | 2.560 | 1.600 | |
Star Fox | 3.120 | 1.950 | |
Street Fighter | 2.240 | 1.600 | |
Super Mario | 2.400 | 1.500 | |
Wii | 2.320 | 1.450 | |
Xenoblade | 2.528 | 1.580 | |
Zelda | 2.840 | 1.700 |
pivot_9 = pd.pivot_table(smash_df, values = ["regular_fall", "fast_fall"],
index = ["expertise", "game"], aggfunc = ["median", "mean", "max"])
pivot_9
median | mean | max | |||||
---|---|---|---|---|---|---|---|
fast_fall | regular_fall | fast_fall | regular_fall | fast_fall | regular_fall | ||
expertise | game | ||||||
Amateur | Castlevania | 2.960 | 1.850 | 2.960000 | 1.850000 | 2.960 | 1.850 |
Donkey Kong | 2.720 | 1.700 | 2.720000 | 1.700000 | 2.720 | 1.700 | |
Earthbound | 2.096 | 1.310 | 2.096000 | 1.310000 | 2.096 | 1.310 | |
F-Zero | 2.880 | 1.800 | 2.880000 | 1.800000 | 2.880 | 1.800 | |
Fire Emblem | 2.584 | 1.615 | 2.612000 | 1.632500 | 2.880 | 1.800 | |
Kirby | 2.168 | 1.355 | 2.168000 | 1.355000 | 2.368 | 1.480 | |
Metal Gear | 2.768 | 1.730 | 2.768000 | 1.730000 | 2.768 | 1.730 | |
Metroid | 2.508 | 1.555 | 2.508000 | 1.555000 | 2.848 | 1.780 | |
Nintendo | 1.984 | 1.240 | 1.984000 | 1.240000 | 1.984 | 1.240 | |
Pikmin | 2.160 | 1.350 | 2.160000 | 1.350000 | 2.160 | 1.350 | |
Pokemon | 2.128 | 1.330 | 2.128000 | 1.330000 | 2.688 | 1.680 | |
Splatoon | 2.528 | 1.580 | 2.528000 | 1.580000 | 2.528 | 1.580 | |
Street Fighter | 2.240 | 1.600 | 2.240000 | 1.600000 | 2.240 | 1.600 | |
Super Mario | 2.400 | 1.500 | 2.355200 | 1.472000 | 2.832 | 1.770 | |
Wii | 3.072 | 1.910 | 3.072000 | 1.910000 | 3.072 | 1.910 | |
Zelda | 2.184 | 1.365 | 2.184000 | 1.365000 | 2.208 | 1.380 | |
Expert | Animal Crossing | 2.096 | 1.310 | 2.096000 | 1.310000 | 2.112 | 1.320 |
Banjo Kazooie | 2.816 | 1.760 | 2.816000 | 1.760000 | 2.816 | 1.760 | |
Bayonetta | 2.832 | 1.770 | 2.832000 | 1.770000 | 2.832 | 1.770 | |
Castlevania | 2.960 | 1.850 | 2.960000 | 1.850000 | 2.960 | 1.850 | |
Donkey Kong | 2.704 | 1.690 | 2.704000 | 1.690000 | 2.800 | 1.750 | |
Dragon Quest | 2.512 | 1.570 | 2.512000 | 1.570000 | 2.512 | 1.570 | |
Duck Hunt | 2.640 | 1.650 | 2.640000 | 1.650000 | 2.640 | 1.650 | |
F-Zero | 2.984 | 1.865 | 2.984000 | 1.865000 | 2.984 | 1.865 | |
Final Fantasy | 2.688 | 1.680 | 2.688000 | 1.680000 | 2.688 | 1.680 | |
Fire Emblem | 2.760 | 1.725 | 2.760000 | 1.725000 | 2.880 | 1.800 | |
Icarus | 2.424 | 1.515 | 2.424000 | 1.515000 | 2.480 | 1.550 | |
Kirby | 2.888 | 1.805 | 2.888000 | 1.805000 | 3.120 | 1.950 | |
Mega Man | 2.880 | 1.800 | 2.880000 | 1.800000 | 2.880 | 1.800 | |
Metroid | 2.444 | 1.515 | 2.444000 | 1.515000 | 2.720 | 1.700 | |
Mother | 2.192 | 1.370 | 2.192000 | 1.370000 | 2.192 | 1.370 | |
Pokemon | 2.500 | 1.760 | 2.647200 | 1.722000 | 2.960 | 1.900 | |
Punch Out | 3.120 | 1.950 | 3.120000 | 1.950000 | 3.120 | 1.950 | |
Sonic | 2.640 | 1.650 | 2.640000 | 1.650000 | 2.640 | 1.650 | |
Stack Up | 2.560 | 1.600 | 2.560000 | 1.600000 | 2.560 | 1.600 | |
Star Fox | 3.120 | 1.950 | 3.120000 | 1.950000 | 3.360 | 2.100 | |
Street Fighter | 2.240 | 1.600 | 2.240000 | 1.600000 | 2.240 | 1.600 | |
Super Mario | 2.400 | 1.500 | 2.357200 | 1.522000 | 2.730 | 1.950 | |
Wii | 2.320 | 1.450 | 2.293333 | 1.433333 | 2.480 | 1.550 | |
Xenoblade | 2.528 | 1.580 | 2.528000 | 1.580000 | 2.528 | 1.580 | |
Zelda | 2.840 | 1.700 | 2.840000 | 1.700000 | 3.040 | 1.800 |
pivot_4.plot.bar(figsize = (10,5), title = "Median Regular Fall by Expertise")
<AxesSubplot:title={'center':'Median Regular Fall by Expertise'}, xlabel='expertise'>
pivot_5.plot.barh(figsize = (10,5), title = "Median Regular Fall by Expertise")
<AxesSubplot:title={'center':'Median Regular Fall by Expertise'}, ylabel='expertise'>
from sklearn.model_selection import train_test_split
Make a copy using copy()
smash_split_df = smash_df.copy()
X = smash_split_df.drop(columns = ["expertise"])
y = smash_split_df["expertise"].astype("category")
train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size = 0.3, random_state = 666)
train_X.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
64 | simon | Castlevania | 1.85 | 2.960 | 107 | 0.760 | 1.520 | 30.00 | 18.00 | 29.00 | 3 | 9 |
22 | young link | Zelda | 1.80 | 2.880 | 88 | 1.260 | 1.749 | 33.66 | 16.26 | 33.66 | 7 | 10 |
38 | king dedede | Kirby | 1.95 | 3.120 | 127 | 1.029 | 1.496 | 32.85 | 16.02 | 32.85 | 6 | 5 |
25 | roy | Fire Emblem | 1.80 | 2.880 | 95 | 1.208 | 2.145 | 30.97 | 13.00 | 28.00 | 3 | 6 |
71 | banjo and kazooie | Banjo Kazooie | 1.76 | 2.816 | 106 | 1.060 | 2.180 | NaN | NaN | NaN | 7 | 8 |
valid_X.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
37 | sonic | Sonic | 1.65 | 2.640 | 86 | 1.444 | 3.850 | 35.00 | 16.89 | 35.00 | 10 | 5 |
68 | incineroar | Pokemon | 1.76 | 2.816 | 116 | 0.620 | 1.180 | 31.60 | 14.40 | 32.80 | 7 | 6 |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 |
52 | palutena | Icarus | 1.55 | 2.480 | 91 | 1.271 | 2.077 | 35.90 | 17.30 | 35.90 | 10 | 5 |
17 | zelda | Zelda | 1.35 | 2.160 | 85 | 0.914 | 1.430 | 31.55 | 15.24 | 31.55 | 4 | 7 |
train_y.head()
64 Amateur 22 Expert 38 Expert 25 Amateur 71 Expert Name: expertise, dtype: category Categories (2, object): ['Amateur', 'Expert']
valid_y.head()
37 Expert 68 Expert 1 Expert 52 Expert 17 Amateur Name: expertise, dtype: category Categories (2, object): ['Amateur', 'Expert']
len(train_X)
50
len(train_y)
50
len(valid_X)
22
len(valid_y)
22
smash_df.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
pd.DataFrame(smash_df.columns.values, columns = ["Variables"])
Variables | |
---|---|
0 | character |
1 | game |
2 | regular_fall |
3 | fast_fall |
4 | weight |
5 | walk_speed |
6 | run_speed |
7 | FULL_HOP_HEIGHT_RENAME |
8 | short_hop_height |
9 | double_hop_height |
10 | before |
11 | after |
12 | expertise |
smash_df.dtypes
character object game category regular_fall float64 fast_fall float64 weight int64 walk_speed float64 run_speed float64 FULL_HOP_HEIGHT_RENAME float64 short_hop_height float64 double_hop_height float64 before int64 after int64 expertise category dtype: object
smash_df_7 = smash_df.iloc[:,[1, 2, 3, 4, 12]]
smash_df_7.head()
game | regular_fall | fast_fall | weight | expertise | |
---|---|---|---|---|---|
0 | Super Mario | 1.50 | 2.400 | 98 | Amateur |
1 | Donkey Kong | 1.63 | 2.608 | 127 | Expert |
2 | Zelda | 1.60 | 3.040 | 104 | Expert |
3 | Metroid | 1.33 | 2.168 | 108 | Expert |
4 | Metroid | 1.33 | 2.168 | 108 | Amateur |
Get dummies. Exclude outcome variable because it is categorical. But since there are only 2 classes, it can also be converted to a dummy and used as the outcome.
smash_df_7_predictors = pd.get_dummies(smash_df_7.drop(columns = ["expertise"]), drop_first = False)
smash_df_7_predictors.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Punch Out | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
1 | 1.63 | 2.608 | 127 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 1.60 | 3.040 | 104 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
3 | 1.33 | 2.168 | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 1.33 | 2.168 | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 33 columns
Combine with outcome variable.
smash_df_7_full = pd.concat([smash_df_7_predictors, smash_df_7["expertise"]], axis = 1)
smash_df_7_full.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 2.400 | 98 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | Amateur |
1 | 1.63 | 2.608 | 127 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
2 | 1.60 | 3.040 | 104 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | Expert |
3 | 1.33 | 2.168 | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
4 | 1.33 | 2.168 | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Amateur |
5 rows × 34 columns
smash_df_7_full.columns.values
array(['regular_fall', 'fast_fall', 'weight', 'game_Animal Crossing', 'game_Banjo Kazooie', 'game_Bayonetta', 'game_Castlevania', 'game_Donkey Kong', 'game_Dragon Quest', 'game_Duck Hunt', 'game_Earthbound', 'game_F-Zero', 'game_Final Fantasy', 'game_Fire Emblem', 'game_Icarus', 'game_Kirby', 'game_Mega Man', 'game_Metal Gear', 'game_Metroid', 'game_Mother', 'game_Nintendo', 'game_Pikmin', 'game_Pokemon', 'game_Punch Out', 'game_Sonic', 'game_Splatoon', 'game_Stack Up', 'game_Star Fox', 'game_Street Fighter', 'game_Super Mario', 'game_Wii', 'game_Xenoblade', 'game_Zelda', 'expertise'], dtype=object)
predictors = ['regular_fall', 'fast_fall', 'weight', 'game_Animal Crossing',
'game_Banjo Kazooie', 'game_Bayonetta', 'game_Castlevania',
'game_Donkey Kong', 'game_Dragon Quest', 'game_Duck Hunt',
'game_Earthbound', 'game_F-Zero', 'game_Final Fantasy',
'game_Fire Emblem', 'game_Icarus', 'game_Kirby', 'game_Mega Man',
'game_Metal Gear', 'game_Metroid', 'game_Mother', 'game_Nintendo',
'game_Pikmin', 'game_Pokemon', 'game_Punch Out', 'game_Sonic',
'game_Splatoon', 'game_Stack Up', 'game_Star Fox',
'game_Street Fighter', 'game_Super Mario', 'game_Wii',
'game_Xenoblade', 'game_Zelda']
predictors_numerical = ['regular_fall', 'fast_fall', 'weight']
predictors_categorical = ['game_Animal Crossing','game_Banjo Kazooie', 'game_Bayonetta', 'game_Castlevania',
'game_Donkey Kong', 'game_Dragon Quest', 'game_Duck Hunt',
'game_Earthbound', 'game_F-Zero', 'game_Final Fantasy',
'game_Fire Emblem', 'game_Icarus', 'game_Kirby', 'game_Mega Man',
'game_Metal Gear', 'game_Metroid', 'game_Mother', 'game_Nintendo',
'game_Pikmin', 'game_Pokemon', 'game_Punch Out', 'game_Sonic',
'game_Splatoon', 'game_Stack Up', 'game_Star Fox',
'game_Street Fighter', 'game_Super Mario', 'game_Wii',
'game_Xenoblade', 'game_Zelda',]
outcome = "expertise"
train_data, valid_data = train_test_split(smash_df_7_full, test_size = 0.3, random_state = 666)
train_data.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
64 | 1.85 | 2.960 | 107 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Amateur |
22 | 1.80 | 2.880 | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | Expert |
38 | 1.95 | 3.120 | 127 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
25 | 1.80 | 2.880 | 95 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Amateur |
71 | 1.76 | 2.816 | 106 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
5 rows × 34 columns
valid_data.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
37 | 1.65 | 2.640 | 86 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
68 | 1.76 | 2.816 | 116 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
1 | 1.63 | 2.608 | 127 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
52 | 1.55 | 2.480 | 91 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Expert |
17 | 1.35 | 2.160 | 85 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | Amateur |
5 rows × 34 columns
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaler.fit(train_data[predictors_numerical])
StandardScaler()
train_df_norm = pd.DataFrame(scaler.transform(train_data[predictors_numerical]), columns = predictors_numerical)
train_df_norm.head()
regular_fall | fast_fall | weight | |
---|---|---|---|
0 | 1.060291 | 1.114411 | 0.822010 |
1 | 0.843285 | 0.894801 | -0.515164 |
2 | 1.494303 | 1.553631 | 2.229562 |
3 | 0.843285 | 0.894801 | -0.022521 |
4 | 0.669681 | 0.719113 | 0.751633 |
# Ensure the indices are similar
train_df_norm.index = train_data.index
train_X = pd.concat([train_df_norm, train_data[predictors_categorical]], axis = 1)
train_X.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Punch Out | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
64 | 1.060291 | 1.114411 | 0.822010 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
22 | 0.843285 | 0.894801 | -0.515164 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
38 | 1.494303 | 1.553631 | 2.229562 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
25 | 0.843285 | 0.894801 | -0.022521 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
71 | 0.669681 | 0.719113 | 0.751633 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 33 columns
valid_df_norm = pd.DataFrame(scaler.transform(valid_data[predictors_numerical]), columns = predictors_numerical)
valid_df_norm.head()
regular_fall | fast_fall | weight | |
---|---|---|---|
0 | 0.192267 | 0.235971 | -0.655919 |
1 | 0.669681 | 0.719113 | 1.455409 |
2 | 0.105465 | 0.148127 | 2.229562 |
3 | -0.241745 | -0.203249 | -0.304031 |
4 | -1.109769 | -1.081689 | -0.726297 |
# Ensure the indices are similar
valid_df_norm.index = valid_data.index
valid_X = pd.concat([valid_df_norm, valid_data[predictors_categorical]], axis = 1)
valid_X.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Punch Out | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
37 | 0.192267 | 0.235971 | -0.655919 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
68 | 0.669681 | 0.719113 | 1.455409 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0.105465 | 0.148127 | 2.229562 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
52 | -0.241745 | -0.203249 | -0.304031 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
17 | -1.109769 | -1.081689 | -0.726297 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
5 rows × 33 columns
train_y = train_data[outcome].astype("category")
train_y.head()
64 Amateur 22 Expert 38 Expert 25 Amateur 71 Expert Name: expertise, dtype: category Categories (2, object): ['Amateur', 'Expert']
valid_y = valid_data[outcome].astype("category")
valid_y.head()
37 Expert 68 Expert 1 Expert 52 Expert 17 Amateur Name: expertise, dtype: category Categories (2, object): ['Amateur', 'Expert']
Creating dummies in new data with less levels.
data = {"regular_fall": [1.8, 2.3],
"fast_fall": [1.7, 2.0],
"weight" : [88, 75],
"game": ["Super Mario", "Zelda"]}
# Create DataFrame
new_df = pd.DataFrame(data)
new_df
regular_fall | fast_fall | weight | game | |
---|---|---|---|---|
0 | 1.8 | 1.7 | 88 | Super Mario |
1 | 2.3 | 2.0 | 75 | Zelda |
Get dummies
new_df_dummies = pd.get_dummies(new_df)
new_df_dummies
regular_fall | fast_fall | weight | game_Super Mario | game_Zelda | |
---|---|---|---|---|---|
0 | 1.8 | 1.7 | 88 | 1 | 0 |
1 | 2.3 | 2.0 | 75 | 0 | 1 |
Get additional dummy columns
new_df_dummies_full = new_df_dummies.reindex(columns = train_X.columns, fill_value = 0)
new_df_dummies_full
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Punch Out | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.8 | 1.7 | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
1 | 2.3 | 2.0 | 75 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
2 rows × 33 columns
pd.DataFrame(new_df_dummies_full.columns.values, columns = ["Variables"])
Variables | |
---|---|
0 | regular_fall |
1 | fast_fall |
2 | weight |
3 | game_Animal Crossing |
4 | game_Banjo Kazooie |
5 | game_Bayonetta |
6 | game_Castlevania |
7 | game_Donkey Kong |
8 | game_Dragon Quest |
9 | game_Duck Hunt |
10 | game_Earthbound |
11 | game_F-Zero |
12 | game_Final Fantasy |
13 | game_Fire Emblem |
14 | game_Icarus |
15 | game_Kirby |
16 | game_Mega Man |
17 | game_Metal Gear |
18 | game_Metroid |
19 | game_Mother |
20 | game_Nintendo |
21 | game_Pikmin |
22 | game_Pokemon |
23 | game_Punch Out |
24 | game_Sonic |
25 | game_Splatoon |
26 | game_Stack Up |
27 | game_Star Fox |
28 | game_Street Fighter |
29 | game_Super Mario |
30 | game_Wii |
31 | game_Xenoblade |
32 | game_Zelda |
Normalise
new_df_dummies_full_norm = pd.DataFrame(scaler.transform(new_df_dummies_full[predictors_numerical]),
columns = predictors_numerical)
new_df_dummies_full_norm.head()
regular_fall | fast_fall | weight | |
---|---|---|---|
0 | 0.843285 | -2.344446 | -0.515164 |
1 | 3.013346 | -1.520909 | -1.430073 |
Assembling the final nornalised new data with the full range of dummy columns
new_df_dummies_full_norm_2 = pd.concat([new_df_dummies_full_norm, new_df_dummies_full[predictors_categorical]],
axis = 1)
new_df_dummies_full_norm_2.head()
regular_fall | fast_fall | weight | game_Animal Crossing | game_Banjo Kazooie | game_Bayonetta | game_Castlevania | game_Donkey Kong | game_Dragon Quest | game_Duck Hunt | ... | game_Punch Out | game_Sonic | game_Splatoon | game_Stack Up | game_Star Fox | game_Street Fighter | game_Super Mario | game_Wii | game_Xenoblade | game_Zelda | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.843285 | -2.344446 | -0.515164 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
1 | 3.013346 | -1.520909 | -1.430073 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
2 rows × 33 columns
pd.DataFrame(new_df_dummies_full_norm_2.columns.values, columns = ["Variables"])
Variables | |
---|---|
0 | regular_fall |
1 | fast_fall |
2 | weight |
3 | game_Animal Crossing |
4 | game_Banjo Kazooie |
5 | game_Bayonetta |
6 | game_Castlevania |
7 | game_Donkey Kong |
8 | game_Dragon Quest |
9 | game_Duck Hunt |
10 | game_Earthbound |
11 | game_F-Zero |
12 | game_Final Fantasy |
13 | game_Fire Emblem |
14 | game_Icarus |
15 | game_Kirby |
16 | game_Mega Man |
17 | game_Metal Gear |
18 | game_Metroid |
19 | game_Mother |
20 | game_Nintendo |
21 | game_Pikmin |
22 | game_Pokemon |
23 | game_Punch Out |
24 | game_Sonic |
25 | game_Splatoon |
26 | game_Stack Up |
27 | game_Star Fox |
28 | game_Street Fighter |
29 | game_Super Mario |
30 | game_Wii |
31 | game_Xenoblade |
32 | game_Zelda |