import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
smash = pd.read_csv("smash_3.csv")
smash.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | full_hop_height | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
smash.shape
(72, 13)
smash.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 72 entries, 0 to 71 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 character 72 non-null object 1 game 72 non-null object 2 regular_fall 72 non-null float64 3 fast_fall 72 non-null float64 4 weight 72 non-null int64 5 walk_speed 72 non-null float64 6 run_speed 72 non-null float64 7 full_hop_height 70 non-null float64 8 short_hop_height 70 non-null float64 9 double_hop_height 70 non-null float64 10 before 72 non-null int64 11 after 72 non-null int64 12 expertise 72 non-null object dtypes: float64(7), int64(3), object(3) memory usage: 7.4+ KB
smash.describe()
regular_fall | fast_fall | weight | walk_speed | run_speed | full_hop_height | short_hop_height | double_hop_height | before | after | |
---|---|---|---|---|---|---|---|---|---|---|
count | 72.000000 | 72.000000 | 72.00000 | 72.000000 | 72.000000 | 70.000000 | 70.000000 | 70.000000 | 72.000000 | 72.000000 |
mean | 1.600069 | 2.546306 | 96.25000 | 1.104171 | 1.806428 | 33.211864 | 16.027429 | 34.440000 | 6.319444 | 7.722222 |
std | 0.222869 | 0.353823 | 13.93218 | 0.213718 | 0.391566 | 5.112580 | 2.302452 | 6.379117 | 2.582860 | 1.576430 |
min | 0.980000 | 1.568000 | 62.00000 | 0.620000 | 1.180000 | 19.790000 | 11.260000 | 19.790000 | 2.000000 | 5.000000 |
25% | 1.472500 | 2.240000 | 88.00000 | 0.924000 | 1.591250 | 30.885375 | 14.500000 | 30.782500 | 4.000000 | 7.000000 |
50% | 1.620000 | 2.592000 | 95.50000 | 1.133000 | 1.722500 | 33.000000 | 16.225000 | 33.500000 | 6.000000 | 8.000000 |
75% | 1.770000 | 2.820000 | 104.50000 | 1.259000 | 1.979750 | 35.575000 | 17.482500 | 36.832500 | 8.250000 | 9.000000 |
max | 2.100000 | 3.360000 | 135.00000 | 1.575000 | 3.850000 | 50.510000 | 22.110000 | 57.350000 | 10.000000 | 10.000000 |
smash["game"] = smash["game"].astype("category")
smash["expertise"] = smash["expertise"].astype("category")
smash.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 72 entries, 0 to 71 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 character 72 non-null object 1 game 72 non-null category 2 regular_fall 72 non-null float64 3 fast_fall 72 non-null float64 4 weight 72 non-null int64 5 walk_speed 72 non-null float64 6 run_speed 72 non-null float64 7 full_hop_height 70 non-null float64 8 short_hop_height 70 non-null float64 9 double_hop_height 70 non-null float64 10 before 72 non-null int64 11 after 72 non-null int64 12 expertise 72 non-null category dtypes: category(2), float64(7), int64(3), object(1) memory usage: 7.9+ KB
smash.describe(include = "category")
game | expertise | |
---|---|---|
count | 72 | 72 |
unique | 30 | 2 |
top | Super Mario | Expert |
freq | 10 | 45 |
Rename columns.
smash = smash.rename(columns = {"full_hop_height": "FULL_HOP_HEIGHT_RENAME"})
smash.head()
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | FULL_HOP_HEIGHT_RENAME | short_hop_height | double_hop_height | before | after | expertise | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
import seaborn as sns
sns.histplot(x = smash["run_speed"])
<AxesSubplot:xlabel='run_speed', ylabel='Count'>
Use ";" at the end to remove the AxesSubplot. Change the number of bins.
sns.histplot(x = smash["run_speed"], bins = 20);
histogram = sns.histplot(x = smash["run_speed"], bins = 20, stat = "probability", color = "purple");
histogram.set_xlabel("Run Speed", fontsize = 15);
histogram.set_ylabel("Probability", fontsize = 15);
sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "green");
count = sns.countplot(x = "expertise", data = smash, color = "red");
count.set_xlabel("Expertise", fontsize = 15);
count.set_ylabel("Number", fontsize = 15);
bar = sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "blue", estimator = sum);
bar.set_xlabel("Expertise", fontsize = 15);
bar.set_ylabel("Total Short Hop Height", fontsize = 15);
bar = sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "lightblue", estimator = max);
bar.set_xlabel("Expertise", fontsize = 15);
bar.set_ylabel("Maximum Short Hop Height", fontsize = 15);
Alternatively from numpy import me
import numpy as np
bar = sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "lightblue", estimator = np.median);
bar.set_xlabel("Expertise", fontsize = 15);
bar.set_ylabel("Median Short Hop Height", fontsize = 15);
Convert to a horizontal orientation.
bar = sns.barplot(x = "short_hop_height", y = "expertise", data = smash, color = "purple", estimator = np.median);
bar.set_ylabel("Expertise", fontsize = 15);
bar.set_xlabel("Median Short Hop Height", fontsize = 15);
sns.boxplot(x = smash["run_speed"]);
box = sns.boxplot(x = smash["run_speed"], showmeans = True, color = "yellow");
box.set_xlabel("Run Speed", fontsize = 15);
Just for illustration. Does not make sense here.
line = sns.lineplot(x = "expertise", y = "double_hop_height", hue = "walk_speed", data = smash);
line.set_xlabel("Expertise", fontsize = 15);
line.set_ylabel("Double Hop Height", fontsize = 15);
line.legend(title = "Walk Speed", title_fontsize = "10");
scatter = sns.scatterplot(x = "run_speed",
y = "walk_speed",
hue = "double_hop_height",
data = smash);
scatter.set_xlabel("Run Speed", fontsize = 20);
scatter.set_ylabel("Walk Speed", fontsize = 20);
scatter.legend(title = "Houdle Hop height", title_fontsize = "10");
Show distributions with the scatter plot.
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash);
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash, kind = "reg");
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash, kind = "hex");
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash, kind = "kde", color = "purple");
sns.catplot(x = "expertise", y = "run_speed", kind = "violin", data = smash);