import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
smash = pd.read_csv("smash_3.csv")
character | game | regular_fall | fast_fall | weight | walk_speed | run_speed | full_hop_height | short_hop_height | double_hop_height | before | after | expertise | |
0 | mario | Super Mario | 1.50 | 2.400 | 98 | 1.155 | 1.760 | 36.33 | 17.54 | 36.33 | 3 | 8 | Amateur |
1 | donkey kong | Donkey Kong | 1.63 | 2.608 | 127 | 1.365 | 1.873 | 34.00 | 17.30 | 35.50 | 8 | 10 | Expert |
2 | link | Zelda | 1.60 | 3.040 | 104 | 1.247 | 1.534 | 27.80 | 13.38 | 29.00 | 10 | 7 | Expert |
3 | samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 6 | 9 | Expert |
4 | dark samus | Metroid | 1.33 | 2.168 | 108 | 1.115 | 1.654 | 37.00 | 18.00 | 37.00 | 2 | 5 | Amateur |
<class 'pandas.core.frame.DataFrame'> RangeIndex: 72 entries, 0 to 71 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 character 72 non-null object 1 game 72 non-null object 2 regular_fall 72 non-null float64 3 fast_fall 72 non-null float64 4 weight 72 non-null int64 5 walk_speed 72 non-null float64 6 run_speed 72 non-null float64 7 full_hop_height 70 non-null float64 8 short_hop_height 70 non-null float64 9 double_hop_height 70 non-null float64 10 before 72 non-null int64 11 after 72 non-null int64 12 expertise 72 non-null object dtypes: float64(7), int64(3), object(3) memory usage: 7.4+ KB
smash["game"] = smash["game"].astype("category")
smash["expertise"] = smash["expertise"].astype("category")
<class 'pandas.core.frame.DataFrame'> RangeIndex: 72 entries, 0 to 71 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 character 72 non-null object 1 game 72 non-null category 2 regular_fall 72 non-null float64 3 fast_fall 72 non-null float64 4 weight 72 non-null int64 5 walk_speed 72 non-null float64 6 run_speed 72 non-null float64 7 full_hop_height 70 non-null float64 8 short_hop_height 70 non-null float64 9 double_hop_height 70 non-null float64 10 before 72 non-null int64 11 after 72 non-null int64 12 expertise 72 non-null category dtypes: category(2), float64(7), int64(3), object(1) memory usage: 7.9+ KB
smash.describe(include = "category")
Rename columns.
smash = smash.rename(columns = {"full_hop_height": "FULL_HOP_HEIGHT_RENAME"})
import seaborn as sns
sns.histplot(x = smash["run_speed"])
Use ";" at the end to remove the AxesSubplot. Change the number of bins.
sns.histplot(x = smash["run_speed"], bins = 20);
histogram = sns.histplot(x = smash["run_speed"], bins = 20, stat = "probability", color = "purple");
histogram.set_xlabel("Run Speed", fontsize = 15);
histogram.set_ylabel("Probability", fontsize = 15);
sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "green");
count = sns.countplot(x = "expertise", data = smash, color = "red");
count.set_xlabel("Expertise", fontsize = 15);
count.set_ylabel("Number", fontsize = 15);
bar = sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "blue", estimator = sum);
bar.set_xlabel("Expertise", fontsize = 15);
bar.set_ylabel("Total Short Hop Height", fontsize = 15);
bar = sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "lightblue", estimator = max);
bar.set_xlabel("Expertise", fontsize = 15);
bar.set_ylabel("Maximum Short Hop Height", fontsize = 15);
import numpy as np
bar = sns.barplot(x = "expertise", y = "short_hop_height", data = smash, color = "lightblue", estimator = np.median);
bar.set_xlabel("Expertise", fontsize = 15);
bar.set_ylabel("Median Short Hop Height", fontsize = 15);
Convert to a horizontal orientation.
bar = sns.barplot(x = "short_hop_height", y = "expertise", data = smash, color = "purple", estimator = np.median);
bar.set_ylabel("Expertise", fontsize = 15);
bar.set_xlabel("Median Short Hop Height", fontsize = 15);
sns.boxplot(x = smash["run_speed"]);
box = sns.boxplot(x = smash["run_speed"], showmeans = True, color = "yellow");
box.set_xlabel("Run Speed", fontsize = 15);
line = sns.lineplot(x = "expertise", y = "double_hop_height", hue = "walk_speed", data = smash);
line.set_xlabel("Expertise", fontsize = 15);
line.set_ylabel("Double Hop Height", fontsize = 15);
line.legend(title = "Walk Speed", title_fontsize = "10");
scatter = sns.scatterplot(x = "run_speed",
y = "walk_speed",
hue = "double_hop_height",
data = smash);
scatter.set_xlabel("Run Speed", fontsize = 20);
scatter.set_ylabel("Walk Speed", fontsize = 20);
scatter.legend(title = "Houdle Hop height", title_fontsize = "10");
Show distributions with the scatter plot.
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash);
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash, kind = "reg");
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash, kind = "hex");
sns.jointplot(x = "run_speed", y = "walk_speed", data = smash, kind = "kde", color = "purple");
sns.catplot(x = "expertise", y = "run_speed", kind = "violin", data = smash);