import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
hero = pd.read_csv("mha_v2.csv")
hero.head()
Name | Occupation | Quirk | Power | Speed | Technique | Intelligence | Score | Rank | |
---|---|---|---|---|---|---|---|---|---|
0 | All For One | Villain | All for One | 6.5 | 6.5 | 6.5 | 6.5 | 26.0 | 1 |
1 | All Might | Hero | One for All | 6.5 | 6.5 | 6.0 | 6.0 | 25.0 | 2 |
2 | Best Jeanist | Hero | Fiber Master | 2.0 | 3.0 | 5.0 | 2.0 | 12.0 | 94 |
3 | Bubble Girl | Hero | Bubble | 1.0 | 2.0 | 4.5 | 3.0 | 10.5 | 111 |
4 | Camie Utsushimi | Student | Glamour | 1.5 | 2.5 | 5.0 | 2.5 | 11.5 | 105 |
hero.shape
(120, 9)
hero.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 120 entries, 0 to 119 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 120 non-null object 1 Occupation 120 non-null object 2 Quirk 120 non-null object 3 Power 120 non-null float64 4 Speed 120 non-null float64 5 Technique 120 non-null float64 6 Intelligence 120 non-null float64 7 Score 120 non-null float64 8 Rank 120 non-null int64 dtypes: float64(5), int64(1), object(3) memory usage: 8.6+ KB
hero.describe()
Power | Speed | Technique | Intelligence | Score | Rank | |
---|---|---|---|---|---|---|
count | 120.000000 | 120.000000 | 120.000000 | 120.000000 | 120.000000 | 120.000000 |
mean | 3.425000 | 3.512500 | 4.270833 | 3.791667 | 15.000000 | 55.666667 |
std | 1.682398 | 1.461642 | 1.309140 | 1.371351 | 3.856131 | 34.226712 |
min | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 8.000000 | 1.000000 |
25% | 2.000000 | 3.000000 | 3.875000 | 3.000000 | 13.000000 | 29.000000 |
50% | 3.000000 | 3.000000 | 4.250000 | 4.000000 | 14.500000 | 59.000000 |
75% | 5.000000 | 4.500000 | 5.000000 | 5.000000 | 16.500000 | 80.000000 |
max | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 40.000000 | 119.000000 |
hero["Name"] = hero["Name"].astype("category")
hero["Occupation"] = hero["Occupation"].astype("category")
hero["Quirk"] = hero["Quirk"].astype("category")
hero.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 120 entries, 0 to 119 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 120 non-null category 1 Occupation 120 non-null category 2 Quirk 120 non-null category 3 Power 120 non-null float64 4 Speed 120 non-null float64 5 Technique 120 non-null float64 6 Intelligence 120 non-null float64 7 Score 120 non-null float64 8 Rank 120 non-null int64 dtypes: category(3), float64(5), int64(1) memory usage: 16.3 KB
hero.describe(include = "category")
Name | Occupation | Quirk | |
---|---|---|---|
count | 120 | 120 | 120 |
unique | 120 | 4 | 119 |
top | All For One | Student | One for All |
freq | 1 | 53 | 2 |
Rename columns.
hero = hero.rename(columns = {"Name": "NAME"})
hero.head()
NAME | Occupation | Quirk | Power | Speed | Technique | Intelligence | Score | Rank | |
---|---|---|---|---|---|---|---|---|---|
0 | All For One | Villain | All for One | 6.5 | 6.5 | 6.5 | 6.5 | 26.0 | 1 |
1 | All Might | Hero | One for All | 6.5 | 6.5 | 6.0 | 6.0 | 25.0 | 2 |
2 | Best Jeanist | Hero | Fiber Master | 2.0 | 3.0 | 5.0 | 2.0 | 12.0 | 94 |
3 | Bubble Girl | Hero | Bubble | 1.0 | 2.0 | 4.5 | 3.0 | 10.5 | 111 |
4 | Camie Utsushimi | Student | Glamour | 1.5 | 2.5 | 5.0 | 2.5 | 11.5 | 105 |
import seaborn as sns
sns.histplot(x = hero["Power"])
<AxesSubplot:xlabel='Power', ylabel='Count'>
Use ";" at the end to remove the AxesSubplot. Change the number of bins.
sns.histplot(x = hero["Speed"], bins = 20);
histogram = sns.histplot(x = hero["Technique"], bins = 20, stat = "probability", color = "purple");
histogram.set_xlabel("Technique", fontsize = 15);
histogram.set_ylabel("Probability", fontsize = 15);
sns.barplot(x = "Occupation", y = "Intelligence", data = hero, color = "green");
count = sns.countplot(x = "Occupation", data = hero, color = "red");
count.set_xlabel("Occupation", fontsize = 15);
count.set_ylabel("Number", fontsize = 15);
bar = sns.barplot(x = "Occupation", y = "Score", data = hero, color = "blue", estimator = sum);
bar.set_xlabel("Occupation", fontsize = 15);
bar.set_ylabel("Total Score", fontsize = 15);
bar = sns.barplot(x = "Occupation", y = "Speed", data = hero, color = "lightblue", estimator = max);
bar.set_xlabel("Occupation", fontsize = 15);
bar.set_ylabel("Maximum Speed", fontsize = 15);
Alternatively from numpy import median
import numpy as np
bar = sns.barplot(x = "Occupation", y = "Speed", data = hero, color = "lightblue", estimator = np.median, ci = None);
bar.set_xlabel("Occupation", fontsize = 15);
bar.set_ylabel("Median Speed", fontsize = 15);
Convert to a horizontal orientation.
bar = sns.barplot(x = "Speed", y = "Occupation", data = hero, color = "purple", estimator = np.median);
bar.set_ylabel("Occupation", fontsize = 15);
bar.set_xlabel("Median Speed", fontsize = 15);
sns.boxplot(x = hero["Power"]);
box = sns.boxplot(x = hero["Intelligence"], showmeans = True, color = "yellow");
box.set_xlabel("Intelligence", fontsize = 15);
Just for illustration. Does not make sense here.
line_1 = sns.lineplot(x = "Technique", y = "Score", data = hero)
plt.show()
Just for illustration. Does not make sense here.
hero_line_df = hero[["Technique", "Score"]]
line_2 = sns.lineplot(data = hero_line_df)
line_2.set_xlabel("Records", fontsize = 15);
line_2.set_ylabel("Value", fontsize = 15);
plt.show()
Just for illustration. Does not make sense here.
line_3 = sns.lineplot(x = "Score", y = "Technique", hue = "Occupation", data = hero);
line_3.set_xlabel("Score", fontsize = 15);
line_3.set_ylabel("Technique", fontsize = 15);
line_3.legend(title = "Occupation", title_fontsize = "10");
Create a new column from index 0. Just for illustration. Does not make much sense here.
hero_copy = hero.copy()
hero_copy.insert(0, "Year", range(1980, 1980 + len(hero_copy)))
hero_copy.head()
Year | NAME | Occupation | Quirk | Power | Speed | Technique | Intelligence | Score | Rank | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1980 | All For One | Villain | All for One | 6.5 | 6.5 | 6.5 | 6.5 | 26.0 | 1 |
1 | 1981 | All Might | Hero | One for All | 6.5 | 6.5 | 6.0 | 6.0 | 25.0 | 2 |
2 | 1982 | Best Jeanist | Hero | Fiber Master | 2.0 | 3.0 | 5.0 | 2.0 | 12.0 | 94 |
3 | 1983 | Bubble Girl | Hero | Bubble | 1.0 | 2.0 | 4.5 | 3.0 | 10.5 | 111 |
4 | 1984 | Camie Utsushimi | Student | Glamour | 1.5 | 2.5 | 5.0 | 2.5 | 11.5 | 105 |
line_4 = sns.lineplot(x = "Year", y = "Score", data = hero_copy);
line_4.set_xlabel("Year", fontsize = 15);
line_4.set_ylabel("Score", fontsize = 15);
A multi-line plot requires a long format Again, it does not make much sense here
hero_copy_2 = hero_copy[["Year", "Power", "Speed", "Technique", "Intelligence"]]
hero_copy_2.head()
Year | Power | Speed | Technique | Intelligence | |
---|---|---|---|---|---|
0 | 1980 | 6.5 | 6.5 | 6.5 | 6.5 |
1 | 1981 | 6.5 | 6.5 | 6.0 | 6.0 |
2 | 1982 | 2.0 | 3.0 | 5.0 | 2.0 |
3 | 1983 | 1.0 | 2.0 | 4.5 | 3.0 |
4 | 1984 | 1.5 | 2.5 | 5.0 | 2.5 |
# import pandas as pd
hero_copy_long = pd.melt(hero_copy_2, ["Year"])
hero_copy_long.head()
Year | variable | value | |
---|---|---|---|
0 | 1980 | Power | 6.5 |
1 | 1981 | Power | 6.5 |
2 | 1982 | Power | 2.0 |
3 | 1983 | Power | 1.0 |
4 | 1984 | Power | 1.5 |
Rename columns if desired
hero_copy_long.rename({"variable": "Attribute", "value": "Value"}, axis=1, inplace=True)
hero_copy_long.head()
Year | Attribute | Value | |
---|---|---|---|
0 | 1980 | Power | 6.5 |
1 | 1981 | Power | 6.5 |
2 | 1982 | Power | 2.0 |
3 | 1983 | Power | 1.0 |
4 | 1984 | Power | 1.5 |
Terrible plot. Just for illustration only.
line_5 = sns.lineplot(x = "Year", y = "Value", hue = "Attribute", data = hero_copy_long);
line_5.set_xlabel("Year", fontsize = 15);
line_5.set_ylabel("Score", fontsize = 15);
line_5.legend(title = "The 4 Attributes", title_fontsize = "10");
Someone's over-powered :-)
scatter = sns.scatterplot(x = "Power",
y = "Speed",
hue = "Occupation",
data = hero);
scatter.set_xlabel("Power", fontsize = 20);
scatter.set_ylabel("Speed", fontsize = 20);
scatter.legend(title = "Occupation", title_fontsize = "10");
scatter_2 = sns.scatterplot(x = "Technique",
y = "Intelligence",
hue = "Occupation",
size = "Score",
data = hero);
scatter_2.set_xlabel("Technique", fontsize = 20);
scatter_2.set_ylabel("Intelligence", fontsize = 20);
scatter_2.legend(title = "The Legends", title_fontsize = "10");
plt.text(6.6, 9.88, "Master Yeoda :-) -->", horizontalalignment = "left", size = "medium", color = "red", weight = "bold")
Text(6.6, 9.88, 'Master Yeoda :-) -->')
Show distributions with the scatter plot.
sns.jointplot(x = "Power", y = "Speed", data = hero);
sns.jointplot(x = "Technique", y = "Intelligence", data = hero, kind = "reg");
sns.jointplot(x = "Power", y = "Technique", data = hero, kind = "hex");
sns.jointplot(x = "Speed", y = "Intelligence", data = hero, kind = "kde", color = "purple");
sns.catplot(x = "Occupation", y = "Score", kind = "violin", data = hero);