univ <- read.csv("universities_and_colleges.csv", header = TRUE)
head(univ)
## School Type Median_SAT Acceptance_Rate Expenditures_Student
## 1 Amherst Lib Arts 1315 0.22 26636
## 2 Barnard Lib Arts 1220 0.53 17653
## 3 Bates Lib Arts 1240 0.36 17554
## 4 Berkeley University 1176 0.37 23665
## 5 Bowdoin Lib Arts 1300 0.24 25703
## 6 Brown University 1281 0.24 24201
## Top_10pct_HS Graduation_pct
## 1 85 93
## 2 69 80
## 3 58 88
## 4 95 68
## 5 78 90
## 6 80 90
str(univ)
## 'data.frame': 49 obs. of 7 variables:
## $ School : chr "Amherst" "Barnard" "Bates" "Berkeley" ...
## $ Type : chr "Lib Arts" "Lib Arts" "Lib Arts" "University" ...
## $ Median_SAT : int 1315 1220 1240 1176 1300 1281 1255 1400 1300 1225 ...
## $ Acceptance_Rate : num 0.22 0.53 0.36 0.37 0.24 0.24 0.56 0.31 0.4 0.64 ...
## $ Expenditures_Student: num 26636 17653 17554 23665 25703 ...
## $ Top_10pct_HS : int 85 69 58 95 78 80 70 98 75 52 ...
## $ Graduation_pct : int 93 80 88 68 90 90 84 75 80 77 ...
mean(univ$Acceptance_Rate, na.rm = TRUE)
## [1] 0.3810204
median(univ$Acceptance_Rate, na.rm = TRUE)
## [1] 0.36
library(DescTools)
Mode(univ$Acceptance_Rate)
## [1] 0.24
## attr(,"freq")
## [1] 5
quantile(univ$Acceptance_Rate)
## 0% 25% 50% 75% 100%
## 0.17 0.28 0.36 0.48 0.67
sd(univ$Acceptance_Rate)
## [1] 0.1337168
var(univ$Acceptance_Rate)
## [1] 0.01788019
cor(univ[,c(3:6)], method = "pearson")
## Median_SAT Acceptance_Rate Expenditures_Student
## Median_SAT 1.0000000 -0.6019020 0.5727417
## Acceptance_Rate -0.6019020 1.0000000 -0.2842544
## Expenditures_Student 0.5727417 -0.2842544 1.0000000
## Top_10pct_HS 0.5034680 -0.6097210 0.5057820
## Top_10pct_HS
## Median_SAT 0.503468
## Acceptance_Rate -0.609721
## Expenditures_Student 0.505782
## Top_10pct_HS 1.000000
max(univ$Acceptance_Rate)
## [1] 0.67
min(univ$Acceptance_Rate)
## [1] 0.17
max(univ$Acceptance_Rate) - min(univ$Acceptance_Rate)
## [1] 0.5
Manual calculation.
univ$Acceptance_Rate_zscore <- (univ$Acceptance_Rate -
mean(univ$Acceptance_Rate))/sd(univ$Acceptance_Rate)
head(univ)
## School Type Median_SAT Acceptance_Rate Expenditures_Student
## 1 Amherst Lib Arts 1315 0.22 26636
## 2 Barnard Lib Arts 1220 0.53 17653
## 3 Bates Lib Arts 1240 0.36 17554
## 4 Berkeley University 1176 0.37 23665
## 5 Bowdoin Lib Arts 1300 0.24 25703
## 6 Brown University 1281 0.24 24201
## Top_10pct_HS Graduation_pct Acceptance_Rate_zscore
## 1 85 93 -1.20418966
## 2 69 80 1.11414253
## 3 58 88 -0.15720093
## 4 95 68 -0.08241602
## 5 78 90 -1.05461984
## 6 80 90 -1.05461984
Possibly a simpler way to do this.
univ$Acceptance_Rate_zscore_2 <- scale(univ$Acceptance_Rate)
head(univ)
## School Type Median_SAT Acceptance_Rate Expenditures_Student
## 1 Amherst Lib Arts 1315 0.22 26636
## 2 Barnard Lib Arts 1220 0.53 17653
## 3 Bates Lib Arts 1240 0.36 17554
## 4 Berkeley University 1176 0.37 23665
## 5 Bowdoin Lib Arts 1300 0.24 25703
## 6 Brown University 1281 0.24 24201
## Top_10pct_HS Graduation_pct Acceptance_Rate_zscore Acceptance_Rate_zscore_2
## 1 85 93 -1.20418966 -1.20418966
## 2 69 80 1.11414253 1.11414253
## 3 58 88 -0.15720093 -0.15720093
## 4 95 68 -0.08241602 -0.08241602
## 5 78 90 -1.05461984 -1.05461984
## 6 80 90 -1.05461984 -1.05461984
library(moments)
moments::skewness(univ$Acceptance_Rate)
## [1] 0.3450946
3*(mean(univ$Acceptance_Rate) - median(univ$Acceptance_Rate))/sd(univ$Acceptance_Rate)
## [1] 0.4716028
moments::kurtosis(univ$Acceptance_Rate)
## [1] 2.150586
Statistical test for normality using skewness and kurtosis.
H0 The data have a skewness and kurtosis that matches a normal distribution
H1: The data have a skewness and kurtosis that does not match a normal distribution
jarque.test(univ$Acceptance_Rate)
##
## Jarque-Bera Normality Test
##
## data: univ$Acceptance_Rate
## JB = 2.4456, p-value = 0.2944
## alternative hypothesis: greater
Central tendencies in frequency tables.
comp <- read.csv("computer_repair_times.csv", header = TRUE)
head(comp)
## ï..Days Frequency
## 1 0 0
## 2 1 0
## 3 2 0
## 4 3 0
## 5 4 0
## 6 5 1
Rename column names.
names(comp) <- c("days", "frequency")
head(comp)
## days frequency
## 1 0 0
## 2 1 0
## 3 2 0
## 4 3 0
## 5 4 0
## 6 5 1
Convert data.
comp_data <- rep(comp$days, comp$frequency)
comp_data
## [1] 5 6 6 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9
## [26] 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
## [51] 10 10 10 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 12 12 12
## [76] 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13
## [101] 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 14 14 14 14 14 14 14 14
## [126] 14 14 14 14 14 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 15 15 15
## [151] 15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
## [176] 16 16 16 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18
## [201] 18 18 18 19 19 19 19 19 19 19 19 19 20 20 20 20 20 20 20 20 21 21 21 21 21
## [226] 22 22 22 22 23 23 23 23 25 25 25 26 26 27 27 29 29 31 34 36 36 37 38 39 40
Mean.
mean(comp_data)
## [1] 14.912
Median.
median(comp_data)
## [1] 14
Mode.
DescTools::Mode(comp_data)
## [1] 12 15
## attr(,"freq")
## [1] 23
Variance.
var(comp_data)
## [1] 35.50227
Standard deviation.
sd(comp_data)
## [1] 5.958378