It’s a boba tea-quila sunrise! Comparing means using t-test.
Use Cadsoft data.
cadsoft <- read.csv("Eg_7-2.csv", header = TRUE)
names(cadsoft)[1] <- "Customer"
head(cadsoft)
## Customer Time
## 1 1 20
## 2 2 12
## 3 3 15
## 4 4 11
## 5 5 22
## 6 6 6
H0: mean response time >= 25
H1: mean respone time < 25
t.test(cadsoft$Time, mu = 25, alternative = "less")
##
## One Sample t-test
##
## data: cadsoft$Time
## t = -1.0522, df = 43, p-value = 0.1493
## alternative hypothesis: true mean is less than 25
## 95 percent confidence interval:
## -Inf 26.8475
## sample estimates:
## mean of x
## 21.90909
H0: mean response time <= 25
H1: mean respone time > 25
t.test(cadsoft$Time, mu = 25, alternative = "greater")
##
## One Sample t-test
##
## data: cadsoft$Time
## t = -1.0522, df = 43, p-value = 0.8507
## alternative hypothesis: true mean is greater than 25
## 95 percent confidence interval:
## 16.97068 Inf
## sample estimates:
## mean of x
## 21.90909
H0: mean response time = 25
H1: mean respone time <> 25
t.test(cadsoft$Time, mu = 25, alternative = "two.sided")
##
## One Sample t-test
##
## data: cadsoft$Time
## t = -1.0522, df = 43, p-value = 0.2986
## alternative hypothesis: true mean is not equal to 25
## 95 percent confidence interval:
## 15.98474 27.83344
## sample estimates:
## mean of x
## 21.90909
Another example.
vacation <- read.csv("Eg_7-6.csv", header = TRUE)
names(vacation)[1] <- "Age"
head(vacation)
## Age Gender Relationship.Status Vacations.per.Year Number.of.Children
## 1 24 Male Married 2 0
## 2 26 Female Married 4 0
## 3 28 Male Married 2 2
## 4 33 Male Married 4 0
## 5 45 Male Married 2 0
## 6 49 Male Married 1 2
H0: Age = 35
H1: Age <> 35
t.test(vacation$Age, mu = 35, alternative = "two.sided")
##
## One Sample t-test
##
## data: vacation$Age
## t = 2.7283, df = 33, p-value = 0.01012
## alternative hypothesis: true mean is not equal to 35
## 95 percent confidence interval:
## 35.93485 41.41809
## sample estimates:
## mean of x
## 38.67647
Test for normality.
shapiro.test(vacation$Age)
##
## Shapiro-Wilk normality test
##
## data: vacation$Age
## W = 0.93422, p-value = 0.04158
Use purchase orders data.
supplier <- read.csv("Eg_7-9.csv", header = TRUE)
names(supplier)[1] <- "Supplier"
head(supplier)
## Supplier Order.No. Item.No. Item.Description Item.Cost Quantity
## 1 Hulkey Fasteners Aug11001 1122 Airframe fasteners $4.25 19,500
## 2 Alum Sheeting Aug11002 1243 Airframe fasteners $4.25 10,000
## 3 Fast-Tie Aerospace Aug11003 5462 Shielded Cable/ft. $1.05 23,000
## 4 Fast-Tie Aerospace Aug11004 5462 Shielded Cable/ft. $1.05 21,500
## 5 Steelpin Inc. Aug11005 5319 Shielded Cable/ft. $1.10 17,500
## 6 Fast-Tie Aerospace Aug11006 5462 Shielded Cable/ft. $1.05 22,500
## Cost.per.order A.P.Terms..Months. Order.Date Arrival.Date
## 1 $82,875.00 30 08/05/11 08/13/11
## 2 $42,500.00 30 08/08/11 08/14/11
## 3 $24,150.00 30 08/10/11 08/15/11
## 4 $22,575.00 30 08/15/11 08/22/11
## 5 $19,250.00 30 08/20/11 08/31/11
## 6 $23,625.00 30 08/20/11 08/26/11
Convert date fields.
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.5
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
supplier$Order.Date <- lubridate::parse_date_time(supplier$Order.Date, c('mdy'))
supplier$Arrival.Date <- lubridate::parse_date_time(supplier$Arrival.Date, c('mdy'))
supplier$Order.Date <- as.Date(supplier$Order.Date, format = "%m/%d/%Y")
supplier$Arrival.Date <- as.Date(supplier$Arrival.Date, format = "%m/%d/%Y")
str(supplier)
## 'data.frame': 94 obs. of 10 variables:
## $ Supplier : chr "Hulkey Fasteners" "Alum Sheeting" "Fast-Tie Aerospace" "Fast-Tie Aerospace" ...
## $ Order.No. : chr "Aug11001" "Aug11002" "Aug11003" "Aug11004" ...
## $ Item.No. : int 1122 1243 5462 5462 5319 5462 4312 7258 6321 5462 ...
## $ Item.Description : chr "Airframe fasteners" "Airframe fasteners" "Shielded Cable/ft." "Shielded Cable/ft." ...
## $ Item.Cost : chr " $4.25 " " $4.25 " " $1.05 " " $1.05 " ...
## $ Quantity : chr " 19,500 " " 10,000 " " 23,000 " " 21,500 " ...
## $ Cost.per.order : chr " $82,875.00 " " $42,500.00 " " $24,150.00 " " $22,575.00 " ...
## $ A.P.Terms..Months.: int 30 30 30 30 30 30 30 45 30 30 ...
## $ Order.Date : Date, format: "2011-08-05" "2011-08-08" ...
## $ Arrival.Date : Date, format: "2011-08-13" "2011-08-14" ...
Compute lead time.
supplier$lead <- as.numeric(supplier$Arrival.Date - supplier$Order.Date)
head(supplier)
## Supplier Order.No. Item.No. Item.Description Item.Cost Quantity
## 1 Hulkey Fasteners Aug11001 1122 Airframe fasteners $4.25 19,500
## 2 Alum Sheeting Aug11002 1243 Airframe fasteners $4.25 10,000
## 3 Fast-Tie Aerospace Aug11003 5462 Shielded Cable/ft. $1.05 23,000
## 4 Fast-Tie Aerospace Aug11004 5462 Shielded Cable/ft. $1.05 21,500
## 5 Steelpin Inc. Aug11005 5319 Shielded Cable/ft. $1.10 17,500
## 6 Fast-Tie Aerospace Aug11006 5462 Shielded Cable/ft. $1.05 22,500
## Cost.per.order A.P.Terms..Months. Order.Date Arrival.Date lead
## 1 $82,875.00 30 2011-08-05 2011-08-13 8
## 2 $42,500.00 30 2011-08-08 2011-08-14 6
## 3 $24,150.00 30 2011-08-10 2011-08-15 5
## 4 $22,575.00 30 2011-08-15 2011-08-22 7
## 5 $19,250.00 30 2011-08-20 2011-08-31 11
## 6 $23,625.00 30 2011-08-20 2011-08-26 6
Filter data for Suppliers Alum Sheeting and Durable Products.
supplier_alum_durrable <- subset(supplier, Supplier == "Alum Sheeting" |
Supplier == "Durrable Products")
table(supplier_alum_durrable$Supplier)
##
## Alum Sheeting Durrable Products
## 8 13
H0: alum - durrable = 0
H1: alum - durrable <> 0
Assume unequal variances
t.test(supplier_alum_durrable$lead ~ supplier_alum_durrable$Supplier)
##
## Welch Two Sample t-test
##
## data: supplier_alum_durrable$lead by supplier_alum_durrable$Supplier
## t = 3.828, df = 9.5306, p-value = 0.003636
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.8598894 3.2939567
## sample estimates:
## mean in group Alum Sheeting mean in group Durrable Products
## 7.000000 4.923077
Assume equal variances
t.test(supplier_alum_durrable$lead ~ supplier_alum_durrable$Supplier,
var.equal = TRUE)
##
## Two Sample t-test
##
## data: supplier_alum_durrable$lead by supplier_alum_durrable$Supplier
## t = 4.4044, df = 19, p-value = 0.0003046
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.089955 3.063891
## sample estimates:
## mean in group Alum Sheeting mean in group Durrable Products
## 7.000000 4.923077
H0: Alum Sheeting >= Durrable Products.
H1: Alum Sheeting < Durrable Products.
Assuming unequal variance by default.
t.test(supplier_alum_durrable$lead ~ supplier_alum_durrable$Supplier,
alternative = "less")
##
## Welch Two Sample t-test
##
## data: supplier_alum_durrable$lead by supplier_alum_durrable$Supplier
## t = 3.828, df = 9.5306, p-value = 0.9982
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 3.065242
## sample estimates:
## mean in group Alum Sheeting mean in group Durrable Products
## 7.000000 4.923077
Testing for a specific mean difference, say 2.
H0: Alum Sheeting >= Durrable Products + 2
H1: Alum Sheeting < Durrable Products + 2
Assuming unequal variance by default.
t.test(supplier_alum_durrable$lead ~ supplier_alum_durrable$Supplier,
alternative = "less", mu = 2)
##
## Welch Two Sample t-test
##
## data: supplier_alum_durrable$lead by supplier_alum_durrable$Supplier
## t = 0.14178, df = 9.5306, p-value = 0.5549
## alternative hypothesis: true difference in means is less than 2
## 95 percent confidence interval:
## -Inf 3.065242
## sample estimates:
## mean in group Alum Sheeting mean in group Durrable Products
## 7.000000 4.923077
H0: Alum Sheeting <= Durrable Products.
H1: Alum Sheeting > Durrable Products.
Assuming unequal variance by default.
t.test(supplier_alum_durrable$lead ~ supplier_alum_durrable$Supplier,
alternative = "greater")
##
## Welch Two Sample t-test
##
## data: supplier_alum_durrable$lead by supplier_alum_durrable$Supplier
## t = 3.828, df = 9.5306, p-value = 0.001818
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 1.088604 Inf
## sample estimates:
## mean in group Alum Sheeting mean in group Durrable Products
## 7.000000 4.923077
Testing for a specific mean difference, say 3.
H0: Alum Sheeting <= Durrable Products + 3
H1: Alum Sheeting > Durrable Products + 3
Assuming unequal variance by default.
t.test(supplier_alum_durrable$lead ~ supplier_alum_durrable$Supplier,
alternative = "greater", mu = 3)
##
## Welch Two Sample t-test
##
## data: supplier_alum_durrable$lead by supplier_alum_durrable$Supplier
## t = -1.7013, df = 9.5306, p-value = 0.9394
## alternative hypothesis: true difference in means is greater than 3
## 95 percent confidence interval:
## 1.088604 Inf
## sample estimates:
## mean in group Alum Sheeting mean in group Durrable Products
## 7.000000 4.923077
Use pile foundation data
pf <- read.csv("pile_foundation.csv", header = TRUE)
head(pf)
## ï..Pile_Number Pile_Length_Estimated Pile_Length_Actual
## 1 1 10.58 18.58
## 2 2 10.58 18.58
## 3 3 10.58 18.58
## 4 4 10.58 18.58
## 5 5 10.58 28.58
## 6 6 10.58 26.58
names(pf)[1:3] <- c("Pile_Number", "Pile_Length_Estimated",
"Pile_Length_Actual")
head(pf)
## Pile_Number Pile_Length_Estimated Pile_Length_Actual
## 1 1 10.58 18.58
## 2 2 10.58 18.58
## 3 3 10.58 18.58
## 4 4 10.58 18.58
## 5 5 10.58 28.58
## 6 6 10.58 26.58
H0: Pile_Length_Estimated = Pile_Length_Actual
H1: Pile_Length_Estimated <> Pile_Length_Actual
t.test(pf$Pile_Length_Estimated, pf$Pile_Length_Actual, paired = TRUE)
##
## Paired t-test
##
## data: pf$Pile_Length_Estimated and pf$Pile_Length_Actual
## t = -10.912, df = 310, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.528856 -5.228508
## sample estimates:
## mean of the differences
## -6.378682
Assumptions
Test for normality.
shapiro.test(pf$Pile_Length_Estimated)
##
## Shapiro-Wilk normality test
##
## data: pf$Pile_Length_Estimated
## W = 0.92995, p-value = 6.34e-11
shapiro.test(pf$Pile_Length_Actual)
##
## Shapiro-Wilk normality test
##
## data: pf$Pile_Length_Actual
## W = 0.96387, p-value = 5.459e-07