Directions

Prepare for trouble!

And make it double!

Data for demo

Back to the spellbook

1. Naming variables

a <- 10
b <- a
print(b)
## [1] 10
b
## [1] 10

2. Basic data types

2.1 Logical

TRUE
## [1] TRUE
class(TRUE)
## [1] "logical"
FALSE
## [1] FALSE
class(FALSE)
## [1] "logical"

2.2 Character

y <- "Greetings Jedi Master"
y
## [1] "Greetings Jedi Master"

2.3 Numeric and Integer

2
## [1] 2
2.5
## [1] 2.5
2L
## [1] 2
class(2)
## [1] "numeric"
class(2L)
## [1] "integer"

3. Data structures

3.1 Vector

vector1 <- c(1,2,3,4,5)
vector1
## [1] 1 2 3 4 5
class(vector1)
## [1] "numeric"
vector2 <- c("S", "U")
vector2
## [1] "S" "U"
class(vector2)
## [1] "character"

3.2 List

list <- list("Hello", 123, TRUE)
list
## [[1]]
## [1] "Hello"
## 
## [[2]]
## [1] 123
## 
## [[3]]
## [1] TRUE

3.3 Matrix

matrix <- matrix(1:9, nrow = 3)
matrix
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

3.4 Data frame

df <- read.csv("statex77.csv", header = TRUE)
head(df)
##            X Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area
## 1    Alabama       3615   3624        2.1    69.05   15.1    41.3    20  50708
## 2     Alaska        365   6315        1.5    69.31   11.3    66.7   152 566432
## 3    Arizona       2212   4530        1.8    70.55    7.8    58.1    15 113417
## 4   Arkansas       2110   3378        1.9    70.66   10.1    39.9    65  51945
## 5 California      21198   5114        1.1    71.71   10.3    62.6    20 156361
## 6   Colorado       2541   4884        0.7    72.06    6.8    63.9   166 103766

4. Accessing data

4.1 Vectors

temps <- c(22, 20, 23, 32, -10)
temps
## [1]  22  20  23  32 -10
cities <- c("Seattle", "Tokyo", "Shanghai", "Hawaii", "Oslo")
cities
## [1] "Seattle"  "Tokyo"    "Shanghai" "Hawaii"   "Oslo"
names(temps) <- cities
temps
##  Seattle    Tokyo Shanghai   Hawaii     Oslo 
##       22       20       23       32      -10
temps["Oslo"]
## Oslo 
##  -10
v3 <- c("A", "B", "C")
v3[c(1,3)]
## [1] "A" "C"
v4 <- c(1:9)
v4[3:7]
## [1] 3 4 5 6 7

4.2 Matrices

v5 <- c(1:12)
v5
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12
matrix2 <- matrix(v5, nrow = 4, byrow = TRUE)
matrix2
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
## [4,]   10   11   12
columns <- c("Tue", "Wed", "Thu")
rows <- c("Sleep", "Eat", "Play", "Laze around")
colnames(matrix2) <- columns
rownames(matrix2) <- rows

matrix2
##             Tue Wed Thu
## Sleep         1   2   3
## Eat           4   5   6
## Play          7   8   9
## Laze around  10  11  12

4.3 Data frames

4.3.1 Example 1

days <- c("Mon", "Tue", "Wed", "Thu", "Fri")
temp <- c(10, 21, 23, 25.3, 28)
study <- c(TRUE, FALSE, FALSE, FALSE, FALSE)

df2 <- data.frame(days, temp, study)
df2
##   days temp study
## 1  Mon 10.0  TRUE
## 2  Tue 21.0 FALSE
## 3  Wed 23.0 FALSE
## 4  Thu 25.3 FALSE
## 5  Fri 28.0 FALSE
df2[1,]
##   days temp study
## 1  Mon   10  TRUE
df2[3,2]
## [1] 23

4.3.2 Example 2

head(df)  
##            X Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area
## 1    Alabama       3615   3624        2.1    69.05   15.1    41.3    20  50708
## 2     Alaska        365   6315        1.5    69.31   11.3    66.7   152 566432
## 3    Arizona       2212   4530        1.8    70.55    7.8    58.1    15 113417
## 4   Arkansas       2110   3378        1.9    70.66   10.1    39.9    65  51945
## 5 California      21198   5114        1.1    71.71   10.3    62.6    20 156361
## 6   Colorado       2541   4884        0.7    72.06    6.8    63.9   166 103766
names(df)[1] <- "State"

head(df)  
##        State Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area
## 1    Alabama       3615   3624        2.1    69.05   15.1    41.3    20  50708
## 2     Alaska        365   6315        1.5    69.31   11.3    66.7   152 566432
## 3    Arizona       2212   4530        1.8    70.55    7.8    58.1    15 113417
## 4   Arkansas       2110   3378        1.9    70.66   10.1    39.9    65  51945
## 5 California      21198   5114        1.1    71.71   10.3    62.6    20 156361
## 6   Colorado       2541   4884        0.7    72.06    6.8    63.9   166 103766
subset(df, State == "Alaska", select = c(Population, Area))
##   Population   Area
## 2        365 566432
subset(df, State == "Alaska" | State == "Washington",
       select = c(State:Income))
##         State Population Income
## 2      Alaska        365   6315
## 47 Washington       3559   4864
subset(df, Life.Exp >= 70 & Frost >= 170)
##            State Population Income Illiteracy Life.Exp Murder HS.Grad Frost
## 29 New Hampshire        812   4281        0.7    71.23    3.3    57.6   174
## 34  North Dakota        637   5087        0.8    72.78    1.4    50.3   186
## 41  South Dakota        681   4167        0.5    72.08    1.7    53.3   172
## 50       Wyoming        376   4566        0.6    70.29    6.9    62.9   173
##     Area
## 29  9027
## 34 69273
## 41 75955
## 50 97203
subset(df, Life.Exp >= 70 & Frost >= 170,
       select = c(State, Population, Area))
##            State Population  Area
## 29 New Hampshire        812  9027
## 34  North Dakota        637 69273
## 41  South Dakota        681 75955
## 50       Wyoming        376 97203

4.3.3 Some manipulations

So if our class is on Mondays, what should we do in the week?

df3 <- df2
df3
##   days temp study
## 1  Mon 10.0  TRUE
## 2  Tue 21.0 FALSE
## 3  Wed 23.0 FALSE
## 4  Thu 25.3 FALSE
## 5  Fri 28.0 FALSE
df3$activity <- ifelse(df3$days == "Mon", "Study hard", "Be lazy")
df3
##   days temp study   activity
## 1  Mon 10.0  TRUE Study hard
## 2  Tue 21.0 FALSE    Be lazy
## 3  Wed 23.0 FALSE    Be lazy
## 4  Thu 25.3 FALSE    Be lazy
## 5  Fri 28.0 FALSE    Be lazy

How does temperature affect our daily activities?

df4 <- df2
df4
##   days temp study
## 1  Mon 10.0  TRUE
## 2  Tue 21.0 FALSE
## 3  Wed 23.0 FALSE
## 4  Thu 25.3 FALSE
## 5  Fri 28.0 FALSE
df4$activity <- ifelse(df4$temp <=10, "Stay at home", "Sleep at home")
df4
##   days temp study      activity
## 1  Mon 10.0  TRUE  Stay at home
## 2  Tue 21.0 FALSE Sleep at home
## 3  Wed 23.0 FALSE Sleep at home
## 4  Thu 25.3 FALSE Sleep at home
## 5  Fri 28.0 FALSE Sleep at home

“Going outside is highly overrated” – Anorak’s Almanac, Ch 17, Verse 32.