Introduction to R

R for Public Health

Ashwini Kalantri

Department of Community Medicine, MGIMS

23 Sep 2024

Get started

"Hello World!"

[1] "Hello World!"

Assignment

<-

text1 <- "Hello World!"
print(text1)

[1] "Hello World!"

->

"Hello World!" -> text2
print(text2)

[1] "Hello World!"

=

text3 = "Hello World!"
print(text3)

[1] "Hello World!"

Reserved Words

if
else
while
repeat
for
function
in

next
break
TRUE
FALSE
NULL
Inf
NaN

NA
NA_integer
NA_real
NA_complex_
NA_character_
…

Operators

Arithmetic

Addition

2 + 5

[1] 7

Subtraction

73 - 32

[1] 41

Multiplication

47 * 7

[1] 329

Division

86 / 3

[1] 28.66667

Arithmetic

Exponentiation

8^2

[1] 64

Modulus

77%%3

[1] 2

Relational

Greater

5 > 6

[1] FALSE

Lesser

5 < 6

[1] TRUE

Equal

6 == 6

[1] TRUE

Relational

Greater or equal

8 >= 5

[1] TRUE

Lesser or equal

7 <= 10

[1] TRUE

Not Equal

9 != 10

[1] TRUE

Joining Logical

AND

TRUE & TRUE

[1] TRUE

TRUE & FALSE

[1] FALSE

FALSE & FALSE

[1] FALSE

OR

TRUE | TRUE

[1] TRUE

TRUE | FALSE

[1] TRUE

FALSE | FALSE

[1] FALSE

Classes

Integer

int <- 3L
print(int)

[1] 3

class(int)

[1] "integer"

Numeric

num <- 4.3
print(num)

[1] 4.3

class(num)

[1] "numeric"

Character

name <- "Your Name"
print(name)

[1] "Your Name"

class(name)

[1] "character"

Classes

Logical

logT <- TRUE
logF <- F
print(logF)

[1] FALSE

class(logF)

[1] "logical"

Date

date1 <- "2023-12-18"
date2 <- 2023-12-18
date3 <- as.Date("2023-12-18")
date4 <- as.Date("18 Dec 2023","%d %b %Y")
date5 <- as.Date(45076, origin = "1900-01-01")
print(date1)

[1] "2023-12-18"

class(date1)

[1] "character"

Class Conversion

num <- "1"
num <- as.numeric(num)

numLet <- as.numeric(LETTERS)
charNum <- as.character(1:100)

tf <- c("TRUE","FALSE","FALSE")
tf <- as.logical(tf)

num <- as.character(num)

Objects

Vectors

vec1 <- c(2,4,6,8,3,5.5)
vec2 <- 4

#combining vectors
newVec <- c(vec1,vec2)

newVec

[1] 2.0 4.0 6.0 8.0 3.0 5.5 4.0

dateVec <- c(as.Date("2023-11-28"),
             as.Date("2023-12-22"),
             Sys.Date())
dateVec

[1] "2023-11-28" "2023-12-22" "2024-09-23"

newVec[5]

[1] 3

Matrix

let <- matrix(LETTERS,
              nrow = 6,
              ncol = 6,
              byrow = F)

let

     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] "A"  "G"  "M"  "S"  "Y"  "E" 
[2,] "B"  "H"  "N"  "T"  "Z"  "F" 
[3,] "C"  "I"  "O"  "U"  "A"  "G" 
[4,] "D"  "J"  "P"  "V"  "B"  "H" 
[5,] "E"  "K"  "Q"  "W"  "C"  "I" 
[6,] "F"  "L"  "R"  "X"  "D"  "J"

let[3,5]

[1] "A"

let[,5]

[1] "Y" "Z" "A" "B" "C" "D"

let[5,]

[1] "E" "K" "Q" "W" "C" "I"

Factor

gender <- c(1,2,2,1,1,1,2,2,1,2,1)

genFac <- factor(gender,
                 levels = c(1,2),
                 labels = c("Male","Female"))

genFac

 [1] Male   Female Female Male   Male   Male   Female Female Male   Female
[11] Male  
Levels: Male Female

Data Frames

age <- c(12,24,NA,23,65,33) # create age vector

gender <- c("M","F","F","M","M","F") #create gender vector

occu <- factor(c(1,4,3,2,4,5), #occupation 
               levels = c(1:5),
               labels = c("Unemp","Service","Student","Business","Prof"))

#date of birth
dob <- c(as.Date("1993-01-16"),as.Date("1963-12-24"),as.Date("1971-01-05"),
         as.Date("1982-11-11"),as.Date("1984-05-15"),as.Date("1999-03-07"))

#create data frame
df <- data.frame(age,gender,occu,dob)

Data Frames

df

  age gender     occu        dob
1  12      M    Unemp 1993-01-16
2  24      F Business 1963-12-24
3  NA      F  Student 1971-01-05
4  23      M  Service 1982-11-11
5  65      M Business 1984-05-15
6  33      F     Prof 1999-03-07

df[2,]

  age gender     occu        dob
2  24      F Business 1963-12-24

df[,2]

[1] "M" "F" "F" "M" "M" "F"

df[2]

df[2,4]

[1] "1963-12-24"

List

list <- list(df,dob,let,newVec)

list

[[1]]
  age gender     occu        dob
1  12      M    Unemp 1993-01-16
2  24      F Business 1963-12-24
3  NA      F  Student 1971-01-05
4  23      M  Service 1982-11-11
5  65      M Business 1984-05-15
6  33      F     Prof 1999-03-07

[[2]]
[1] "1993-01-16" "1963-12-24" "1971-01-05" "1982-11-11" "1984-05-15"
[6] "1999-03-07"

[[3]]
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] "A"  "G"  "M"  "S"  "Y"  "E" 
[2,] "B"  "H"  "N"  "T"  "Z"  "F" 
[3,] "C"  "I"  "O"  "U"  "A"  "G" 
[4,] "D"  "J"  "P"  "V"  "B"  "H" 
[5,] "E"  "K"  "Q"  "W"  "C"  "I" 
[6,] "F"  "L"  "R"  "X"  "D"  "J" 

[[4]]
[1] 2.0 4.0 6.0 8.0 3.0 5.5 4.0

List

List with nth object(s)

list[2]

[[1]]
[1] "1993-01-16" "1963-12-24" "1971-01-05" "1982-11-11" "1984-05-15"
[6] "1999-03-07"

nth object

list[[2]]

[1] "1993-01-16" "1963-12-24" "1971-01-05" "1982-11-11" "1984-05-15"
[6] "1999-03-07"

selecting withing object

list[[2]][4]

[1] "1982-11-11"

list[[1]][2,3]

[1] Business
Levels: Unemp Service Student Business Prof

Functions

function_name(argument1 = value1, argument2 = value2, ...)

Functions

addition <- function(n1,n2){
  n1 + n2
}

div <- function(n1,n2){
  n1 / n2
}

Functions

div(n1 = 55,n2 = 3)

[1] 18.33333

Packages

library(dplyr)

dplyr::glimpse(df)

Rows: 6
Columns: 4
$ age    <dbl> 12, 24, NA, 23, 65, 33
$ gender <chr> "M", "F", "F", "M", "M", "F"
$ occu   <fct> Unemp, Business, Student, Service, Business, Prof
$ dob    <date> 1993-01-16, 1963-12-24, 1971-01-05, 1982-11-11, 1984-05-15, 199…

Working Directory

setwd("~/r4ph24") #Mac, Linux, Unix

setwd("C:/user/ashwini/documents/r4ph24") #Windows

getwd()

Projects

Scripts

Names
Spaces
Pipes
Comments

Names

# Strive for:
young_age <- df %>%  filter(age < 20)

# Avoid:
YOUNGAGE <- df %>%  filter(age < 20)

Spaces

# Strive for
z <- (a + b)^2 / d

# Avoid
z<-( a + b ) ^ 2/d

# Strive for
mean_age <- mean(df$age, na.rm = TRUE)

# Avoid
mean_age<-mean (df$age ,na.rm=TRUE)

Pipes

# Avoid
pipe <- df %>% select(age,dob,occu) %>% mutate(age_cat = if_else(age < 20,"Young","Old"))

# Strive for
pipe <- df %>%
  select(age, dob, occu) %>%
  mutate(age_cat = if_else(age < 20, "Young", "Old"))

# Avoid
pipe <- df %>%
  select(age, dob, occu) %>%
  summarise(age_cat = mean(
                            age,
                            na.rm = TRUE)
                          )

# Strive for
pipe <- df %>%
  select(age, dob, occu) %>%
  summarise(age_cat = mean(
    age,
    na.rm = TRUE)
    )

Commenting

# Print the text "Hello World"
print("Hello World!")

print("Hello World!") # Print the text "Hello World"

# Multi-line comment
# about printing the text "Hello World"
print("Hello World!")

Sections

# Section 1 ####
print("Section 1")

## Sub Section ####
print("Sub section")

# Section 2 ####
print("Section 2")

Importing Data

CSV

data <- read.csv("data.csv")

Excel

library(readxl)
data <- read_excel("data.xlsx")

Stata, SPSS

library(haven)
data <- read_sav("data.sav")
data <- read_dta("data.dta")

A Swiss-Army Knife for Data I/O

library(rio)
data <- rio::import("data.xlsx")
data <- rio::import("data.csv")
data <- rio::import("data.sav")
data <- rio::import("data.dta")

Tidy Data

Each variable is a column; each column is a variable.
Each observation is a row; each row is an observation.
Each value is a cell; each cell is a single value.

Getting Help

Help yourself

Read the manual
Check your code
Read the error message
Web search
Read the forums
Rubber duck debugging

Get Help

Ask a friend, colleague
Post on the forums
- Describe your goal
- Be explicit about your question
- Provide specific information
- Be courteous
- Provide the solution if you found it elsewhere.