Introduction to R

R for Public Health

Ashwini Kalantri

Department of Community Medicine, MGIMS

23 Sep 2024

Get started

"Hello World!"
[1] "Hello World!"

Assignment

<-

text1 <- "Hello World!"
print(text1)
[1] "Hello World!"

->

"Hello World!" -> text2
print(text2)
[1] "Hello World!"

=

text3 = "Hello World!"
print(text3)
[1] "Hello World!"

Reserved Words

  • if
  • else
  • while
  • repeat
  • for
  • function
  • in
  • next
  • break
  • TRUE
  • FALSE
  • NULL
  • Inf
  • NaN
  • NA
  • NA_integer
  • NA_real
  • NA_complex_
  • NA_character_

Operators

Arithmetic

Addition

2 + 5
[1] 7

Subtraction

73 - 32
[1] 41

Multiplication

47 * 7
[1] 329

Division

86 / 3
[1] 28.66667

Arithmetic

Exponentiation

8^2
[1] 64

Modulus

77%%3
[1] 2

Relational

Greater

5 > 6
[1] FALSE

Lesser

5 < 6
[1] TRUE

Equal

6 == 6
[1] TRUE

Relational

Greater or equal

8 >= 5
[1] TRUE

Lesser or equal

7 <= 10
[1] TRUE

Not Equal

9 != 10
[1] TRUE

Joining Logical

AND

TRUE & TRUE
[1] TRUE
TRUE & FALSE
[1] FALSE
FALSE & FALSE
[1] FALSE

OR

TRUE | TRUE
[1] TRUE
TRUE | FALSE
[1] TRUE
FALSE | FALSE
[1] FALSE

Classes

Integer

int <- 3L
print(int)
[1] 3
class(int)
[1] "integer"

Numeric

num <- 4.3
print(num)
[1] 4.3
class(num)
[1] "numeric"

Character

name <- "Your Name"
print(name)
[1] "Your Name"
class(name)
[1] "character"

Classes

Logical

logT <- TRUE
logF <- F
print(logF)
[1] FALSE
class(logF)
[1] "logical"

Date

date1 <- "2023-12-18"
date2 <- 2023-12-18
date3 <- as.Date("2023-12-18")
date4 <- as.Date("18 Dec 2023","%d %b %Y")
date5 <- as.Date(45076, origin = "1900-01-01")
print(date1)
[1] "2023-12-18"
class(date1)
[1] "character"

Class Conversion

num <- "1"
num <- as.numeric(num)

numLet <- as.numeric(LETTERS)
charNum <- as.character(1:100)

tf <- c("TRUE","FALSE","FALSE")
tf <- as.logical(tf)

num <- as.character(num)

Objects

Vectors

vec1 <- c(2,4,6,8,3,5.5)
vec2 <- 4

#combining vectors
newVec <- c(vec1,vec2)

newVec
[1] 2.0 4.0 6.0 8.0 3.0 5.5 4.0
dateVec <- c(as.Date("2023-11-28"),
             as.Date("2023-12-22"),
             Sys.Date())
dateVec
[1] "2023-11-28" "2023-12-22" "2024-09-23"
newVec[5]
[1] 3

Matrix

let <- matrix(LETTERS,
              nrow = 6,
              ncol = 6,
              byrow = F)

let
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] "A"  "G"  "M"  "S"  "Y"  "E" 
[2,] "B"  "H"  "N"  "T"  "Z"  "F" 
[3,] "C"  "I"  "O"  "U"  "A"  "G" 
[4,] "D"  "J"  "P"  "V"  "B"  "H" 
[5,] "E"  "K"  "Q"  "W"  "C"  "I" 
[6,] "F"  "L"  "R"  "X"  "D"  "J" 
let[3,5]
[1] "A"
let[,5]
[1] "Y" "Z" "A" "B" "C" "D"
let[5,]
[1] "E" "K" "Q" "W" "C" "I"

Factor

gender <- c(1,2,2,1,1,1,2,2,1,2,1)

genFac <- factor(gender,
                 levels = c(1,2),
                 labels = c("Male","Female"))

genFac
 [1] Male   Female Female Male   Male   Male   Female Female Male   Female
[11] Male  
Levels: Male Female

Data Frames

age <- c(12,24,NA,23,65,33) # create age vector

gender <- c("M","F","F","M","M","F") #create gender vector

occu <- factor(c(1,4,3,2,4,5), #occupation 
               levels = c(1:5),
               labels = c("Unemp","Service","Student","Business","Prof"))

#date of birth
dob <- c(as.Date("1993-01-16"),as.Date("1963-12-24"),as.Date("1971-01-05"),
         as.Date("1982-11-11"),as.Date("1984-05-15"),as.Date("1999-03-07"))

#create data frame
df <- data.frame(age,gender,occu,dob)

Data Frames

df
  age gender     occu        dob
1  12      M    Unemp 1993-01-16
2  24      F Business 1963-12-24
3  NA      F  Student 1971-01-05
4  23      M  Service 1982-11-11
5  65      M Business 1984-05-15
6  33      F     Prof 1999-03-07
df[2,]
  age gender     occu        dob
2  24      F Business 1963-12-24
df[,2]
[1] "M" "F" "F" "M" "M" "F"
df[2]
  gender
1      M
2      F
3      F
4      M
5      M
6      F
df[2,4]
[1] "1963-12-24"

List

list <- list(df,dob,let,newVec)

list
[[1]]
  age gender     occu        dob
1  12      M    Unemp 1993-01-16
2  24      F Business 1963-12-24
3  NA      F  Student 1971-01-05
4  23      M  Service 1982-11-11
5  65      M Business 1984-05-15
6  33      F     Prof 1999-03-07

[[2]]
[1] "1993-01-16" "1963-12-24" "1971-01-05" "1982-11-11" "1984-05-15"
[6] "1999-03-07"

[[3]]
     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] "A"  "G"  "M"  "S"  "Y"  "E" 
[2,] "B"  "H"  "N"  "T"  "Z"  "F" 
[3,] "C"  "I"  "O"  "U"  "A"  "G" 
[4,] "D"  "J"  "P"  "V"  "B"  "H" 
[5,] "E"  "K"  "Q"  "W"  "C"  "I" 
[6,] "F"  "L"  "R"  "X"  "D"  "J" 

[[4]]
[1] 2.0 4.0 6.0 8.0 3.0 5.5 4.0

List

List with nth object(s)

list[2]
[[1]]
[1] "1993-01-16" "1963-12-24" "1971-01-05" "1982-11-11" "1984-05-15"
[6] "1999-03-07"

nth object

list[[2]]
[1] "1993-01-16" "1963-12-24" "1971-01-05" "1982-11-11" "1984-05-15"
[6] "1999-03-07"

selecting withing object

list[[2]][4]
[1] "1982-11-11"
list[[1]][2,3]
[1] Business
Levels: Unemp Service Student Business Prof

Functions

function_name(argument1 = value1, argument2 = value2, ...)

Functions

addition <- function(n1,n2){
  n1 + n2
}

div <- function(n1,n2){
  n1 / n2
}

Functions

div(n1 = 55,n2 = 3)
[1] 18.33333

Packages

library(dplyr)

dplyr::glimpse(df)
Rows: 6
Columns: 4
$ age    <dbl> 12, 24, NA, 23, 65, 33
$ gender <chr> "M", "F", "F", "M", "M", "F"
$ occu   <fct> Unemp, Business, Student, Service, Business, Prof
$ dob    <date> 1993-01-16, 1963-12-24, 1971-01-05, 1982-11-11, 1984-05-15, 199…

Working Directory

setwd("~/r4ph24") #Mac, Linux, Unix

setwd("C:/user/ashwini/documents/r4ph24") #Windows

getwd()

Projects

Scripts

  • Names
  • Spaces
  • Pipes
  • Comments

Names

# Strive for:
young_age <- df %>%  filter(age < 20)

# Avoid:
YOUNGAGE <- df %>%  filter(age < 20)

Spaces

# Strive for
z <- (a + b)^2 / d

# Avoid
z<-( a + b ) ^ 2/d

# Strive for
mean_age <- mean(df$age, na.rm = TRUE)

# Avoid
mean_age<-mean (df$age ,na.rm=TRUE)

Pipes

# Avoid
pipe <- df %>% select(age,dob,occu) %>% mutate(age_cat = if_else(age < 20,"Young","Old"))

# Strive for
pipe <- df %>%
  select(age, dob, occu) %>%
  mutate(age_cat = if_else(age < 20, "Young", "Old"))

# Avoid
pipe <- df %>%
  select(age, dob, occu) %>%
  summarise(age_cat = mean(
                            age,
                            na.rm = TRUE)
                          )

# Strive for
pipe <- df %>%
  select(age, dob, occu) %>%
  summarise(age_cat = mean(
    age,
    na.rm = TRUE)
    )

Commenting

# Print the text "Hello World"
print("Hello World!")

print("Hello World!") # Print the text "Hello World"

# Multi-line comment
# about printing the text "Hello World"
print("Hello World!")
Sections
# Section 1 ####
print("Section 1")

## Sub Section ####
print("Sub section")

# Section 2 ####
print("Section 2")

Importing Data

CSV

data <- read.csv("data.csv")

Excel

library(readxl)
data <- read_excel("data.xlsx")

Stata, SPSS

library(haven)
data <- read_sav("data.sav")
data <- read_dta("data.dta")

A Swiss-Army Knife for Data I/O

library(rio)
data <- rio::import("data.xlsx")
data <- rio::import("data.csv")
data <- rio::import("data.sav")
data <- rio::import("data.dta")

Tidy Data

Tidy Data

  1. Each variable is a column; each column is a variable.

  2. Each observation is a row; each row is an observation.

  3. Each value is a cell; each cell is a single value.

Getting Help

Help yourself

  • Read the manual
  • Check your code
  • Read the error message
  • Web search
  • Read the forums
  • Rubber duck debugging

Get Help

  • Ask a friend, colleague
  • Post on the forums
    • Describe your goal
    • Be explicit about your question
    • Provide specific information
    • Be courteous
    • Provide the solution if you found it elsewhere.