Data Visualisation

R for Public Health

Ashwini Kalantri

Department of Community Medicine, MGIMS

25 Sep 2024

Grammar of Graphics

Grammar of Graphics

  • Data
    The variables to be plotted
  • Aesthetics
    scales, colours, fills, size, labels etc
  • Geometries
    The shapes that represent your data
  • Facets
    Sub plots
  • Statistics
    Statistics and summaries
  • Coordinates
    The plotting space
  • Themes
    Design elements

ggplot2

library(ggplot2)
library(dplyr)
data <- read.csv("data.csv")

Canvas

ggplot()

Add data

ggplot(data)

Aesthetics

ggplot(data, aes(x = wt))

Aesthetics

ggplot(data, aes(x = wt,
                 y = ht))

Geometry

ggplot(data, aes(x = wt,
                 y = ht)) +
  geom_point()

Labels

ggplot(data, aes(x = wt,
                 y = ht)) +
  labs(x = "Weight (kg)",
       y = "Height (m)") +
  geom_point()

Colour

ggplot(data, aes(x = wt,
                 y = ht,
                 colour = age)) +
  labs(x = "Weight (kg)",
       y = "Height (m)") +
  geom_point()

Size

data %>%
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)") +
  geom_point()

Size

Facets

plotData <- data %>%
  mutate(phc = factor(phc),
         sex = factor(sex),
         edu = factor(edu))

plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)") +
  geom_point() +
  facet_wrap(vars(phc))

Facets

Scale

plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)") +
  geom_point() +
  facet_wrap(vars(phc)) +
  scale_x_log10() +
  scale_y_log10()

Scale

Title

plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)",
       title = "Scatter plot of Height against Weight",
       subtitle = "10-30 year individuals from Wardha",
       caption = "Data Source: VCAN Project, DCM, MGIMS") +
  geom_point() +
  facet_wrap(vars(phc))  +
  scale_x_log10() +
  scale_y_log10()

Title

Theme

plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)",
       title = "Scatter plot of Height against Weight",
       subtitle = "10-30 year individuals from Wardha",
       caption = "Data Source: VCAN Project, DCM, MGIMS") +
  geom_point() +
  facet_wrap(vars(phc))  +
  scale_x_log10() +
  scale_y_log10() +
  theme_minimal()

Theme

Legend

plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)",
       title = "Scatter plot of Height against Weight",
       subtitle = "10-30 year individuals from Wardha",
       caption = "Data Source: VCAN Project, DCM, MGIMS") +
  geom_point() +
  facet_wrap(vars(phc))  +
  scale_x_log10() +
  scale_y_log10() +
  theme_minimal() +
  theme(legend.position = "bottom")

Legend

Scale Colour

plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)",
       title = "Scatter plot of Height against Weight",
       subtitle = "10-30 year individuals from Wardha",
       caption = "Data Source: VCAN Project, DCM, MGIMS") +
  geom_point() +
  facet_wrap(vars(phc))  +
  scale_x_log10() +
  scale_y_log10() +
  theme_minimal() +
  theme(legend.position = "bottom") +
  scale_colour_viridis_c()

Scale Colour

New Theme

library(ggthemes)
plotData %>% 
  ggplot(aes(x = wt,
             y = ht,
             colour = age,
             size = edu)) +
  labs(x = "Weight (kg)",
       y = "Height (m)",
       title = "Scatter plot of Height against Weight",
       subtitle = "10-30 year individuals from Wardha",
       caption = "Data Source: VCAN Project, DCM, MGIMS") +
  geom_point() +
  facet_wrap(vars(phc), strip.position = "bottom")  +
  scale_x_log10() +
  scale_y_log10() +
  theme_stata() +
  theme(legend.position = "bottom") +
  scale_colour_viridis_c()

New Theme

Histogram

data %>% 
  ggplot(aes(x = sbp)) +
  geom_histogram()

Density

data %>% 
  ggplot(aes(x = sbp)) +
  geom_density()

Histogram and Density

data %>% 
  ggplot(aes(x = sbp)) +
  geom_density() +
  geom_histogram()

Histogram and Density

data %>% 
  ggplot(aes(x = sbp)) +
  geom_density() +
  geom_histogram(aes(x = sbp,
                     y = after_stat(density)))

Barchart

plotData %>% 
  ggplot(aes(x = phc)) +
  geom_bar()

Barchart labels

plotData %>% 
  ggplot(aes(x = phc)) +
  geom_bar() +
  geom_text(aes(label = after_stat(count)),
            stat = "count",vjust = 2,colour = "white")

Smooth

plotData %>% 
  ggplot(aes(x = wt,
             y = ht)) +
  geom_smooth() 

Box-plot

plotData %>% 
  ggplot(aes(x = sex,
             y = wt)) +
  geom_boxplot()

Count

plotData %>% 
  ggplot(aes(x = sex,
             y = phc)) +
  geom_count()

Maps

library(sf)

shp <- st_read(dsn = "shape_files/war_vil.shp")

wardha <- fortify(shp)

map_data <- read.csv("map_data.csv")%>% 
  mutate(popcat = case_when(ind >= 5000 ~ ">5000",
                            ind >= 2000 & ind < 5000 ~ "2000-5000",
                            ind >= 1000 & ind < 2000 ~ "1000-2000",
                            ind >= 500 & ind < 1000 ~ "500-1000",
                            ind < 500 ~ "<500")) %>% 
  filter(!is.na(census_id))

map <- wardha %>% 
  left_join(map_data,
            by = c("CEN_2011"="census_id"))

Maps

ggplot(data = map) +
  geom_sf(aes(fill = popcat)) +
  theme_minimal()

Maps

ggplot(data = map) +
  geom_sf(aes(fill = popcat)) +
  theme_minimal() +
  coord_sf(xlim = c(78.2, 78.8), ylim = c(20.5, 20.9)) +
  scale_fill_viridis_d(na.value = "grey")

Grammar of Tables

Grammar of Tables

GT Table

GT Table

library(gt)

map_data %>%
  head(n = 10) %>% 
  gt()

GT Table

X village hh ind census_id description_english popcat
1 1 1599 5792 534052 Anji >5000
2 2 259 920 534046 Kamthi Khanapur 500-1000
3 3 3 6 534049 Sewa <500
4 4 229 744 534045 Chaka Majara 500-1000
5 5 284 1071 534051 Pavnur 1000-2000
6 6 570 1990 534054 Mandawa 1000-2000
7 7 237 779 534055 Pulai 500-1000
8 8 150 524 534056 Peth 500-1000
9 9 193 700 534057 Borgaon (Sawali) 500-1000
10 10 133 462 534060 Borgaon (Nandora) <500

Headder

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details")

Headder

Sevagram HDSS

Population Details
X village hh ind census_id description_english popcat
1 1 1599 5792 534052 Anji >5000
2 2 259 920 534046 Kamthi Khanapur 500-1000
3 3 3 6 534049 Sewa <500
4 4 229 744 534045 Chaka Majara 500-1000
5 5 284 1071 534051 Pavnur 1000-2000
6 6 570 1990 534054 Mandawa 1000-2000
7 7 237 779 534055 Pulai 500-1000
8 8 150 524 534056 Peth 500-1000
9 9 193 700 534057 Borgaon (Sawali) 500-1000
10 10 133 462 534060 Borgaon (Nandora) <500

Source Note

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS"))

Source Note

Sevagram HDSS

Population Details
X village hh ind census_id description_english popcat
1 1 1599 5792 534052 Anji >5000
2 2 259 920 534046 Kamthi Khanapur 500-1000
3 3 3 6 534049 Sewa <500
4 4 229 744 534045 Chaka Majara 500-1000
5 5 284 1071 534051 Pavnur 1000-2000
6 6 570 1990 534054 Mandawa 1000-2000
7 7 237 779 534055 Pulai 500-1000
8 8 150 524 534056 Peth 500-1000
9 9 193 700 534057 Borgaon (Sawali) 500-1000
10 10 133 462 534060 Borgaon (Nandora) <500

Source: Wave 1, Sevagram HDSS

Footnote

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS")) %>%
  tab_footnote(footnote = "Large Village",
               locations = cells_body(columns = description_english,
                                      rows = ind > 5000))

Footnote

Sevagram HDSS

Population Details
X village hh ind census_id description_english popcat
1 1 1599 5792 534052 Anji1 >5000
2 2 259 920 534046 Kamthi Khanapur 500-1000
3 3 3 6 534049 Sewa <500
4 4 229 744 534045 Chaka Majara 500-1000
5 5 284 1071 534051 Pavnur 1000-2000
6 6 570 1990 534054 Mandawa 1000-2000
7 7 237 779 534055 Pulai 500-1000
8 8 150 524 534056 Peth 500-1000
9 9 193 700 534057 Borgaon (Sawali) 500-1000
10 10 133 462 534060 Borgaon (Nandora) <500

Source: Wave 1, Sevagram HDSS

1 Large Village

Spanners

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS")) %>%
  tab_footnote(footnote = "Large Village",
               locations = cells_body(columns = description_english,
                                      rows = ind > 5000)) %>%
  tab_spanner(label = "Population details",
              columns = c(hh, ind))

Spanners

Sevagram HDSS

Population Details
X village Population details census_id description_english popcat
hh ind
1 1 1599 5792 534052 Anji1 >5000
2 2 259 920 534046 Kamthi Khanapur 500-1000
3 3 3 6 534049 Sewa <500
4 4 229 744 534045 Chaka Majara 500-1000
5 5 284 1071 534051 Pavnur 1000-2000
6 6 570 1990 534054 Mandawa 1000-2000
7 7 237 779 534055 Pulai 500-1000
8 8 150 524 534056 Peth 500-1000
9 9 193 700 534057 Borgaon (Sawali) 500-1000
10 10 133 462 534060 Borgaon (Nandora) <500

Source: Wave 1, Sevagram HDSS

1 Large Village

Hide Columns

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS")) %>%
  tab_footnote(footnote = "Large Village",
               locations = cells_body(columns = description_english,
                                      rows = ind > 5000)) %>%
  tab_spanner(label = "Population details",
              columns = c(hh, ind)) %>%
  cols_hide(c(X, village, census_id, popcat))

Hide Columns

Sevagram HDSS

Population Details
Population details description_english
hh ind
1599 5792 Anji1
259 920 Kamthi Khanapur
3 6 Sewa
229 744 Chaka Majara
284 1071 Pavnur
570 1990 Mandawa
237 779 Pulai
150 524 Peth
193 700 Borgaon (Sawali)
133 462 Borgaon (Nandora)

Source: Wave 1, Sevagram HDSS

1 Large Village

Move Columns

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS")) %>%
  tab_footnote(footnote = "Large Village",
               locations = cells_body(columns = description_english,
                                      rows = ind > 5000)) %>%
  tab_spanner(label = "Population details",
              columns = c(hh, ind)) %>%
  cols_hide(c(X, village, census_id, popcat)) %>%
  cols_move_to_start(description_english) 

Move Columns

Sevagram HDSS

Population Details
description_english Population details
hh ind
Anji1 1599 5792
Kamthi Khanapur 259 920
Sewa 3 6
Chaka Majara 229 744
Pavnur 284 1071
Mandawa 570 1990
Pulai 237 779
Peth 150 524
Borgaon (Sawali) 193 700
Borgaon (Nandora) 133 462

Source: Wave 1, Sevagram HDSS

1 Large Village

Column Names

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS")) %>%
  tab_footnote(footnote = "Large Village",
               locations = cells_body(columns = description_english,
                                      rows = ind > 5000)) %>%
  tab_spanner(label = "Population details",
              columns = c(hh, ind)) %>%
  cols_hide(c(X, village, census_id, popcat)) %>%
  cols_move_to_start(description_english) %>%
  cols_label(description_english = "Village",
             hh = "Households",
             ind = "Population")

Column Names

Sevagram HDSS

Population Details
Village Population details
Households Population
Anji1 1599 5792
Kamthi Khanapur 259 920
Sewa 3 6
Chaka Majara 229 744
Pavnur 284 1071
Mandawa 570 1990
Pulai 237 779
Peth 150 524
Borgaon (Sawali) 193 700
Borgaon (Nandora) 133 462

Source: Wave 1, Sevagram HDSS

1 Large Village

Conditional Formating

map_data %>%
  head(n = 10) %>% 
  gt() %>%
  tab_header(title = md("**Sevagram HDSS**"),
             subtitle = "Population Details") %>%
  tab_source_note(source_note = md("**Source:** Wave 1, Sevagram HDSS")) %>%
  tab_footnote(footnote = "Large Village",
               locations = cells_body(columns = description_english,
                                      rows = ind > 5000)) %>%
  tab_spanner(label = "Population details",
              columns = c(hh, ind)) %>%
  cols_hide(c(X, village, census_id, popcat)) %>%
  cols_move_to_start(description_english) %>%
  cols_label(description_english = "Village",
             hh = "Households",
             ind = "Population") %>%
  tab_style(style = list(cell_fill(color = "green")),
            locations = cells_body(columns = hh,
                                   row = hh > 1000)) %>% 
  tab_style(style = list(cell_fill(color = "red")),
            locations = cells_body(columns = ind,
                                   row = ind < 500))

Conditional Formating

Sevagram HDSS

Population Details
Village Population details
Households Population
Anji1 1599 5792
Kamthi Khanapur 259 920
Sewa 3 6
Chaka Majara 229 744
Pavnur 284 1071
Mandawa 570 1990
Pulai 237 779
Peth 150 524
Borgaon (Sawali) 193 700
Borgaon (Nandora) 133 462

Source: Wave 1, Sevagram HDSS

1 Large Village