Biostatistics

R for Public Health

25 Sep 2024

Biostatistics

library(ggplot2)
library(readr)
library(dplyr)

df <- read_csv("data.csv")

Tests for normality

Histogram

df %>%
  ggplot(aes(x = sbp)) +
  geom_histogram(colour = "red",
                 fill = "blue")

Density plot

df %>% 
  ggplot(aes(x=sbp))+
  geom_density()

Histogram with Density plot

df %>%
  ggplot(aes(x = sbp)) +
  geom_histogram(aes(x=sbp,
                     after_stat(density)),
                 colour="red",
                 fill="blue")+geom_density(lwd=2)

Box plot

df %>% 
  ggplot(aes(x=sbp))+
  geom_boxplot()

Q-Q Plot

df %>% 
  ggplot(aes(sample = sbp)) +
  geom_qq()

Kolmogorov-Smirnov Test

ks.test(df$sbp, pnorm)

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  df$sbp
D = 1, p-value < 2.2e-16
alternative hypothesis: two-sided

Shapiro test

shapiro.test(df$sbp)

    Shapiro-Wilk normality test

data:  df$sbp
W = 0.99304, p-value = 0.6837

Lilliefors test

library(nortest)
lillie.test(df$sbp)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  df$sbp
D = 0.057746, p-value = 0.2554

Tests of Significance

t-test

ttest_result <- t.test(sbp ~ sex,
                       data = df)

print(ttest_result)

    Welch Two Sample t-test

data:  sbp by sex
t = 0.93106, df = 126.45, p-value = 0.3536
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -2.018888  5.606657
sample estimates:
mean in group Female   mean in group Male 
            110.7377             108.9438 

Paired t-test

pairedttest <- t.test(df$sbp1,
                      df$sbp2,
                      paired=TRUE)

pairedttest

    Paired t-test

data:  df$sbp1 and df$sbp2
t = -0.10942, df = 149, p-value = 0.913
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -1.524712  1.364712
sample estimates:
mean difference 
          -0.08 

Chi-square test

cst <- chisq.test(df$sex,
                  df$curSmokeless)
cst

    Pearson's Chi-squared test with Yates' continuity correction

data:  df$sex and df$curSmokeless
X-squared = 13.126, df = 1, p-value = 0.0002912

Wilcoxon signed rank test

WT <- wilcox.test(df$dbp1,
                  df$dbp2,
                  paired = TRUE)
WT

    Wilcoxon signed rank test with continuity correction

data:  df$dbp1 and df$dbp2
V = 4607.5, p-value = 0.6899
alternative hypothesis: true location shift is not equal to 0

Mann Whitney Test

MW <- wilcox.test(dbp ~ sex,data = df)

MW

    Wilcoxon rank sum test with continuity correction

data:  dbp by sex
W = 2532.5, p-value = 0.487
alternative hypothesis: true location shift is not equal to 0

Kruskal-Wallis test

KWT <- kruskal.test(df$dbp1,
                    df$dbp2,
                    df$dbp3)
KWT

    Kruskal-Wallis rank sum test

data:  df$dbp1 and df$dbp2
Kruskal-Wallis chi-squared = 90.005, df = 39, p-value = 6.569e-06

Correlation

Pearson correlation coefficient

correlation_coefficient <- cor(df$sbp1, df$sbp2)
print(correlation_coefficient)
[1] 0.6970923

Test the significance of the correlation coefficient

cor_test_result <- cor.test(df$sbp1, df$sbp2)
print(cor_test_result)

    Pearson's product-moment correlation

data:  df$sbp1 and df$sbp2
t = 11.828, df = 148, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6043466 0.7711979
sample estimates:
      cor 
0.6970923 

Spearman correlation

spearman_coefficient <- cor(df$dbp1,df$dbp2, method = "spearman")
print(spearman_coefficient)
[1] 0.6018171

Test the significance of the Spearman correlation coefficient

spearman_test_result <- cor.test(df$dbp1,df$dbp2, method = "spearman")
print(spearman_test_result) 

    Spearman's rank correlation rho

data:  df$dbp1 and df$dbp2
S = 223968, p-value = 3.794e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.6018171 

Anova

library(tidyr)
dfLong <- df %>% 
  select(sbp1,sbp2,sbp3) %>% 
  pivot_longer(cols = everything(),
               names_to = "type",
               values_to = "bp")

AR <- aov(bp~type, data = dfLong)
summary(AR)
             Df Sum Sq Mean Sq F value Pr(>F)
type          2     78   38.96   0.314  0.731
Residuals   447  55516  124.20               

MANOVA

manova_model <- manova(cbind(sbp1,sbp2,dbp1,dbp2) ~ edu,
                       data = df)
summary(manova_model)
           Df Pillai approx F num Df den Df   Pr(>F)   
edu         3 0.1965   2.5408     12    435 0.002988 **
Residuals 146                                          
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1