Appendix D — Sapopemba Survey Analysis
D.1 Overview
This report aims to analyze the potential number of pregnant women in the Sapopemba birth center who fit the research sample for actigraphy use during pregnancy.
This is done by considering that all the other eligibility criteria are met (e.g., non-smoker, no gestational issues, etc.). Here we are considering only:
- The age of the pregnant women.
- The minimum time required for actigraphy use.
- If the delivery was at the birth center.
Most of the other criteria are already met by the birth centers’ selection criteria.
D.2 Set the Environment
D.2.1 Load Packages
D.2.2 Set Data Directories
Code
data_dir <- here("data")
processed_data_dir <- here("data", "processed")
actigraphy_processed_data_dir <- here("data", "processed", "actigraphy")
sri_data_dir <- here("data", "sri")
sleep_prop_dir <- here("data", "sleep-prop")
lux_data_dir <- here("data", "lux")D.2.3 Set Keys
Code
gs4_auth(
cache = here(".secrets"),
email = TRUE,
use_oob = FALSE
)Code
salt <- Sys.getenv("PREGNANCY_SALT") # askpass()D.3 Import Data
Code
raw_data <-
"1w5mLtYYoh6MPsP8lPqj1tyKNzX9XJuPMmCMsNFM-bcs" |>
googlesheets4::read_sheet(sheet = "Dataset")D.4 Tidy Data
Code
raw_data <-
data |>
dplyr::mutate(
date_on_entry = lubridate::ymd(date_on_entry),
baby_birth_date = lubridate::ymd(baby_birth_date),
age = as.numeric(age),
ultrassom_date = lubridate::ymd(ultrassom_date),
ga_on_ultrassom =
(
ga_on_ultrassom |>
stringr::str_extract("^[0-9]*(?=/)") |>
as.numeric() |>
lubridate::weeks()
) +
(
ga_on_ultrassom |>
stringr::str_extract("(?<=/)[0-9]$") |>
as.numeric() |>
lubridate::days()
),
n_gestation = as.numeric(n_gestation),
n_delivery = as.numeric(n_delivery),
n_abortion = as.numeric(n_abortion),
birth_center_delivery = as.logical(birth_center_delivery)
)Code
data |> dplyr::glimpse()D.5 Transform Data
Code
ga_start <- function(ultrasound, ga) {
checkmate::assert_date(ultrasound)
rutils:::assert_period(ga, lower = lubridate::period(0))
rutils:::assert_identical(ultrasound, ga, type = "length")
lubridate::as_date(ultrasound - ga)
}Code
ga_point <- function(ga_start, point, print = TRUE) {
checkmate::assert_date(ga_start)
checkmate::assert_multi_class(point, c("Date", "POSIXt"))
checkmate::assert_flag(print)
for (i in seq_along(ga_start)) {
for (j in seq_along(point)) {
if (point[i] < ga_start[i]) {
cli::cli_abort(paste(
"{.strong {cli::col_red('point')}} must be equal or greater than",
"{.strong {cli::col_blue('ga_start')}}."
))
}
}
}
ga_start <- ga_start |> lubridate::as_date()
point <- point |> lubridate::as_date()
out <-
lubridate::interval(ga_start, point, tzone ="UTC") |>
as.numeric() %>%
`/`(as.numeric(lubridate::ddays())) %>%
lubridate::days()
if (isTRUE(print)) {
out_duration <- out |> lubridate::as.duration()
weeks <- floor(out_duration / lubridate::dweeks())
days <- out_duration %% lubridate::dweeks() / lubridate::ddays()
cli::cli_alert_info(paste(
"{.strong",
"{weeks} {.strong {cli::col_red('week(s)')}}",
"{days} {.strong {cli::col_red('day(s)')}}",
"}"
))
}
invisible(out)
}Code
ga_week_int <- function(ga_start, week) {
checkmate::assert_date(ga_start, len = 1)
checkmate::assert_number(week, lower = 0)
week_start <-
(ga_start + lubridate::dweeks(week)) |>
lubridate::as_date()
week_end <-
(week_start + lubridate::dweeks(1) - lubridate::dseconds(1)) |>
lubridate::as_date()
lubridate::interval(week_start, week_end)
}Code
ga_weeks <- function(ga) {
rutils:::assert_period(ga)
ga <- ga |> lubridate::as.duration()
floor(ga / lubridate::dweeks())
}Code
extract_weeks_from_interval <- function(start, end) {
checkmate::assert_date(start)
checkmate::assert_date(end)
lubridate::interval(start, end, tzone ="UTC") |>
as.numeric() %>%
`/`(as.numeric(lubridate::dweeks())) |>
floor()
}Code
data <-
data |>
dplyr::mutate(
ga_start = ga_start(
ultrasound = ultrassom_date,
ga = ga_on_ultrassom
),
ga_on_entry = ga_point(
ga_start = ga_start,
point = date_on_entry,
print = FALSE
),
ga_on_entry_weeks = ga_weeks(ga_on_entry),
ga_on_entry_days = ga_days(ga_on_entry),
ga_on_baby_birth_date = ga_point(
ga_start = ga_start,
point = baby_birth_date,
print = FALSE
),
ga_on_baby_birth_date_weeks = ga_weeks(ga_on_baby_birth_date),
ga_on_baby_birth_date_days = ga_days(ga_on_baby_birth_date),
duration_from_entry_to_birth =
lubridate::as.period(baby_birth_date - date_on_entry)
)Code
data <-
data |>
dplyr::select(
ga_start,
date_on_entry,
ga_on_entry,
ga_on_entry_weeks,
ga_on_entry_days,
baby_birth_date,
ga_on_baby_birth_date,
ga_on_baby_birth_date_weeks,
ga_on_baby_birth_date_days,
ultrassom_date,
ga_on_ultrassom,
duration_from_entry_to_birth,
age,
n_gestation,
n_delivery,
n_abortion,
birth_center_delivery
)D.6 Explore Data
D.6.1 Parameters
Code
min_actigraphy_days <- 7D.6.2 How much time does the survey cover?
Code
period_from_interval <- function(start, end) {
checkmate::assert_date(start)
checkmate::assert_date(end)
lubridate::interval(start, end, tzone ="UTC") |>
lubridate::as.period()
}D.6.3 What is the potential number of pregnant women who fit the research sample in a given period of time?
Code
n_fit <- function(data, start, end, min_actigraphy_days = 7, print = TRUE) {
checkmate::assert_tibble(data)
checkmate::assert_date(start)
checkmate::assert_date(end)
checkmate::assert_number(min_actigraphy_days)
checkmate::assert_flag(print)
data <-
data |>
dplyr::filter(
date_on_entry >= start,
date_on_entry <= end
)
total <- data |> nrow()
n_fit <-
data |>
dplyr::filter(
age >= 18,
duration_from_entry_to_birth >= lubridate::days(min_actigraphy_days),
birth_center_delivery == TRUE
) |>
nrow()
n_fit_per <- n_fit / total * 100
if (isTRUE(print)) {
cli::cli_bullets(c(
paste0(
"{.strong {cli::col_red('Number of fit pregnant women')}}: ",
"{n_fit}/{total}"
),
paste0(
"{.strong Percentage of fit pregnant women}: ",
"{round(n_fit_per, 3)} %"
)
))
}
invisible(
list(
n_fit = n_fit,
n_fit_per = n_fit_per,
total = total,
start = start,
end = end,
min_actigraphy_days = min_actigraphy_days
)
)
}Code
data |>
n_fit(
start = start,
end = end,
min_actigraphy_days = min_actigraphy_days,
print = TRUE
)Code
data |>
dplyr::filter(
date_on_entry >= start,
date_on_entry <= end
) |>
ggplot2::ggplot(
ggplot2::aes(
x =
duration_from_entry_to_birth |>
lubridate::as.duration() %>%
`/`(lubridate::ddays())
)
) +
ggplot2::geom_histogram(bins = 30) +
ggplot2::geom_vline(
xintercept = 7,
color = "red",
linetype = "dashed"
) +
ggplot2::geom_vline(
xintercept = 15,
color = "blue",
linetype = "dashed"
) +
ggplot2::labs(
x = "Days from entry to birth",
y = "Frequency",
title = "Distribution of days from entry to birth"
)D.6.4 What is the maximum gestational age at the time of recruitment that provides the best chance of achieving the optimal gain-loss balance in the number of pregnant women who meet the minimum time required for actigraph use?
Code
gain_minus_loss <- function(
data,
ga_cut,
min_actigraphy_days = 7,
print = TRUE
) {
checkmate::assert_tibble(data)
rutils:::assert_period(ga_cut)
checkmate::assert_number(min_actigraphy_days)
checkmate::assert_flag(print)
gain <-
data |>
dplyr::filter(
ga_on_entry <= ga_cut,
age >= 18,
duration_from_entry_to_birth >= lubridate::days(min_actigraphy_days),
birth_center_delivery == TRUE
) |>
nrow()
loss <-
data |>
dplyr::filter(
ga_on_entry <= ga_cut,
age >= 18,
duration_from_entry_to_birth < lubridate::days(min_actigraphy_days),
birth_center_delivery == TRUE
) |>
nrow()
gain_minus_loss <- gain - loss
total <- gain + loss
if (isTRUE(print)) {
cli::cli_bullets(c(
paste0(
"{.strong {cli::col_blue('Gestational age cut')}}: ",
" {ga_weeks_days(ga_cut)}"
),
paste0("{.strong Gain}: {gain}"),
paste0("{.strong Loss}: {loss}"),
paste0("{.strong Total}: {total}"),
paste0("{.strong {cli::col_red('Gain - Loss')}}: {gain_minus_loss}")
))
}
invisible(
list(
ga_cut = ga_cut,
gain = gain,
loss = loss,
total = total,
gain_minus_loss = gain_minus_loss,
min_actigraphy_days = min_actigraphy_days
)
)
}Code
min_ga <- lubridate::weeks(36)
max_ga <- lubridate::weeks(41) + lubridate::days(6)
diff <- max_ga - min_ga
days <- attributes(diff)$day
ga_options <- purrr::map_vec(
.x = lubridate::days(seq(days)),
.f = ~ .x + min
)
stats <- purrr::map_df(
.x = ga_options,
.f = function(x) {
list <-
data |>
gain_minus_loss(
ga_cut = x,
min_actigraphy_days = min_actigraphy_days,
print = FALSE
) |>
dplyr::as_tibble()
}
)Code
Code
stats |>
ggplot2::ggplot(
ggplot2::aes(
x = ga_cut |>
lubridate::as.duration() %>%
`/`(lubridate::dweeks()),
y = gain_minus_loss
)
) +
ggplot2::geom_line(color = "blue") +
ggplot2::geom_point() +
ggplot2::labs(
x = "Gestational age cut",
y = "Gain - Loss",
title = "Gain - Loss by gestational age cut"
)Code
data |>
dplyr::filter(
age >= 18,
birth_center_delivery == TRUE,
duration_from_entry_to_birth >= lubridate::days(min_actigraphy_days)
) |>
ggplot2::ggplot(
ggplot2::aes(
x = ga_on_entry |>
lubridate::as.duration() %>%
`/`(lubridate::dweeks())
)
) +
ggplot2::geom_histogram(bins = 30) +
ggplot2::geom_vline(
xintercept =
lubridate::dweeks(37) %>%
`+`(lubridate::ddays(6)) %>%
`/`(lubridate::dweeks()),
color = "red",
linetype = "dashed"
) +
ggplot2::labs(
x = "Gestation age at entry",
y = "Frequency",
title = paste0(
"Distribution of the gestational age at the time of entry ",
"of pregnant women",
"\n",
"that meet the minimum time required for actigraph use"
)
)Code
data |>
dplyr::filter(
age >= 18,
birth_center_delivery == TRUE,
duration_from_entry_to_birth < lubridate::days(min_actigraphy_days)
) |>
ggplot2::ggplot(
ggplot2::aes(
x = ga_on_entry |>
lubridate::as.duration() %>%
`/`(lubridate::dweeks())
)
) +
ggplot2::geom_histogram(bins = 30) +
ggplot2::geom_vline(
xintercept =
lubridate::dweeks(37) %>%
`+`(lubridate::ddays(6)) %>%
`/`(lubridate::dweeks()),
color = "red",
linetype = "dashed"
) +
ggplot2::labs(
x = "Gestation age at entry",
y = "Frequency",
title = paste0(
"Distribution of the gestational age at the time of entry ",
"of pregnant women",
"\n",
"that DO NOT meet the minimum time required for actigraph use"
)
)