field_form_data <-
field_form_data |>
`names<-`(paste0("X", seq_along(field_form_data))) |>
dplyr::select(
X1, X4, X5, X11, X91, X92, X95, X96, X97, X99, X101, X102, X106, X119,
X118, X125, X126
) |>
rename(
timestamp = X1,
birth_date = X4,
cpf = X5,
weight_before = X96,
weight_current = X95,
height = X97,
ethnicity = X106,
education = X118,
family_income = X99,
health_plan = X101,
solo = X102,
exercise = X119,
work = X92,
study = X91,
gestations = X11,
deliveries = X125,
abortions = X126
) |>
mutate(
id = anonymize_id(cpf, salt),
timestamp = mdy_hms(timestamp, tz = "America/Sao_Paulo"),
birth_date = dmy(birth_date),
age = scaler:::age(birth_date, timestamp),
ethnicity = factor(
ethnicity,
levels = c(
"Indígena",
"Preta",
"Parda",
"Amarela",
"Branca"
),
ordered = TRUE
),
education = factor(
education,
levels = c(
"Não frequentou a escola",
"Fundamental incompleto",
"Fundamental completo",
"Ensino médio incompleto",
"Ensino médio completo",
"Ensino superior incompleto",
"Ensino superior completo",
"Mestrado incompleto",
"Mestrado completo",
"Doutorado incompleto",
"Doutorado completo",
"Pós-doutorado incompleto",
"Pós-doutorado completo"
),
ordered = TRUE
),
gestations = case_match(
gestations,
c("Sim", "0") ~ "1",
"Não" ~ "0",
.default = gestations
),
deliveries = case_when(
is.na(deliveries) ~ "0",
TRUE ~ deliveries
),
abortions = case_when(
is.na(abortions) ~ "0",
TRUE ~ abortions
)
) |>
mutate(
across(
.cols = all_of(
c(
"weight_before", "weight_current", "height",
"family_income", "gestations", "deliveries",
"abortions"
)
),
.fns = as.numeric
)
) |>
mutate(
across(
.cols = all_of(
c("health_plan", "solo", "exercise", "work", "study")
),
.fns = function(x) {
case_match(
x,
"Sim" ~ TRUE,
"Não" ~ FALSE
)
}
)
) |>
mutate(
gestations = case_when(
id == "ae5fc5cd" ~ 3,
TRUE ~ gestations
),
) |>
mutate(
bmi_before = weight_before / ((height / 100)^2),
bmi_current = weight_current / ((height / 100)^2)
) |>
dplyr::select(-cpf) |>
relocate(
id, timestamp, birth_date, age, weight_before, weight_current, height, bmi_before, bmi_current, family_income
)