Welcome to ADAE
ADAE is the backbone of safety reporting. Every safety table flows from this dataset.
Today’s focus: Build ADAE with treatment emergent flags and occurrence indicators.
Setup
library(admiral)
library(pharmaversesdtm)
library(pharmaverseadam)
library(dplyr)
library(lubridate)
library(stringr)
library(xportr)
# Load SDTM domains
ae <- pharmaversesdtm::ae
ex <- pharmaversesdtm::ex
# Load pre-built ADSL
adsl <- pharmaverseadam::adsl
cat("Loaded AE:", nrow(ae), "records\n")
cat("Loaded ADSL:", nrow(adsl), "subjects\n")
Loaded ADSL: 306 subjects
Why pharmaverseadam::adsl?
- Pre-built reference ADSL
- Has all required variables
- Saves rebuild time
Step 1: Merge ADSL to AE
# Merge treatment dates to AE
adae <- ae %>%
admiral::derive_vars_merged(
dataset_add = adsl,
new_vars = exprs(TRTSDT, TRTEDT, TRT01A),
by_vars = exprs(STUDYID, USUBJID)
)
cat("\nADAE initialized:", nrow(adae), "records\n")
ADAE initialized: 1191 records
# Check
adae %>%
select(USUBJID, AETERM, TRTSDT, TRTEDT) %>%
head(5)
# A tibble: 5 × 4
USUBJID AETERM TRTSDT TRTEDT
<chr> <chr> <date> <date>
1 01-701-1015 APPLICATION SITE ERYTHEMA 2014-01-02 2014-07-02
2 01-701-1015 APPLICATION SITE PRURITUS 2014-01-02 2014-07-02
3 01-701-1015 DIARRHOEA 2014-01-02 2014-07-02
4 01-701-1023 ATRIOVENTRICULAR BLOCK SECOND DEGREE 2012-08-05 2012-09-01
5 01-701-1023 ERYTHEMA 2012-08-05 2012-09-01
Step 2: Derive AE Start Date
# Derive AE start date
adae <- adae %>%
admiral::derive_vars_dt(
new_vars_prefix = "AST",
dtc = AESTDTC,
highest_imputation = "M",
flag_imputation = "auto"
)
cat("\nAE start date derived\n")
# Check
adae %>%
select(USUBJID, AESEQ, AESTDTC, ASTDT, ASTDTF) %>%
head(5)
# A tibble: 5 × 5
USUBJID AESEQ AESTDTC ASTDT ASTDTF
<chr> <dbl> <chr> <date> <chr>
1 01-701-1015 1 2014-01-03 2014-01-03 <NA>
2 01-701-1015 2 2014-01-03 2014-01-03 <NA>
3 01-701-1015 3 2014-01-09 2014-01-09 <NA>
4 01-701-1023 3 2012-08-26 2012-08-26 <NA>
5 01-701-1023 1 2012-08-07 2012-08-07 <NA>
Imputation:
- Partial dates like “2024-03” → “2024-03-01”
- ASTDTF flag shows if imputed
Step 3: Derive AE End Date
# Derive AE end date
adae <- adae %>%
admiral::derive_vars_dt(
new_vars_prefix = "AEN",
dtc = AEENDTC,
highest_imputation = "M",
date_imputation = "last",
flag_imputation = "auto"
)
cat("\nAE end date derived\n")
# Check
adae %>%
select(USUBJID, AESEQ, AEENDTC, AENDT, AENDTF) %>%
head(5)
# A tibble: 5 × 5
USUBJID AESEQ AEENDTC AENDT AENDTF
<chr> <dbl> <chr> <date> <chr>
1 01-701-1015 1 <NA> NA <NA>
2 01-701-1015 2 <NA> NA <NA>
3 01-701-1015 3 2014-01-11 2014-01-11 <NA>
4 01-701-1023 3 <NA> NA <NA>
5 01-701-1023 1 2012-08-30 2012-08-30 <NA>
Difference from start:
date_imputation = "last" → “2024-03” becomes “2024-03-31”
Step 4: Derive Study Days
# Calculate relative days
adae <- adae %>%
admiral::derive_vars_dy(
reference_date = TRTSDT,
source_vars = exprs(ASTDT, AENDT)
)
cat("\nStudy days derived\n")
# Check
adae %>%
select(USUBJID, TRTSDT, ASTDT, ASTDY, AENDT, AENDY) %>%
filter(!is.na(ASTDY)) %>%
head(5)
# A tibble: 5 × 6
USUBJID TRTSDT ASTDT ASTDY AENDT AENDY
<chr> <date> <date> <dbl> <date> <dbl>
1 01-701-1015 2014-01-02 2014-01-03 2 NA NA
2 01-701-1015 2014-01-02 2014-01-03 2 NA NA
3 01-701-1015 2014-01-02 2014-01-09 8 2014-01-11 10
4 01-701-1023 2012-08-05 2012-08-26 22 NA NA
5 01-701-1023 2012-08-05 2012-08-07 3 2012-08-30 26
Study day convention:
- Day 1 = first treatment day
- No Day 0
Step 5: Derive Duration
# Calculate duration
adae <- adae %>%
mutate(
ADURN = case_when(
!is.na(ASTDT) & !is.na(AENDT) ~ as.numeric(AENDT - ASTDT) + 1,
TRUE ~ NA_real_
)
)
cat("\nDuration calculated\n")
# Summary
adae %>%
filter(!is.na(ADURN)) %>%
summarise(
N = n(),
Mean = round(mean(ADURN), 1),
Median = median(ADURN),
Min = min(ADURN),
Max = max(ADURN)
)
# A tibble: 1 × 5
N Mean Median Min Max
<int> <dbl> <dbl> <dbl> <dbl>
1 718 24.2 11 1 444
Formula: AENDT - ASTDT + 1
Step 6: Derive Treatment Emergent Flag
# Flag AEs that occurred during treatment
adae <- adae %>%
mutate(
TRTEMFL = case_when(
!is.na(ASTDT) & !is.na(TRTSDT) & ASTDT >= TRTSDT ~ "Y",
TRUE ~ NA_character_
)
)
cat("\nTreatment emergent flag derived\n")
Treatment emergent flag derived
# Distribution
adae %>%
count(TRTEMFL)
# A tibble: 2 × 2
TRTEMFL n
<chr> <int>
1 Y 1126
2 <NA> 65
Logic:
- AE started on or after treatment start
- Used in all safety tables
Step 7: Derive Severity Numeric
# Create numeric severity
adae <- adae %>%
mutate(
ASEV = AESEV,
ASEVN = case_when(
AESEV == "MILD" ~ 1,
AESEV == "MODERATE" ~ 2,
AESEV == "SEVERE" ~ 3,
TRUE ~ NA_real_
)
)
cat("\nSeverity numeric created\n")
# Distribution
adae %>%
count(ASEV, ASEVN)
# A tibble: 3 × 3
ASEV ASEVN n
<chr> <dbl> <int>
1 MILD 1 770
2 MODERATE 2 378
3 SEVERE 3 43
Step 8: Derive First Occurrence Flag
# Flag first AE per subject
adae <- adae %>%
arrange(USUBJID, ASTDT, AESEQ) %>%
group_by(USUBJID) %>%
mutate(
AOCCFL = if_else(row_number() == 1, "Y", NA_character_)
) %>%
ungroup()
cat("\nFirst occurrence flag derived\n")
First occurrence flag derived
# Check
adae %>%
filter(AOCCFL == "Y") %>%
select(USUBJID, AETERM, ASTDT, AOCCFL) %>%
head(5)
# A tibble: 5 × 4
USUBJID AETERM ASTDT AOCCFL
<chr> <chr> <date> <chr>
1 01-701-1015 APPLICATION SITE ERYTHEMA 2014-01-03 Y
2 01-701-1023 ERYTHEMA 2012-08-07 Y
3 01-701-1028 APPLICATION SITE ERYTHEMA 2013-07-21 Y
4 01-701-1034 APPLICATION SITE PRURITUS 2014-08-27 Y
5 01-701-1047 HIATUS HERNIA 2013-02-12 Y
Logic:
- First AE record per subject
- Used for “subjects with at least one AE”
Step 9: Derive Serious AE Flag
# Copy serious flag
adae <- adae %>%
mutate(
ASER = AESER,
ASERN = if_else(AESER == "Y", 1, 0)
)
cat("\nSerious flag created\n")
# Distribution
adae %>%
count(ASER, ASERN)
# A tibble: 2 × 3
ASER ASERN n
<chr> <dbl> <int>
1 N 0 1188
2 Y 1 3
Step 10: Derive Analysis Flag
# Primary analysis flag
adae <- adae %>%
mutate(
ANL01FL = if_else(TRTEMFL == "Y", "Y", NA_character_)
)
cat("\nAnalysis flag created\n")
# Distribution
adae %>%
count(ANL01FL, TRTEMFL)
# A tibble: 2 × 3
ANL01FL TRTEMFL n
<chr> <chr> <int>
1 Y Y 1126
2 <NA> <NA> 65
ANL01FL:
- Primary analysis records
- Typically = TRTEMFL
Validation
cat("\n=== ADAE Validation ===\n\n")
# Check 1: Start <= End
check1 <- adae %>%
filter(!is.na(ASTDT) & !is.na(AENDT) & ASTDT > AENDT)
cat("Check 1 - ASTDT > AENDT:", nrow(check1), "\n")
Check 1 - ASTDT > AENDT: 0
# Check 2: Duration
check2 <- adae %>%
filter(!is.na(ADURN)) %>%
mutate(
Expected = as.numeric(AENDT - ASTDT) + 1,
Match = ADURN == Expected
) %>%
filter(!Match)
cat("Check 2 - ADURN mismatch:", nrow(check2), "\n")
Check 2 - ADURN mismatch: 0
# Check 3: One AOCCFL per subject
check3 <- adae %>%
filter(AOCCFL == "Y") %>%
count(USUBJID) %>%
filter(n > 1)
cat("Check 3 - Multiple AOCCFL:", nrow(check3), "\n")
Check 3 - Multiple AOCCFL: 0
cat("\n✓ Validation complete\n")
Summary
cat("\n=== ADAE Summary ===\n\n")
cat("Total records:", nrow(adae), "\n")
cat("Variables:", ncol(adae), "\n\n")
# By treatment
adae %>%
filter(TRTEMFL == "Y") %>%
group_by(TRT01A) %>%
summarise(
N_AEs = n(),
N_Subjects = n_distinct(USUBJID),
N_Serious = sum(ASER == "Y", na.rm = TRUE),
.groups = "drop"
)
# A tibble: 3 × 4
TRT01A N_AEs N_Subjects N_Serious
<chr> <int> <int> <int>
1 Placebo 281 65 0
2 Xanomeline High Dose 418 69 1
3 Xanomeline Low Dose 427 84 2
# By severity
adae %>%
filter(TRTEMFL == "Y") %>%
count(ASEV)
# A tibble: 3 × 2
ASEV n
<chr> <int>
1 MILD 731
2 MODERATE 354
3 SEVERE 41
Export
# Add labels
attr(adae$ASTDT, "label") <- "Analysis Start Date"
attr(adae$ASTDY, "label") <- "Analysis Start Relative Day"
attr(adae$AENDT, "label") <- "Analysis End Date"
attr(adae$AENDY, "label") <- "Analysis End Relative Day"
attr(adae$ADURN, "label") <- "Analysis Duration (Days)"
attr(adae$TRTEMFL, "label") <- "Treatment Emergent Flag"
attr(adae$ASEV, "label") <- "Analysis Severity"
attr(adae$ASEVN, "label") <- "Analysis Severity (N)"
attr(adae$AOCCFL, "label") <- "First Occurrence Flag"
attr(adae$ANL01FL, "label") <- "Analysis Record Flag 01"
# Export - xportr is already loaded via library()
xportr_write(adae, path = "adae.xpt", domain = "ADAE")
cat("\n✓ ADAE exported to: adae.xpt\n")
✓ ADAE exported to: adae.xpt
Key Takeaways
- Use pharmaverseadam::adsl for reference ADSL
- Impute start date “first”, end date “last”
- TRTEMFL = AE on or after treatment start
- AOCCFL = first AE per subject (arrange + group_by + row_number)
- Always validate dates and flags
Next Steps
Day 19: ADLB - Lab Analysis Dataset
Day 20: ADVS - Vitals Analysis Dataset
Day 21: ADTTE - Time-to-Event