Welcome to ADSL Part 2
Today we complete ADSL by adding population flags, demographic groupings, and baseline measurements.
What we’ll add:
Population flags (SAFFL, ITTFL)
Demographics groupings (AGEGR1, SEXN, RACEN)
Baseline measurements (HEIGHT, WEIGHT, BMI)
Setup and Load Day 16 ADSL
library (admiral)
library (pharmaversesdtm)
library (dplyr)
library (lubridate)
library (stringr)
library (xportr)
# Load SDTM domains
dm <- pharmaversesdtm:: dm
ex <- pharmaversesdtm:: ex
ds <- pharmaversesdtm:: ds
vs <- pharmaversesdtm:: vs
cat ("Loaded SDTM domains \n " )
Rebuild ADSL Part 1 from Day 16
# Start with DM
adsl <- dm %>%
select (- DOMAIN)
# Treatment variables
adsl <- adsl %>%
mutate (
TRT01P = ARM,
TRT01A = ACTARM
)
# Prepare EX with dates
ex_ext <- ex %>%
derive_vars_dtm (dtc = EXSTDTC, new_vars_prefix = "EXST" ) %>%
derive_vars_dtm (dtc = EXENDTC, new_vars_prefix = "EXEN" , time_imputation = "last" )
# First dose date
adsl <- adsl %>%
derive_vars_merged (
dataset_add = ex_ext,
filter_add = (EXDOSE > 0 | (EXDOSE == 0 & str_detect (EXTRT, "PLACEBO" ))) & ! is.na (EXSTDTM),
new_vars = exprs (TRTSDTM = EXSTDTM),
order = exprs (EXSTDTM, EXSEQ),
mode = "first" ,
by_vars = exprs (STUDYID, USUBJID)
)
# Last dose date
adsl <- adsl %>%
derive_vars_merged (
dataset_add = ex_ext,
filter_add = (EXDOSE > 0 | (EXDOSE == 0 & str_detect (EXTRT, "PLACEBO" ))) & ! is.na (EXENDTM),
new_vars = exprs (TRTEDTM = EXENDTM),
order = exprs (EXENDTM, EXSEQ),
mode = "last" ,
by_vars = exprs (STUDYID, USUBJID)
)
# Convert to dates
adsl <- adsl %>%
derive_vars_dtm_to_dt (source_vars = exprs (TRTSDTM, TRTEDTM))
# Treatment duration
adsl <- adsl %>%
derive_var_trtdurd ()
cat (" \n ADSL Part 1 rebuilt:" , nrow (adsl), "subjects \n " )
ADSL Part 1 rebuilt: 306 subjects
Step 1: Derive Safety Population Flag (SAFFL)
Safety Population: Subjects who received at least one dose of study treatment.
# Derive SAFFL
adsl <- adsl %>%
derive_var_merged_exist_flag (
dataset_add = ex,
by_vars = exprs (STUDYID, USUBJID),
new_var = SAFFL,
condition = (EXDOSE > 0 | (EXDOSE == 0 & str_detect (EXTRT, "PLACEBO" )))
)
cat (" \n Safety population flag derived \n " )
Safety population flag derived
# Distribution
adsl %>%
count (SAFFL, TRT01A)
# A tibble: 4 × 3
SAFFL TRT01A n
<chr> <chr> <int>
1 Y Placebo 86
2 Y Xanomeline High Dose 72
3 Y Xanomeline Low Dose 96
4 <NA> Screen Failure 52
Logic:
SAFFL = “Y” if subject has any EX record with EXDOSE > 0
Used for safety analyses (AEs, labs, vitals)
Step 2: Derive ITT Population Flag (ITTFL)
Intent-to-Treat Population: Randomized subjects.
# Derive ITTFL
adsl <- adsl %>%
derive_var_merged_exist_flag (
dataset_add = ds,
by_vars = exprs (STUDYID, USUBJID),
new_var = ITTFL,
condition = DSDECOD == "RANDOMIZED"
)
cat (" \n ITT population flag derived \n " )
ITT population flag derived
# Distribution
adsl %>%
count (ITTFL, SAFFL)
# A tibble: 2 × 3
ITTFL SAFFL n
<chr> <chr> <int>
1 Y Y 254
2 <NA> <NA> 52
Logic:
ITTFL = “Y” if randomization event exists in DS
Used for efficacy analyses
Step 3: Create Age Groupings
# Derive age group categories
adsl <- adsl %>%
mutate (
AGEGR1 = case_when (
AGE < 65 ~ "<65" ,
AGE >= 65 & AGE < 75 ~ "65-74" ,
AGE >= 75 ~ ">=75" ,
TRUE ~ NA_character_
),
AGEGR1N = case_when (
AGE < 65 ~ 1 ,
AGE >= 65 & AGE < 75 ~ 2 ,
AGE >= 75 ~ 3 ,
TRUE ~ NA_real_
)
)
cat (" \n Age groupings created \n " )
# Distribution
adsl %>%
count (AGEGR1, AGEGR1N)
# A tibble: 3 × 3
AGEGR1 AGEGR1N n
<chr> <dbl> <int>
1 65-74 2 85
2 <65 1 42
3 >=75 3 179
Age Groups:
Step 4: Derive Numeric Demographics
# Numeric versions of categorical variables
adsl <- adsl %>%
mutate (
SEXN = case_when (
SEX == "F" ~ 1 ,
SEX == "M" ~ 2 ,
TRUE ~ NA_real_
),
RACEN = case_when (
RACE == "AMERICAN INDIAN OR ALASKA NATIVE" ~ 1 ,
RACE == "ASIAN" ~ 2 ,
RACE == "BLACK OR AFRICAN AMERICAN" ~ 3 ,
RACE == "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER" ~ 4 ,
RACE == "WHITE" ~ 5 ,
RACE == "MULTIPLE" ~ 6 ,
TRUE ~ NA_real_
)
)
cat (" \n Numeric demographics derived \n " )
Numeric demographics derived
# Check mappings
adsl %>%
count (SEX, SEXN)
# A tibble: 2 × 3
SEX SEXN n
<chr> <dbl> <int>
1 F 1 179
2 M 2 127
adsl %>%
count (RACE, RACEN) %>%
head (10 )
# A tibble: 4 × 3
RACE RACEN n
<chr> <dbl> <int>
1 AMERICAN INDIAN OR ALASKA NATIVE 1 2
2 ASIAN 2 2
3 BLACK OR AFRICAN AMERICAN 3 29
4 WHITE 5 273
Why numeric versions?
Enable table sorting
Required for some stats procedures
Standard across studies
Step 5: Prepare VS for Baseline Merging
# Filter VS for baseline
vs_bl <- vs %>%
filter (VISIT %in% c ("BASELINE" , "SCREENING" ) | VISITNUM == 0 ) %>%
derive_vars_dt (dtc = VSDTC, new_vars_prefix = "VS" )
# Available baseline measurements
vs_bl %>%
count (VSTESTCD)
# A tibble: 5 × 2
VSTESTCD n
<chr> <int>
1 DIABP 759
2 PULSE 759
3 SYSBP 759
4 TEMP 253
5 WEIGHT 253
Step 6: Derive Baseline Height
# Merge baseline height
adsl <- adsl %>%
derive_vars_merged (
dataset_add = vs_bl,
by_vars = exprs (STUDYID, USUBJID),
filter_add = VSTESTCD == "HEIGHT" & ! is.na (VSSTRESN),
new_vars = exprs (HEIGHTBL = VSSTRESN),
order = exprs (VSDT),
mode = "last"
)
cat (" \n Baseline height merged \n " )
# Summary
adsl %>%
filter (! is.na (HEIGHTBL)) %>%
summarise (
N = n (),
Mean = round (mean (HEIGHTBL), 1 ),
SD = round (sd (HEIGHTBL), 1 ),
Median = median (HEIGHTBL),
Min = min (HEIGHTBL),
Max = max (HEIGHTBL)
)
# A tibble: 1 × 6
N Mean SD Median Min Max
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 NaN NA NA Inf -Inf
Logic: Take last height at or before baseline
Step 7: Derive Baseline Weight
# Merge baseline weight
adsl <- adsl %>%
derive_vars_merged (
dataset_add = vs_bl,
by_vars = exprs (STUDYID, USUBJID),
filter_add = VSTESTCD == "WEIGHT" & ! is.na (VSSTRESN),
new_vars = exprs (WEIGHTBL = VSSTRESN),
order = exprs (VSDT),
mode = "last"
)
cat (" \n Baseline weight merged \n " )
# Summary
adsl %>%
filter (! is.na (WEIGHTBL)) %>%
summarise (
N = n (),
Mean = round (mean (WEIGHTBL), 1 ),
SD = round (sd (WEIGHTBL), 1 ),
Median = median (WEIGHTBL),
Min = min (WEIGHTBL),
Max = max (WEIGHTBL)
)
# A tibble: 1 × 6
N Mean SD Median Min Max
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 253 66.6 14.1 66.7 34.0 108.
Step 8: Calculate Baseline BMI
# Calculate BMI
adsl <- adsl %>%
mutate (
BMIBL = case_when (
! is.na (HEIGHTBL) & ! is.na (WEIGHTBL) ~ round (WEIGHTBL / (HEIGHTBL / 100 )^ 2 , 1 ),
TRUE ~ NA_real_
)
)
cat (" \n Baseline BMI calculated \n " )
# Summary
adsl %>%
filter (! is.na (BMIBL)) %>%
summarise (
N = n (),
Mean = round (mean (BMIBL), 1 ),
SD = round (sd (BMIBL), 1 ),
Median = median (BMIBL),
Min = min (BMIBL),
Max = max (BMIBL)
)
# A tibble: 1 × 6
N Mean SD Median Min Max
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 NaN NA NA Inf -Inf
Formula: BMI = Weight (kg) / (Height (m))²
Step 9: Create BMI Groups
# BMI groupings
adsl <- adsl %>%
mutate (
BMIBLGR1 = case_when (
BMIBL < 18.5 ~ "<18.5" ,
BMIBL >= 18.5 & BMIBL < 25 ~ "18.5-<25" ,
BMIBL >= 25 & BMIBL < 30 ~ "25-<30" ,
BMIBL >= 30 ~ ">=30" ,
TRUE ~ NA_character_
)
)
# Distribution
adsl %>%
count (BMIBLGR1)
# A tibble: 1 × 2
BMIBLGR1 n
<chr> <int>
1 <NA> 306
BMI Categories:
<18.5: Underweight
18.5-<25: Normal
25-<30: Overweight
=30: Obese
Validation Checks
cat (" \n === ADSL Validation === \n\n " )
# Check 1: SAFFL consistency
check1 <- adsl %>%
filter (SAFFL == "Y" & is.na (TRTSDT))
cat ("Check 1 - SAFFL='Y' but no TRTSDT:" , nrow (check1), " \n " )
Check 1 - SAFFL='Y' but no TRTSDT: 0
# Check 2: Age groups
check2 <- adsl %>%
filter (! is.na (AGEGR1) & is.na (AGEGR1N))
cat ("Check 2 - AGEGR1 without AGEGR1N:" , nrow (check2), " \n " )
Check 2 - AGEGR1 without AGEGR1N: 0
# Check 3: BMI calculation
check3 <- adsl %>%
filter (! is.na (BMIBL)) %>%
mutate (
BMI_check = round (WEIGHTBL / (HEIGHTBL / 100 )^ 2 , 1 ),
Match = abs (BMIBL - BMI_check) < 0.1
) %>%
filter (! Match)
cat ("Check 3 - BMI mismatch:" , nrow (check3), " \n " )
Check 3 - BMI mismatch: 0
# Check 4: Numeric demographics
check4 <- adsl %>%
filter (! is.na (SEX) & is.na (SEXN))
cat ("Check 4 - SEX without SEXN:" , nrow (check4), " \n " )
Check 4 - SEX without SEXN: 0
cat (" \n ✓ Validation complete \n " )
Complete ADSL Summary
cat (" \n === Complete ADSL === \n\n " )
cat ("Total subjects:" , nrow (adsl), " \n " )
cat ("Variables:" , ncol (adsl), " \n\n " )
# Population flags
cat ("Population Flags: \n " )
adsl %>%
summarise (
SAFFL_Y = sum (SAFFL == "Y" , na.rm = TRUE ),
ITTFL_Y = sum (ITTFL == "Y" , na.rm = TRUE )
)
# A tibble: 1 × 2
SAFFL_Y ITTFL_Y
<int> <int>
1 254 254
# Demographics by treatment (Safety population)
cat (" \n Demographics by Treatment (Safety Population): \n " )
Demographics by Treatment (Safety Population):
adsl %>%
filter (SAFFL == "Y" ) %>%
group_by (TRT01A) %>%
summarise (
N = n (),
Age_Mean = round (mean (AGE), 1 ),
Age_SD = round (sd (AGE), 1 ),
Female_N = sum (SEX == "F" ),
Female_Pct = round (100 * sum (SEX == "F" ) / n (), 1 ),
BMI_Mean = round (mean (BMIBL, na.rm = TRUE ), 1 ),
.groups = "drop"
)
# A tibble: 3 × 7
TRT01A N Age_Mean Age_SD Female_N Female_Pct BMI_Mean
<chr> <int> <dbl> <dbl> <int> <dbl> <dbl>
1 Placebo 86 75.2 8.6 53 61.6 NaN
2 Xanomeline High Dose 72 73.8 7.9 35 48.6 NaN
3 Xanomeline Low Dose 96 76 8.1 55 57.3 NaN
Export Complete ADSL
# Apply variable labels
attr (adsl$ SAFFL, "label" ) <- "Safety Population Flag"
attr (adsl$ ITTFL, "label" ) <- "Intent-to-Treat Population Flag"
attr (adsl$ AGEGR1, "label" ) <- "Age Group 1"
attr (adsl$ AGEGR1N, "label" ) <- "Age Group 1 (N)"
attr (adsl$ SEXN, "label" ) <- "Sex (N)"
attr (adsl$ RACEN, "label" ) <- "Race (N)"
attr (adsl$ HEIGHTBL, "label" ) <- "Baseline Height (cm)"
attr (adsl$ WEIGHTBL, "label" ) <- "Baseline Weight (kg)"
attr (adsl$ BMIBL, "label" ) <- "Baseline Body Mass Index"
attr (adsl$ BMIBLGR1, "label" ) <- "Baseline BMI Group 1"
# Export as XPT
xportr_write (adsl, path = "adsl.xpt" , domain = "ADSL" )
cat (" \n ✓ ADSL exported to: adsl.xpt \n " )
✓ ADSL exported to: adsl.xpt
Key Takeaways
Population Flags:
SAFFL = received treatment (from EX)
ITTFL = randomized (from DS)
Both derived with derive_var_merged_exist_flag()
Demographics:
Always create numeric versions (AGEGR1N, SEXN, RACEN)
Numeric enables sorting and statistics
Character for display in tables
Baseline Measurements:
Merge from VS using derive_vars_merged()
Take last baseline measurement
Calculate BMI from HEIGHT and WEIGHT
Validation:
Check flag consistency (SAFFL=“Y” must have TRTSDT)
Verify calculations (BMI formula)
Ensure char/numeric pairs match
Next Steps
Day 18: ADAE (Adverse Events) - OCCDS structure
Day 19: ADLB (Labs) - BDS with baseline
Day 20: ADVS (Vitals) - BDS with visits
Day 21: ADTTE (Time-to-Event)
ADSL is complete - ready for all other ADaMs!
Resources
Admiral:
CDISC:
Pharmaverse:
End of Day 17
Tomorrow: ADAE (Adverse Events)