Welcome to ADVS
ADVS follows the same BDS structure as ADLB, but with additional complexity: visit windows and multiple readings per timepoint.
Today’s focus: Build ADVS with visit mapping, baseline flags, and change from baseline.
Setup
library (admiral)
library (pharmaversesdtm)
library (pharmaverseadam)
library (dplyr)
library (lubridate)
library (stringr)
library (xportr)
# Load SDTM and ADaM
vs <- pharmaversesdtm:: vs
adsl <- pharmaverseadam:: adsl
cat ("Loaded VS:" , nrow (vs), "records \n " )
cat ("Loaded ADSL:" , nrow (adsl), "subjects \n " )
Loaded ADSL: 306 subjects
Step 1: Merge ADSL Variables
# Start with VS and merge treatment info
advs <- vs %>%
admiral:: derive_vars_merged (
dataset_add = adsl,
new_vars = exprs (TRTSDT, TRT01A),
by_vars = exprs (STUDYID, USUBJID)
)
cat (" \n ADVS initialized:" , nrow (advs), "records \n " )
ADVS initialized: 29643 records
# Check
advs %>%
select (USUBJID, VSTESTCD, VSDTC, TRTSDT) %>%
head (5 )
# A tibble: 5 × 4
USUBJID VSTESTCD VSDTC TRTSDT
<chr> <chr> <chr> <date>
1 01-701-1015 DIABP 2013-12-26 2014-01-02
2 01-701-1015 DIABP 2013-12-26 2014-01-02
3 01-701-1015 DIABP 2013-12-26 2014-01-02
4 01-701-1015 DIABP 2013-12-31 2014-01-02
5 01-701-1015 DIABP 2013-12-31 2014-01-02
Step 2: Derive Analysis Date
# Derive analysis date from VSDTC
advs <- advs %>%
admiral:: derive_vars_dt (
new_vars_prefix = "A" ,
dtc = VSDTC
)
cat (" \n Analysis date derived \n " )
# Check
advs %>%
select (USUBJID, VSTESTCD, VSDTC, ADT) %>%
head (5 )
# A tibble: 5 × 4
USUBJID VSTESTCD VSDTC ADT
<chr> <chr> <chr> <date>
1 01-701-1015 DIABP 2013-12-26 2013-12-26
2 01-701-1015 DIABP 2013-12-26 2013-12-26
3 01-701-1015 DIABP 2013-12-26 2013-12-26
4 01-701-1015 DIABP 2013-12-31 2013-12-31
5 01-701-1015 DIABP 2013-12-31 2013-12-31
Step 3: Derive Study Day
# Calculate relative day
advs <- advs %>%
admiral:: derive_vars_dy (
reference_date = TRTSDT,
source_vars = exprs (ADT)
)
cat (" \n Study day derived \n " )
# Check
advs %>%
select (USUBJID, VSTESTCD, TRTSDT, ADT, ADY) %>%
filter (! is.na (ADY)) %>%
head (5 )
# A tibble: 5 × 5
USUBJID VSTESTCD TRTSDT ADT ADY
<chr> <chr> <date> <date> <dbl>
1 01-701-1015 DIABP 2014-01-02 2013-12-26 -7
2 01-701-1015 DIABP 2014-01-02 2013-12-26 -7
3 01-701-1015 DIABP 2014-01-02 2013-12-26 -7
4 01-701-1015 DIABP 2014-01-02 2013-12-31 -2
5 01-701-1015 DIABP 2014-01-02 2013-12-31 -2
Step 4: Create PARAM and AVAL
# Create BDS variables
advs <- advs %>%
mutate (
PARAMCD = VSTESTCD,
PARAM = VSTEST,
AVAL = VSSTRESN,
AVALU = VSSTRESU
)
cat (" \n PARAM and AVAL created \n " )
# Check parameters
advs %>%
count (PARAMCD, PARAM) %>%
head (10 )
# A tibble: 6 × 3
PARAMCD PARAM n
<chr> <chr> <int>
1 DIABP Diastolic Blood Pressure 8207
2 HEIGHT Height 254
3 PULSE Pulse Rate 8204
4 SYSBP Systolic Blood Pressure 8208
5 TEMP Temperature 2720
6 WEIGHT Weight 2050
Step 5: Map Analysis Visits
# Map VISIT to analysis visit
advs <- advs %>%
mutate (
AVISIT = case_when (
VISIT == "SCREENING" ~ "Baseline" ,
VISIT == "BASELINE" ~ "Baseline" ,
VISIT == "WEEK 2" ~ "Week 2" ,
VISIT == "WEEK 4" ~ "Week 4" ,
VISIT == "WEEK 8" ~ "Week 8" ,
TRUE ~ VISIT
),
AVISITN = case_when (
AVISIT == "Baseline" ~ 0 ,
AVISIT == "Week 2" ~ 2 ,
AVISIT == "Week 4" ~ 4 ,
AVISIT == "Week 8" ~ 8 ,
TRUE ~ as.numeric (VISITNUM)
)
)
cat (" \n Analysis visit mapped \n " )
# Distribution
advs %>%
count (AVISIT, AVISITN) %>%
arrange (AVISITN)
# A tibble: 16 × 3
AVISIT AVISITN n
<chr> <dbl> <int>
1 Baseline 0 2783
2 SCREENING 1 1 3044
3 SCREENING 2 2 2496
4 Week 2 2 2736
5 UNSCHEDULED 3.1 3.1 10
6 AMBUL ECG PLACEMENT 3.5 2060
7 Week 4 4 2495
8 AMBUL ECG REMOVAL 6 1890
9 WEEK 6 7 2296
10 Week 8 8 2077
11 WEEK 12 9 1881
12 WEEK 16 10 1616
13 WEEK 20 11 1407
14 WEEK 24 12 1272
15 WEEK 26 13 1220
16 RETRIEVAL 201 360
Visit Mapping:
AVISIT: Analysis visit label
AVISITN: Analysis visit number for sorting
Step 6: Handle Position and Timepoint
# Map position and timepoint
advs <- advs %>%
mutate (
ATPT = VSTPT,
ATPTN = VSTPTNUM
)
cat (" \n Position and timepoint mapped \n " )
Position and timepoint mapped
# Check
advs %>%
count (VSPOS, VSTPT, ATPTN) %>%
head (5 )
# A tibble: 4 × 4
VSPOS VSTPT ATPTN n
<chr> <chr> <dbl> <int>
1 STANDING AFTER STANDING FOR 1 MINUTE 816 8204
2 STANDING AFTER STANDING FOR 3 MINUTES 817 8207
3 SUPINE AFTER LYING DOWN FOR 5 MINUTES 815 8208
4 <NA> <NA> NA 5024
Multiple Readings:
ATPT: Analysis timepoint (e.g., “PRE-DOSE”, “1H POST-DOSE”)
ATPTN: Timepoint number for ordering
Step 7: Derive Baseline Flag
# Flag last value before treatment start
advs <- advs %>%
arrange (USUBJID, PARAMCD, ADT, ATPTN) %>%
group_by (USUBJID, PARAMCD) %>%
mutate (
ABLFL = case_when (
! is.na (AVAL) & ! is.na (TRTSDT) & ADT <= TRTSDT &
row_number () == max (which (ADT <= TRTSDT)) ~ "Y" ,
TRUE ~ NA_character_
)
) %>%
ungroup ()
cat (" \n Baseline flag derived \n " )
# Check
advs %>%
filter (ABLFL == "Y" ) %>%
select (USUBJID, PARAMCD, ADT, TRTSDT, AVAL, ABLFL) %>%
head (5 )
# A tibble: 5 × 6
USUBJID PARAMCD ADT TRTSDT AVAL ABLFL
<chr> <chr> <date> <date> <dbl> <chr>
1 01-701-1015 DIABP 2014-01-02 2014-01-02 61 Y
2 01-701-1015 HEIGHT 2013-12-26 2014-01-02 147. Y
3 01-701-1015 PULSE 2014-01-02 2014-01-02 59 Y
4 01-701-1015 SYSBP 2014-01-02 2014-01-02 131 Y
5 01-701-1015 TEMP 2014-01-02 2014-01-02 36.2 Y
Step 8: Derive Baseline Value
# Merge baseline value to all records
advs <- advs %>%
group_by (USUBJID, PARAMCD) %>%
mutate (
BASE = AVAL[ABLFL == "Y" ][1 ]
) %>%
ungroup ()
cat (" \n Baseline value derived \n " )
# Check
advs %>%
select (USUBJID, PARAMCD, AVISIT, AVAL, ABLFL, BASE) %>%
head (10 )
# A tibble: 10 × 6
USUBJID PARAMCD AVISIT AVAL ABLFL BASE
<chr> <chr> <chr> <dbl> <chr> <dbl>
1 01-701-1015 DIABP SCREENING 1 64 <NA> NA
2 01-701-1015 DIABP SCREENING 1 83 <NA> NA
3 01-701-1015 DIABP SCREENING 1 57 <NA> NA
4 01-701-1015 DIABP SCREENING 2 68 <NA> NA
5 01-701-1015 DIABP SCREENING 2 59 <NA> NA
6 01-701-1015 DIABP SCREENING 2 71 <NA> NA
7 01-701-1015 DIABP Baseline 56 <NA> NA
8 01-701-1015 DIABP Baseline 51 <NA> NA
9 01-701-1015 DIABP Baseline 61 Y NA
10 01-701-1015 DIABP AMBUL ECG PLACEMENT 67 <NA> NA
Step 9: Derive Change from Baseline
# Calculate change and percent change
advs <- advs %>%
mutate (
CHG = case_when (
! is.na (AVAL) & ! is.na (BASE) ~ AVAL - BASE,
TRUE ~ NA_real_
),
PCHG = case_when (
! is.na (CHG) & BASE != 0 ~ (CHG / BASE) * 100 ,
TRUE ~ NA_real_
)
)
cat (" \n Change from baseline calculated \n " )
Change from baseline calculated
# Summary by parameter
advs %>%
filter (! is.na (CHG)) %>%
group_by (PARAMCD) %>%
summarise (
N = n (),
Mean_CHG = round (mean (CHG), 1 ),
SD_CHG = round (sd (CHG), 1 ),
.groups = "drop"
) %>%
head (5 )
# A tibble: 2 × 4
PARAMCD N Mean_CHG SD_CHG
<chr> <int> <dbl> <dbl>
1 HEIGHT 254 0 0
2 WEIGHT 6 -0.7 1.5
Step 10: Derive Reference Range Indicators
# Create reference range indicators for BP and HR
advs <- advs %>%
mutate (
ANRIND = case_when (
PARAMCD == "SYSBP" & ! is.na (AVAL) ~ case_when (
AVAL < 90 ~ "LOW" ,
AVAL > 140 ~ "HIGH" ,
TRUE ~ "NORMAL"
),
PARAMCD == "DIABP" & ! is.na (AVAL) ~ case_when (
AVAL < 60 ~ "LOW" ,
AVAL > 90 ~ "HIGH" ,
TRUE ~ "NORMAL"
),
PARAMCD == "PULSE" & ! is.na (AVAL) ~ case_when (
AVAL < 60 ~ "LOW" ,
AVAL > 100 ~ "HIGH" ,
TRUE ~ "NORMAL"
),
TRUE ~ NA_character_
),
BNRIND = case_when (
PARAMCD == "SYSBP" & ! is.na (BASE) ~ case_when (
BASE < 90 ~ "LOW" ,
BASE > 140 ~ "HIGH" ,
TRUE ~ "NORMAL"
),
PARAMCD == "DIABP" & ! is.na (BASE) ~ case_when (
BASE < 60 ~ "LOW" ,
BASE > 90 ~ "HIGH" ,
TRUE ~ "NORMAL"
),
PARAMCD == "PULSE" & ! is.na (BASE) ~ case_when (
BASE < 60 ~ "LOW" ,
BASE > 100 ~ "HIGH" ,
TRUE ~ "NORMAL"
),
TRUE ~ NA_character_
)
)
cat (" \n Reference range indicators derived \n " )
Reference range indicators derived
# Distribution
advs %>%
count (PARAMCD, ANRIND) %>%
head (10 )
# A tibble: 10 × 3
PARAMCD ANRIND n
<chr> <chr> <int>
1 DIABP HIGH 467
2 DIABP LOW 386
3 DIABP NORMAL 7352
4 DIABP <NA> 2
5 HEIGHT <NA> 254
6 PULSE HIGH 47
7 PULSE LOW 589
8 PULSE NORMAL 7565
9 PULSE <NA> 3
10 SYSBP HIGH 2559
Clinical Ranges (Simplified):
Systolic BP: <90 LOW, 90-140 NORMAL, >140 HIGH
Diastolic BP: <60 LOW, 60-90 NORMAL, >90 HIGH
Pulse: <60 LOW, 60-100 NORMAL, >100 HIGH
Step 11: Derive Analysis Sequence
# Create sequence number
advs <- advs %>%
arrange (USUBJID, PARAMCD, AVISITN, ATPTN, ADT) %>%
group_by (USUBJID) %>%
mutate (ASEQ = row_number ()) %>%
ungroup ()
cat (" \n Analysis sequence derived \n " )
Analysis sequence derived
Validation
cat (" \n === ADVS Validation === \n\n " )
# Check 1: One ABLFL per subject-parameter
check1 <- advs %>%
filter (ABLFL == "Y" ) %>%
count (USUBJID, PARAMCD) %>%
filter (n > 1 )
cat ("Check 1 - Multiple ABLFL per subject-PARAMCD:" , nrow (check1), " \n " )
Check 1 - Multiple ABLFL per subject-PARAMCD: 0
# Check 2: CHG is NA at baseline
check2 <- advs %>%
filter (ABLFL == "Y" , ! is.na (CHG))
cat ("Check 2 - CHG not NA at baseline:" , nrow (check2), " \n " )
Check 2 - CHG not NA at baseline: 255
# Check 3: BASE consistency
check3 <- advs %>%
filter (! is.na (BASE)) %>%
group_by (USUBJID, PARAMCD) %>%
summarise (n_distinct_base = n_distinct (BASE), .groups = "drop" ) %>%
filter (n_distinct_base > 1 )
cat ("Check 3 - Inconsistent BASE within subject-PARAMCD:" , nrow (check3), " \n " )
Check 3 - Inconsistent BASE within subject-PARAMCD: 0
# Check 4: Visit ordering
check4 <- advs %>%
filter (! is.na (AVISITN)) %>%
group_by (AVISIT) %>%
summarise (n_distinct_avisitn = n_distinct (AVISITN), .groups = "drop" ) %>%
filter (n_distinct_avisitn > 1 )
cat ("Check 4 - Multiple AVISITN per AVISIT:" , nrow (check4), " \n " )
Check 4 - Multiple AVISITN per AVISIT: 0
cat (" \n ✓ Validation complete \n " )
Summary
cat (" \n === ADVS Summary === \n\n " )
cat ("Total records:" , nrow (advs), " \n " )
cat ("Variables:" , ncol (advs), " \n\n " )
# By parameter
advs %>%
group_by (PARAMCD, PARAM) %>%
summarise (
N_Records = n (),
N_Subjects = n_distinct (USUBJID),
N_Baseline = sum (ABLFL == "Y" , na.rm = TRUE ),
.groups = "drop"
) %>%
head (10 )
# A tibble: 6 × 5
PARAMCD PARAM N_Records N_Subjects N_Baseline
<chr> <chr> <int> <int> <int>
1 DIABP Diastolic Blood Pressure 8207 254 254
2 HEIGHT Height 254 254 254
3 PULSE Pulse Rate 8204 254 254
4 SYSBP Systolic Blood Pressure 8208 254 254
5 TEMP Temperature 2720 254 254
6 WEIGHT Weight 2050 254 254
# By visit
advs %>%
count (AVISIT, AVISITN) %>%
arrange (AVISITN)
# A tibble: 16 × 3
AVISIT AVISITN n
<chr> <dbl> <int>
1 Baseline 0 2783
2 SCREENING 1 1 3044
3 SCREENING 2 2 2496
4 Week 2 2 2736
5 UNSCHEDULED 3.1 3.1 10
6 AMBUL ECG PLACEMENT 3.5 2060
7 Week 4 4 2495
8 AMBUL ECG REMOVAL 6 1890
9 WEEK 6 7 2296
10 Week 8 8 2077
11 WEEK 12 9 1881
12 WEEK 16 10 1616
13 WEEK 20 11 1407
14 WEEK 24 12 1272
15 WEEK 26 13 1220
16 RETRIEVAL 201 360
Export
# Add labels
attr (advs$ PARAMCD, "label" ) <- "Parameter Code"
attr (advs$ PARAM, "label" ) <- "Parameter"
attr (advs$ AVAL, "label" ) <- "Analysis Value"
attr (advs$ AVALU, "label" ) <- "Analysis Value Unit"
attr (advs$ ADT, "label" ) <- "Analysis Date"
attr (advs$ ADY, "label" ) <- "Analysis Relative Day"
attr (advs$ AVISIT, "label" ) <- "Analysis Visit"
attr (advs$ AVISITN, "label" ) <- "Analysis Visit (N)"
attr (advs$ ATPT, "label" ) <- "Analysis Timepoint"
attr (advs$ ATPTN, "label" ) <- "Analysis Timepoint (N)"
attr (advs$ ABLFL, "label" ) <- "Baseline Record Flag"
attr (advs$ BASE, "label" ) <- "Baseline Value"
attr (advs$ CHG, "label" ) <- "Change from Baseline"
attr (advs$ PCHG, "label" ) <- "Percent Change from Baseline"
attr (advs$ ANRIND, "label" ) <- "Analysis Reference Range Indicator"
attr (advs$ BNRIND, "label" ) <- "Baseline Reference Range Indicator"
attr (advs$ ASEQ, "label" ) <- "Analysis Sequence Number"
# Export
xportr_write (advs, path = "advs.xpt" , domain = "ADVS" )
cat (" \n ✓ ADVS exported to: advs.xpt \n " )
✓ ADVS exported to: advs.xpt
Key Takeaways
ADVS follows same BDS structure as ADLB
Visit mapping: VISIT → AVISIT/AVISITN
Multiple readings handled via ATPT/ATPTN (timepoint)
ABLFL logic same as ADLB (last value ≤ TRTSDT)
Clinical ranges differ by vital sign parameter
Position (SITTING/STANDING) captured in original VS
Next Steps
Day 21: ADTTE - Time-to-Event Analysis
Day 22: ADCM and ADRS
Day 23: TLF Production