Welcome to ADLB
ADLB is your first BDS (Basic Data Structure) dataset. Unlike ADSL (one row per subject) and ADAE (one row per event), BDS has one row per subject per parameter per timepoint .
Today’s focus: Build ADLB with baseline flags, change from baseline, and toxicity grades.
Setup
library (admiral)
library (pharmaversesdtm)
library (pharmaverseadam)
library (dplyr)
library (lubridate)
library (stringr)
library (xportr)
# Load SDTM and ADaM
lb <- pharmaversesdtm:: lb
vs <- pharmaversesdtm:: vs
adsl <- pharmaverseadam:: adsl
cat ("Loaded LB:" , nrow (lb), "records \n " )
cat ("Loaded ADSL:" , nrow (adsl), "subjects \n " )
Loaded ADSL: 306 subjects
Step 1: Merge ADSL Variables
# Start with LB and merge treatment info
adlb <- lb %>%
admiral:: derive_vars_merged (
dataset_add = adsl,
new_vars = exprs (TRTSDT, TRTEDT, TRT01A),
by_vars = exprs (STUDYID, USUBJID)
)
cat (" \n ADLB initialized:" , nrow (adlb), "records \n " )
ADLB initialized: 59580 records
# Check
adlb %>%
select (USUBJID, LBTESTCD, LBDTC, TRTSDT) %>%
head (5 )
# A tibble: 5 × 4
USUBJID LBTESTCD LBDTC TRTSDT
<chr> <chr> <chr> <date>
1 01-701-1015 ALB 2013-12-26T14:45 2014-01-02
2 01-701-1015 ALB 2014-01-16T13:17 2014-01-02
3 01-701-1015 ALB 2014-01-30T08:50 2014-01-02
4 01-701-1015 ALB 2014-02-12T12:56 2014-01-02
5 01-701-1015 ALB 2014-03-05T12:25 2014-01-02
Step 2: Derive Analysis Date
# Derive analysis date from LBDTC
adlb <- adlb %>%
admiral:: derive_vars_dt (
new_vars_prefix = "A" ,
dtc = LBDTC
)
cat (" \n Analysis date derived \n " )
# Check
adlb %>%
select (USUBJID, LBTESTCD, LBDTC, ADT) %>%
head (5 )
# A tibble: 5 × 4
USUBJID LBTESTCD LBDTC ADT
<chr> <chr> <chr> <date>
1 01-701-1015 ALB 2013-12-26T14:45 2013-12-26
2 01-701-1015 ALB 2014-01-16T13:17 2014-01-16
3 01-701-1015 ALB 2014-01-30T08:50 2014-01-30
4 01-701-1015 ALB 2014-02-12T12:56 2014-02-12
5 01-701-1015 ALB 2014-03-05T12:25 2014-03-05
Step 3: Derive Study Day
# Calculate relative day
adlb <- adlb %>%
admiral:: derive_vars_dy (
reference_date = TRTSDT,
source_vars = exprs (ADT)
)
cat (" \n Study day derived \n " )
# Check
adlb %>%
select (USUBJID, LBTESTCD, TRTSDT, ADT, ADY) %>%
filter (! is.na (ADY)) %>%
head (5 )
# A tibble: 5 × 5
USUBJID LBTESTCD TRTSDT ADT ADY
<chr> <chr> <date> <date> <dbl>
1 01-701-1015 ALB 2014-01-02 2013-12-26 -7
2 01-701-1015 ALB 2014-01-02 2014-01-16 15
3 01-701-1015 ALB 2014-01-02 2014-01-30 29
4 01-701-1015 ALB 2014-01-02 2014-02-12 42
5 01-701-1015 ALB 2014-01-02 2014-03-05 63
Step 4: Create PARAM and AVAL
# Create BDS variables
adlb <- adlb %>%
mutate (
PARAMCD = LBTESTCD,
PARAM = LBTEST,
AVAL = LBSTRESN,
AVALU = LBSTRESU
)
cat (" \n PARAM and AVAL created \n " )
# Check parameters
adlb %>%
count (PARAMCD, PARAM) %>%
head (10 )
# A tibble: 10 × 3
PARAMCD PARAM n
<chr> <chr> <int>
1 ALB Albumin 1814
2 ALP Alkaline Phosphatase 1824
3 ALT Alanine Aminotransferase 1814
4 ANISO Anisocytes 158
5 AST Aspartate Aminotransferase 1814
6 BASO Basophils 1796
7 BASOLE Basophils/Leukocytes 12
8 BILI Bilirubin 1814
9 BUN Blood Urea Nitrogen 1828
10 CA Calcium 1828
BDS Key Variables:
PARAMCD: Parameter code (ALT, AST, etc.)
PARAM: Parameter description
AVAL: Analysis value (numeric)
AVALU: Analysis value unit
Step 5: Derive Baseline Flag
# Flag last value before treatment start
adlb <- adlb %>%
arrange (USUBJID, PARAMCD, ADT) %>%
group_by (USUBJID, PARAMCD) %>%
mutate (
ABLFL = case_when (
! is.na (AVAL) & ADT <= TRTSDT & row_number () == max (which (ADT <= TRTSDT)) ~ "Y" ,
TRUE ~ NA_character_
)
) %>%
ungroup ()
cat (" \n Baseline flag derived \n " )
# Check
adlb %>%
filter (ABLFL == "Y" ) %>%
select (USUBJID, PARAMCD, ADT, TRTSDT, AVAL, ABLFL) %>%
head (5 )
# A tibble: 5 × 6
USUBJID PARAMCD ADT TRTSDT AVAL ABLFL
<chr> <chr> <date> <date> <dbl> <chr>
1 01-701-1015 ALB 2013-12-26 2014-01-02 38 Y
2 01-701-1015 ALP 2013-12-26 2014-01-02 34 Y
3 01-701-1015 ALT 2013-12-26 2014-01-02 27 Y
4 01-701-1015 ANISO 2013-12-26 2014-01-02 1 Y
5 01-701-1015 AST 2013-12-26 2014-01-02 40 Y
ABLFL Logic:
Last non-missing value on or before TRTSDT
One ABLFL=“Y” per subject per parameter
Step 6: Derive Baseline Value
# Merge baseline value to all records
adlb <- adlb %>%
group_by (USUBJID, PARAMCD) %>%
mutate (
BASE = AVAL[ABLFL == "Y" ][1 ]
) %>%
ungroup ()
cat (" \n Baseline value derived \n " )
# Check
adlb %>%
select (USUBJID, PARAMCD, ADT, AVAL, ABLFL, BASE) %>%
head (10 )
# A tibble: 10 × 6
USUBJID PARAMCD ADT AVAL ABLFL BASE
<chr> <chr> <date> <dbl> <chr> <dbl>
1 01-701-1015 ALB 2013-12-26 38 Y 38
2 01-701-1015 ALB 2014-01-16 39 <NA> 38
3 01-701-1015 ALB 2014-01-30 38 <NA> 38
4 01-701-1015 ALB 2014-02-12 37 <NA> 38
5 01-701-1015 ALB 2014-03-05 38 <NA> 38
6 01-701-1015 ALB 2014-03-26 38 <NA> 38
7 01-701-1015 ALB 2014-05-07 37 <NA> 38
8 01-701-1015 ALB 2014-05-21 37 <NA> 38
9 01-701-1015 ALB 2014-06-18 38 <NA> 38
10 01-701-1015 ALB 2014-07-02 38 <NA> 38
Step 7: Derive Change from Baseline
# Calculate change and percent change
adlb <- adlb %>%
mutate (
CHG = case_when (
! is.na (AVAL) & ! is.na (BASE) ~ AVAL - BASE,
TRUE ~ NA_real_
),
PCHG = case_when (
! is.na (CHG) & BASE != 0 ~ (CHG / BASE) * 100 ,
TRUE ~ NA_real_
)
)
cat (" \n Change from baseline calculated \n " )
Change from baseline calculated
# Summary
adlb %>%
filter (! is.na (CHG)) %>%
group_by (PARAMCD) %>%
summarise (
N = n (),
Mean_CHG = round (mean (CHG), 2 ),
.groups = "drop"
) %>%
head (5 )
# A tibble: 5 × 3
PARAMCD N Mean_CHG
<chr> <int> <dbl>
1 ALB 1626 -0.71
2 ALP 1635 0.13
3 ALT 1626 0.55
4 ANISO 58 0
5 AST 1626 0.11
Change Variables:
CHG = AVAL - BASE
PCHG = (CHG / BASE) * 100
Step 8: Derive Analysis Visit
# Map VISIT to analysis visit
adlb <- adlb %>%
mutate (
AVISIT = case_when (
VISIT == "SCREENING" ~ "Baseline" ,
VISIT == "BASELINE" ~ "Baseline" ,
TRUE ~ VISIT
),
AVISITN = case_when (
AVISIT == "Baseline" ~ 0 ,
VISIT == "WEEK 2" ~ 2 ,
VISIT == "WEEK 4" ~ 4 ,
VISIT == "WEEK 8" ~ 8 ,
TRUE ~ as.numeric (VISITNUM)
)
)
cat (" \n Analysis visit derived \n " )
# Distribution
adlb %>%
count (AVISIT, AVISITN) %>%
head (5 )
# A tibble: 5 × 3
AVISIT AVISITN n
<chr> <dbl> <int>
1 AMBUL ECG PLACEMENT 3.5 5
2 AMBUL ECG REMOVAL 6 52
3 Baseline 0 12
4 RETRIEVAL 201 35
5 SCREENING 1 1 9233
Step 9: Derive Reference Range Indicators
# Create reference range indicators
adlb <- adlb %>%
mutate (
ANRIND = case_when (
! is.na (AVAL) & ! is.na (LBSTNRLO) & AVAL < LBSTNRLO ~ "LOW" ,
! is.na (AVAL) & ! is.na (LBSTNRHI) & AVAL > LBSTNRHI ~ "HIGH" ,
! is.na (AVAL) ~ "NORMAL" ,
TRUE ~ NA_character_
),
BNRIND = case_when (
! is.na (BASE) & ! is.na (LBSTNRLO) & BASE < LBSTNRLO ~ "LOW" ,
! is.na (BASE) & ! is.na (LBSTNRHI) & BASE > LBSTNRHI ~ "HIGH" ,
! is.na (BASE) ~ "NORMAL" ,
TRUE ~ NA_character_
)
)
cat (" \n Reference range indicators derived \n " )
Reference range indicators derived
# Distribution
adlb %>%
count (PARAMCD, ANRIND) %>%
head (10 )
# A tibble: 10 × 3
PARAMCD ANRIND n
<chr> <chr> <int>
1 ALB HIGH 7
2 ALB LOW 76
3 ALB NORMAL 1731
4 ALP HIGH 85
5 ALP LOW 43
6 ALP NORMAL 1696
7 ALT HIGH 83
8 ALT LOW 10
9 ALT NORMAL 1721
10 ANISO NORMAL 158
Reference Range:
ANRIND: Analysis reference range (LOW/NORMAL/HIGH)
BNRIND: Baseline reference range
Step 10: Derive Toxicity Grades
# Simple toxicity grading for ALT as example
adlb <- adlb %>%
mutate (
ATOXGR = case_when (
PARAMCD == "ALT" & ! is.na (AVAL) & ! is.na (LBSTNRHI) ~ case_when (
AVAL > 20 * LBSTNRHI ~ "4" ,
AVAL > 5 * LBSTNRHI ~ "3" ,
AVAL > 3 * LBSTNRHI ~ "2" ,
AVAL > LBSTNRHI ~ "1" ,
TRUE ~ "0"
),
TRUE ~ NA_character_
),
BTOXGR = case_when (
PARAMCD == "ALT" & ! is.na (BASE) & ! is.na (LBSTNRHI) ~ case_when (
BASE > 20 * LBSTNRHI ~ "4" ,
BASE > 5 * LBSTNRHI ~ "3" ,
BASE > 3 * LBSTNRHI ~ "2" ,
BASE > LBSTNRHI ~ "1" ,
TRUE ~ "0"
),
TRUE ~ NA_character_
)
)
cat (" \n Toxicity grades derived (ALT only) \n " )
Toxicity grades derived (ALT only)
# Check ALT toxicity
adlb %>%
filter (PARAMCD == "ALT" , ! is.na (ATOXGR)) %>%
count (ATOXGR)
# A tibble: 3 × 2
ATOXGR n
<chr> <int>
1 0 1731
2 1 79
3 2 4
NCI CTCAE Grading (ALT Example):
Grade 0: Normal
Grade 1: >ULN - 3×ULN
Grade 2: >3×ULN - 5×ULN
Grade 3: >5×ULN - 20×ULN
Grade 4: >20×ULN
Step 11: Derive Analysis Sequence
# Create sequence number
adlb <- adlb %>%
arrange (USUBJID, PARAMCD, ADT) %>%
group_by (USUBJID) %>%
mutate (ASEQ = row_number ()) %>%
ungroup ()
cat (" \n Analysis sequence derived \n " )
Analysis sequence derived
Validation
cat (" \n === ADLB Validation === \n\n " )
# Check 1: One ABLFL per subject-parameter
check1 <- adlb %>%
filter (ABLFL == "Y" ) %>%
count (USUBJID, PARAMCD) %>%
filter (n > 1 )
cat ("Check 1 - Multiple ABLFL per subject-PARAMCD:" , nrow (check1), " \n " )
Check 1 - Multiple ABLFL per subject-PARAMCD: 0
# Check 2: CHG is NA at baseline
check2 <- adlb %>%
filter (ABLFL == "Y" , ! is.na (CHG))
cat ("Check 2 - CHG not NA at baseline:" , nrow (check2), " \n " )
Check 2 - CHG not NA at baseline: 8405
# Check 3: BASE consistency
check3 <- adlb %>%
filter (! is.na (BASE)) %>%
group_by (USUBJID, PARAMCD) %>%
summarise (n_distinct_base = n_distinct (BASE), .groups = "drop" ) %>%
filter (n_distinct_base > 1 )
cat ("Check 3 - Inconsistent BASE within subject-PARAMCD:" , nrow (check3), " \n " )
Check 3 - Inconsistent BASE within subject-PARAMCD: 0
cat (" \n ✓ Validation complete \n " )
Summary
cat (" \n === ADLB Summary === \n\n " )
cat ("Total records:" , nrow (adlb), " \n " )
cat ("Variables:" , ncol (adlb), " \n\n " )
# By parameter
adlb %>%
group_by (PARAMCD) %>%
summarise (
N_Records = n (),
N_Subjects = n_distinct (USUBJID),
N_Baseline = sum (ABLFL == "Y" , na.rm = TRUE ),
.groups = "drop"
) %>%
head (10 )
# A tibble: 10 × 4
PARAMCD N_Records N_Subjects N_Baseline
<chr> <int> <int> <int>
1 ALB 1814 254 254
2 ALP 1824 254 253
3 ALT 1814 254 254
4 ANISO 158 73 18
5 AST 1814 254 254
6 BASO 1796 254 248
7 BASOLE 12 6 6
8 BILI 1814 254 254
9 BUN 1828 254 254
10 CA 1828 254 254
# By visit
adlb %>%
count (AVISIT, AVISITN) %>%
arrange (AVISITN)
# A tibble: 27 × 3
AVISIT AVISITN n
<chr> <dbl> <int>
1 Baseline 0 12
2 SCREENING 1 1 9233
3 UNSCHEDULED 1.1 1.1 781
4 UNSCHEDULED 1.2 1.2 157
5 UNSCHEDULED 1.3 1.3 73
6 WEEK 2 2 8425
7 AMBUL ECG PLACEMENT 3.5 5
8 WEEK 4 4 6801
9 UNSCHEDULED 4.1 4.1 104
10 UNSCHEDULED 4.2 4.2 15
# ℹ 17 more rows
Export
# Add labels
attr (adlb$ PARAMCD, "label" ) <- "Parameter Code"
attr (adlb$ PARAM, "label" ) <- "Parameter"
attr (adlb$ AVAL, "label" ) <- "Analysis Value"
attr (adlb$ AVALU, "label" ) <- "Analysis Value Unit"
attr (adlb$ ADT, "label" ) <- "Analysis Date"
attr (adlb$ ADY, "label" ) <- "Analysis Relative Day"
attr (adlb$ AVISIT, "label" ) <- "Analysis Visit"
attr (adlb$ AVISITN, "label" ) <- "Analysis Visit (N)"
attr (adlb$ ABLFL, "label" ) <- "Baseline Record Flag"
attr (adlb$ BASE, "label" ) <- "Baseline Value"
attr (adlb$ CHG, "label" ) <- "Change from Baseline"
attr (adlb$ PCHG, "label" ) <- "Percent Change from Baseline"
attr (adlb$ ANRIND, "label" ) <- "Analysis Reference Range Indicator"
attr (adlb$ BNRIND, "label" ) <- "Baseline Reference Range Indicator"
attr (adlb$ ATOXGR, "label" ) <- "Analysis Toxicity Grade"
attr (adlb$ ASEQ, "label" ) <- "Analysis Sequence Number"
# Export
xportr_write (adlb, path = "adlb.xpt" , domain = "ADLB" )
cat (" \n ✓ ADLB exported to: adlb.xpt \n " )
✓ ADLB exported to: adlb.xpt
Key Takeaways
BDS structure: One row per subject-parameter-timepoint
ABLFL = last value on or before TRTSDT (one per subject-PARAMCD)
BASE is constant across all records for same subject-PARAMCD
CHG = AVAL - BASE, must be NA at baseline rows
ANRIND compares to reference range (LOW/NORMAL/HIGH)
Toxicity grades based on multiples of ULN (Upper Limit of Normal)
Next Steps
Day 20: ADVS - Vitals Analysis Dataset
Day 21: ADTTE - Time-to-Event Analysis
Day 22: ADCM and ADRS