The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

ksformat Usage Examples

ksformat hex icon A hex sticker style icon for the ksformat package with a formatting pipeline from values to labels. A I P Active Inactive Pending ksformat

The ksformat package provides SAS PROC FORMAT-like functionality for R. This vignette walks through the most common use cases.

Example 1: Basic Discrete Formatting

Create a format for gender codes (auto-stored in library as “sex”):

fnew(
  "M" = "Male",
  "F" = "Female",
  .missing = "Unknown",
  .other = "Other Gender",
  name = "sex"
)

gender_codes <- c("M", "F", "M", NA, "X", "F")
formatted_genders <- fput(gender_codes, "sex")

data.frame(
  code = gender_codes,
  label = formatted_genders
)
#>   code        label
#> 1    M         Male
#> 2    F       Female
#> 3    M         Male
#> 4 <NA>      Unknown
#> 5    X Other Gender
#> 6    F       Female

fprint("sex")
#> KS Format:sex
#> Type: character 
#> Mappings:
#>   M => Male
#>   F => Female
#>   .missing => Unknown
#>   .other => Other Gender

Example 2: Numeric Range Formatting

Define formats in SAS-like text (auto-registered):

fparse(text = '
VALUE age (numeric)
  [0, 18)     = "Child"
  [18, 65)    = "Adult"
  [65, HIGH]  = "Senior"
  .missing    = "Age Unknown"
;
')

ages <- c(5, 15.3, 17.9, 18, 45, 64.99, 65, 85, NA)
age_groups <- fputn(ages, "age")

data.frame(
  age = ages,
  group = age_groups
)
#>     age       group
#> 1  5.00       Child
#> 2 15.30       Child
#> 3 17.90       Child
#> 4 18.00       Adult
#> 5 45.00       Adult
#> 6 64.99       Adult
#> 7 65.00      Senior
#> 8 85.00      Senior
#> 9    NA Age Unknown

Example 3: Decimal Ranges (BMI Categories)

fparse(text = '
VALUE bmi (numeric)
  [0, 18.5)    = "Underweight"
  [18.5, 25)   = "Normal"
  [25, 30)     = "Overweight"
  [30, HIGH]   = "Obese"
  .missing     = "No data"
;
')

bmi_values <- c(16.2, 18.5, 22.7, 25, 29.9, 35.1, NA)
bmi_labels <- fputn(bmi_values, "bmi")

data.frame(
  bmi = bmi_values,
  category = bmi_labels
)
#>    bmi    category
#> 1 16.2 Underweight
#> 2 18.5      Normal
#> 3 22.7      Normal
#> 4 25.0  Overweight
#> 5 29.9  Overweight
#> 6 35.1       Obese
#> 7   NA     No data

Example 4: Exclusive/Inclusive Bounds

fparse(text = '
VALUE score (numeric)
  (0, 50]    = "Low"
  (50, 100]  = "High"
  .other     = "Out of range"
;
')

scores <- c(0, 1, 50, 51, 100, 101)
score_labels <- fputn(scores, "score")

data.frame(
  score = scores,
  label = score_labels
)
#>   score        label
#> 1     0 Out of range
#> 2     1          Low
#> 3    50          Low
#> 4    51         High
#> 5   100         High
#> 6   101 Out of range

Example 5: Reverse Formatting with Invalue

Invalues convert labels back to values. The default target_type is "numeric":

finput(
  "Male" = 1,
  "Female" = 2,
  name = "sex_inv"
)
#> KS Invalue: sex_inv 
#> Target Type: numeric 
#> Mappings:
#>   Male => 1
#>   Female => 2

labels <- c("Male", "Female", "Male", "Unknown", "Female")
codes <- finputn(labels, "sex_inv")

data.frame(
  label = labels,
  code = codes
)
#>     label code
#> 1    Male    1
#> 2  Female    2
#> 3    Male    1
#> 4 Unknown   NA
#> 5  Female    2

Example 6: Bidirectional Formatting

fnew_bid() creates both a format and an invalue at once:

status_bi <- fnew_bid(
  "A" = "Active",
  "I" = "Inactive",
  "P" = "Pending",
  name = "status"
)

# Forward: code -> label
status_codes <- c("A", "I", "P", "A")
status_labels <- fputc(status_codes, "status")
data.frame(code = status_codes, label = status_labels)
#>   code    label
#> 1    A   Active
#> 2    I Inactive
#> 3    P  Pending
#> 4    A   Active

# Reverse: label -> code
test_labels <- c("Active", "Pending", "Inactive")
test_codes <- finputc(test_labels, "status_inv")
data.frame(label = test_labels, code = test_codes)
#>      label code
#> 1   Active    A
#> 2  Pending    P
#> 3 Inactive    I

Example 7: Parse Multiple Formats from Text

fparse(text = '
// Study format definitions

VALUE race (character)
  "W" = "White"
  "B" = "Black"
  "A" = "Asian"
  .missing = "Unknown"
;

INVALUE race_inv
  "White" = 1
  "Black" = 2
  "Asian" = 3
;
')

fprint()
#> Registered formats:
#>   age - VALUE (numeric), 3 mapping(s)
#>   bmi - VALUE (numeric), 4 mapping(s)
#>   race - VALUE (character), 3 mapping(s)
#>   race_inv - INVALUE (numeric), 3 mapping(s)
#>   score - VALUE (numeric), 2 mapping(s)
#>   sex - VALUE (character), 2 mapping(s)
#>   sex_inv - INVALUE (numeric), 2 mapping(s)
#>   status - VALUE (character), 3 mapping(s)
#>   status_inv - INVALUE (character), 3 mapping(s)

Example 8: Export Formats Back to Text

bmi_fmt <- format_get("bmi")
cat(fexport(bmi = bmi_fmt))
#> VALUE bmi (numeric)
#>   [0, 18.5) = "Underweight"
#>   [18.5, 25) = "Normal"
#>   [25, 30) = "Overweight"
#>   [30, HIGH] = "Obese"
#>   .missing = "No data"
#> ;

Example 9: SAS-like PUT/INPUT Functions

# fputn — apply numeric format by name
fputn(c(5, 30, 70), "age")
#> [1] "Child"  "Adult"  "Senior"

# fputc — apply character format by name
fputc(c("M", "F"), "sex")
#> [1] "Male"   "Female"

# finputn — apply numeric invalue by name
finputn(c("White", "Black"), "race_inv")
#> [1] 1 2

Example 10: Data Frame Formatting

df <- data.frame(
  id = 1:6,
  sex = c("M", "F", "M", "F", NA, "X"),
  age = c(15, 25, 45, 70, 35, NA),
  stringsAsFactors = FALSE
)

sex_f <- format_get("sex")
age_f <- format_get("age")

df_formatted <- fput_df(
  df,
  sex = sex_f,
  age = age_f,
  suffix = "_label"
)

df_formatted
#>   id  sex age    sex_label   age_label
#> 1  1    M  15         Male       Child
#> 2  2    F  25       Female       Adult
#> 3  3    M  45         Male       Adult
#> 4  4    F  70       Female      Senior
#> 5  5 <NA>  35      Unknown       Adult
#> 6  6    X  NA Other Gender Age Unknown

Example 11: Missing Value Handling

# With .missing label
fput(c("M", "F", NA), "sex")
#> [1] "Male"    "Female"  "Unknown"

# With keep_na = TRUE
fput(c("M", "F", NA), sex_f, keep_na = TRUE)
#> [1] "Male"   "Female" NA

# is_missing() checks
is_missing(NA)
#> [1] TRUE
is_missing(NaN)
#> [1] TRUE
is_missing("")   # TRUE — empty strings are treated as missing
#> [1] TRUE

Example 12: Date/Time Formats (SAS-style)

SAS Date Formats

SAS date format names are auto-resolved — no pre-creation needed:

today <- Sys.Date()

data.frame(
  format = c("DATE9.", "MMDDYY10.", "DDMMYY10.", "YYMMDD10.",
             "MONYY7.", "WORDDATE.", "YEAR4.", "QTR."),
  result = c(
    fputn(today, "DATE9."),
    fputn(today, "MMDDYY10."),
    fputn(today, "DDMMYY10."),
    fputn(today, "YYMMDD10."),
    fputn(today, "MONYY7."),
    fputn(today, "WORDDATE."),
    fputn(today, "YEAR4."),
    fputn(today, "QTR.")
  )
)
#>      format         result
#> 1    DATE9.      28MAR2026
#> 2 MMDDYY10.     03/28/2026
#> 3 DDMMYY10.     28/03/2026
#> 4 YYMMDD10.     2026-03-28
#> 5   MONYY7.        MAR2026
#> 6 WORDDATE. March 28, 2026
#> 7    YEAR4.           2026
#> 8      QTR.              1

# Multiple dates
dates <- as.Date(c("2020-01-15", "2020-06-30", "2020-12-25"))
fputn(dates, "DATE9.")
#> [1] "15JAN2020" "30JUN2020" "25DEC2020"

R Numeric Dates (Days Since 1970-01-01)

r_days <- as.numeric(as.Date("2025-01-01"))
r_days
#> [1] 20089
fputn(r_days, "DATE9.")
#> [1] "01JAN2025"
fputn(r_days, "MMDDYY10.")
#> [1] "01/01/2025"

Time Formats

Time is represented as seconds since midnight:

seconds <- c(0, 3600, 45000, 86399)

data.frame(
  seconds = seconds,
  TIME8 = fputn(seconds, "TIME8."),
  TIME5 = fputn(seconds, "TIME5."),
  HHMM = fputn(seconds, "HHMM.")
)
#>   seconds    TIME8 TIME5  HHMM
#> 1       0  0:00:00  0:00 00:00
#> 2    3600  1:00:00  1:00 01:00
#> 3   45000 12:30:00 12:30 12:30
#> 4   86399 23:59:59 23:59 23:59

Datetime Formats

now <- Sys.time()

data.frame(
  format = c("DATETIME20.", "DATETIME13.", "DTDATE.", "DTYYMMDD."),
  result = c(
    fputn(now, "DATETIME20."),
    fputn(now, "DATETIME13."),
    fputn(now, "DTDATE."),
    fputn(now, "DTYYMMDD.")
  )
)
#>        format             result
#> 1 DATETIME20. 28MAR2026:13:58:42
#> 2 DATETIME13.      28MAR26:13:58
#> 3     DTDATE.          28MAR2026
#> 4   DTYYMMDD.         2026-03-28

# From numeric R-epoch seconds
r_secs <- as.numeric(as.POSIXct("2025-06-15 14:30:00", tz = "UTC"))
fputn(r_secs, "DATETIME20.")
#> [1] "15JUN2025:14:30:00"

Custom Date Formats with fnew_date()

# SAS-named format
fnew_date("DATE9.", name = "bday_fmt")
#> KS Format:bday_fmt
#> Type: date 
#> Pattern: %d%b%Y (DATE9.)
birthdays <- as.Date(c("1990-03-25", "1985-11-03", "2000-07-14"))
fput(birthdays, "bday_fmt")
#> [1] "25MAR1990" "03NOV1985" "14JUL2000"

# Custom strftime pattern (e.g. DD.MM.YYYY)
fnew_date("%d.%m.%Y", name = "ru_date", type = "date")
#> KS Format:ru_date
#> Type: date 
#> Pattern: %d.%m.%Y
fput(birthdays, "ru_date")
#> [1] "25.03.1990" "03.11.1985" "14.07.2000"

# Custom pattern with missing label
fnew_date("MMDDYY10.", name = "us_date", .missing = "NO DATE")
#> KS Format:us_date
#> Type: date 
#> Pattern: %m/%d/%Y (MMDDYY10.) 
#>   .missing => NO DATE
mixed <- c(as.Date("2025-01-01"), NA, as.Date("2025-12-31"))
fput(mixed, "us_date")
#> [1] "01/01/2025" "NO DATE"    "12/31/2025"

fprint("bday_fmt")
#> KS Format:bday_fmt
#> Type: date 
#> Pattern: %d%b%Y (DATE9.)

Date Formats in Data Frames

patients <- data.frame(
  id = 1:4,
  visit_date = as.Date(c("2025-01-10", "2025-02-15", "2025-03-20", NA)),
  stringsAsFactors = FALSE
)

visit_fmt <- fnew_date("DATE9.", name = "visit_fmt", .missing = "NOT RECORDED")
fput_df(patients, visit_date = visit_fmt)
#>   id visit_date visit_date_fmt
#> 1  1 2025-01-10      10JAN2025
#> 2  2 2025-02-15      15FEB2025
#> 3  3 2025-03-20      20MAR2025
#> 4  4       <NA>   NOT RECORDED

Parse Date Formats from Text

fparse(text = '
VALUE enrldt (date)
  pattern = "DATE9."
  .missing = "Not Enrolled"
;

VALUE visit_time (time)
  pattern = "TIME8."
;

VALUE stamp (datetime)
  pattern = "DATETIME20."
;
')

fput(as.Date("2025-03-01"), "enrldt")
#> [1] "01MAR2025"
fput(36000, "visit_time")
#> [1] "10:00:00"
fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp")
#> [1] "01MAR2025:10:00:00"

# Export back to text
enrl_obj <- format_get("enrldt")
cat(fexport(enrldt = enrl_obj))
#> VALUE enrldt (date)
#>   pattern = "DATE9."
#>   .missing = "Not Enrolled"
#> ;

fclear()
#> All formats cleared from library.

Example 13: Multilabel Formats

Overlapping Age Categories

With multilabel formats, a single value can match multiple labels:

fnew(
  "0,5,TRUE,TRUE"    = "Infant",
  "6,11,TRUE,TRUE"   = "Child",
  "12,17,TRUE,TRUE"  = "Adolescent",
  "0,17,TRUE,TRUE"   = "Pediatric",
  "18,64,TRUE,TRUE"  = "Adult",
  "65,Inf,TRUE,TRUE" = "Elderly",
  "18,Inf,TRUE,TRUE" = "Non-Pediatric",
  name = "age_categories",
  type = "numeric",
  multilabel = TRUE
)

ages <- c(3, 14, 25, 70)

# fput returns first match only
fput(ages, "age_categories")
#> [1] "Infant"     "Adolescent" "Adult"      "Elderly"

# fput_all returns ALL matching labels
all_labels <- fput_all(ages, "age_categories")
for (i in seq_along(ages)) {
  cat("Age", ages[i], "->", paste(all_labels[[i]], collapse = ", "), "\n")
}
#> Age 3 -> Infant, Pediatric 
#> Age 14 -> Adolescent, Pediatric 
#> Age 25 -> Adult, Non-Pediatric 
#> Age 70 -> Elderly, Non-Pediatric

Multilabel with Missing Values

fnew(
  "0,100,TRUE,TRUE"   = "Valid Score",
  "0,49,TRUE,TRUE"    = "Below Average",
  "50,100,TRUE,TRUE"  = "Above Average",
  "90,100,TRUE,TRUE"  = "Excellent",
  .missing = "No Score",
  .other = "Out of Range",
  name = "score_ml",
  type = "numeric",
  multilabel = TRUE
)

scores <- c(95, 45, NA, 150)
ml_result <- fput_all(scores, "score_ml")

for (i in seq_along(scores)) {
  cat("Score", ifelse(is.na(scores[i]), "NA", scores[i]),
      "->", paste(ml_result[[i]], collapse = ", "), "\n")
}
#> Score 95 -> Valid Score, Above Average, Excellent 
#> Score 45 -> Valid Score, Below Average 
#> Score NA -> No Score 
#> Score 150 -> Out of Range

Parse Multilabel from Text

fparse(text = '
VALUE risk (numeric, multilabel)
  [0, 3]   = "Low Risk"
  [0, 7]   = "Monitored"
  (3, 7]   = "Medium Risk"
  (7, 10]  = "High Risk"
;
')

risk_scores <- c(2, 5, 9)
risk_labels <- fput_all(risk_scores, "risk")
for (i in seq_along(risk_scores)) {
  cat("Score", risk_scores[i], "->",
      paste(risk_labels[[i]], collapse = " | "), "\n")
}
#> Score 2 -> Low Risk | Monitored 
#> Score 5 -> Monitored | Medium Risk 
#> Score 9 -> High Risk

Multilabel Export

risk_obj <- format_get("risk")
cat(fexport(risk = risk_obj))
#> VALUE risk (numeric, multilabel)
#>   [0, 3] = "Low Risk"
#>   [0, 7] = "Monitored"
#>   (3, 7] = "Medium Risk"
#>   (7, 10] = "High Risk"
#> ;

fprint("risk")
#> KS Format:risk (multilabel)
#> Type: numeric 
#> Mappings:
#>   [0, 3] => Low Risk
#>   [0, 7] => Monitored
#>   (3, 7] => Medium Risk
#>   (7, 10] => High Risk

Practical Example: Adverse Event Severity Grading

fnew(
  "1,1,TRUE,TRUE" = "Mild",
  "2,2,TRUE,TRUE" = "Moderate",
  "3,3,TRUE,TRUE" = "Severe",
  "4,4,TRUE,TRUE" = "Life-threatening",
  "5,5,TRUE,TRUE" = "Fatal",
  "3,5,TRUE,TRUE" = "Serious",
  "1,2,TRUE,TRUE" = "Non-serious",
  name = "ae_grade",
  type = "numeric",
  multilabel = TRUE
)

grades <- c(1, 2, 3, 4, 5)
ae_labels <- fput_all(grades, "ae_grade")
for (i in seq_along(grades)) {
  cat("Grade", grades[i], ":",
      paste(ae_labels[[i]], collapse = " + "), "\n")
}
#> Grade 1 : Mild + Non-serious 
#> Grade 2 : Moderate + Non-serious 
#> Grade 3 : Severe + Serious 
#> Grade 4 : Life-threatening + Serious 
#> Grade 5 : Fatal + Serious

fclear()
#> All formats cleared from library.

Example 14: Case-Insensitive Matching

sex_nc <- fnew(
  "M" = "Male",
  "F" = "Female",
  .missing = "Unknown",
  name = "sex_nc",
  type = "character",
  ignore_case = TRUE
)

input <- c("m", "F", "M", "f", NA)
fput(input, sex_nc)
#> [1] "Male"    "Female"  "Male"    "Female"  "Unknown"

# Note the [nocase] flag
fprint("sex_nc")
#> KS Format:sex_nc (nocase)
#> Type: character 
#> Mappings:
#>   M => Male
#>   F => Female
#>   .missing => Unknown

# Also works with fputc
fputc("m", "sex_nc")
#> [1] "Male"

fclear()
#> All formats cleared from library.

Example 15: Expression Labels in Formats

Expression labels contain .x1, .x2, etc., which reference extra arguments passed to fput(). This lets you compute labels dynamically.

Simple sprintf Expression

stat_fmt <- fnew(
  "n"   = "sprintf('%s', .x1)",
  "pct" = "sprintf('%.1f%%', .x1 * 100)",
  name = "stat",
  type = "character"
)

types  <- c("n",  "pct",  "n",   "pct")
values <- c(42,   0.053,  100,   0.255)

fput(types, stat_fmt, values)
#> [1] "42"    "5.3%"  "100"   "25.5%"

Two Extra Arguments (.x1, .x2)

ratio_fmt <- fnew(
  "ratio" = "sprintf('%s/%s', .x1, .x2)",
  name = "ratio",
  type = "character"
)

fput("ratio", ratio_fmt, 3, 10)
#> [1] "3/10"
fput(c("ratio", "ratio"), ratio_fmt, c(3, 7), c(10, 20))
#> [1] "3/10" "7/20"

ifelse Expression

sign_fmt <- fnew(
  "val" = "ifelse(.x1 > 0, paste0('+', .x1), as.character(.x1))",
  name = "sign",
  type = "character"
)

nums <- c(5, 0, -3)
fput(rep("val", 3), sign_fmt, nums)
#> [1] "+5" "0"  "-3"

Mixed Static and Expression Labels

mixed_fmt <- fnew(
  "header" = "HEADER",
  "n"      = "sprintf('N=%s', .x1)",
  "pct"    = "sprintf('%.1f%%', .x1 * 100)",
  name = "mixed",
  type = "character"
)

keys <- c("header", "n", "pct", "header", "n")
vals <- c(0,        42,  0.15,  0,        100)
fput(keys, mixed_fmt, vals)
#> [1] "HEADER" "N=42"   "15.0%"  "HEADER" "N=100"

Expression in .other Fallback

known_fmt <- fnew(
  "ok" = "OK",
  .other = "sprintf('Error(%s)', .x1)",
  name = "err_fmt",
  type = "character"
)

codes   <- c("ok", "E01", "ok", "E99")
details <- c("",   "timeout", "", "overflow")
fput(codes, known_fmt, details)
#> [1] "OK"              "Error(timeout)"  "OK"              "Error(overflow)"

Scalar Recycling

label_fmt <- fnew(
  "val" = "sprintf('%s (N=%s)', .x1, .x2)",
  name = "recycle",
  type = "character"
)

fput(c("val", "val"), label_fmt, c(42, 55), 100)
#> [1] "42 (N=100)" "55 (N=100)"

Statistical Table Format with Computed Labels

A realistic clinical-trial example: e() marks labels as expressions evaluated at apply-time, .x1 references the extra argument, and multiline dplyr::case_when shows complex conditional formatting.

# Population counts used as denominators
n.trt <- data.frame(pop = c("fas","pps","saf"), ntot = c(34, 30, 36))
get_n <- function(pop) {
  n.trt$ntot[n.trt$pop == pop]
}

fnew(
  "n_fas" = e("get_n('fas')"),
  "n_pps" = e("get_n('pps')"),
  "n_saf" = e("get_n('saf')"),
  "n"   = "sprintf('%d', .x1)",
  "n_pct_fas" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('fas'))",
  "n_pct_pps" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('pps'))",
  "n_pct_saf" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('saf'))",
  "pct" = "dplyr::case_when(
               .x1>0 & .x1<0.1 ~ sprintf('%5s', ' <0.1%'),
               .x1>=0.1 | .x1==0 ~ sprintf(paste0('%5.', 1 ,'f%%'), .x1)
           )",
  "pval" = "dplyr::case_when(
                .x1>=0 & .x1<0.001 ~ sprintf('%s', '<0.001'),
                .x1>=0.001 & .x1<=0.999 ~ sprintf(paste0('%.', 3 ,'f'), .x1),
                .x1>0.999 ~ sprintf('%s', '>0.999'), .default = '--'
           )",
  name = "stat",
  type = "character"
)

The same format can be created via fparse(). Note that multiline expressions must be collapsed to single lines in the text block, and (eval) marks evaluated labels:

fmt <- '
  VALUE stat_01 (character)
     "n_fas" = "get_n(\'fas\')" (eval)
     "n_pps" = "get_n(\'pps\')" (eval)
     "n_saf" = "get_n(\'saf\')" (eval)
     "n"     = "sprintf(\'%d\', .x1)"
     "pct"   = "dplyr::case_when(.x1>0 & .x1<0.1 ~ sprintf(\'%5s\', \' <0.1%\'), .x1>=0.1 | .x1==0 ~ sprintf(paste0(\'%5.\', 1 ,\'f%%\'), .x1))"
     "n_pct_fas" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'fas\'))"
     "n_pct_pps" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'pps\'))"
     "n_pct_saf" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'saf\'))"
     "pval"  = "dplyr::case_when(.x1>=0 & .x1<0.001 ~ sprintf(\'%s\', \'<0.001\'), .x1>=0.001 & .x1<=0.999 ~ sprintf(paste0(\'%.\', 3 ,\'f\'), .x1), .x1>0.999 ~ sprintf(\'%s\', \'>0.999\'), .default = \'--\')"
;'
fparse(fmt)

Both stat (via fnew) and stat_01 (via fparse) produce identical results:

df <- data.frame(
  types = c("n_fas", "n_pps", "n_saf", "n", "pct", "pct", "n", "pval", "pval",
            "n_pct_fas", "n_pct_pps", "n_pct_saf"),
  values = c(NA, NA, NA, 42, 0.053, 0.0008, 100, 0.255, 0.0003, 22, 22, 22)
)

df$fmt    <- fput(df$types, "stat",    df$values)
df$fmt_01 <- fput(df$types, "stat_01", df$values)
print(df)
#>        types   values         fmt      fmt_01
#> 1      n_fas       NA          34          34
#> 2      n_pps       NA          30          30
#> 3      n_saf       NA          36          36
#> 4          n  42.0000          42          42
#> 5        pct   0.0530       <0.1%       <0.1%
#> 6        pct   0.0008       <0.1%       <0.1%
#> 7          n 100.0000         100         100
#> 8       pval   0.2550       0.255       0.255
#> 9       pval   0.0003      <0.001      <0.001
#> 10 n_pct_fas  22.0000 22 ( 64.7%) 22 ( 64.7%)
#> 11 n_pct_pps  22.0000 22 ( 73.3%) 22 ( 73.3%)
#> 12 n_pct_saf  22.0000 22 ( 61.1%) 22 ( 61.1%)

fclear()
#> All formats cleared from library.

Example 16: Vectorized Format Names (SAS PUTC-style)

Each element can use a different format, determined by a vector of format names:

# Dispatch format: maps type code to format name
fnew("1" = "groupx", "2" = "groupy", "3" = "groupz",
     name = "typefmt", type = "numeric")

# Per-group character formats
fnew("positive" = "agree",  "negative" = "disagree", "neutral" = "notsure",
     name = "groupx", type = "character")
fnew("positive" = "accept", "negative" = "reject",   "neutral" = "possible",
     name = "groupy", type = "character")
fnew("positive" = "pass",   "negative" = "fail",     "neutral" = "retest",
     name = "groupz", type = "character")

type     <- c(1, 1, 1, 2, 2, 2, 3, 3, 3)
response <- c("positive", "negative", "neutral",
              "positive", "negative", "neutral",
              "positive", "negative", "neutral")

# Step 1: map type -> format name
respfmt <- fput(type, "typefmt")

# Step 2: apply per-element format
word <- fputc(response, respfmt)

data.frame(type = type, response = response, respfmt = respfmt, word = word)
#>   type response respfmt     word
#> 1    1 positive  groupx    agree
#> 2    1 negative  groupx disagree
#> 3    1  neutral  groupx  notsure
#> 4    2 positive  groupy   accept
#> 5    2 negative  groupy   reject
#> 6    2  neutral  groupy possible
#> 7    3 positive  groupz     pass
#> 8    3 negative  groupz     fail
#> 9    3  neutral  groupz   retest

fclear()
#> All formats cleared from library.

Example 17: Working with Dates and Formats — PUTN

A SAS-style workflow where format names are looked up dynamically per observation:

# Format that maps key codes to date format names
fnew("1" = "date9.", "2" = "mmddyy10.",
     name = "writfmt", type = "numeric")

fnew_date("date9.")
#> KS Format:DATE9.
#> Type: date 
#> Pattern: %d%b%Y (DATE9.)
fnew_date("mmddyy10.")
#> KS Format:MMDDYY10.
#> Type: date 
#> Pattern: %m/%d/%Y (MMDDYY10.)

# Input data (R date numbers = days since 1970-01-01)
number <- c(12103, 10899)
key    <- c(1, 2)

# Look up format name per observation
datefmt <- fputn(key, "writfmt")

# Apply per-element date format
date <- fputn(number, datefmt)

data.frame(number = number, key = key, datefmt = datefmt, date = date)
#>   number key   datefmt       date
#> 1  12103   1    date9.  20FEB2003
#> 2  10899   2 mmddyy10. 11/04/1999

fclear()
#> All formats cleared from library.

Example 18: Import SAS Formats from CNTLOUT CSV

The fimport() function reads a CSV file exported from a SAS format catalogue (PROC FORMAT ... CNTLOUT=):

csv_path <- system.file("extdata", "test_cntlout.csv", package = "ksformat")
imported <- fimport(csv_path)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#>   '/tmp/RtmpZPeVsJ/Rinst1e13e97bade264/ksformat/extdata/test_cntlout.csv'.
names(imported)
#> [1] "AGEGRP"   "BMICAT"   "GENDER"   "RACEIN"   "SMISSING"

fprint()
#> Registered formats:
#>   AGEGRP - VALUE (numeric), 3 mapping(s)
#>   BMICAT - VALUE (numeric), 4 mapping(s)
#>   GENDER - VALUE (character), 2 mapping(s)
#>   RACEIN - INVALUE (numeric), 3 mapping(s)
#>   SMISSING - VALUE (numeric), 1 mapping(s)

Use Imported Formats

# Character format (GENDER)
gender_codes <- c("M", "F", NA, "X")
data.frame(
  code = gender_codes,
  label = fputc(gender_codes, "GENDER")
)
#>   code   label
#> 1    M    Male
#> 2    F  Female
#> 3 <NA> Unknown
#> 4    X       X

# Numeric format (AGEGRP)
ages <- c(5, 17, 18, 45, 65, 100, NA, -1)
data.frame(
  age = ages,
  group = fputn(ages, "AGEGRP")
)
#>   age       group
#> 1   5       Child
#> 2  17       Child
#> 3  18       Adult
#> 4  45       Adult
#> 5  65      Senior
#> 6 100      Senior
#> 7  NA Missing Age
#> 8  -1       Other

# Numeric format (BMICAT)
bmi_values <- c(15.0, 18.5, 22.3, 25.0, 28.7, 30.0, 35.5)
data.frame(
  bmi = bmi_values,
  category = fputn(bmi_values, "BMICAT")
)
#>    bmi    category
#> 1 15.0 Underweight
#> 2 18.5      Normal
#> 3 22.3      Normal
#> 4 25.0  Overweight
#> 5 28.7  Overweight
#> 6 30.0       Obese
#> 7 35.5       Obese

# Invalue (RACEIN)
race_labels <- c("White", "Black", "Asian", "Other")
data.frame(
  label = race_labels,
  code = finputn(race_labels, "RACEIN")
)
#>   label code
#> 1 White    1
#> 2 Black    2
#> 3 Asian    3
#> 4 Other   NA

Apply to Data Frame

df <- data.frame(
  id = 1:5,
  sex = c("M", "F", "M", NA, "F"),
  age = c(10, 30, 70, NA, 50),
  stringsAsFactors = FALSE
)

gender_fmt <- imported[["GENDER"]]
age_fmt    <- imported[["AGEGRP"]]

fput_df(df, sex = gender_fmt, age = age_fmt, suffix = "_label")
#>   id  sex age sex_label   age_label
#> 1  1    M  10      Male       Child
#> 2  2    F  30    Female       Adult
#> 3  3    M  70      Male      Senior
#> 4  4 <NA>  NA   Unknown Missing Age
#> 5  5    F  50    Female       Adult

Export Imported Format

cat(fexport(AGEGRP = age_fmt))
#> VALUE AGEGRP (numeric)
#>   [0, 17] = "Child"
#>   [18, 64] = "Adult"
#>   [65, HIGH] = "Senior"
#>   .missing = "Missing Age"
#>   .other = "Other"
#> ;
cat(fexport(GENDER = gender_fmt))
#> VALUE GENDER (character)
#>   "M" = "Male"
#>   "F" = "Female"
#>   .missing = "Unknown"
#> ;

Selective Import (No Auto-register)

fclear()
#> All formats cleared from library.

manual <- fimport(csv_path, register = FALSE)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#>   '/tmp/RtmpZPeVsJ/Rinst1e13e97bade264/ksformat/extdata/test_cntlout.csv'.

# Library should be empty
fprint()
#> Format library is empty

# Use directly from returned list
fput(c("M", "F"), manual[["GENDER"]])
#> [1] "Male"   "Female"

fclear()
#> All formats cleared from library.

Example 19: Bilingual Format

Expression labels can select between languages at apply-time using an extra argument:

# Single format, language selected via .x1 extra argument
sex_bi <- fnew(
  "M" = "ifelse(.x1 == 'en', 'Male', 'Homme')",
  "F" = "ifelse(.x1 == 'en', 'Female', 'Femme')",
  .missing = "Unknown",
  name = "sex_bi"
)

# .x1 = language code per observation
fput(c("M", "F", "M"), sex_bi, c("en", "fr", "en"))
#> [1] "Male"  "Femme" "Male"
# -> "Male" "Femme" "Male"

# Alternative: one format per language, selected at apply-time
fnew("M" = "Male",  "F" = "Female",  .missing = "Unknown", name = "sex_en")
fnew("M" = "Homme", "F" = "Femme",   .missing = "Inconnu", name = "sex_fr")

lang <- "fr"
fput(c("M", "F", NA), paste0("sex_", lang))
#> [1] "Homme"   "Femme"   "Inconnu"
# -> "Homme" "Femme" "Inconnu"

fclear()
#> All formats cleared from library.

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.