The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
The ksformat package provides SAS PROC FORMAT-like functionality for R. This vignette walks through the most common use cases.
Create a format for gender codes (auto-stored in library as “sex”):
fnew(
"M" = "Male",
"F" = "Female",
.missing = "Unknown",
.other = "Other Gender",
name = "sex"
)
gender_codes <- c("M", "F", "M", NA, "X", "F")
formatted_genders <- fput(gender_codes, "sex")
data.frame(
code = gender_codes,
label = formatted_genders
)
#> code label
#> 1 M Male
#> 2 F Female
#> 3 M Male
#> 4 <NA> Unknown
#> 5 X Other Gender
#> 6 F Female
fprint("sex")
#> KS Format:sex
#> Type: character
#> Mappings:
#> M => Male
#> F => Female
#> .missing => Unknown
#> .other => Other GenderDefine formats in SAS-like text (auto-registered):
fparse(text = '
VALUE age (numeric)
[0, 18) = "Child"
[18, 65) = "Adult"
[65, HIGH] = "Senior"
.missing = "Age Unknown"
;
')
ages <- c(5, 15.3, 17.9, 18, 45, 64.99, 65, 85, NA)
age_groups <- fputn(ages, "age")
data.frame(
age = ages,
group = age_groups
)
#> age group
#> 1 5.00 Child
#> 2 15.30 Child
#> 3 17.90 Child
#> 4 18.00 Adult
#> 5 45.00 Adult
#> 6 64.99 Adult
#> 7 65.00 Senior
#> 8 85.00 Senior
#> 9 NA Age Unknownfparse(text = '
VALUE bmi (numeric)
[0, 18.5) = "Underweight"
[18.5, 25) = "Normal"
[25, 30) = "Overweight"
[30, HIGH] = "Obese"
.missing = "No data"
;
')
bmi_values <- c(16.2, 18.5, 22.7, 25, 29.9, 35.1, NA)
bmi_labels <- fputn(bmi_values, "bmi")
data.frame(
bmi = bmi_values,
category = bmi_labels
)
#> bmi category
#> 1 16.2 Underweight
#> 2 18.5 Normal
#> 3 22.7 Normal
#> 4 25.0 Overweight
#> 5 29.9 Overweight
#> 6 35.1 Obese
#> 7 NA No datafparse(text = '
VALUE score (numeric)
(0, 50] = "Low"
(50, 100] = "High"
.other = "Out of range"
;
')
scores <- c(0, 1, 50, 51, 100, 101)
score_labels <- fputn(scores, "score")
data.frame(
score = scores,
label = score_labels
)
#> score label
#> 1 0 Out of range
#> 2 1 Low
#> 3 50 Low
#> 4 51 High
#> 5 100 High
#> 6 101 Out of rangeInvalues convert labels back to values. The default
target_type is "numeric":
finput(
"Male" = 1,
"Female" = 2,
name = "sex_inv"
)
#> KS Invalue: sex_inv
#> Target Type: numeric
#> Mappings:
#> Male => 1
#> Female => 2
labels <- c("Male", "Female", "Male", "Unknown", "Female")
codes <- finputn(labels, "sex_inv")
data.frame(
label = labels,
code = codes
)
#> label code
#> 1 Male 1
#> 2 Female 2
#> 3 Male 1
#> 4 Unknown NA
#> 5 Female 2fnew_bid() creates both a format and an invalue at
once:
status_bi <- fnew_bid(
"A" = "Active",
"I" = "Inactive",
"P" = "Pending",
name = "status"
)
# Forward: code -> label
status_codes <- c("A", "I", "P", "A")
status_labels <- fputc(status_codes, "status")
data.frame(code = status_codes, label = status_labels)
#> code label
#> 1 A Active
#> 2 I Inactive
#> 3 P Pending
#> 4 A Active
# Reverse: label -> code
test_labels <- c("Active", "Pending", "Inactive")
test_codes <- finputc(test_labels, "status_inv")
data.frame(label = test_labels, code = test_codes)
#> label code
#> 1 Active A
#> 2 Pending P
#> 3 Inactive Ifparse(text = '
// Study format definitions
VALUE race (character)
"W" = "White"
"B" = "Black"
"A" = "Asian"
.missing = "Unknown"
;
INVALUE race_inv
"White" = 1
"Black" = 2
"Asian" = 3
;
')
fprint()
#> Registered formats:
#> age - VALUE (numeric), 3 mapping(s)
#> bmi - VALUE (numeric), 4 mapping(s)
#> race - VALUE (character), 3 mapping(s)
#> race_inv - INVALUE (numeric), 3 mapping(s)
#> score - VALUE (numeric), 2 mapping(s)
#> sex - VALUE (character), 2 mapping(s)
#> sex_inv - INVALUE (numeric), 2 mapping(s)
#> status - VALUE (character), 3 mapping(s)
#> status_inv - INVALUE (character), 3 mapping(s)df <- data.frame(
id = 1:6,
sex = c("M", "F", "M", "F", NA, "X"),
age = c(15, 25, 45, 70, 35, NA),
stringsAsFactors = FALSE
)
sex_f <- format_get("sex")
age_f <- format_get("age")
df_formatted <- fput_df(
df,
sex = sex_f,
age = age_f,
suffix = "_label"
)
df_formatted
#> id sex age sex_label age_label
#> 1 1 M 15 Male Child
#> 2 2 F 25 Female Adult
#> 3 3 M 45 Male Adult
#> 4 4 F 70 Female Senior
#> 5 5 <NA> 35 Unknown Adult
#> 6 6 X NA Other Gender Age Unknown# With .missing label
fput(c("M", "F", NA), "sex")
#> [1] "Male" "Female" "Unknown"
# With keep_na = TRUE
fput(c("M", "F", NA), sex_f, keep_na = TRUE)
#> [1] "Male" "Female" NA
# is_missing() checks
is_missing(NA)
#> [1] TRUE
is_missing(NaN)
#> [1] TRUE
is_missing("") # TRUE — empty strings are treated as missing
#> [1] TRUESAS date format names are auto-resolved — no pre-creation needed:
today <- Sys.Date()
data.frame(
format = c("DATE9.", "MMDDYY10.", "DDMMYY10.", "YYMMDD10.",
"MONYY7.", "WORDDATE.", "YEAR4.", "QTR."),
result = c(
fputn(today, "DATE9."),
fputn(today, "MMDDYY10."),
fputn(today, "DDMMYY10."),
fputn(today, "YYMMDD10."),
fputn(today, "MONYY7."),
fputn(today, "WORDDATE."),
fputn(today, "YEAR4."),
fputn(today, "QTR.")
)
)
#> format result
#> 1 DATE9. 28MAR2026
#> 2 MMDDYY10. 03/28/2026
#> 3 DDMMYY10. 28/03/2026
#> 4 YYMMDD10. 2026-03-28
#> 5 MONYY7. MAR2026
#> 6 WORDDATE. March 28, 2026
#> 7 YEAR4. 2026
#> 8 QTR. 1
# Multiple dates
dates <- as.Date(c("2020-01-15", "2020-06-30", "2020-12-25"))
fputn(dates, "DATE9.")
#> [1] "15JAN2020" "30JUN2020" "25DEC2020"Time is represented as seconds since midnight:
seconds <- c(0, 3600, 45000, 86399)
data.frame(
seconds = seconds,
TIME8 = fputn(seconds, "TIME8."),
TIME5 = fputn(seconds, "TIME5."),
HHMM = fputn(seconds, "HHMM.")
)
#> seconds TIME8 TIME5 HHMM
#> 1 0 0:00:00 0:00 00:00
#> 2 3600 1:00:00 1:00 01:00
#> 3 45000 12:30:00 12:30 12:30
#> 4 86399 23:59:59 23:59 23:59now <- Sys.time()
data.frame(
format = c("DATETIME20.", "DATETIME13.", "DTDATE.", "DTYYMMDD."),
result = c(
fputn(now, "DATETIME20."),
fputn(now, "DATETIME13."),
fputn(now, "DTDATE."),
fputn(now, "DTYYMMDD.")
)
)
#> format result
#> 1 DATETIME20. 28MAR2026:13:58:42
#> 2 DATETIME13. 28MAR26:13:58
#> 3 DTDATE. 28MAR2026
#> 4 DTYYMMDD. 2026-03-28
# From numeric R-epoch seconds
r_secs <- as.numeric(as.POSIXct("2025-06-15 14:30:00", tz = "UTC"))
fputn(r_secs, "DATETIME20.")
#> [1] "15JUN2025:14:30:00"fnew_date()# SAS-named format
fnew_date("DATE9.", name = "bday_fmt")
#> KS Format:bday_fmt
#> Type: date
#> Pattern: %d%b%Y (DATE9.)
birthdays <- as.Date(c("1990-03-25", "1985-11-03", "2000-07-14"))
fput(birthdays, "bday_fmt")
#> [1] "25MAR1990" "03NOV1985" "14JUL2000"
# Custom strftime pattern (e.g. DD.MM.YYYY)
fnew_date("%d.%m.%Y", name = "ru_date", type = "date")
#> KS Format:ru_date
#> Type: date
#> Pattern: %d.%m.%Y
fput(birthdays, "ru_date")
#> [1] "25.03.1990" "03.11.1985" "14.07.2000"
# Custom pattern with missing label
fnew_date("MMDDYY10.", name = "us_date", .missing = "NO DATE")
#> KS Format:us_date
#> Type: date
#> Pattern: %m/%d/%Y (MMDDYY10.)
#> .missing => NO DATE
mixed <- c(as.Date("2025-01-01"), NA, as.Date("2025-12-31"))
fput(mixed, "us_date")
#> [1] "01/01/2025" "NO DATE" "12/31/2025"
fprint("bday_fmt")
#> KS Format:bday_fmt
#> Type: date
#> Pattern: %d%b%Y (DATE9.)patients <- data.frame(
id = 1:4,
visit_date = as.Date(c("2025-01-10", "2025-02-15", "2025-03-20", NA)),
stringsAsFactors = FALSE
)
visit_fmt <- fnew_date("DATE9.", name = "visit_fmt", .missing = "NOT RECORDED")
fput_df(patients, visit_date = visit_fmt)
#> id visit_date visit_date_fmt
#> 1 1 2025-01-10 10JAN2025
#> 2 2 2025-02-15 15FEB2025
#> 3 3 2025-03-20 20MAR2025
#> 4 4 <NA> NOT RECORDEDfparse(text = '
VALUE enrldt (date)
pattern = "DATE9."
.missing = "Not Enrolled"
;
VALUE visit_time (time)
pattern = "TIME8."
;
VALUE stamp (datetime)
pattern = "DATETIME20."
;
')
fput(as.Date("2025-03-01"), "enrldt")
#> [1] "01MAR2025"
fput(36000, "visit_time")
#> [1] "10:00:00"
fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp")
#> [1] "01MAR2025:10:00:00"
# Export back to text
enrl_obj <- format_get("enrldt")
cat(fexport(enrldt = enrl_obj))
#> VALUE enrldt (date)
#> pattern = "DATE9."
#> .missing = "Not Enrolled"
#> ;
fclear()
#> All formats cleared from library.With multilabel formats, a single value can match multiple labels:
fnew(
"0,5,TRUE,TRUE" = "Infant",
"6,11,TRUE,TRUE" = "Child",
"12,17,TRUE,TRUE" = "Adolescent",
"0,17,TRUE,TRUE" = "Pediatric",
"18,64,TRUE,TRUE" = "Adult",
"65,Inf,TRUE,TRUE" = "Elderly",
"18,Inf,TRUE,TRUE" = "Non-Pediatric",
name = "age_categories",
type = "numeric",
multilabel = TRUE
)
ages <- c(3, 14, 25, 70)
# fput returns first match only
fput(ages, "age_categories")
#> [1] "Infant" "Adolescent" "Adult" "Elderly"
# fput_all returns ALL matching labels
all_labels <- fput_all(ages, "age_categories")
for (i in seq_along(ages)) {
cat("Age", ages[i], "->", paste(all_labels[[i]], collapse = ", "), "\n")
}
#> Age 3 -> Infant, Pediatric
#> Age 14 -> Adolescent, Pediatric
#> Age 25 -> Adult, Non-Pediatric
#> Age 70 -> Elderly, Non-Pediatricfnew(
"0,100,TRUE,TRUE" = "Valid Score",
"0,49,TRUE,TRUE" = "Below Average",
"50,100,TRUE,TRUE" = "Above Average",
"90,100,TRUE,TRUE" = "Excellent",
.missing = "No Score",
.other = "Out of Range",
name = "score_ml",
type = "numeric",
multilabel = TRUE
)
scores <- c(95, 45, NA, 150)
ml_result <- fput_all(scores, "score_ml")
for (i in seq_along(scores)) {
cat("Score", ifelse(is.na(scores[i]), "NA", scores[i]),
"->", paste(ml_result[[i]], collapse = ", "), "\n")
}
#> Score 95 -> Valid Score, Above Average, Excellent
#> Score 45 -> Valid Score, Below Average
#> Score NA -> No Score
#> Score 150 -> Out of Rangefparse(text = '
VALUE risk (numeric, multilabel)
[0, 3] = "Low Risk"
[0, 7] = "Monitored"
(3, 7] = "Medium Risk"
(7, 10] = "High Risk"
;
')
risk_scores <- c(2, 5, 9)
risk_labels <- fput_all(risk_scores, "risk")
for (i in seq_along(risk_scores)) {
cat("Score", risk_scores[i], "->",
paste(risk_labels[[i]], collapse = " | "), "\n")
}
#> Score 2 -> Low Risk | Monitored
#> Score 5 -> Monitored | Medium Risk
#> Score 9 -> High Riskrisk_obj <- format_get("risk")
cat(fexport(risk = risk_obj))
#> VALUE risk (numeric, multilabel)
#> [0, 3] = "Low Risk"
#> [0, 7] = "Monitored"
#> (3, 7] = "Medium Risk"
#> (7, 10] = "High Risk"
#> ;
fprint("risk")
#> KS Format:risk (multilabel)
#> Type: numeric
#> Mappings:
#> [0, 3] => Low Risk
#> [0, 7] => Monitored
#> (3, 7] => Medium Risk
#> (7, 10] => High Riskfnew(
"1,1,TRUE,TRUE" = "Mild",
"2,2,TRUE,TRUE" = "Moderate",
"3,3,TRUE,TRUE" = "Severe",
"4,4,TRUE,TRUE" = "Life-threatening",
"5,5,TRUE,TRUE" = "Fatal",
"3,5,TRUE,TRUE" = "Serious",
"1,2,TRUE,TRUE" = "Non-serious",
name = "ae_grade",
type = "numeric",
multilabel = TRUE
)
grades <- c(1, 2, 3, 4, 5)
ae_labels <- fput_all(grades, "ae_grade")
for (i in seq_along(grades)) {
cat("Grade", grades[i], ":",
paste(ae_labels[[i]], collapse = " + "), "\n")
}
#> Grade 1 : Mild + Non-serious
#> Grade 2 : Moderate + Non-serious
#> Grade 3 : Severe + Serious
#> Grade 4 : Life-threatening + Serious
#> Grade 5 : Fatal + Serious
fclear()
#> All formats cleared from library.sex_nc <- fnew(
"M" = "Male",
"F" = "Female",
.missing = "Unknown",
name = "sex_nc",
type = "character",
ignore_case = TRUE
)
input <- c("m", "F", "M", "f", NA)
fput(input, sex_nc)
#> [1] "Male" "Female" "Male" "Female" "Unknown"
# Note the [nocase] flag
fprint("sex_nc")
#> KS Format:sex_nc (nocase)
#> Type: character
#> Mappings:
#> M => Male
#> F => Female
#> .missing => Unknown
# Also works with fputc
fputc("m", "sex_nc")
#> [1] "Male"
fclear()
#> All formats cleared from library.Expression labels contain .x1, .x2, etc.,
which reference extra arguments passed to fput(). This lets
you compute labels dynamically.
sprintf Expression.x1, .x2)ifelse Expressionmixed_fmt <- fnew(
"header" = "HEADER",
"n" = "sprintf('N=%s', .x1)",
"pct" = "sprintf('%.1f%%', .x1 * 100)",
name = "mixed",
type = "character"
)
keys <- c("header", "n", "pct", "header", "n")
vals <- c(0, 42, 0.15, 0, 100)
fput(keys, mixed_fmt, vals)
#> [1] "HEADER" "N=42" "15.0%" "HEADER" "N=100".other FallbackA realistic clinical-trial example: e() marks labels as
expressions evaluated at apply-time, .x1 references the
extra argument, and multiline dplyr::case_when shows
complex conditional formatting.
# Population counts used as denominators
n.trt <- data.frame(pop = c("fas","pps","saf"), ntot = c(34, 30, 36))
get_n <- function(pop) {
n.trt$ntot[n.trt$pop == pop]
}
fnew(
"n_fas" = e("get_n('fas')"),
"n_pps" = e("get_n('pps')"),
"n_saf" = e("get_n('saf')"),
"n" = "sprintf('%d', .x1)",
"n_pct_fas" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('fas'))",
"n_pct_pps" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('pps'))",
"n_pct_saf" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('saf'))",
"pct" = "dplyr::case_when(
.x1>0 & .x1<0.1 ~ sprintf('%5s', ' <0.1%'),
.x1>=0.1 | .x1==0 ~ sprintf(paste0('%5.', 1 ,'f%%'), .x1)
)",
"pval" = "dplyr::case_when(
.x1>=0 & .x1<0.001 ~ sprintf('%s', '<0.001'),
.x1>=0.001 & .x1<=0.999 ~ sprintf(paste0('%.', 3 ,'f'), .x1),
.x1>0.999 ~ sprintf('%s', '>0.999'), .default = '--'
)",
name = "stat",
type = "character"
)The same format can be created via fparse(). Note that
multiline expressions must be collapsed to single lines in the text
block, and (eval) marks evaluated labels:
fmt <- '
VALUE stat_01 (character)
"n_fas" = "get_n(\'fas\')" (eval)
"n_pps" = "get_n(\'pps\')" (eval)
"n_saf" = "get_n(\'saf\')" (eval)
"n" = "sprintf(\'%d\', .x1)"
"pct" = "dplyr::case_when(.x1>0 & .x1<0.1 ~ sprintf(\'%5s\', \' <0.1%\'), .x1>=0.1 | .x1==0 ~ sprintf(paste0(\'%5.\', 1 ,\'f%%\'), .x1))"
"n_pct_fas" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'fas\'))"
"n_pct_pps" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'pps\'))"
"n_pct_saf" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'saf\'))"
"pval" = "dplyr::case_when(.x1>=0 & .x1<0.001 ~ sprintf(\'%s\', \'<0.001\'), .x1>=0.001 & .x1<=0.999 ~ sprintf(paste0(\'%.\', 3 ,\'f\'), .x1), .x1>0.999 ~ sprintf(\'%s\', \'>0.999\'), .default = \'--\')"
;'
fparse(fmt)Both stat (via fnew) and
stat_01 (via fparse) produce identical
results:
df <- data.frame(
types = c("n_fas", "n_pps", "n_saf", "n", "pct", "pct", "n", "pval", "pval",
"n_pct_fas", "n_pct_pps", "n_pct_saf"),
values = c(NA, NA, NA, 42, 0.053, 0.0008, 100, 0.255, 0.0003, 22, 22, 22)
)
df$fmt <- fput(df$types, "stat", df$values)
df$fmt_01 <- fput(df$types, "stat_01", df$values)
print(df)
#> types values fmt fmt_01
#> 1 n_fas NA 34 34
#> 2 n_pps NA 30 30
#> 3 n_saf NA 36 36
#> 4 n 42.0000 42 42
#> 5 pct 0.0530 <0.1% <0.1%
#> 6 pct 0.0008 <0.1% <0.1%
#> 7 n 100.0000 100 100
#> 8 pval 0.2550 0.255 0.255
#> 9 pval 0.0003 <0.001 <0.001
#> 10 n_pct_fas 22.0000 22 ( 64.7%) 22 ( 64.7%)
#> 11 n_pct_pps 22.0000 22 ( 73.3%) 22 ( 73.3%)
#> 12 n_pct_saf 22.0000 22 ( 61.1%) 22 ( 61.1%)
fclear()
#> All formats cleared from library.Each element can use a different format, determined by a vector of format names:
# Dispatch format: maps type code to format name
fnew("1" = "groupx", "2" = "groupy", "3" = "groupz",
name = "typefmt", type = "numeric")
# Per-group character formats
fnew("positive" = "agree", "negative" = "disagree", "neutral" = "notsure",
name = "groupx", type = "character")
fnew("positive" = "accept", "negative" = "reject", "neutral" = "possible",
name = "groupy", type = "character")
fnew("positive" = "pass", "negative" = "fail", "neutral" = "retest",
name = "groupz", type = "character")
type <- c(1, 1, 1, 2, 2, 2, 3, 3, 3)
response <- c("positive", "negative", "neutral",
"positive", "negative", "neutral",
"positive", "negative", "neutral")
# Step 1: map type -> format name
respfmt <- fput(type, "typefmt")
# Step 2: apply per-element format
word <- fputc(response, respfmt)
data.frame(type = type, response = response, respfmt = respfmt, word = word)
#> type response respfmt word
#> 1 1 positive groupx agree
#> 2 1 negative groupx disagree
#> 3 1 neutral groupx notsure
#> 4 2 positive groupy accept
#> 5 2 negative groupy reject
#> 6 2 neutral groupy possible
#> 7 3 positive groupz pass
#> 8 3 negative groupz fail
#> 9 3 neutral groupz retest
fclear()
#> All formats cleared from library.A SAS-style workflow where format names are looked up dynamically per observation:
# Format that maps key codes to date format names
fnew("1" = "date9.", "2" = "mmddyy10.",
name = "writfmt", type = "numeric")
fnew_date("date9.")
#> KS Format:DATE9.
#> Type: date
#> Pattern: %d%b%Y (DATE9.)
fnew_date("mmddyy10.")
#> KS Format:MMDDYY10.
#> Type: date
#> Pattern: %m/%d/%Y (MMDDYY10.)
# Input data (R date numbers = days since 1970-01-01)
number <- c(12103, 10899)
key <- c(1, 2)
# Look up format name per observation
datefmt <- fputn(key, "writfmt")
# Apply per-element date format
date <- fputn(number, datefmt)
data.frame(number = number, key = key, datefmt = datefmt, date = date)
#> number key datefmt date
#> 1 12103 1 date9. 20FEB2003
#> 2 10899 2 mmddyy10. 11/04/1999
fclear()
#> All formats cleared from library.The fimport() function reads a CSV file exported from a
SAS format catalogue (PROC FORMAT ... CNTLOUT=):
imported <- fimport(csv_path)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#> '/tmp/RtmpZPeVsJ/Rinst1e13e97bade264/ksformat/extdata/test_cntlout.csv'.
names(imported)
#> [1] "AGEGRP" "BMICAT" "GENDER" "RACEIN" "SMISSING"
fprint()
#> Registered formats:
#> AGEGRP - VALUE (numeric), 3 mapping(s)
#> BMICAT - VALUE (numeric), 4 mapping(s)
#> GENDER - VALUE (character), 2 mapping(s)
#> RACEIN - INVALUE (numeric), 3 mapping(s)
#> SMISSING - VALUE (numeric), 1 mapping(s)# Character format (GENDER)
gender_codes <- c("M", "F", NA, "X")
data.frame(
code = gender_codes,
label = fputc(gender_codes, "GENDER")
)
#> code label
#> 1 M Male
#> 2 F Female
#> 3 <NA> Unknown
#> 4 X X
# Numeric format (AGEGRP)
ages <- c(5, 17, 18, 45, 65, 100, NA, -1)
data.frame(
age = ages,
group = fputn(ages, "AGEGRP")
)
#> age group
#> 1 5 Child
#> 2 17 Child
#> 3 18 Adult
#> 4 45 Adult
#> 5 65 Senior
#> 6 100 Senior
#> 7 NA Missing Age
#> 8 -1 Other
# Numeric format (BMICAT)
bmi_values <- c(15.0, 18.5, 22.3, 25.0, 28.7, 30.0, 35.5)
data.frame(
bmi = bmi_values,
category = fputn(bmi_values, "BMICAT")
)
#> bmi category
#> 1 15.0 Underweight
#> 2 18.5 Normal
#> 3 22.3 Normal
#> 4 25.0 Overweight
#> 5 28.7 Overweight
#> 6 30.0 Obese
#> 7 35.5 Obese
# Invalue (RACEIN)
race_labels <- c("White", "Black", "Asian", "Other")
data.frame(
label = race_labels,
code = finputn(race_labels, "RACEIN")
)
#> label code
#> 1 White 1
#> 2 Black 2
#> 3 Asian 3
#> 4 Other NAdf <- data.frame(
id = 1:5,
sex = c("M", "F", "M", NA, "F"),
age = c(10, 30, 70, NA, 50),
stringsAsFactors = FALSE
)
gender_fmt <- imported[["GENDER"]]
age_fmt <- imported[["AGEGRP"]]
fput_df(df, sex = gender_fmt, age = age_fmt, suffix = "_label")
#> id sex age sex_label age_label
#> 1 1 M 10 Male Child
#> 2 2 F 30 Female Adult
#> 3 3 M 70 Male Senior
#> 4 4 <NA> NA Unknown Missing Age
#> 5 5 F 50 Female Adultcat(fexport(AGEGRP = age_fmt))
#> VALUE AGEGRP (numeric)
#> [0, 17] = "Child"
#> [18, 64] = "Adult"
#> [65, HIGH] = "Senior"
#> .missing = "Missing Age"
#> .other = "Other"
#> ;
cat(fexport(GENDER = gender_fmt))
#> VALUE GENDER (character)
#> "M" = "Male"
#> "F" = "Female"
#> .missing = "Unknown"
#> ;fclear()
#> All formats cleared from library.
manual <- fimport(csv_path, register = FALSE)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#> '/tmp/RtmpZPeVsJ/Rinst1e13e97bade264/ksformat/extdata/test_cntlout.csv'.
# Library should be empty
fprint()
#> Format library is empty
# Use directly from returned list
fput(c("M", "F"), manual[["GENDER"]])
#> [1] "Male" "Female"
fclear()
#> All formats cleared from library.Expression labels can select between languages at apply-time using an extra argument:
# Single format, language selected via .x1 extra argument
sex_bi <- fnew(
"M" = "ifelse(.x1 == 'en', 'Male', 'Homme')",
"F" = "ifelse(.x1 == 'en', 'Female', 'Femme')",
.missing = "Unknown",
name = "sex_bi"
)
# .x1 = language code per observation
fput(c("M", "F", "M"), sex_bi, c("en", "fr", "en"))
#> [1] "Male" "Femme" "Male"
# -> "Male" "Femme" "Male"
# Alternative: one format per language, selected at apply-time
fnew("M" = "Male", "F" = "Female", .missing = "Unknown", name = "sex_en")
fnew("M" = "Homme", "F" = "Femme", .missing = "Inconnu", name = "sex_fr")
lang <- "fr"
fput(c("M", "F", NA), paste0("sex_", lang))
#> [1] "Homme" "Femme" "Inconnu"
# -> "Homme" "Femme" "Inconnu"
fclear()
#> All formats cleared from library.These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.