tableone: Configuration

Configuration and formatting options

This vignette provides examples of some of the formatting options. To demonstrate them we will use the survival::cgd dataset:

# set up the data 
gcd = survival::cgd %>% 
  # filter to include only the first visit
  dplyr::filter(enum==1) %>% 
  # make the steroids and propylac columns into a logical value
  # see later for a better way of doing this.
  dplyr::mutate(
    steroids = as.logical(steroids),
    propylac = as.logical(propylac)
  )
  

# A basic unstratified population description table is as follows:
formula = Surv(tstart, tstop, status) ~ treat + 
  sex + age + height + weight + inherit + steroids + hos.cat

gcd %>% compare_population(formula)
placeborIFN-g
VariableCharacteristicValue (N=65)Value (N=63)P value
sexmale % [95% CI] (n)81.5% [70.4%—89.1%] (53)81.0% [69.6%—88.8%] (51)1 †
female % [95% CI] (n)18.5% [10.9%—29.6%] (12)19.0% [11.2%—30.4%] (12)
ageMedian [IQR]14 [7—24]12 [7—19.5]0.56 ††
heightMedian [IQR]143 [115—171]139 [119—167]0.45 †††
weightMedian [IQR]36.1 [21.6—63.7]34.4 [20.6—53.7]0.4 †††
inheritX-linked % [95% CI] (n)63.1% [50.9%—73.8%] (41)71.4% [59.3%—81.1%] (45)0.35 †
autosomal % [95% CI] (n)36.9% [26.2%—49.1%] (24)28.6% [18.9%—40.7%] (18)
steroidsfalse % [95% CI] (n)96.9% [89.5%—99.2%] (63)98.4% [91.5%—99.7%] (62)1 †
true % [95% CI] (n)3.1% [0.8%—10.5%] (2)1.6% [0.3%—8.5%] (1)
hos.catUS:NIH % [95% CI] (n)16.9% [9.7%—27.8%] (11)23.8% [15.0%—35.6%] (15)0.7 †
US:other % [95% CI] (n)49.2% [37.5%—61.1%] (32)49.2% [37.3%—61.2%] (31)
Europe:Amsterdam % [95% CI] (n)15.4% [8.6%—26.1%] (10)14.3% [7.7%—25.0%] (9)
Europe:other % [95% CI] (n)18.5% [10.9%—29.6%] (12)12.7% [6.6%—23.1%] (8)
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous)
Normal distributions determined by the Anderson-Darling test (P>0.005)
An adjusted P value of 0.00714 may be considered significant.

Column labelling

  • A custom labeller function can be defined for the table.
# set a table relabelling function
rename_cols = function(col) {
  dplyr::case_when(
    col == "hos.cat" ~ "Location",
    col == "steroids" ~ "Steroid treatment",
    TRUE ~ stringr::str_to_sentence(col)
  )
}

# set it using an option
# we are not going to reset this as we will use in all the subsequent examples:
options("tableone.labeller"=rename_cols)

gcd %>% compare_population(formula) 
placeborIFN-g
VariableCharacteristicValue (N=65)Value (N=63)P value
Sexmale % [95% CI] (n)81.5% [70.4%—89.1%] (53)81.0% [69.6%—88.8%] (51)1 †
female % [95% CI] (n)18.5% [10.9%—29.6%] (12)19.0% [11.2%—30.4%] (12)
AgeMedian [IQR]14 [7—24]12 [7—19.5]0.56 ††
HeightMedian [IQR]143 [115—171]139 [119—167]0.45 †††
WeightMedian [IQR]36.1 [21.6—63.7]34.4 [20.6—53.7]0.4 †††
InheritX-linked % [95% CI] (n)63.1% [50.9%—73.8%] (41)71.4% [59.3%—81.1%] (45)0.35 †
autosomal % [95% CI] (n)36.9% [26.2%—49.1%] (24)28.6% [18.9%—40.7%] (18)
Steroid treatmentfalse % [95% CI] (n)96.9% [89.5%—99.2%] (63)98.4% [91.5%—99.7%] (62)1 †
true % [95% CI] (n)3.1% [0.8%—10.5%] (2)1.6% [0.3%—8.5%] (1)
LocationUS:NIH % [95% CI] (n)16.9% [9.7%—27.8%] (11)23.8% [15.0%—35.6%] (15)0.7 †
US:other % [95% CI] (n)49.2% [37.5%—61.1%] (32)49.2% [37.3%—61.2%] (31)
Europe:Amsterdam % [95% CI] (n)15.4% [8.6%—26.1%] (10)14.3% [7.7%—25.0%] (9)
Europe:other % [95% CI] (n)18.5% [10.9%—29.6%] (12)12.7% [6.6%—23.1%] (8)
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous)
Normal distributions determined by the Anderson-Darling test (P>0.005)
An adjusted P value of 0.00714 may be considered significant.

Content format

  • Change the decimal point
  • Change the font and font size
  • Change the labelling of the p-value column
  • Change the format of the p-value
  • Hide the daggers for the method for the p-value
old = options(
  # set a mid point as decimal point
  "tableone.dp"="\u00B7",
  "tableone.font"="Arial Narrow",
  "tableone.font_size"=12,
  "tableone.pvalue_column_name"="p-value",
  # the p-value formatter must be a function that takes a vector of numbers and returns
  # a vector of characters. The example here is a function that returns a function.
  "tableone.pvalue_formatter" = 
          scales::label_pvalue(accuracy = 0.01,decimal.mark = "\u00B7"),
  "tableone.show_pvalue_method"=FALSE
)


gcd %>% compare_population(formula) 
placeborIFN-g
VariableCharacteristicValue (N=65)Value (N=63)p-value
Sexmale % [95% CI] (n)81·5% [70·4%—89·1%] (53)81·0% [69·6%—88·8%] (51)>0·99
female % [95% CI] (n)18·5% [10·9%—29·6%] (12)19·0% [11·2%—30·4%] (12)
AgeMedian [IQR]14 [7—24]12 [7—19·5]0·56
HeightMedian [IQR]143 [115—171]139 [119—167]0·45
WeightMedian [IQR]36·1 [21·6—63·7]34·4 [20·6—53·7]0·40
InheritX-linked % [95% CI] (n)63·1% [50·9%—73·8%] (41)71·4% [59·3%—81·1%] (45)0·35
autosomal % [95% CI] (n)36·9% [26·2%—49·1%] (24)28·6% [18·9%—40·7%] (18)
Steroid treatmentfalse % [95% CI] (n)96·9% [89·5%—99·2%] (63)98·4% [91·5%—99·7%] (62)>0·99
true % [95% CI] (n)3·1% [0·8%—10·5%] (2)1·6% [0·3%—8·5%] (1)
LocationUS:NIH % [95% CI] (n)16·9% [9·7%—27·8%] (11)23·8% [15·0%—35·6%] (15)0·69
US:other % [95% CI] (n)49·2% [37·5%—61·1%] (32)49·2% [37·3%—61·2%] (31)
Europe:Amsterdam % [95% CI] (n)15·4% [8·6%—26·1%] (10)14·3% [7·7%—25·0%] (9)
Europe:other % [95% CI] (n)18·5% [10·9%—29·6%] (12)12·7% [6·6%—23·1%] (8)
Significance determined using Fisher's exact test (categorical variables) or 2 sample Wilcoxon Rank Sum test, 2 sample Kolmogorov-Smirnov test (continuous variables)
Normal distributions determined by the Anderson-Darling test (P>0.005)
An adjusted p-value of 0.00714 may be considered significant.
# reset
options(old)

Summary types

The default statistics may seem wrong for the data, particularly the decision around whether to present mean or median, which depends on the detection of normality in the data. The presentation can be overridden by supplying a named list to override_type, the names here being the original column names to override. This won’t change the method of detection of significance which depends on the detection of normality. The test type and significance levels are also configurable.

# override_type - names list of column names and summary type out of options

# with this looser definition of normality (i.e. less likely to reject the null
# that the data is normally distributed), height and weight are found to be
# and hence the t-test is used.
old = options(
  "tableone.normality_test"="lillie",
  "tableone.normality_significance"=0.00001
)

gcd %>% compare_population(
    formula,
    # age is still not normally distributed but we can override it to be 
    # presented as a mean and SD.
    override_type = list(age="mean_sd")
  )
placeborIFN-g
VariableCharacteristicValue (N=65)Value (N=63)P value
Sexmale % [95% CI] (n)81.5% [70.4%—89.1%] (53)81.0% [69.6%—88.8%] (51)1 †
female % [95% CI] (n)18.5% [10.9%—29.6%] (12)19.0% [11.2%—30.4%] (12)
AgeMean ± SD15 ± 9.6414.3 ± 10.10.56 ††
HeightMean ± SD141 ± 34.1140 ± 27.20.86 †††
WeightMean ± SD42.3 ± 24.338.8 ± 19.90.37 †††
InheritX-linked % [95% CI] (n)63.1% [50.9%—73.8%] (41)71.4% [59.3%—81.1%] (45)0.35 †
autosomal % [95% CI] (n)36.9% [26.2%—49.1%] (24)28.6% [18.9%—40.7%] (18)
Steroid treatmentfalse % [95% CI] (n)96.9% [89.5%—99.2%] (63)98.4% [91.5%—99.7%] (62)1 †
true % [95% CI] (n)3.1% [0.8%—10.5%] (2)1.6% [0.3%—8.5%] (1)
LocationUS:NIH % [95% CI] (n)16.9% [9.7%—27.8%] (11)23.8% [15.0%—35.6%] (15)0.7 †
US:other % [95% CI] (n)49.2% [37.5%—61.1%] (32)49.2% [37.3%—61.2%] (31)
Europe:Amsterdam % [95% CI] (n)15.4% [8.6%—26.1%] (10)14.3% [7.7%—25.0%] (9)
Europe:other % [95% CI] (n)18.5% [10.9%—29.6%] (12)12.7% [6.6%—23.1%] (8)
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sided student's t-test (continuous)
Normal distributions determined by the Lilliefors (Kolmogorov-Smirnov) test (P>1e-05)
An adjusted P value of 0.00714 may be considered significant.
options(old)
# the following option also controls which parametric test is chosen (between)
# wilcoxon and ks tests:
# options("tableone.tolerance_to_ties"=0.25)

Customising the number of decimal places

Need to change this on a column by column basis (eg. here reals using a named list) or on a systematic bases (e.g. percent). Specification can either be as fixed (e.g. “2f”) or significant figures (e.g. “3g”). N.b. This setting is independent of the p-value formatter.

gcd %>% compare_population(
    formula,
    # can supply either the "5f" (for 5 digits floating point) or "6g"
    # for 6 significant figures syntax:
    override_real_dp = list(age="0f",height="0f",weight="2f"),
    # or a plain set of numbers. If the option is unnamed it is applied to 
    # all the variables:
    override_percent_dp = 0
  )
placeborIFN-g
VariableCharacteristicValue (N=65)Value (N=63)P value
Sexmale % [95% CI] (n)82% [70%—89%] (53)81% [70%—89%] (51)1 †
female % [95% CI] (n)18% [11%—30%] (12)19% [11%—30%] (12)
AgeMedian [IQR]14 [7—24]12 [7—20]0.56 ††
HeightMedian [IQR]143 [115—171]139 [119—167]0.45 †††
WeightMedian [IQR]36.10 [21.60—63.70]34.40 [20.65—53.65]0.4 †††
InheritX-linked % [95% CI] (n)63% [51%—74%] (41)71% [59%—81%] (45)0.35 †
autosomal % [95% CI] (n)37% [26%—49%] (24)29% [19%—41%] (18)
Steroid treatmentfalse % [95% CI] (n)97% [89%—99%] (63)98% [92%—100%] (62)1 †
true % [95% CI] (n)3% [1%—11%] (2)2% [0%—8%] (1)
LocationUS:NIH % [95% CI] (n)17% [10%—28%] (11)24% [15%—36%] (15)0.7 †
US:other % [95% CI] (n)49% [37%—61%] (32)49% [37%—61%] (31)
Europe:Amsterdam % [95% CI] (n)15% [9%—26%] (10)14% [8%—25%] (9)
Europe:other % [95% CI] (n)18% [11%—30%] (12)13% [7%—23%] (8)
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous)
Normal distributions determined by the Anderson-Darling test (P>0.005)
An adjusted P value of 0.00714 may be considered significant.

Summary format customisation

Standard layouts are defined, “relaxed”, “compact”, “micro”, “simple”, “single”, “missing”, and these can be used in the layout parameter to give a particular format to the columns and content of the table.

gcd %>% compare_population(
    formula,
    layout = "relaxed"
  )
placeborIFN-g
VariableCharacteristicValueCount (N=65)ValueCount (N=63)P value
Sexmale % [95% CI]81.5% [70.4%—89.1%]53/6581.0% [69.6%—88.8%]51/631 †
female % [95% CI]18.5% [10.9%—29.6%]12/6519.0% [11.2%—30.4%]12/63
AgeMedian [IQR]14 [7—24]6512 [7—19.5]630.56 ††
HeightMedian [IQR]143 [115—171]65139 [119—167]630.45 †††
WeightMedian [IQR]36.1 [21.6—63.7]6534.4 [20.6—53.7]630.4 †††
InheritX-linked % [95% CI]63.1% [50.9%—73.8%]41/6571.4% [59.3%—81.1%]45/630.35 †
autosomal % [95% CI]36.9% [26.2%—49.1%]24/6528.6% [18.9%—40.7%]18/63
Steroid treatmentfalse % [95% CI]96.9% [89.5%—99.2%]63/6598.4% [91.5%—99.7%]62/631 †
true % [95% CI]3.1% [0.8%—10.5%]2/651.6% [0.3%—8.5%]1/63
LocationUS:NIH % [95% CI]16.9% [9.7%—27.8%]11/6523.8% [15.0%—35.6%]15/630.7 †
US:other % [95% CI]49.2% [37.5%—61.1%]32/6549.2% [37.3%—61.2%]31/63
Europe:Amsterdam % [95% CI]15.4% [8.6%—26.1%]10/6514.3% [7.7%—25.0%]9/63
Europe:other % [95% CI]18.5% [10.9%—29.6%]12/6512.7% [6.6%—23.1%]8/63
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous)
Normal distributions determined by the Anderson-Darling test (P>0.005)
An adjusted P value of 0.00714 may be considered significant.

Custom layouts

The “relaxed” standard layout is defined using a list. This is shown below:

list(
    subtype_count = list(
        characteristic = "{level} % [95% CI]",
        Value = "{.sprintf_na('%1.1f%% [%1.1f%%—%1.1f%%]',prob.0.5*100,prob.0.025*100,prob.0.975*100)}",
        `Count (N={N})` = "{.sprintf_na('%d/%d',x,n)}"
    ),
    median_iqr = list(
        characteristic = "Median [IQR]",
        Value = "{.sprintf_na('%1.3g %s [%1.3g—%1.3g]',q.0.5,unit,q.0.25,q.0.75)}",
        `Count (N={N})` = "{.sprintf_na('%d',n)}"
    ),
    mean_sd = list(
        characteristic = "Mean ± SD", Value = "{.sprintf_na('%1.3g %s ± %1.3g',mean,unit,sd)}",
        `Count (N={N})` = "{.sprintf_na('%d',n)}"
    ),
    skipped = list(
        characteristic = "—", Value = "—",
        `Count (N={N})` = "{.sprintf_na('%d',n)}"
    )
)

We can produce a customised list based on this and supply it to a formatting function as the layout parameter. The named list defines the column name and the column contents, at the moment one item in this list must be named characteristic. The column contents can refer to the following variables:

  • subtype_count can use {level}, {prob.0.5}, {prob.0.025}, {prob.0.975}, {x}, {n}, {N} - x is subgroup count, n is data count excluding missing, N includes missing.
  • median_iqr can use {q.0.5}, {q.0.25}, …, {unit}, {n}, {N} - n excludes missing, N does not.
  • mean_sd can use {mean}, {sd}, {unit}, {n}, {N} - n excludes missing, N does not.
  • skipped can use {unit}, {n}, {N} - n excludes missing, N does not.

Other than the characteristic column, the column names are derived from the names of the custom configuration list. The names can also be configured using glue and this can use intervention level data like {N} for the subgroup counts or data level variables such as {N_total} which is the number of items across all groups or {N_missing} for example.

There are a few useful formatting functions that the spec can also use beyond the usual text processing functions:

  • .sprintf_na - sprintfs a set of numbers replacing the output with getOption("tableone.na","\u2014") if all values are missing, and if some values are missing replacing each individual missing value with
    getOption("tableone.missing","<?>")
  • .sprintf_no_na - sprintfs a set of numbers replacing the output with getOption("tableone.na","\u2014") if any values are missing
  • .maybe - returns a string if it is present or “” if NA
custom = list(
    subtype_count = list(
        characteristic = "{level}",
        "Value (N={N}/{N_total})" = "{.sprintf_na('%1.1f%% (%d/%d)',prob.0.5*100,x,n)}"
    ),
    median_iqr = list(
        characteristic = "Median (N)",
        "Value (N={N}/{N_total})" = "{.sprintf_na('%1.3g (%d)',q.0.5,n)}"
    ),
    mean_sd = list(
        characteristic = "Mean (N)", 
        "Value (N={N}/{N_total})" = "{.sprintf_na('%1.3g (%d)',mean,n)}"
    ),
    skipped = list(
        characteristic = "(N)", 
        "Value (N={N}/{N_total})" = "{.sprintf_na('— (%d)',n)}"
    )
)

# printing control the following options control missing values
# produced by the .sprintf_na function:
# getOption("tableone.missing","<?>")
# getOption("tableone.na","\u2014")


gcd %>% compare_population(
    formula,
    layout = custom
  )
placeborIFN-g
VariableCharacteristicValue (N=65/128)Value (N=63/128)P value
Sexmale81.5% (53/65)81.0% (51/63)1 †
female18.5% (12/65)19.0% (12/63)
AgeMedian (N)14 (65)12 (63)0.56 ††
HeightMedian (N)143 (65)139 (63)0.45 †††
WeightMedian (N)36.1 (65)34.4 (63)0.4 †††
InheritX-linked63.1% (41/65)71.4% (45/63)0.35 †
autosomal36.9% (24/65)28.6% (18/63)
Steroid treatmentfalse96.9% (63/65)98.4% (62/63)1 †
true3.1% (2/65)1.6% (1/63)
LocationUS:NIH16.9% (11/65)23.8% (15/63)0.7 †
US:other49.2% (32/65)49.2% (31/63)
Europe:Amsterdam15.4% (10/65)14.3% (9/63)
Europe:other18.5% (12/65)12.7% (8/63)
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous)
Normal distributions determined by the Anderson-Darling test (P>0.005)
An adjusted P value of 0.00714 may be considered significant.