This vignette provides examples of some of the formatting options. To
demonstrate them we will use the survival::cgd
dataset:
# set up the data
gcd = survival::cgd %>%
# filter to include only the first visit
dplyr::filter(enum==1) %>%
# make the steroids and propylac columns into a logical value
# see later for a better way of doing this.
dplyr::mutate(
steroids = as.logical(steroids),
propylac = as.logical(propylac)
)
# A basic unstratified population description table is as follows:
formula = Surv(tstart, tstop, status) ~ treat +
sex + age + height + weight + inherit + steroids + hos.cat
gcd %>% compare_population(formula)
placebo | rIFN-g | |||
---|---|---|---|---|
Variable | Characteristic | Value (N=65) | Value (N=63) | P value |
sex | male % [95% CI] (n) | 81.5% [70.4%—89.1%] (53) | 81.0% [69.6%—88.8%] (51) | 1 † |
female % [95% CI] (n) | 18.5% [10.9%—29.6%] (12) | 19.0% [11.2%—30.4%] (12) | ||
age | Median [IQR] | 14 [7—24] | 12 [7—19.5] | 0.56 †† |
height | Median [IQR] | 143 [115—171] | 139 [119—167] | 0.45 ††† |
weight | Median [IQR] | 36.1 [21.6—63.7] | 34.4 [20.6—53.7] | 0.4 ††† |
inherit | X-linked % [95% CI] (n) | 63.1% [50.9%—73.8%] (41) | 71.4% [59.3%—81.1%] (45) | 0.35 † |
autosomal % [95% CI] (n) | 36.9% [26.2%—49.1%] (24) | 28.6% [18.9%—40.7%] (18) | ||
steroids | false % [95% CI] (n) | 96.9% [89.5%—99.2%] (63) | 98.4% [91.5%—99.7%] (62) | 1 † |
true % [95% CI] (n) | 3.1% [0.8%—10.5%] (2) | 1.6% [0.3%—8.5%] (1) | ||
hos.cat | US:NIH % [95% CI] (n) | 16.9% [9.7%—27.8%] (11) | 23.8% [15.0%—35.6%] (15) | 0.7 † |
US:other % [95% CI] (n) | 49.2% [37.5%—61.1%] (32) | 49.2% [37.3%—61.2%] (31) | ||
Europe:Amsterdam % [95% CI] (n) | 15.4% [8.6%—26.1%] (10) | 14.3% [7.7%—25.0%] (9) | ||
Europe:other % [95% CI] (n) | 18.5% [10.9%—29.6%] (12) | 12.7% [6.6%—23.1%] (8) | ||
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous) Normal distributions determined by the Anderson-Darling test (P>0.005) An adjusted P value of 0.00714 may be considered significant. |
# set a table relabelling function
rename_cols = function(col) {
dplyr::case_when(
col == "hos.cat" ~ "Location",
col == "steroids" ~ "Steroid treatment",
TRUE ~ stringr::str_to_sentence(col)
)
}
# set it using an option
# we are not going to reset this as we will use in all the subsequent examples:
options("tableone.labeller"=rename_cols)
gcd %>% compare_population(formula)
placebo | rIFN-g | |||
---|---|---|---|---|
Variable | Characteristic | Value (N=65) | Value (N=63) | P value |
Sex | male % [95% CI] (n) | 81.5% [70.4%—89.1%] (53) | 81.0% [69.6%—88.8%] (51) | 1 † |
female % [95% CI] (n) | 18.5% [10.9%—29.6%] (12) | 19.0% [11.2%—30.4%] (12) | ||
Age | Median [IQR] | 14 [7—24] | 12 [7—19.5] | 0.56 †† |
Height | Median [IQR] | 143 [115—171] | 139 [119—167] | 0.45 ††† |
Weight | Median [IQR] | 36.1 [21.6—63.7] | 34.4 [20.6—53.7] | 0.4 ††† |
Inherit | X-linked % [95% CI] (n) | 63.1% [50.9%—73.8%] (41) | 71.4% [59.3%—81.1%] (45) | 0.35 † |
autosomal % [95% CI] (n) | 36.9% [26.2%—49.1%] (24) | 28.6% [18.9%—40.7%] (18) | ||
Steroid treatment | false % [95% CI] (n) | 96.9% [89.5%—99.2%] (63) | 98.4% [91.5%—99.7%] (62) | 1 † |
true % [95% CI] (n) | 3.1% [0.8%—10.5%] (2) | 1.6% [0.3%—8.5%] (1) | ||
Location | US:NIH % [95% CI] (n) | 16.9% [9.7%—27.8%] (11) | 23.8% [15.0%—35.6%] (15) | 0.7 † |
US:other % [95% CI] (n) | 49.2% [37.5%—61.1%] (32) | 49.2% [37.3%—61.2%] (31) | ||
Europe:Amsterdam % [95% CI] (n) | 15.4% [8.6%—26.1%] (10) | 14.3% [7.7%—25.0%] (9) | ||
Europe:other % [95% CI] (n) | 18.5% [10.9%—29.6%] (12) | 12.7% [6.6%—23.1%] (8) | ||
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous) Normal distributions determined by the Anderson-Darling test (P>0.005) An adjusted P value of 0.00714 may be considered significant. |
old = options(
# set a mid point as decimal point
"tableone.dp"="\u00B7",
"tableone.font"="Arial Narrow",
"tableone.font_size"=12,
"tableone.pvalue_column_name"="p-value",
# the p-value formatter must be a function that takes a vector of numbers and returns
# a vector of characters. The example here is a function that returns a function.
"tableone.pvalue_formatter" =
scales::label_pvalue(accuracy = 0.01,decimal.mark = "\u00B7"),
"tableone.show_pvalue_method"=FALSE
)
gcd %>% compare_population(formula)
placebo | rIFN-g | |||
---|---|---|---|---|
Variable | Characteristic | Value (N=65) | Value (N=63) | p-value |
Sex | male % [95% CI] (n) | 81·5% [70·4%—89·1%] (53) | 81·0% [69·6%—88·8%] (51) | >0·99 |
female % [95% CI] (n) | 18·5% [10·9%—29·6%] (12) | 19·0% [11·2%—30·4%] (12) | ||
Age | Median [IQR] | 14 [7—24] | 12 [7—19·5] | 0·56 |
Height | Median [IQR] | 143 [115—171] | 139 [119—167] | 0·45 |
Weight | Median [IQR] | 36·1 [21·6—63·7] | 34·4 [20·6—53·7] | 0·40 |
Inherit | X-linked % [95% CI] (n) | 63·1% [50·9%—73·8%] (41) | 71·4% [59·3%—81·1%] (45) | 0·35 |
autosomal % [95% CI] (n) | 36·9% [26·2%—49·1%] (24) | 28·6% [18·9%—40·7%] (18) | ||
Steroid treatment | false % [95% CI] (n) | 96·9% [89·5%—99·2%] (63) | 98·4% [91·5%—99·7%] (62) | >0·99 |
true % [95% CI] (n) | 3·1% [0·8%—10·5%] (2) | 1·6% [0·3%—8·5%] (1) | ||
Location | US:NIH % [95% CI] (n) | 16·9% [9·7%—27·8%] (11) | 23·8% [15·0%—35·6%] (15) | 0·69 |
US:other % [95% CI] (n) | 49·2% [37·5%—61·1%] (32) | 49·2% [37·3%—61·2%] (31) | ||
Europe:Amsterdam % [95% CI] (n) | 15·4% [8·6%—26·1%] (10) | 14·3% [7·7%—25·0%] (9) | ||
Europe:other % [95% CI] (n) | 18·5% [10·9%—29·6%] (12) | 12·7% [6·6%—23·1%] (8) | ||
Significance determined using Fisher's exact test (categorical variables) or 2 sample Wilcoxon Rank Sum test, 2 sample Kolmogorov-Smirnov test (continuous variables) Normal distributions determined by the Anderson-Darling test (P>0.005) An adjusted p-value of 0.00714 may be considered significant. |
The default statistics may seem wrong for the data, particularly the
decision around whether to present mean or median, which depends on the
detection of normality in the data. The presentation can be overridden
by supplying a named list to override_type
, the names here
being the original column names to override. This won’t change the
method of detection of significance which depends on the detection of
normality. The test type and significance levels are also
configurable.
# override_type - names list of column names and summary type out of options
# with this looser definition of normality (i.e. less likely to reject the null
# that the data is normally distributed), height and weight are found to be
# and hence the t-test is used.
old = options(
"tableone.normality_test"="lillie",
"tableone.normality_significance"=0.00001
)
gcd %>% compare_population(
formula,
# age is still not normally distributed but we can override it to be
# presented as a mean and SD.
override_type = list(age="mean_sd")
)
placebo | rIFN-g | |||
---|---|---|---|---|
Variable | Characteristic | Value (N=65) | Value (N=63) | P value |
Sex | male % [95% CI] (n) | 81.5% [70.4%—89.1%] (53) | 81.0% [69.6%—88.8%] (51) | 1 † |
female % [95% CI] (n) | 18.5% [10.9%—29.6%] (12) | 19.0% [11.2%—30.4%] (12) | ||
Age | Mean ± SD | 15 ± 9.64 | 14.3 ± 10.1 | 0.56 †† |
Height | Mean ± SD | 141 ± 34.1 | 140 ± 27.2 | 0.86 ††† |
Weight | Mean ± SD | 42.3 ± 24.3 | 38.8 ± 19.9 | 0.37 ††† |
Inherit | X-linked % [95% CI] (n) | 63.1% [50.9%—73.8%] (41) | 71.4% [59.3%—81.1%] (45) | 0.35 † |
autosomal % [95% CI] (n) | 36.9% [26.2%—49.1%] (24) | 28.6% [18.9%—40.7%] (18) | ||
Steroid treatment | false % [95% CI] (n) | 96.9% [89.5%—99.2%] (63) | 98.4% [91.5%—99.7%] (62) | 1 † |
true % [95% CI] (n) | 3.1% [0.8%—10.5%] (2) | 1.6% [0.3%—8.5%] (1) | ||
Location | US:NIH % [95% CI] (n) | 16.9% [9.7%—27.8%] (11) | 23.8% [15.0%—35.6%] (15) | 0.7 † |
US:other % [95% CI] (n) | 49.2% [37.5%—61.1%] (32) | 49.2% [37.3%—61.2%] (31) | ||
Europe:Amsterdam % [95% CI] (n) | 15.4% [8.6%—26.1%] (10) | 14.3% [7.7%—25.0%] (9) | ||
Europe:other % [95% CI] (n) | 18.5% [10.9%—29.6%] (12) | 12.7% [6.6%—23.1%] (8) | ||
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sided student's t-test (continuous) Normal distributions determined by the Lilliefors (Kolmogorov-Smirnov) test (P>1e-05) An adjusted P value of 0.00714 may be considered significant. |
Need to change this on a column by column basis (eg. here reals using a named list) or on a systematic bases (e.g. percent). Specification can either be as fixed (e.g. “2f”) or significant figures (e.g. “3g”). N.b. This setting is independent of the p-value formatter.
gcd %>% compare_population(
formula,
# can supply either the "5f" (for 5 digits floating point) or "6g"
# for 6 significant figures syntax:
override_real_dp = list(age="0f",height="0f",weight="2f"),
# or a plain set of numbers. If the option is unnamed it is applied to
# all the variables:
override_percent_dp = 0
)
placebo | rIFN-g | |||
---|---|---|---|---|
Variable | Characteristic | Value (N=65) | Value (N=63) | P value |
Sex | male % [95% CI] (n) | 82% [70%—89%] (53) | 81% [70%—89%] (51) | 1 † |
female % [95% CI] (n) | 18% [11%—30%] (12) | 19% [11%—30%] (12) | ||
Age | Median [IQR] | 14 [7—24] | 12 [7—20] | 0.56 †† |
Height | Median [IQR] | 143 [115—171] | 139 [119—167] | 0.45 ††† |
Weight | Median [IQR] | 36.10 [21.60—63.70] | 34.40 [20.65—53.65] | 0.4 ††† |
Inherit | X-linked % [95% CI] (n) | 63% [51%—74%] (41) | 71% [59%—81%] (45) | 0.35 † |
autosomal % [95% CI] (n) | 37% [26%—49%] (24) | 29% [19%—41%] (18) | ||
Steroid treatment | false % [95% CI] (n) | 97% [89%—99%] (63) | 98% [92%—100%] (62) | 1 † |
true % [95% CI] (n) | 3% [1%—11%] (2) | 2% [0%—8%] (1) | ||
Location | US:NIH % [95% CI] (n) | 17% [10%—28%] (11) | 24% [15%—36%] (15) | 0.7 † |
US:other % [95% CI] (n) | 49% [37%—61%] (32) | 49% [37%—61%] (31) | ||
Europe:Amsterdam % [95% CI] (n) | 15% [9%—26%] (10) | 14% [8%—25%] (9) | ||
Europe:other % [95% CI] (n) | 18% [11%—30%] (12) | 13% [7%—23%] (8) | ||
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous) Normal distributions determined by the Anderson-Darling test (P>0.005) An adjusted P value of 0.00714 may be considered significant. |
Standard layouts are defined, “relaxed”, “compact”, “micro”, “simple”, “single”, “missing”, and these can be used in the layout parameter to give a particular format to the columns and content of the table.
placebo | rIFN-g | |||||
---|---|---|---|---|---|---|
Variable | Characteristic | Value | Count (N=65) | Value | Count (N=63) | P value |
Sex | male % [95% CI] | 81.5% [70.4%—89.1%] | 53/65 | 81.0% [69.6%—88.8%] | 51/63 | 1 † |
female % [95% CI] | 18.5% [10.9%—29.6%] | 12/65 | 19.0% [11.2%—30.4%] | 12/63 | ||
Age | Median [IQR] | 14 [7—24] | 65 | 12 [7—19.5] | 63 | 0.56 †† |
Height | Median [IQR] | 143 [115—171] | 65 | 139 [119—167] | 63 | 0.45 ††† |
Weight | Median [IQR] | 36.1 [21.6—63.7] | 65 | 34.4 [20.6—53.7] | 63 | 0.4 ††† |
Inherit | X-linked % [95% CI] | 63.1% [50.9%—73.8%] | 41/65 | 71.4% [59.3%—81.1%] | 45/63 | 0.35 † |
autosomal % [95% CI] | 36.9% [26.2%—49.1%] | 24/65 | 28.6% [18.9%—40.7%] | 18/63 | ||
Steroid treatment | false % [95% CI] | 96.9% [89.5%—99.2%] | 63/65 | 98.4% [91.5%—99.7%] | 62/63 | 1 † |
true % [95% CI] | 3.1% [0.8%—10.5%] | 2/65 | 1.6% [0.3%—8.5%] | 1/63 | ||
Location | US:NIH % [95% CI] | 16.9% [9.7%—27.8%] | 11/65 | 23.8% [15.0%—35.6%] | 15/63 | 0.7 † |
US:other % [95% CI] | 49.2% [37.5%—61.1%] | 32/65 | 49.2% [37.3%—61.2%] | 31/63 | ||
Europe:Amsterdam % [95% CI] | 15.4% [8.6%—26.1%] | 10/65 | 14.3% [7.7%—25.0%] | 9/63 | ||
Europe:other % [95% CI] | 18.5% [10.9%—29.6%] | 12/65 | 12.7% [6.6%—23.1%] | 8/63 | ||
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous) Normal distributions determined by the Anderson-Darling test (P>0.005) An adjusted P value of 0.00714 may be considered significant. |
The “relaxed” standard layout is defined using a list. This is shown below:
list(
subtype_count = list(
characteristic = "{level} % [95% CI]",
Value = "{.sprintf_na('%1.1f%% [%1.1f%%—%1.1f%%]',prob.0.5*100,prob.0.025*100,prob.0.975*100)}",
`Count (N={N})` = "{.sprintf_na('%d/%d',x,n)}"
),
median_iqr = list(
characteristic = "Median [IQR]",
Value = "{.sprintf_na('%1.3g %s [%1.3g—%1.3g]',q.0.5,unit,q.0.25,q.0.75)}",
`Count (N={N})` = "{.sprintf_na('%d',n)}"
),
mean_sd = list(
characteristic = "Mean ± SD", Value = "{.sprintf_na('%1.3g %s ± %1.3g',mean,unit,sd)}",
`Count (N={N})` = "{.sprintf_na('%d',n)}"
),
skipped = list(
characteristic = "—", Value = "—",
`Count (N={N})` = "{.sprintf_na('%d',n)}"
)
)
We can produce a customised list based on this and supply it to a
formatting function as the layout
parameter. The named list
defines the column name and the column contents, at the moment one item
in this list must be named characteristic
. The column
contents can refer to the following variables:
subtype_count
can use {level}
,
{prob.0.5}
, {prob.0.025}
,
{prob.0.975}
, {x}
, {n}
,
{N}
- x
is subgroup count, n
is
data count excluding missing, N
includes missing.median_iqr
can use {q.0.5}
,
{q.0.25}
, …, {unit}
, {n}
,
{N}
- n
excludes missing, N
does
not.mean_sd
can use {mean}
, {sd}
,
{unit}
, {n}
, {N}
- n
excludes missing, N
does not.skipped
can use {unit}
, {n}
,
{N}
- n
excludes missing, N
does
not.Other than the characteristic column, the column names are derived
from the names of the custom configuration list. The names can also be
configured using glue
and this can use intervention level
data like {N}
for the subgroup counts or data level
variables such as {N_total}
which is the number of items
across all groups or {N_missing}
for example.
There are a few useful formatting functions that the spec can also use beyond the usual text processing functions:
.sprintf_na
- sprintf
s a set of numbers
replacing the output with getOption("tableone.na","\u2014")
if all values are missing, and if some values are missing replacing each
individual missing value withgetOption("tableone.missing","<?>")
.sprintf_no_na
- sprintf
s a set of numbers
replacing the output with getOption("tableone.na","\u2014")
if any values are missing.maybe
- returns a string if it is present or “” if
NAcustom = list(
subtype_count = list(
characteristic = "{level}",
"Value (N={N}/{N_total})" = "{.sprintf_na('%1.1f%% (%d/%d)',prob.0.5*100,x,n)}"
),
median_iqr = list(
characteristic = "Median (N)",
"Value (N={N}/{N_total})" = "{.sprintf_na('%1.3g (%d)',q.0.5,n)}"
),
mean_sd = list(
characteristic = "Mean (N)",
"Value (N={N}/{N_total})" = "{.sprintf_na('%1.3g (%d)',mean,n)}"
),
skipped = list(
characteristic = "(N)",
"Value (N={N}/{N_total})" = "{.sprintf_na('— (%d)',n)}"
)
)
# printing control the following options control missing values
# produced by the .sprintf_na function:
# getOption("tableone.missing","<?>")
# getOption("tableone.na","\u2014")
gcd %>% compare_population(
formula,
layout = custom
)
placebo | rIFN-g | |||
---|---|---|---|---|
Variable | Characteristic | Value (N=65/128) | Value (N=63/128) | P value |
Sex | male | 81.5% (53/65) | 81.0% (51/63) | 1 † |
female | 18.5% (12/65) | 19.0% (12/63) | ||
Age | Median (N) | 14 (65) | 12 (63) | 0.56 †† |
Height | Median (N) | 143 (65) | 139 (63) | 0.45 ††† |
Weight | Median (N) | 36.1 (65) | 34.4 (63) | 0.4 ††† |
Inherit | X-linked | 63.1% (41/65) | 71.4% (45/63) | 0.35 † |
autosomal | 36.9% (24/65) | 28.6% (18/63) | ||
Steroid treatment | false | 96.9% (63/65) | 98.4% (62/63) | 1 † |
true | 3.1% (2/65) | 1.6% (1/63) | ||
Location | US:NIH | 16.9% (11/65) | 23.8% (15/63) | 0.7 † |
US:other | 49.2% (32/65) | 49.2% (31/63) | ||
Europe:Amsterdam | 15.4% (10/65) | 14.3% (9/63) | ||
Europe:other | 18.5% (12/65) | 12.7% (8/63) | ||
†, Fisher's exact test (categorical); ††, 2 sample Wilcoxon Rank Sum test (continuous); †††, 2 sample Kolmogorov-Smirnov test (continuous) Normal distributions determined by the Anderson-Darling test (P>0.005) An adjusted P value of 0.00714 may be considered significant. |