library(dplyr)
library(explore)
A classic count()
returns the number of
observations.
<- use_data_penguins()
data %>% count(island)
data #> # A tibble: 3 × 2
#> island n
#> <fct> <int>
#> 1 Biscoe 168
#> 2 Dream 124
#> 3 Torgersen 52
To add percent values, simply use count_pct()
from
{explore}.
%>% count_pct(island)
data #> # A tibble: 3 × 4
#> island n total pct
#> <fct> <int> <int> <dbl>
#> 1 Biscoe 168 344 48.8
#> 2 Dream 124 344 36.0
#> 3 Torgersen 52 344 15.1
%>% glimpse()
data #> Rows: 344
#> Columns: 8
#> $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel…
#> $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse…
#> $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
#> $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
#> $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
#> $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
#> $ sex <fct> male, female, female, NA, female, male, female, male…
#> $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…
To add an id variable, simply use add_var_id()
from
{explore}.
%>% add_var_id() %>% glimpse()
data #> Rows: 344
#> Columns: 9
#> $ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
#> $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel…
#> $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse…
#> $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
#> $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
#> $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
#> $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
#> $ sex <fct> male, female, female, NA, female, male, female, male…
#> $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…
Create a user defined report (RMarkdown template) to explore your own data.
create_notebook_explore(
output_dir = tempdir(),
output_file = "notebook-explore.Rmd")
Create a Data Dictionary of a data set (Markdown File data_dict.md)
%>% data_dict_md(output_dir = tempdir()) iris
Add title, detailed descriptions and change default filename
<- data.frame(
description variable = c("Species"),
description = c("Species of Iris flower"))
data_dict_md(iris,
title = "iris flower data set",
description = description,
output_file = "data_dict_iris.md",
output_dir = tempdir())
<- use_data_titanic(count = FALSE)
data glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
<- data %>% clean_var(Age, name = "age")
data glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
<- use_data_beer()
data %>% describe(energy_kcal_100ml)
data #> variable = energy_kcal_100ml
#> type = double
#> na = 11 of 161 (6.8%)
#> unique = 34
#> min|max = 20 | 62
#> q05|q95 = 24 | 56.65
#> q25|q75 = 37 | 44
#> median = 42
#> mean = 39.89333
<- data %>% clean_var(energy_kcal_100ml, na = 42)
data %>% describe(energy_kcal_100ml)
data #> variable = energy_kcal_100ml
#> type = double
#> na = 0 of 161 (0%)
#> unique = 33
#> min|max = 20 | 62
#> q05|q95 = 24 | 55
#> q25|q75 = 38 | 44
#> median = 42
#> mean = 40.03727
<- create_data_person()
data %>% describe(age)
data #> variable = age
#> type = integer
#> na = 0 of 1 000 (0%)
#> unique = 80
#> min|max = 16 | 95
#> q05|q95 = 21 | 92
#> q25|q75 = 37 | 76
#> median = 55
#> mean = 55.845
<- data %>% clean_var(age, min_val = 20, max_val = 80)
data %>% describe(age)
data #> variable = age
#> type = integer
#> na = 0 of 1 000 (0%)
#> unique = 61
#> min|max = 20 | 80
#> q05|q95 = 21 | 80
#> q25|q75 = 37 | 76
#> median = 55
#> mean = 54.276
%>% describe(income)
data #> variable = income
#> type = double
#> na = 0 of 1 000 (0%)
#> unique = 228
#> min|max = 0 | 150
#> q05|q95 = 6 | 123.025
#> q25|q75 = 35 | 88.625
#> median = 62
#> mean = 61.5875
<- data %>% clean_var(income, rescale01 = TRUE)
data %>% describe(income)
data #> variable = income
#> type = double
#> na = 0 of 1 000 (0%)
#> unique = 228
#> min|max = 0 | 1
#> q05|q95 = 0.04 | 0.820167
#> q25|q75 = 0.233333 | 0.590833
#> median = 0.4
#> mean = 0.410583
1, "handset"] <- " android "
data[2, "handset"] <- "ANDROID"
data[%>% describe(handset)
data #> variable = handset
#> type = character
#> na = 0 of 1 000 (0%)
#> unique = 5
#> android = 1 (0.1%)
#> ANDROID = 1 (0.1%)
#> Android = 471 (47.1%)
#> Apple = 430 (43%)
#> Other = 97 (9.7%)
<- data %>% clean_var(handset, simplify_text = TRUE)
data %>% describe(handset)
data #> variable = handset
#> type = character
#> na = 0 of 1 000 (0%)
#> unique = 3
#> ANDROID = 473 (47.3%)
#> APPLE = 430 (43%)
#> OTHER = 97 (9.7%)