class: middle, right, title-slide .title[ # Tidy data reshaping & summaries ] .subtitle[ ## demonstration ] .author[ ### Athanasia Monika Mowinckel ] --- layout: true <div class="my-sidebar"></div> --- class: inverse, middle, center name: pivots # pivots --- ```r penguins |> pivot_longer(contains("_")) ``` ``` ## # A tibble: 1,376 × 6 ## species island sex year name value ## <fct> <fct> <fct> <int> <chr> <dbl> ## 1 Adelie Torgersen male 2007 bill_l… 39.1 ## 2 Adelie Torgersen male 2007 bill_d… 18.7 ## 3 Adelie Torgersen male 2007 flippe… 181 ## 4 Adelie Torgersen male 2007 body_m… 3750 ## 5 Adelie Torgersen female 2007 bill_l… 39.5 ## 6 Adelie Torgersen female 2007 bill_d… 17.4 ## 7 Adelie Torgersen female 2007 flippe… 186 ## 8 Adelie Torgersen female 2007 body_m… 3800 ## 9 Adelie Torgersen female 2007 bill_l… 40.3 ## 10 Adelie Torgersen female 2007 bill_d… 18 ## # … with 1,366 more rows ``` --- ```r penguins |> pivot_longer(contains("_"), # select the columns names_to = c("body_part", "measure", "unit"), # break them into these columns names_sep = "_") # break the column names on this character ``` ``` ## # A tibble: 1,376 × 8 ## species island sex year body_…¹ measure ## <fct> <fct> <fct> <int> <chr> <chr> ## 1 Adelie Torgersen male 2007 bill length ## 2 Adelie Torgersen male 2007 bill depth ## 3 Adelie Torgersen male 2007 flipper length ## 4 Adelie Torgersen male 2007 body mass ## 5 Adelie Torgersen fema… 2007 bill length ## 6 Adelie Torgersen fema… 2007 bill depth ## 7 Adelie Torgersen fema… 2007 flipper length ## 8 Adelie Torgersen fema… 2007 body mass ## 9 Adelie Torgersen fema… 2007 bill length ## 10 Adelie Torgersen fema… 2007 bill depth ## # … with 1,366 more rows, 2 more variables: ## # unit <chr>, value <dbl>, and abbreviated ## # variable name ¹body_part ``` --- ```r penguins |> pivot_longer(contains("_"), names_to = c("body_part", "measure", "unit"), names_sep = "_") |> ggplot(aes(x = value, fill = species)) + geom_density() + facet_wrap(~ body_part, scales = "free") + scale_fill_viridis_d(alpha = .5) + theme(legend.position = "bottom") ``` ![](002-tidy-summaries-demo_files/figure-html/unnamed-chunk-3-1.png)<!-- --> --- ```r penguins_long <- penguins |> pivot_longer(contains("_"), names_to = c("body_part", "measure", "unit"), names_sep = "_") penguins_long |> pivot_wider(names_from = c("body_part", "measure", "unit"), # pivot these columns values_from = "value", # take the values from here names_sep = "_") # separate names_from with this character ``` ``` ## # A tibble: 35 × 8 ## species island sex year bill_…¹ bill_…² ## <fct> <fct> <fct> <int> <list> <list> ## 1 Adelie Torgersen male 2007 <dbl> <dbl> ## 2 Adelie Torgersen fema… 2007 <dbl> <dbl> ## 3 Adelie Torgersen <NA> 2007 <dbl> <dbl> ## 4 Adelie Biscoe fema… 2007 <dbl> <dbl> ## 5 Adelie Biscoe male 2007 <dbl> <dbl> ## 6 Adelie Dream fema… 2007 <dbl> <dbl> ## 7 Adelie Dream male 2007 <dbl> <dbl> ## 8 Adelie Dream <NA> 2007 <dbl> <dbl> ## 9 Adelie Biscoe fema… 2008 <dbl> <dbl> ## 10 Adelie Biscoe male 2008 <dbl> <dbl> ## # … with 25 more rows, 2 more variables: ## # flipper_length_mm <list>, ## # body_mass_g <list>, and abbreviated ## # variable names ¹bill_length_mm, ## # ²bill_depth_mm ``` --- class: inverse, middle, center name: summaries # summaries --- ```r penguins |> summarise(mean(bill_length_mm, na.rm = TRUE)) ``` ``` ## # A tibble: 1 × 1 ## `mean(bill_length_mm, na.rm = TRUE)` ## <dbl> ## 1 43.9 ``` --- ```r penguins |> group_by(species) |> summarise(m_bill_length = mean(bill_length_mm, na.rm = TRUE)) ``` ``` ## # A tibble: 3 × 2 ## species m_bill_length ## <fct> <dbl> ## 1 Adelie 38.8 ## 2 Chinstrap 48.8 ## 3 Gentoo 47.5 ``` --- ```r penguins |> group_by(species, island) |> summarise(m_bill_length = mean(bill_length_mm, na.rm = TRUE)) ``` ``` ## # A tibble: 5 × 3 ## # Groups: species [3] ## species island m_bill_length ## <fct> <fct> <dbl> ## 1 Adelie Biscoe 39.0 ## 2 Adelie Dream 38.5 ## 3 Adelie Torgersen 39.0 ## 4 Chinstrap Dream 48.8 ## 5 Gentoo Biscoe 47.5 ``` --- ```r penguins |> group_by(species, island) |> summarise(across(bill_length_mm, mean )) ``` ``` ## # A tibble: 5 × 3 ## # Groups: species [3] ## species island bill_length_mm ## <fct> <fct> <dbl> ## 1 Adelie Biscoe 39.0 ## 2 Adelie Dream 38.5 ## 3 Adelie Torgersen NA ## 4 Chinstrap Dream 48.8 ## 5 Gentoo Biscoe NA ``` --- ```r penguins |> group_by(species, island) |> summarise(across(bill_length_mm, list(mean, sd) )) ``` ``` ## # A tibble: 5 × 4 ## # Groups: species [3] ## species island bill_length_mm_1 bill_le…¹ ## <fct> <fct> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 2.48 ## 2 Adelie Dream 38.5 2.47 ## 3 Adelie Torgersen NA NA ## 4 Chinstrap Dream 48.8 3.34 ## 5 Gentoo Biscoe NA NA ## # … with abbreviated variable name ## # ¹bill_length_mm_2 ``` --- ```r penguins |> group_by(species, island) |> summarise(across(bill_length_mm, list(mean = mean, sd = sd) )) ``` ``` ## # A tibble: 5 × 4 ## # Groups: species [3] ## species island bill_length_mm_m…¹ bill_…² ## <fct> <fct> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 2.48 ## 2 Adelie Dream 38.5 2.47 ## 3 Adelie Torgersen NA NA ## 4 Chinstrap Dream 48.8 3.34 ## 5 Gentoo Biscoe NA NA ## # … with abbreviated variable names ## # ¹bill_length_mm_mean, ²bill_length_mm_sd ``` --- ```r penguins |> group_by(species, island) |> summarise(across(bill_length_mm, list(mean = mean, sd = sd), .names = "{.fn}" )) ``` ``` ## # A tibble: 5 × 4 ## # Groups: species [3] ## species island mean sd ## <fct> <fct> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 2.48 ## 2 Adelie Dream 38.5 2.47 ## 3 Adelie Torgersen NA NA ## 4 Chinstrap Dream 48.8 3.34 ## 5 Gentoo Biscoe NA NA ``` --- ```r penguins |> group_by(species, island) |> summarise(across(contains("_"), list(Mean = mean, SD = sd, Min = min, Max = max), na.rm = TRUE)) ``` ``` ## # A tibble: 5 × 18 ## # Groups: species [3] ## species island bill_…¹ bill_…² bill_…³ bill_…⁴ ## <fct> <fct> <dbl> <dbl> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 2.48 34.5 45.6 ## 2 Adelie Dream 38.5 2.47 32.1 44.1 ## 3 Adelie Torge… 39.0 3.03 33.5 46 ## 4 Chinst… Dream 48.8 3.34 40.9 58 ## 5 Gentoo Biscoe 47.5 3.08 40.9 59.6 ## # … with 12 more variables: ## # bill_depth_mm_Mean <dbl>, ## # bill_depth_mm_SD <dbl>, ## # bill_depth_mm_Min <dbl>, ## # bill_depth_mm_Max <dbl>, ## # flipper_length_mm_Mean <dbl>, ## # flipper_length_mm_SD <dbl>, … ``` --- ```r penguins |> group_by(species, island) |> summarise(across(contains("_"), list(Mean = mean, SD = sd, Min = min, Max = max), na.rm = TRUE, .names = "{.fn}_{.col}")) ``` ``` ## # A tibble: 5 × 18 ## # Groups: species [3] ## species island Mean_…¹ SD_bi…² Min_b…³ Max_b…⁴ ## <fct> <fct> <dbl> <dbl> <dbl> <dbl> ## 1 Adelie Biscoe 39.0 2.48 34.5 45.6 ## 2 Adelie Dream 38.5 2.47 32.1 44.1 ## 3 Adelie Torge… 39.0 3.03 33.5 46 ## 4 Chinst… Dream 48.8 3.34 40.9 58 ## 5 Gentoo Biscoe 47.5 3.08 40.9 59.6 ## # … with 12 more variables: ## # Mean_bill_depth_mm <dbl>, ## # SD_bill_depth_mm <dbl>, ## # Min_bill_depth_mm <dbl>, ## # Max_bill_depth_mm <dbl>, ## # Mean_flipper_length_mm <dbl>, ## # SD_flipper_length_mm <dbl>, … ``` --- ```r penguins |> group_by(species, island) |> summarise(across(contains("_"), list(Mean = mean, SD = sd, Min = min, Max = max), na.rm = TRUE)) |> pivot_longer(contains("_"), names_to = c("body_part", "measure", "unit", "stat"), names_sep = "_") |> pivot_wider(names_from = stat, values_from = value) ``` ``` ## # A tibble: 20 × 9 ## # Groups: species [3] ## species island body_…¹ measure unit Mean ## <fct> <fct> <chr> <chr> <chr> <dbl> ## 1 Adelie Biscoe bill length mm 39.0 ## 2 Adelie Biscoe bill depth mm 18.4 ## 3 Adelie Biscoe flipper length mm 189. ## 4 Adelie Biscoe body mass g 3710. ## 5 Adelie Dream bill length mm 38.5 ## 6 Adelie Dream bill depth mm 18.3 ## 7 Adelie Dream flipper length mm 190. ## 8 Adelie Dream body mass g 3688. ## 9 Adelie Torge… bill length mm 39.0 ## 10 Adelie Torge… bill depth mm 18.4 ## 11 Adelie Torge… flipper length mm 191. ## 12 Adelie Torge… body mass g 3706. ## 13 Chinstrap Dream bill length mm 48.8 ## 14 Chinstrap Dream bill depth mm 18.4 ## 15 Chinstrap Dream flipper length mm 196. ## 16 Chinstrap Dream body mass g 3733. ## 17 Gentoo Biscoe bill length mm 47.5 ## 18 Gentoo Biscoe bill depth mm 15.0 ## 19 Gentoo Biscoe flipper length mm 217. ## 20 Gentoo Biscoe body mass g 5076. ## # … with 3 more variables: SD <dbl>, Min <dbl>, ## # Max <dbl>, and abbreviated variable name ## # ¹body_part ``` --- ```r penguins |> # pivot all the columns we want to summarise pivot_longer(contains("_"), names_to = c("body_part", "measure", "unit", "stat"), names_sep = "_", values_drop_na = TRUE) |> # Group by wanted grouping variables, including names of columns we made above group_by(species, island, body_part, measure, unit) |> # Summarise and give columns just function name summarise(across(value, list(Mean = mean, SD = sd, Min = min, Max = max), .names = "{.fn}")) ``` ``` ## # A tibble: 20 × 9 ## # Groups: species, island, body_part, measure ## # [20] ## species island body_…¹ measure unit Mean ## <fct> <fct> <chr> <chr> <chr> <dbl> ## 1 Adelie Biscoe bill depth mm 18.4 ## 2 Adelie Biscoe bill length mm 39.0 ## 3 Adelie Biscoe body mass g 3710. ## 4 Adelie Biscoe flipper length mm 189. ## 5 Adelie Dream bill depth mm 18.3 ## 6 Adelie Dream bill length mm 38.5 ## 7 Adelie Dream body mass g 3688. ## 8 Adelie Dream flipper length mm 190. ## 9 Adelie Torge… bill depth mm 18.4 ## 10 Adelie Torge… bill length mm 39.0 ## 11 Adelie Torge… body mass g 3706. ## 12 Adelie Torge… flipper length mm 191. ## 13 Chinstrap Dream bill depth mm 18.4 ## 14 Chinstrap Dream bill length mm 48.8 ## 15 Chinstrap Dream body mass g 3733. ## 16 Chinstrap Dream flipper length mm 196. ## 17 Gentoo Biscoe bill depth mm 15.0 ## 18 Gentoo Biscoe bill length mm 47.5 ## 19 Gentoo Biscoe body mass g 5076. ## 20 Gentoo Biscoe flipper length mm 217. ## # … with 3 more variables: SD <dbl>, Min <dbl>, ## # Max <dbl>, and abbreviated variable name ## # ¹body_part ``` --- class: inverse, middle, center name: nested # nested data --- ```r penguins |> nest_by(species, island) ``` ``` ## # A tibble: 5 × 3 ## # Rowwise: species, island ## species island data ## <fct> <fct> <list<tibble[,6]>> ## 1 Adelie Biscoe [44 × 6] ## 2 Adelie Dream [56 × 6] ## 3 Adelie Torgersen [52 × 6] ## 4 Chinstrap Dream [68 × 6] ## 5 Gentoo Biscoe [124 × 6] ``` --- ```r penguins |> nest_by(species, island) |> mutate(lm_model = list( lm(bill_length_mm ~ bill_depth_mm, data = data) )) ``` ``` ## # A tibble: 5 × 4 ## # Rowwise: species, island ## species island data lm_mo…¹ ## <fct> <fct> <list<tibble[,6]>> <list> ## 1 Adelie Biscoe [44 × 6] <lm> ## 2 Adelie Dream [56 × 6] <lm> ## 3 Adelie Torgersen [52 × 6] <lm> ## 4 Chinstrap Dream [68 × 6] <lm> ## 5 Gentoo Biscoe [124 × 6] <lm> ## # … with abbreviated variable name ¹lm_model ``` --- ```r model_penguins <- penguins |> nest_by(species, island) |> mutate( lm_model = list( lm(bill_length_mm ~ bill_depth_mm, data = data) ), table = list(broom::tidy(lm_model)) ) model_penguins ``` ``` ## # A tibble: 5 × 5 ## # Rowwise: species, island ## species island data lm_mo…¹ table ## <fct> <fct> <list<ti> <list> <list> ## 1 Adelie Biscoe [44 × 6] <lm> <tibble> ## 2 Adelie Dream [56 × 6] <lm> <tibble> ## 3 Adelie Torgersen [52 × 6] <lm> <tibble> ## 4 Chinstrap Dream [68 × 6] <lm> <tibble> ## 5 Gentoo Biscoe [124 × 6] <lm> <tibble> ## # … with abbreviated variable name ¹lm_model ``` --- ```r model_penguins |> unnest(data) ``` ``` ## # A tibble: 344 × 10 ## # Groups: species, island [5] ## species island bill_length_mm bill_…¹ flipp…² ## <fct> <fct> <dbl> <dbl> <int> ## 1 Adelie Biscoe 37.8 18.3 174 ## 2 Adelie Biscoe 37.7 18.7 180 ## 3 Adelie Biscoe 35.9 19.2 189 ## 4 Adelie Biscoe 38.2 18.1 185 ## 5 Adelie Biscoe 38.8 17.2 180 ## 6 Adelie Biscoe 35.3 18.9 187 ## 7 Adelie Biscoe 40.6 18.6 183 ## 8 Adelie Biscoe 40.5 17.9 187 ## 9 Adelie Biscoe 37.9 18.6 172 ## 10 Adelie Biscoe 40.5 18.9 180 ## # … with 334 more rows, 5 more variables: ## # body_mass_g <int>, sex <fct>, year <int>, ## # lm_model <list>, table <list>, and ## # abbreviated variable names ¹bill_depth_mm, ## # ²flipper_length_mm ``` --- ```r model_penguins |> unnest(table) |> select(-lm_model, -data) ``` ``` ## # A tibble: 10 × 7 ## # Groups: species, island [5] ## species island term estim…¹ std.e…² stati…³ ## <fct> <fct> <chr> <dbl> <dbl> <dbl> ## 1 Adelie Biscoe (Int… 21.0 5.24 4.01 ## 2 Adelie Biscoe bill… 0.977 0.285 3.43 ## 3 Adelie Dream (Int… 18.3 4.66 3.93 ## 4 Adelie Dream bill… 1.10 0.255 4.33 ## 5 Adelie Torge… (Int… 28.6 5.77 4.95 ## 6 Adelie Torge… bill… 0.562 0.313 1.80 ## 7 Chinstr… Dream (Int… 13.4 5.06 2.66 ## 8 Chinstr… Dream bill… 1.92 0.274 7.01 ## 9 Gentoo Biscoe (Int… 17.2 3.28 5.25 ## 10 Gentoo Biscoe bill… 2.02 0.219 9.24 ## # … with 1 more variable: p.value <dbl>, and ## # abbreviated variable names ¹estimate, ## # ²std.error, ³statistic ```