library(tidyverse)
#> ── Attaching core tidyverse packages ───────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.2 ✔ readr 2.1.4
#> ✔ forcats 1.0.0 ✔ stringr 1.5.0
#> ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
#> ✔ purrr 1.0.1
#> ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
5 Workflow: code style
Prerequisites
5.1 Exercises 5.6
-
Restyled pipelines are given below.
flights |> filter(dest == "IAH") |> group_by(year, month, day) |> summarize( n = n(), delay = mean(arr_delay, na.rm = TRUE) ) |> filter(n > 10) #> `summarise()` has grouped output by 'year', 'month'. You can override using #> the `.groups` argument. #> # A tibble: 365 × 5 #> # Groups: year, month [12] #> year month day n delay #> <int> <int> <int> <int> <dbl> #> 1 2013 1 1 20 17.8 #> 2 2013 1 2 20 7 #> 3 2013 1 3 19 18.3 #> 4 2013 1 4 20 -3.2 #> 5 2013 1 5 13 20.2 #> 6 2013 1 6 18 9.28 #> # ℹ 359 more rows
flights |> filter( carrier == "UA", dest %in% c("IAH", "HOU"), sched_dep_time > 0900, sched_arr_time < 2000 ) |> group_by(flight) |> summarize( delay = mean(arr_delay, na.rm = TRUE), cancelled = sum(is.na(arr_delay)), n = n() ) |> filter(n > 10) #> # A tibble: 74 × 4 #> flight delay cancelled n #> <int> <dbl> <int> <int> #> 1 53 12.5 2 18 #> 2 112 14.1 0 14 #> 3 205 -1.71 0 14 #> 4 235 -5.36 0 14 #> 5 255 -9.47 0 15 #> 6 268 38.6 1 15 #> # ℹ 68 more rows