--- title: "Lesson 8" format: html --- ```{r} require(tidyverse) library(nycflights13) flights |> select(3:6) |> head(3) |> rename_all(~ gsub("_", "", .)) flights |> select(3:6) |> head(3) |> rename_with(toupper, .cols = 2:4) # 每月10号-15号,dep_delay > 100 的航班 # 每月哪个出发地origin的 dep_delay总时长最长 flights |> group_by(month, origin) |> summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |> slice_max(total_dep_delay) # arrange(month, origin, desc(total_dep_delay)) # 每月随机抽取一天,随机抽取三个航班 slice_sample(n = 1) flights |> tidyr::nest(ymddf = -c(year, month, day)) |> group_by(year, month) |> slice_sample(n = 5) |> unnest(ymddf) |> group_by(year, month, day) |> slice_sample(n = 3) flights |> tidyr::nest(ymddf = -c(year, month, day)) |> group_by(year, month) |> slice_sample(n = 1) |> mutate( ymddf = purrr::map(ymddf, \(x) { x |> slice_sample(n = 3) }) ) # 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班 flights |> mutate(date = ymd(paste(year, month, day))) |> mutate(weekday = wday(date)) |> mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |> group_by(year, month, origin, flight, isworkday) |> summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |> tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |> filter( purrr::map(diffdelaydf, \(x) { nrow(x) }) > 1 ) |> mutate( diffdelay = purrr::map_dbl(diffdelaydf, \(x) { x |> arrange(isworkday) |> pull(mean_delay) |> diff() }) ) |> group_by(year, month, origin) |> slice_max(diffdelay, n = 3) wday(today()) weekday() slice_sample(n = 1) flights |> group_by(month) |> slice_sample(n = 1) ``` ```{r} p1 <- flights |> group_by(year, month) |> summarize(n = n()) |> ggplot(aes(month, n)) + geom_point(shape = 21, size = 6, color = "black", fill = "red") + geom_line() ggsave("./a.pdf") ggsave("./a.png") require(patchwork) p1 / p1 ```