RWEP/SD/2.3_datatransform/_demo.qmd

116 lines
2.1 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
title: "Lesson 8"
format: html
---
```{r}
require(tidyverse)
library(nycflights13)
flights |>
select(3:6) |>
head(3) |>
rename_all(~ gsub("_", "", .))
flights |>
select(3:6) |>
head(3) |>
rename_with(toupper, .cols = 2:4)
# 每月10号-15号dep_delay > 100 的航班
# 每月哪个出发地origin的 dep_delay总时长最长
flights |>
group_by(month, origin) |>
summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |>
slice_max(total_dep_delay)
# arrange(month, origin, desc(total_dep_delay))
# 每月随机抽取一天,随机抽取三个航班
slice_sample(n = 1)
flights |>
tidyr::nest(ymddf = -c(year, month, day)) |>
group_by(year, month) |>
slice_sample(n = 5) |>
unnest(ymddf) |>
group_by(year, month, day) |>
slice_sample(n = 3)
flights |>
tidyr::nest(ymddf = -c(year, month, day)) |>
group_by(year, month) |>
slice_sample(n = 1) |>
mutate(
ymddf = purrr::map(ymddf, \(x) {
x |>
slice_sample(n = 3)
})
)
# 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班
flights |>
mutate(date = ymd(paste(year, month, day))) |>
mutate(weekday = wday(date)) |>
mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |>
group_by(year, month, origin, flight, isworkday) |>
summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |>
tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |>
filter(
purrr::map(diffdelaydf, \(x) {
nrow(x)
}) >
1
) |>
mutate(
diffdelay = purrr::map_dbl(diffdelaydf, \(x) {
x |>
arrange(isworkday) |>
pull(mean_delay) |>
diff()
})
) |>
group_by(year, month, origin) |>
slice_max(diffdelay, n = 3)
wday(today())
weekday()
slice_sample(n = 1)
flights |>
group_by(month) |>
slice_sample(n = 1)
```
```{r}
p1 <- flights |>
group_by(year, month) |>
summarize(n = n()) |>
ggplot(aes(month, n)) +
geom_point(shape = 21, size = 6, color = "black", fill = "red") +
geom_line()
ggsave("./a.pdf")
ggsave("./a.png")
require(patchwork)
p1 / p1
```