RWEP/SD/20240328_9_课后作业/第8次课后作业_模板.qmd

119 lines
3.2 KiB
Plaintext
Raw Normal View History

2024-03-25 21:48:30 +08:00
---
2024-03-28 08:15:02 +08:00
title: 课后作业8
2024-03-25 21:48:30 +08:00
author: 姓名
format: html
---
2024-03-28 12:25:13 +08:00
# 下载airquality.xlsx并读取数据
2024-03-25 21:48:30 +08:00
```{r}
2024-04-02 02:04:22 +08:00
#| eval: false
#| execute: false
2024-03-28 12:25:13 +08:00
# 下载至临时文件
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
destfile = tmpxlsxpath)
2024-04-02 02:04:22 +08:00
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
2024-03-25 21:48:30 +08:00
```
2024-03-26 16:58:06 +08:00
# 根据`airqualitydf.xlsx`按采样点统计白天8:00-20:00与夜晚20:00-8:00中空气质量指数AQI中位数按城市统计低于所有采样点AQI30%分位值的采样点占比列出上述占比最高的10个城市不考虑采样点数低于5个的城市
2024-03-25 21:48:30 +08:00
2024-04-02 02:04:22 +08:00
```{r}
#| eval: false
#| execute: false
require(tidyverse)
airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
group_by(site) |>
summarize(AQI.median = median(AQI, na.rm = TRUE)) |>
left_join(metadf |> select(site, city = Area)) |>
group_by(city) |>
filter(n() > 5) |>
summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |>
top_n(10, p)
2024-03-25 21:48:30 +08:00
2024-04-02 02:04:22 +08:00
airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
group_by(site) |>
summarize(AQI.median = median(AQI, na.rm = TRUE))
2024-03-25 21:48:30 +08:00
2024-04-02 02:04:22 +08:00
airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
left_join(metadf |> select(site, city = Area)) |>
group_by(city) |>
filter(length(unique(site)) >= 5) |>
summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2,
na.rm = TRUE)) / n()) |>
slice_max(p, n = 10)
```
2024-03-28 12:25:13 +08:00
2024-03-26 16:58:06 +08:00
# 按照不同城市分组统计白天与夜晚AQI中位数是否具有显著差异。
2024-04-02 02:04:22 +08:00
```{r}
#| eval: false
if (FALSE) {
require(infer)
testdf <- airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
left_join(metadf |> select(site, city = Area)) |>
group_by(city) |>
filter(length(unique(site)) >= 5) |>
mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"),
levels = c("day", "night"))
) |>
group_by(city) |>
nest(citydf = -city) |>
mutate(median_diff = purrr::map_dbl(citydf, ~
.x |>
specify(AQI ~ dayornight) |>
calculate(stat = "diff in medians", order = c("day", "night")) |>
pull(stat)
)) |>
ungroup() |>
# slice_sample(n = 12) |>
mutate(null_dist = purrr::map(citydf, ~
.x |>
specify(AQI ~ dayornight) |>
hypothesize(null = "independence") |>
generate(reps = 1000, type = "permute") |>
calculate(stat = "diff in medians", order = c("day", "night"))
)) |>
mutate(fig = purrr::pmap(list(null_dist, median_diff, city),
~ visualize(..1) +
shade_p_value(obs_stat = ..2, direction = "both") +
ggtitle(..3)
)) |>
mutate(p_value = purrr::map2_dbl(null_dist, median_diff,
~ get_p_value(.x, obs_stat = .y, direction = "both") |>
pull(p_value)
)) |>
arrange(p_value) |>
mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异"))
testdf |>
select(city, sigdiff) |>
knitr::kable()
lang <- "cn"
(testdf |>
slice_sample(n = 9) |>
pull(fig)) |>
patchwork::wrap_plots(ncol = 3) +
dwfun::theme_sci(5, 5)
dwfun::ggsavep("./testdf.pdf")
}
```