Files
su2026rwep/SD/3.9_课后作业8/第8次课后作业_模板.qmd
T
2026-05-21 01:33:30 +08:00

175 lines
4.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
---
title: 课后作业8
author: 姓名
format: html
---
# 数据
下载airquality.xlsx,并读取数据。
```{r}
#| message: false
#| warning: false
# 下载至临时文件
if (FALSE) {
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
download.file(
"https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
destfile = tmpxlsxpath
)
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
saveRDS(airqualitydf, "./airqualitydf.RDS")
saveRDS(metadf, "./metadf.RDS")
}
airqualitydf <- readRDS("./airqualitydf.RDS")
metadf <- readRDS("./metadf.RDS")
```
# 描述统计
根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
```{r}
#| message: false
#| warning: false
require(tidyverse)
airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
group_by(site) |>
summarize(AQI.median = median(AQI, na.rm = TRUE)) |>
left_join(metadf |> select(site, city = Area)) |>
group_by(city) |>
filter(n() > 5) |>
summarize(
p = sum(
AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)
) /
n()
) |>
top_n(10, p)
airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
group_by(site) |>
summarize(AQI.median = median(AQI, na.rm = TRUE))
airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
left_join(metadf |> select(site, city = Area)) |>
group_by(city) |>
filter(length(unique(site)) >= 5) |>
summarize(
p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, na.rm = TRUE)) / n()
) |>
slice_max(p, n = 10) |>
knitr::kable()
```
# 统计检验
按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。
```{r}
#| message: false
#| warning: false
if (FALSE) {
require(infer)
require(tidyverse)
testdf <- airqualitydf |>
select(datetime, site, AQI) |>
filter(!is.na(AQI)) |>
left_join(metadf |> select(site, city = Area)) |>
group_by(city) |>
filter(length(unique(site)) >= 5) |>
mutate(
dayornight = factor(
ifelse(between(hour(datetime), 8, 20), "day", "night"),
levels = c("day", "night")
)
) |>
group_by(city) |>
nest(citydf = -city) |>
mutate(
median_diff = purrr::map_dbl(
citydf,
~ .x |>
specify(AQI ~ dayornight) |>
calculate(stat = "diff in medians", order = c("day", "night")) |>
pull(stat)
)
) |>
ungroup() |>
# slice_sample(n = 12) |>
mutate(
null_dist = purrr::map(
citydf,
~ .x |>
specify(AQI ~ dayornight) |>
hypothesize(null = "independence") |>
generate(reps = 1000, type = "permute") |>
calculate(stat = "diff in medians", order = c("day", "night"))
)
) |>
mutate(
p_value = purrr::map2_dbl(
null_dist,
median_diff,
~ get_p_value(.x, obs_stat = .y, direction = "both") |>
pull(p_value)
)
) |>
mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |>
mutate(
fig = purrr::pmap(
list(null_dist, median_diff, city, sigdiff),
~ visualize(..1) +
shade_p_value(obs_stat = ..2, direction = "both") +
ggtitle(paste0(..3, "", ..4)) +
theme_sci(2, 2)
)
) |>
arrange(p_value)
saveRDS(testdf, "./testdf.RDS")
}
if (FALSE) {
lang <- "cn"
require(dwfun)
require(rmdify)
require(drwateR)
dwfun::init()
rmdify::rmd_init()
testdf <- readRDS("./testdf.RDS")
require(tidyverse)
testdf |>
select(city, median_diff, p_value, sigdiff) |>
knitr::kable()
testdf |>
mutate(grp = (row_number() - 1) %/% 12) |>
group_by(grp) |>
nest(grpdf = -grp) |>
ungroup() |>
# slice(1) |>
mutate(
gp = purrr::map(
grpdf,
~ (.x |>
pull(fig)) |>
patchwork::wrap_plots(ncol = 3) +
dwfun::theme_sci(5, 7)
)
) |>
pull(gp)
}
```