第9次课
This commit is contained in:
		@@ -7,18 +7,112 @@ format: html
 | 
			
		||||
# 下载airquality.xlsx,并读取数据
 | 
			
		||||
 | 
			
		||||
```{r}
 | 
			
		||||
#| eval: false
 | 
			
		||||
#| execute: false
 | 
			
		||||
# 下载至临时文件
 | 
			
		||||
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
 | 
			
		||||
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
 | 
			
		||||
  destfile = tmpxlsxpath)
 | 
			
		||||
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
 | 
			
		||||
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
 | 
			
		||||
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
 | 
			
		||||
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
# 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
 | 
			
		||||
 | 
			
		||||
```{r}
 | 
			
		||||
#| eval: false
 | 
			
		||||
#| execute: false
 | 
			
		||||
require(tidyverse)
 | 
			
		||||
airqualitydf |>
 | 
			
		||||
  select(datetime, site, AQI) |>
 | 
			
		||||
  filter(!is.na(AQI)) |>
 | 
			
		||||
  group_by(site) |>
 | 
			
		||||
  summarize(AQI.median = median(AQI, na.rm = TRUE)) |>
 | 
			
		||||
  left_join(metadf |> select(site, city = Area)) |>
 | 
			
		||||
  group_by(city) |>
 | 
			
		||||
  filter(n() > 5) |>
 | 
			
		||||
  summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |>
 | 
			
		||||
  top_n(10, p)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
airqualitydf |>
 | 
			
		||||
  select(datetime, site, AQI) |>
 | 
			
		||||
  filter(!is.na(AQI)) |>
 | 
			
		||||
  group_by(site) |>
 | 
			
		||||
  summarize(AQI.median = median(AQI, na.rm = TRUE))
 | 
			
		||||
 | 
			
		||||
airqualitydf |>
 | 
			
		||||
  select(datetime, site, AQI) |>
 | 
			
		||||
  filter(!is.na(AQI)) |>
 | 
			
		||||
  left_join(metadf |> select(site, city = Area)) |>
 | 
			
		||||
  group_by(city) |>
 | 
			
		||||
  filter(length(unique(site)) >= 5) |>
 | 
			
		||||
  summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2,
 | 
			
		||||
    na.rm = TRUE)) / n()) |>
 | 
			
		||||
  slice_max(p, n = 10)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。
 | 
			
		||||
 | 
			
		||||
```{r}
 | 
			
		||||
#| eval: false
 | 
			
		||||
 | 
			
		||||
if (FALSE) {
 | 
			
		||||
 | 
			
		||||
require(infer)
 | 
			
		||||
testdf <- airqualitydf |>
 | 
			
		||||
  select(datetime, site, AQI) |>
 | 
			
		||||
  filter(!is.na(AQI)) |>
 | 
			
		||||
  left_join(metadf |> select(site, city = Area)) |>
 | 
			
		||||
  group_by(city) |>
 | 
			
		||||
  filter(length(unique(site)) >= 5) |>
 | 
			
		||||
  mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"),
 | 
			
		||||
    levels = c("day", "night"))
 | 
			
		||||
  ) |>
 | 
			
		||||
  group_by(city) |>
 | 
			
		||||
  nest(citydf = -city) |>
 | 
			
		||||
  mutate(median_diff = purrr::map_dbl(citydf, ~
 | 
			
		||||
    .x |>
 | 
			
		||||
      specify(AQI ~ dayornight) |>
 | 
			
		||||
      calculate(stat = "diff in medians", order = c("day", "night")) |>
 | 
			
		||||
      pull(stat)
 | 
			
		||||
)) |>
 | 
			
		||||
  ungroup() |>
 | 
			
		||||
#  slice_sample(n = 12) |>
 | 
			
		||||
  mutate(null_dist = purrr::map(citydf, ~
 | 
			
		||||
    .x |>
 | 
			
		||||
      specify(AQI ~ dayornight) |>
 | 
			
		||||
   hypothesize(null = "independence") |>
 | 
			
		||||
   generate(reps = 1000, type = "permute") |>
 | 
			
		||||
      calculate(stat = "diff in medians", order = c("day", "night"))
 | 
			
		||||
)) |>
 | 
			
		||||
  mutate(fig = purrr::pmap(list(null_dist, median_diff, city),
 | 
			
		||||
    ~ visualize(..1) +
 | 
			
		||||
  shade_p_value(obs_stat = ..2, direction = "both") +
 | 
			
		||||
      ggtitle(..3)
 | 
			
		||||
  )) |>
 | 
			
		||||
  mutate(p_value = purrr::map2_dbl(null_dist, median_diff, 
 | 
			
		||||
    ~  get_p_value(.x, obs_stat = .y, direction = "both") |>
 | 
			
		||||
      pull(p_value)
 | 
			
		||||
  )) |>
 | 
			
		||||
  arrange(p_value) |>
 | 
			
		||||
  mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异"))
 | 
			
		||||
 | 
			
		||||
testdf |>
 | 
			
		||||
  select(city, sigdiff) |>
 | 
			
		||||
knitr::kable()
 | 
			
		||||
 | 
			
		||||
  lang <- "cn"
 | 
			
		||||
(testdf |>
 | 
			
		||||
  slice_sample(n = 9) |>
 | 
			
		||||
  pull(fig)) |>
 | 
			
		||||
patchwork::wrap_plots(ncol = 3) +
 | 
			
		||||
dwfun::theme_sci(5, 5)
 | 
			
		||||
 | 
			
		||||
dwfun::ggsavep("./testdf.pdf")
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user