diff --git a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd
index 0c79e4e..4a09163 100644
--- a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd
+++ b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd
@@ -7,18 +7,112 @@ format: html
# 下载airquality.xlsx,并读取数据
```{r}
+#| eval: false
+#| execute: false
# 下载至临时文件
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
destfile = tmpxlsxpath)
-airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
-metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
+airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
+metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
```
# 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
+```{r}
+#| eval: false
+#| execute: false
+require(tidyverse)
+airqualitydf |>
+ select(datetime, site, AQI) |>
+ filter(!is.na(AQI)) |>
+ group_by(site) |>
+ summarize(AQI.median = median(AQI, na.rm = TRUE)) |>
+ left_join(metadf |> select(site, city = Area)) |>
+ group_by(city) |>
+ filter(n() > 5) |>
+ summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |>
+ top_n(10, p)
+airqualitydf |>
+ select(datetime, site, AQI) |>
+ filter(!is.na(AQI)) |>
+ group_by(site) |>
+ summarize(AQI.median = median(AQI, na.rm = TRUE))
+
+airqualitydf |>
+ select(datetime, site, AQI) |>
+ filter(!is.na(AQI)) |>
+ left_join(metadf |> select(site, city = Area)) |>
+ group_by(city) |>
+ filter(length(unique(site)) >= 5) |>
+ summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2,
+ na.rm = TRUE)) / n()) |>
+ slice_max(p, n = 10)
+```
# 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。
+
+```{r}
+#| eval: false
+
+if (FALSE) {
+
+require(infer)
+testdf <- airqualitydf |>
+ select(datetime, site, AQI) |>
+ filter(!is.na(AQI)) |>
+ left_join(metadf |> select(site, city = Area)) |>
+ group_by(city) |>
+ filter(length(unique(site)) >= 5) |>
+ mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"),
+ levels = c("day", "night"))
+ ) |>
+ group_by(city) |>
+ nest(citydf = -city) |>
+ mutate(median_diff = purrr::map_dbl(citydf, ~
+ .x |>
+ specify(AQI ~ dayornight) |>
+ calculate(stat = "diff in medians", order = c("day", "night")) |>
+ pull(stat)
+)) |>
+ ungroup() |>
+# slice_sample(n = 12) |>
+ mutate(null_dist = purrr::map(citydf, ~
+ .x |>
+ specify(AQI ~ dayornight) |>
+ hypothesize(null = "independence") |>
+ generate(reps = 1000, type = "permute") |>
+ calculate(stat = "diff in medians", order = c("day", "night"))
+)) |>
+ mutate(fig = purrr::pmap(list(null_dist, median_diff, city),
+ ~ visualize(..1) +
+ shade_p_value(obs_stat = ..2, direction = "both") +
+ ggtitle(..3)
+ )) |>
+ mutate(p_value = purrr::map2_dbl(null_dist, median_diff,
+ ~ get_p_value(.x, obs_stat = .y, direction = "both") |>
+ pull(p_value)
+ )) |>
+ arrange(p_value) |>
+ mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异"))
+
+testdf |>
+ select(city, sigdiff) |>
+knitr::kable()
+
+ lang <- "cn"
+(testdf |>
+ slice_sample(n = 9) |>
+ pull(fig)) |>
+patchwork::wrap_plots(ncol = 3) +
+dwfun::theme_sci(5, 5)
+
+dwfun::ggsavep("./testdf.pdf")
+
+}
+
+```
+
diff --git a/SD/20240328_2_正则表达式/_extensions b/SD/20240402_1_datavisualize/_extensions
similarity index 100%
rename from SD/20240328_2_正则表达式/_extensions
rename to SD/20240402_1_datavisualize/_extensions
diff --git a/SD/20240328_3_datavisualize/index.qmd b/SD/20240402_1_datavisualize/index.qmd
similarity index 100%
rename from SD/20240328_3_datavisualize/index.qmd
rename to SD/20240402_1_datavisualize/index.qmd
diff --git a/SD/20240328_3_datavisualize/mpg-plot.png b/SD/20240402_1_datavisualize/mpg-plot.png
similarity index 100%
rename from SD/20240328_3_datavisualize/mpg-plot.png
rename to SD/20240402_1_datavisualize/mpg-plot.png
diff --git a/SD/20240328_3_datavisualize/_extensions b/SD/20240402_2_正则表达式/_extensions
similarity index 100%
rename from SD/20240328_3_datavisualize/_extensions
rename to SD/20240402_2_正则表达式/_extensions
diff --git a/SD/20240328_2_正则表达式/index.qmd b/SD/20240402_2_正则表达式/index.qmd
similarity index 100%
rename from SD/20240328_2_正则表达式/index.qmd
rename to SD/20240402_2_正则表达式/index.qmd
diff --git a/SD/20240328_4_实践部分/_extensions b/SD/20240402_4_实践部分/_extensions
similarity index 100%
rename from SD/20240328_4_实践部分/_extensions
rename to SD/20240402_4_实践部分/_extensions
diff --git a/SD/20240328_4_实践部分/index.qmd b/SD/20240402_4_实践部分/index.qmd
similarity index 100%
rename from SD/20240328_4_实践部分/index.qmd
rename to SD/20240402_4_实践部分/index.qmd
diff --git a/SD/20240402_9_课后作业/_extensions b/SD/20240402_9_课后作业/_extensions
new file mode 120000
index 0000000..74119e3
--- /dev/null
+++ b/SD/20240402_9_课后作业/_extensions
@@ -0,0 +1 @@
+../../_extensions
\ No newline at end of file
diff --git a/SD/20240402_9_课后作业/index.qmd b/SD/20240402_9_课后作业/index.qmd
new file mode 100644
index 0000000..db0948e
--- /dev/null
+++ b/SD/20240402_9_课后作业/index.qmd
@@ -0,0 +1,49 @@
+---
+title: "课后作业9"
+subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution
+author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心
+date: today
+lang: zh
+format:
+ revealjs:
+ theme: dark
+ slide-number: true
+ chalkboard:
+ buttons: true
+ preview-links: auto
+ lang: zh
+ toc: true
+ toc-depth: 1
+ toc-title: 大纲
+ logo: ./_extensions/inst/img/ucaslogo.png
+ css: ./_extensions/inst/css/revealjs.css
+ pointer:
+ key: "p"
+ color: "#32cd32"
+ pointerSize: 18
+revealjs-plugins:
+ - pointer
+filters:
+ - d2
+---
+
+```{r}
+#| include: false
+#| cache: false
+lang <- "cn"
+require(tidyverse)
+require(learnr)
+```
+
+## 第9次课后作业
+
+自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。
+
+作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
+
+
+## 欢迎讨论!{.center}
+
+
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+
diff --git a/SD/20240402_9_课后作业/第9次课后作业_模板.qmd b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd
new file mode 100644
index 0000000..90dde6c
--- /dev/null
+++ b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd
@@ -0,0 +1,8 @@
+---
+title: 课后作业9
+author: 姓名
+format: html
+---
+
+要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。
+
diff --git a/data/writexldemo.xlsx b/data/writexldemo.xlsx
index 55c9a59..2677682 100644
Binary files a/data/writexldemo.xlsx and b/data/writexldemo.xlsx differ