diff --git a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd index 0c79e4e..4a09163 100644 --- a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd +++ b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd @@ -7,18 +7,112 @@ format: html # 下载airquality.xlsx,并读取数据 ```{r} +#| eval: false +#| execute: false # 下载至临时文件 tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx", destfile = tmpxlsxpath) -airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) -metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) +airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) +metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) ``` # 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 +```{r} +#| eval: false +#| execute: false +require(tidyverse) +airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + group_by(site) |> + summarize(AQI.median = median(AQI, na.rm = TRUE)) |> + left_join(metadf |> select(site, city = Area)) |> + group_by(city) |> + filter(n() > 5) |> + summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |> + top_n(10, p) +airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + group_by(site) |> + summarize(AQI.median = median(AQI, na.rm = TRUE)) + +airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + left_join(metadf |> select(site, city = Area)) |> + group_by(city) |> + filter(length(unique(site)) >= 5) |> + summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, + na.rm = TRUE)) / n()) |> + slice_max(p, n = 10) +``` # 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 + +```{r} +#| eval: false + +if (FALSE) { + +require(infer) +testdf <- airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + left_join(metadf |> select(site, city = Area)) |> + group_by(city) |> + filter(length(unique(site)) >= 5) |> + mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"), + levels = c("day", "night")) + ) |> + group_by(city) |> + nest(citydf = -city) |> + mutate(median_diff = purrr::map_dbl(citydf, ~ + .x |> + specify(AQI ~ dayornight) |> + calculate(stat = "diff in medians", order = c("day", "night")) |> + pull(stat) +)) |> + ungroup() |> +# slice_sample(n = 12) |> + mutate(null_dist = purrr::map(citydf, ~ + .x |> + specify(AQI ~ dayornight) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> + calculate(stat = "diff in medians", order = c("day", "night")) +)) |> + mutate(fig = purrr::pmap(list(null_dist, median_diff, city), + ~ visualize(..1) + + shade_p_value(obs_stat = ..2, direction = "both") + + ggtitle(..3) + )) |> + mutate(p_value = purrr::map2_dbl(null_dist, median_diff, + ~ get_p_value(.x, obs_stat = .y, direction = "both") |> + pull(p_value) + )) |> + arrange(p_value) |> + mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) + +testdf |> + select(city, sigdiff) |> +knitr::kable() + + lang <- "cn" +(testdf |> + slice_sample(n = 9) |> + pull(fig)) |> +patchwork::wrap_plots(ncol = 3) + +dwfun::theme_sci(5, 5) + +dwfun::ggsavep("./testdf.pdf") + +} + +``` + diff --git a/SD/20240328_2_正则表达式/_extensions b/SD/20240402_1_datavisualize/_extensions similarity index 100% rename from SD/20240328_2_正则表达式/_extensions rename to SD/20240402_1_datavisualize/_extensions diff --git a/SD/20240328_3_datavisualize/index.qmd b/SD/20240402_1_datavisualize/index.qmd similarity index 100% rename from SD/20240328_3_datavisualize/index.qmd rename to SD/20240402_1_datavisualize/index.qmd diff --git a/SD/20240328_3_datavisualize/mpg-plot.png b/SD/20240402_1_datavisualize/mpg-plot.png similarity index 100% rename from SD/20240328_3_datavisualize/mpg-plot.png rename to SD/20240402_1_datavisualize/mpg-plot.png diff --git a/SD/20240328_3_datavisualize/_extensions b/SD/20240402_2_正则表达式/_extensions similarity index 100% rename from SD/20240328_3_datavisualize/_extensions rename to SD/20240402_2_正则表达式/_extensions diff --git a/SD/20240328_2_正则表达式/index.qmd b/SD/20240402_2_正则表达式/index.qmd similarity index 100% rename from SD/20240328_2_正则表达式/index.qmd rename to SD/20240402_2_正则表达式/index.qmd diff --git a/SD/20240328_4_实践部分/_extensions b/SD/20240402_4_实践部分/_extensions similarity index 100% rename from SD/20240328_4_实践部分/_extensions rename to SD/20240402_4_实践部分/_extensions diff --git a/SD/20240328_4_实践部分/index.qmd b/SD/20240402_4_实践部分/index.qmd similarity index 100% rename from SD/20240328_4_实践部分/index.qmd rename to SD/20240402_4_实践部分/index.qmd diff --git a/SD/20240402_9_课后作业/_extensions b/SD/20240402_9_课后作业/_extensions new file mode 120000 index 0000000..74119e3 --- /dev/null +++ b/SD/20240402_9_课后作业/_extensions @@ -0,0 +1 @@ +../../_extensions \ No newline at end of file diff --git a/SD/20240402_9_课后作业/index.qmd b/SD/20240402_9_课后作业/index.qmd new file mode 100644 index 0000000..db0948e --- /dev/null +++ b/SD/20240402_9_课后作业/index.qmd @@ -0,0 +1,49 @@ +--- +title: "课后作业9" +subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution +author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心 +date: today +lang: zh +format: + revealjs: + theme: dark + slide-number: true + chalkboard: + buttons: true + preview-links: auto + lang: zh + toc: true + toc-depth: 1 + toc-title: 大纲 + logo: ./_extensions/inst/img/ucaslogo.png + css: ./_extensions/inst/css/revealjs.css + pointer: + key: "p" + color: "#32cd32" + pointerSize: 18 +revealjs-plugins: + - pointer +filters: + - d2 +--- + +```{r} +#| include: false +#| cache: false +lang <- "cn" +require(tidyverse) +require(learnr) +``` + +## 第9次课后作业 + +自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。 + +作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd) + + +## 欢迎讨论!{.center} + + +`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` + diff --git a/SD/20240402_9_课后作业/第9次课后作业_模板.qmd b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd new file mode 100644 index 0000000..90dde6c --- /dev/null +++ b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd @@ -0,0 +1,8 @@ +--- +title: 课后作业9 +author: 姓名 +format: html +--- + +要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。 + diff --git a/data/writexldemo.xlsx b/data/writexldemo.xlsx index 55c9a59..2677682 100644 Binary files a/data/writexldemo.xlsx and b/data/writexldemo.xlsx differ