From 7c553379109de45d249efb0bc6f6b378ba3cbfcb Mon Sep 17 00:00:00 2001 From: ming Date: Tue, 2 Apr 2024 02:04:22 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AC=AC9=E6=AC=A1=E8=AF=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../第8次课后作业_模板.qmd | 98 +++++++++++++++++- .../_extensions | 0 .../index.qmd | 0 .../mpg-plot.png | Bin .../_extensions | 0 .../index.qmd | 0 .../_extensions | 0 .../index.qmd | 0 SD/20240402_9_课后作业/_extensions | 1 + SD/20240402_9_课后作业/index.qmd | 49 +++++++++ .../第9次课后作业_模板.qmd | 8 ++ data/writexldemo.xlsx | Bin 5361 -> 5360 bytes 12 files changed, 154 insertions(+), 2 deletions(-) rename SD/{20240328_2_正则表达式 => 20240402_1_datavisualize}/_extensions (100%) rename SD/{20240328_3_datavisualize => 20240402_1_datavisualize}/index.qmd (100%) rename SD/{20240328_3_datavisualize => 20240402_1_datavisualize}/mpg-plot.png (100%) rename SD/{20240328_3_datavisualize => 20240402_2_正则表达式}/_extensions (100%) rename SD/{20240328_2_正则表达式 => 20240402_2_正则表达式}/index.qmd (100%) rename SD/{20240328_4_实践部分 => 20240402_4_实践部分}/_extensions (100%) rename SD/{20240328_4_实践部分 => 20240402_4_实践部分}/index.qmd (100%) create mode 120000 SD/20240402_9_课后作业/_extensions create mode 100644 SD/20240402_9_课后作业/index.qmd create mode 100644 SD/20240402_9_课后作业/第9次课后作业_模板.qmd diff --git a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd index 0c79e4e..4a09163 100644 --- a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd +++ b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd @@ -7,18 +7,112 @@ format: html # 下载airquality.xlsx,并读取数据 ```{r} +#| eval: false +#| execute: false # 下载至临时文件 tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx", destfile = tmpxlsxpath) -airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) -metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) +airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) +metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) ``` # 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 +```{r} +#| eval: false +#| execute: false +require(tidyverse) +airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + group_by(site) |> + summarize(AQI.median = median(AQI, na.rm = TRUE)) |> + left_join(metadf |> select(site, city = Area)) |> + group_by(city) |> + filter(n() > 5) |> + summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |> + top_n(10, p) +airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + group_by(site) |> + summarize(AQI.median = median(AQI, na.rm = TRUE)) + +airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + left_join(metadf |> select(site, city = Area)) |> + group_by(city) |> + filter(length(unique(site)) >= 5) |> + summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, + na.rm = TRUE)) / n()) |> + slice_max(p, n = 10) +``` # 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 + +```{r} +#| eval: false + +if (FALSE) { + +require(infer) +testdf <- airqualitydf |> + select(datetime, site, AQI) |> + filter(!is.na(AQI)) |> + left_join(metadf |> select(site, city = Area)) |> + group_by(city) |> + filter(length(unique(site)) >= 5) |> + mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"), + levels = c("day", "night")) + ) |> + group_by(city) |> + nest(citydf = -city) |> + mutate(median_diff = purrr::map_dbl(citydf, ~ + .x |> + specify(AQI ~ dayornight) |> + calculate(stat = "diff in medians", order = c("day", "night")) |> + pull(stat) +)) |> + ungroup() |> +# slice_sample(n = 12) |> + mutate(null_dist = purrr::map(citydf, ~ + .x |> + specify(AQI ~ dayornight) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> + calculate(stat = "diff in medians", order = c("day", "night")) +)) |> + mutate(fig = purrr::pmap(list(null_dist, median_diff, city), + ~ visualize(..1) + + shade_p_value(obs_stat = ..2, direction = "both") + + ggtitle(..3) + )) |> + mutate(p_value = purrr::map2_dbl(null_dist, median_diff, + ~ get_p_value(.x, obs_stat = .y, direction = "both") |> + pull(p_value) + )) |> + arrange(p_value) |> + mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) + +testdf |> + select(city, sigdiff) |> +knitr::kable() + + lang <- "cn" +(testdf |> + slice_sample(n = 9) |> + pull(fig)) |> +patchwork::wrap_plots(ncol = 3) + +dwfun::theme_sci(5, 5) + +dwfun::ggsavep("./testdf.pdf") + +} + +``` + diff --git a/SD/20240328_2_正则表达式/_extensions b/SD/20240402_1_datavisualize/_extensions similarity index 100% rename from SD/20240328_2_正则表达式/_extensions rename to SD/20240402_1_datavisualize/_extensions diff --git a/SD/20240328_3_datavisualize/index.qmd b/SD/20240402_1_datavisualize/index.qmd similarity index 100% rename from SD/20240328_3_datavisualize/index.qmd rename to SD/20240402_1_datavisualize/index.qmd diff --git a/SD/20240328_3_datavisualize/mpg-plot.png b/SD/20240402_1_datavisualize/mpg-plot.png similarity index 100% rename from SD/20240328_3_datavisualize/mpg-plot.png rename to SD/20240402_1_datavisualize/mpg-plot.png diff --git a/SD/20240328_3_datavisualize/_extensions b/SD/20240402_2_正则表达式/_extensions similarity index 100% rename from SD/20240328_3_datavisualize/_extensions rename to SD/20240402_2_正则表达式/_extensions diff --git a/SD/20240328_2_正则表达式/index.qmd b/SD/20240402_2_正则表达式/index.qmd similarity index 100% rename from SD/20240328_2_正则表达式/index.qmd rename to SD/20240402_2_正则表达式/index.qmd diff --git a/SD/20240328_4_实践部分/_extensions b/SD/20240402_4_实践部分/_extensions similarity index 100% rename from SD/20240328_4_实践部分/_extensions rename to SD/20240402_4_实践部分/_extensions diff --git a/SD/20240328_4_实践部分/index.qmd b/SD/20240402_4_实践部分/index.qmd similarity index 100% rename from SD/20240328_4_实践部分/index.qmd rename to SD/20240402_4_实践部分/index.qmd diff --git a/SD/20240402_9_课后作业/_extensions b/SD/20240402_9_课后作业/_extensions new file mode 120000 index 0000000..74119e3 --- /dev/null +++ b/SD/20240402_9_课后作业/_extensions @@ -0,0 +1 @@ +../../_extensions \ No newline at end of file diff --git a/SD/20240402_9_课后作业/index.qmd b/SD/20240402_9_课后作业/index.qmd new file mode 100644 index 0000000..db0948e --- /dev/null +++ b/SD/20240402_9_课后作业/index.qmd @@ -0,0 +1,49 @@ +--- +title: "课后作业9" +subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution +author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心 +date: today +lang: zh +format: + revealjs: + theme: dark + slide-number: true + chalkboard: + buttons: true + preview-links: auto + lang: zh + toc: true + toc-depth: 1 + toc-title: 大纲 + logo: ./_extensions/inst/img/ucaslogo.png + css: ./_extensions/inst/css/revealjs.css + pointer: + key: "p" + color: "#32cd32" + pointerSize: 18 +revealjs-plugins: + - pointer +filters: + - d2 +--- + +```{r} +#| include: false +#| cache: false +lang <- "cn" +require(tidyverse) +require(learnr) +``` + +## 第9次课后作业 + +自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。 + +作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd) + + +## 欢迎讨论!{.center} + + +`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` + diff --git a/SD/20240402_9_课后作业/第9次课后作业_模板.qmd b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd new file mode 100644 index 0000000..90dde6c --- /dev/null +++ b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd @@ -0,0 +1,8 @@ +--- +title: 课后作业9 +author: 姓名 +format: html +--- + +要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。 + diff --git a/data/writexldemo.xlsx b/data/writexldemo.xlsx index 55c9a599870e54ac57de2d9eb6150651822f7ff9..2677682b3c1a25081ff247b47a0cb3595423b4a3 100644 GIT binary patch delta 338 zcmV-Y0j>V=Dex(a7DG6@)B-Okl#*Xcwbn7zeQf%|nqKCW#yCHIM1)f4A6i z@y)k@XM{Xp4UtP8IX(h&HxkhaDaD9%Kfdh&QebF7gNXstg5rxeg4bcPpjWP(TE&Bd zN$Xi0-TRSN7JFpW@He%UeHcuFP>&0#H*)!Gs4Mt#2xPeMyUJd5EjD;*A3N zcu4xh+i8%Y4rG=Bup#bQ`6!>BB-cyS%*gCMC+W~NoikE0LcfkJZ|W~VeQNS=;u$dH kzWoHvAG7!nb^`_6Dc-y!lXDbQ0r!*06e9+l5&!@I0Jch<@c;k- delta 339 zcmV-Z0j&P;De)<=vJVBXAyxw=leZ5Zf6HpaFc3xe{R*SKE!#;;5Viwt$*O@uQ(C$c z(!>$99!P4P{CyR_l7LrTj_#c^cLo*fqimvIuuf=IqAW>K1d3@cR9&L?-6me4$a$(b zZM1?Coxq`WbyqM$n6~h0wE^n|xF~>>Bg~ZO;JqOjJ9Yp`T@oBBhRkT z(;9G|rVm(xr<{6<&%oGBL^L2dn~>(mHXVR73=K$7-r+3C@Wt!FN;likDOXM@{K>$q zbrdJ}(TT~}wr$cbI%~+E;m_^MTeoT~)cI5lP*re7m<8&!tqOcygu^w|`E5AyUI2eQ z1%2jiSfuX*9)$p;k2@+Ki>K$^B?eV_nlIyY5$CI2x+M8MS>|8okvIJppd5z$n>Ygc l+_#^g`UA7~5OxCvuOU_gC6jU#Q~~&t#}p$5oe}^5000?Uol^h+