Compare commits

...

20 Commits

Author SHA1 Message Date
6412cc5560 render compile 2025-03-25 00:46:18 +08:00
bc628545b6 update leture 2 2025-03-25 00:45:07 +08:00
1ead84ac70 render compile 2025-03-20 09:37:17 +08:00
10ffdd46f3 add 3.1 2025-03-20 09:36:34 +08:00
987a3eaea9 render compile 2025-03-20 09:35:47 +08:00
c292d0ffab add 2.3 2025-03-20 09:35:13 +08:00
cb2e22fde5 update 2025-03-20 09:34:24 +08:00
8a31a565a8 add some lesson for lesson6 2025-03-20 09:33:27 +08:00
6e3f134635 update 2025-03-19 16:41:27 +08:00
c883d6df6a update 2025-03-19 16:40:59 +08:00
fda04d79f1 update 2025-03-19 16:33:40 +08:00
a4596a9836 update 2025-03-19 16:31:50 +08:00
7902f28a7a update gitignore 2025-03-17 20:35:44 +08:00
654fb2f024 render compile 2025-03-17 20:32:11 +08:00
352ca2b1ce render compile 2025-03-17 20:30:20 +08:00
82248cb24b render compile 2025-03-17 20:23:18 +08:00
a21430385f update 2025-03-17 20:16:08 +08:00
78196d49ed render compile 2025-03-17 20:06:33 +08:00
30dd043c3b render compile 2025-03-17 19:50:08 +08:00
2db87d5e89 update 2025-03-17 19:20:50 +08:00
112 changed files with 1569 additions and 227 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ _variables.yml
_freeze/ _freeze/
*_cache/ *_cache/
*_files/ *_files/
SD/_*/

1
.source_state Normal file
View File

@@ -0,0 +1 @@
454c267b732cda3ca58332ab3883bec6

382
Makefile Normal file
View File

@@ -0,0 +1,382 @@
# Makefile for Quarto Project Automation
# Detect OS
HOSTNAME := $(shell hostname)
OS := $(shell uname | tr A-Z a-z)
ifeq ($(OS), darwin)
SEDI := sed -i ''
OS := OSX
else ifeq ($(OS), linux)
SEDI := sed -i
OS := linux
else
$(error Unknown operating system)
endif
# Fetch Git branch and project details
branchname := $(shell git branch --show-current)
reponame := $(shell basename $(shell git rev-parse --show-toplevel))
projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel)))
pubtype := $(if $(findstring PUB,$(branchname)),public,protected)
remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml)
siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF"
# Variables for colors and port
bcolor := grey
port := 4199
# Set background color based on branch name
ifeq ($(findstring R1,$(branchname)),R1)
bcolor := orange
else ifeq ($(findstring R2,$(branchname)),R2)
bcolor := lightblue
else ifeq ($(findstring R3,$(branchname)),R3)
bcolor := lightgreen
else ifeq ($(findstring PUB,$(branchname)),PUB)
bcolor := light
endif
# Set port based on branch name
branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210
port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/')
port := $(if $(port),$(port),4199)
# Define the state file
STATE_FILE := .source_state
# checkfile
GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www"
# Default target
.PHONY: all preview readme rsync local clean upload fix_links force check_git_status
all: local upload clean commit push
force: updrefbib check_git_status updvariable render
local: updrefbib check_git_status updvariable lazyrender
updmakefile:
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \
cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \
git add "Makefile" && git commit -m "Update Makefile"; \
echo "本项目Makefile更新完成."; \
else \
echo "Makefile 无需在本系统上更新."; \
fi
updrefbib:
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
echo "更新本项目参考文献..."; \
cp "$$HOME/literature/Ref.bib" "./BB/"; \
echo "推送本地参考文献到远程服务器..."; \
rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \
echo "本项目参考文献更新完成."; \
else \
echo "检查网络连通性..."; \
if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \
echo "网络正常,更新本项目参考文献..."; \
wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \
echo "本项目参考文献更新完成."; \
else \
echo "网络不可用,跳过参考文献更新."; \
fi; \
fi; \
git add BB/Ref.bib; \
if [ "$$(git diff --cached)" ]; then \
git commit -m "Update Ref.bib"; \
fi
check_git_status:
@uncommitted=$$(git status --porcelain); \
if [ -n "$$uncommitted" ]; then \
git status; \
read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \
answer=$${answer:-N}; \
if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \
read -p "请输入修改说明: " message; \
git add . && git commit -m "$$message"; \
else \
echo "未提交如后续操作为pull则无法继续"; \
fi; \
fi
# Lazy render with hash checking
lazyrender:
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \
echo "源文件发生变化, 重新编译..."; \
echo "$$current_hash" > $(STATE_FILE); \
$(MAKE) render; \
else \
echo "源文件无变化, 跳过编译..."; \
exit 0; \
fi
# Render target
render:
@quarto render
commit:
@echo "提交修改(commit)..."; \
git add .; \
if [ -n "$$(git diff --cached)" ]; then \
git commit -m "render compile"; \
else \
echo "没有修改记录,跳过."; \
fi; \
# Pull changes from the specified branch based on the current branch
pull:
@echo "从远程拉取项目更新..."; \
$(MAKE) check_git_status; \
git pull; \
current_branch=$$(git rev-parse --abbrev-ref HEAD); \
if [ "$$current_branch" = main ]; then \
echo "当前分枝为$$current_branch."; \
remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \
if [ -n "$$remote_branch" ]; then \
echo "尝试从远程分枝$$remote_branch 拉取更新..."; \
git pull --rebase origin $$remote_branch; \
else \
echo "远程无可用分支$$remote_branch."; \
fi; \
else \
echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
git pull --rebase origin main; \
fi
# Pull changes from the main branch
pullmain:
$(MAKE) check_git_status; \
@current_branch=$$(git rev-parse --abbrev-ref HEAD); \
@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
git pull --rebase origin main; \
push:
@echo "推送到远程..."; \
git push
filehash:
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
echo "$$current_hash" > $(STATE_FILE)
# Preview the site on a specific port
preview:
@quarto preview --port $(port)
# Generate README.md
readme:
@quarto render index.qmd -t markdown -o README.md
@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md
@rm "$(outputdir)/README.md"
# Sync files with remote server
rsync:
@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"
# Open local site
open:
@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi
# Clean unnecessary files
clean:
@rm -f ./*.spl ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk
# Upload files to server and fix links
upload: backupdocx
@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)"
@$(MAKE) fix_links
@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \
if [ "$(OS)" = "OSX" ]; then \
open "$(siteurl)" 2>/dev/null; \
fi; \
else \
echo "Rsync failed. Attempting alternative upload method..."; \
mkdir -p "$(reponame)"; \
rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \
rm -rf "$(reponame)"; \
rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \
if [ "$(OS)" = "OSX" ]; then \
open "$(siteurl)" 2>/dev/null; \
fi; \
fi
backupdocx:
@echo "备份MS.docx文件..."; \
currentcommithash=$$(git rev-parse --short HEAD); \
datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \
mkdir -p TC/MS/; \
existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \
if [ -n "$$existing_file" ]; then \
echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \
echo "无需备份."; \
else \
if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \
cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \
git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
if [ "$$(git diff --cached)" ]; then \
git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \
fi; \
else \
echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在无需备份."; \
fi; \
fi;
trackchange:
@if [ "$(projtype)" != "manuscript" ]; then \
exit 0; \
fi; \
echo "选择两个提交以比较文档..."; \
hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \
| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \
| fzf --multi --reverse --preview="echo {}" ); \
echo $$hashes; \
hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \
hash1=$$(git rev-parse --short $${hash1}^); \
datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \
hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \
hash2=$$(git rev-parse --short $${hash2}^); \
datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \
if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \
echo "必须选择两个提交."; \
exit 1; \
fi; \
doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \
echo "$$doc1"; \
doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \
echo "$$doc2"; \
if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \
echo "打开文件: $$doc1 和 $$doc2"; \
open "$$doc1" "$$doc2"; \
printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \
echo "请在word中对比两个版本形成带修改痕迹的版本并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \
else \
echo "一个或两个文件不存在: $$doc1, $$doc2"; \
exit 1; \
fi
# Fix links in www directory
fix_links:
@find ./www -type f -name "*.html" -exec sed -i.bak \
-e "s/{{< var branch >}}/$(branchname)/g" \
-e "s/{{< var pubtype >}}/$(pubtype)/g" \
-e "s/{{< var projtype >}}/$(projtype)/g" \
-e "s/{{< var reponame >}}/$(reponame)/g" \
-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \
-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} +
@find ./www -type f -name "*.bak" -exec rm {} +
updvariable:
@touch _variables.yml # 如果文件不存在则创建
@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml
@if grep -q '^reponame:' _variables.yml; then \
$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \
else \
echo "reponame: $(reponame)" >> _variables.yml; \
fi
@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml
@if grep -q '^projtype:' _variables.yml; then \
$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \
else \
echo "projtype: $(projtype)" >> _variables.yml; \
fi
@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml
@if grep -q '^branch:' _variables.yml; then \
$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \
else \
echo "branch: $(branchname)" >> _variables.yml; \
fi
@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml
@if grep -q '^pubtype:' _variables.yml; then \
$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \
else \
echo "pubtype: $(pubtype)" >> _variables.yml; \
fi
@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml
@if grep -q '^nwAB:' _variables.yml; then \
$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \
else \
echo "nwAB: $(nwAB)" >> _variables.yml; \
fi
@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml
@if grep -q '^nwMS:' _variables.yml; then \
$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \
else \
echo "nwMS: $(nwMS)" >> _variables.yml; \
fi
@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml
@if grep -q '^figtblMS:' _variables.yml; then \
$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \
else \
echo "figtblMS: $(figtblMS)" >> _variables.yml; \
fi
@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml
@if grep -q '^figtblSM:' _variables.yml; then \
$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \
else \
echo "figtblSM: $(figtblSM)" >> _variables.yml; \
fi
@mkpapervar
# Help: list all available commands with descriptions (English and Chinese)
help:
@echo "Makefile for Quarto Project Automation"
@echo "======================================="
@echo "Available targets (English):"
@echo ""
@echo " make all - Execute local build, upload, clean, and commit"
@echo " make force - Force render, hash update, upload, clean, and commit"
@echo " make local - Check git status and perform a lazy render if changes detected"
@echo " make check_git_status - Check for uncommitted changes and ask to commit them"
@echo " make lazyrender - Render if source files have changed based on hash comparison"
@echo " make render - Force Quarto to render the project"
@echo " make commit - Commit changes if no previous uncommitted changes"
@echo " make filehash - Generate and store the file hash of source files"
@echo " make preview - Preview the site locally on the specific port (default: 4199)"
@echo " make readme - Render README.md from Quarto index.qmd"
@echo " make rsync - Sync output files with the remote server"
@echo " make open - Open the generated site locally in the browser"
@echo " make clean - Clean up unnecessary files"
@echo " make upload - Upload files to the server and fix links"
@echo " make fix_links - Fix HTML links in the 'www' directory for the remote server"
@echo " make updmakefile - Update the Makefile"
@echo " make help - Display this help message"
@echo ""
@echo "Available targets (中文):"
@echo ""
@echo " make all - 执行本地构建、上传、清理和提交"
@echo " make force - 强制渲染、更新哈希、上传、清理并提交"
@echo " make local - 检查Git状态若检测到更改则进行懒惰渲染"
@echo " make check_git_status - 检查未提交的更改,并询问是否提交"
@echo " make lazyrender - 如果源文件发生更改,则根据哈希比较进行渲染"
@echo " make render - 强制 Quarto 渲染项目"
@echo " make commit - 如果没有未提交的更改则提交"
@echo " make filehash - 生成并存储源文件的哈希值"
@echo " make preview - 本地在特定端口预览网站 (默认: 4199)"
@echo " make readme - 从 Quarto 的 index.qmd 生成 README.md"
@echo " make rsync - 将输出文件同步到远程服务器"
@echo " make open - 在浏览器中打开生成的网站"
@echo " make clean - 清理不必要的文件"
@echo " make upload - 上传文件到服务器并修复链接"
@echo " make fix_links - 修复 'www' 目录中的 HTML 链接"
@echo " make updmakefile - 更新本项目 Makefile"
@echo " make help - 显示此帮助信息"
@echo ""
@echo "Environment variables (English and 中文):"
@echo " bcolor - Background color based on branch name (基于分支名的背景颜色)"
@echo " port - Port number based on branch name (基于分支名的端口号)"
@echo " STATE_FILE - File for storing hash state of source files (用于存储源文件哈希状态的文件)"
@echo " siteurl - The URL where the site will be hosted (网站托管的 URL)"
@echo ""

View File

@@ -47,9 +47,8 @@ knitr::opts_chunk$set(echo = TRUE)
### 课件 ### 课件
- 采用`R语言`+`quarto`完成 - 采用`R语言`+`quarto`完成
- 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html) - 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html)
- 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git) - 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git)
- 代码web界面[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/)
## 如何学习接下来的内容? ## 如何学习接下来的内容?
@@ -67,7 +66,7 @@ knitr::opts_chunk$set(echo = TRUE)
## Rstudio Server使用 ## Rstudio Server使用
- 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/) - 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/)
- 每位同学使用1个账号随机生成 - 每位同学使用1个账号随机生成
- 密码:**** - 密码:****
- 后面的实践课程可在该服务器上完成 - 后面的实践课程可在该服务器上完成

View File

@@ -0,0 +1,138 @@
---
title: "Lesson 6"
format: html
---
```{r}
https://rs1.drwater.net
username:
- ruser01
- ruser02
- ruser03
- ruser04
- ruser05
- ruser06
RWEP2025
```
# 安装包
```{r}
install.packages("tidyverse")
x <- c(1:10, NA)
hist(x)
mean(x, na.rm = TRUE)
median(x, na.rm = TRUE)
sd(x, na.rm = TRUE)
for(i in 1:10){
print(i)
}
x + y + x * y
myfunc <- function(x, y = 3) {
x + y + x * y
}
myfunc(1, 2)
myfunc(10)
c(FALSE, 2, 1:3, 3)
c(FALSE, 2, 1:3, 3) > 1
all(c(FALSE, 2, 1:3, 3) > 1)
c(1L,2L,3L)
any(c(FALSE, 2, 1:3, 3) > 1)
x <- 10
sin(x) = ?
paste("sin(x) = ", sin(x), sep = " ")
paste0("sin(x) = ", sin(x))
substr("Monday", 1, 3)
```
# tidy
```{r}
require(readxl)
aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf")
# install.packages("skimr")
aqdf |>
skimr::skim()
# base
# tidyverse
aqdf |>
dplyr::group_by(Area) |>
dplyr::summarize(
n = n(),
lon.mean = mean(lon, na.rm = TRUE),
lon.sd = sd(lat, na.rm = TRUE)
) |>
dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |>
ggplot(aes(x = n, y = lon.mean)) +
geom_point() +
geom_line() +
geom_errorbar(
aes(ymin = lon.mean - lon.sd,
ymax = lon.mean + lon.sd)
)
readxl::read_xlsx("./airquality.xlsx")
flights|>
filter(dest=="IAH")|>
group_by(year,month,day)|>summarize(n=n(),
delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
```

View File

@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

@@ -264,6 +264,9 @@ t.test(x, y)
wilcox.test(x, y) wilcox.test(x, y)
``` ```
### [什么是 Wilcoxon-Mann-Whitney检验](https://zhuanlan.zhihu.com/p/613524533
## 统计函数 ## 统计函数
### 创建向量的直方图 ### 创建向量的直方图
@@ -792,4 +795,4 @@ names(Y) <- c("colA", "colB", "colC")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

@@ -37,7 +37,7 @@ require(learnr)
## 下载excel文件 ## 下载excel文件
[https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx) [https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx)
## Tidy data ## Tidy data

View File

@@ -0,0 +1,21 @@
name,age,score
Alice,25,85
Bob,30,92
Charlie,28,89
David,22,95
Eva,35,87
Frank,27,91
Grace,29,88
Helen,26,93
Ivan,31,86
Jack,24,94
Kelly,32,89
Lily,28,90
Mike,33,85
Nancy,27,92
Olivia,34,88
Peter,29,93
Queen,25,89
Ryan,30,94
Samantha,26,91
Tom,31,87
1 name age score
2 Alice 25 85
3 Bob 30 92
4 Charlie 28 89
5 David 22 95
6 Eva 35 87
7 Frank 27 91
8 Grace 29 88
9 Helen 26 93
10 Ivan 31 86
11 Jack 24 94
12 Kelly 32 89
13 Lily 28 90
14 Mike 33 85
15 Nancy 27 92
16 Olivia 34 88
17 Peter 29 93
18 Queen 25 89
19 Ryan 30 94
20 Samantha 26 91
21 Tom 31 87

View File

@@ -79,5 +79,5 @@ Tom,31,87
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

@@ -321,5 +321,4 @@ if (FALSE) {
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

@@ -323,5 +323,5 @@ flights |>
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

@@ -1,49 +0,0 @@
---
title: "课后作业9"
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
date: today
lang: zh
format:
revealjs:
theme: dark
slide-number: true
chalkboard:
buttons: true
preview-links: auto
lang: zh
toc: true
toc-depth: 1
toc-title: 大纲
logo: ./_extensions/inst/img/ucaslogo.png
css: ./_extensions/inst/css/revealjs.css
pointer:
key: "p"
color: "#32cd32"
pointerSize: 18
revealjs-plugins:
- pointer
filters:
- d2
---
```{r}
#| include: false
#| cache: false
lang <- "cn"
require(tidyverse)
require(learnr)
```
## 第9次课后作业
自选数据集使用R语言开展不同因子如年份、季节、处理方式等间某指标的差异分析采用图表方式形成简要报告。
作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -1,8 +0,0 @@
---
title: 课后作业9
author: 姓名
format: html
---
要求自选数据集使用R语言开展不同因子间如年份、季节、处理方式等某指标的差异分析采用图表+文字说明等方式形成简要报告。

View File

@@ -1 +0,0 @@
../../_extensions

View File

@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

BIN
SD/3.9_课后作业8/.RData Normal file

Binary file not shown.

Binary file not shown.

View File

@@ -43,7 +43,7 @@ require(learnr)
1. 根据`airqualitydf.xlsx`按采样点统计白天8:00-20:00与夜晚20:00-8:00中空气质量指数AQI中位数按城市统计低于所有采样点AQI30%分位值的采样点占比列出上述占比最高的10个城市不考虑采样点数低于5个的城市 1. 根据`airqualitydf.xlsx`按采样点统计白天8:00-20:00与夜晚20:00-8:00中空气质量指数AQI中位数按城市统计低于所有采样点AQI30%分位值的采样点占比列出上述占比最高的10个城市不考虑采样点数低于5个的城市
2. 按照不同城市分组统计白天与夜晚AQI中位数是否具有显著差异。 2. 按照不同城市分组统计白天与夜晚AQI中位数是否具有显著差异。
作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) 作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
## 示例代码 ## 示例代码
@@ -62,5 +62,5 @@ require(learnr)
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

Binary file not shown.

Binary file not shown.

View File

@@ -14,7 +14,7 @@ format: html
# 下载至临时文件 # 下载至临时文件
if (FALSE) { if (FALSE) {
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx", download.file("https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
destfile = tmpxlsxpath) destfile = tmpxlsxpath)
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
@@ -118,6 +118,8 @@ if (FALSE) {
saveRDS(testdf, "./testdf.RDS") saveRDS(testdf, "./testdf.RDS")
} }
if (FALSE) {
lang <- "cn" lang <- "cn"
require(dwfun) require(dwfun)
require(rmdify) require(rmdify)
@@ -144,5 +146,7 @@ testdf |>
pull(gp) pull(gp)
}
``` ```

View File

@@ -3934,5 +3934,5 @@ p
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 61 KiB

View File

@@ -100,5 +100,4 @@ geom_bar(position = "fill")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

Before

Width:  |  Height:  |  Size: 272 KiB

After

Width:  |  Height:  |  Size: 272 KiB

View File

Before

Width:  |  Height:  |  Size: 345 KiB

After

Width:  |  Height:  |  Size: 345 KiB

View File

Before

Width:  |  Height:  |  Size: 210 KiB

After

Width:  |  Height:  |  Size: 210 KiB

View File

Before

Width:  |  Height:  |  Size: 470 KiB

After

Width:  |  Height:  |  Size: 470 KiB

View File

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 41 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 5.5 KiB

View File

Before

Width:  |  Height:  |  Size: 209 KiB

After

Width:  |  Height:  |  Size: 209 KiB

View File

Before

Width:  |  Height:  |  Size: 7.8 KiB

After

Width:  |  Height:  |  Size: 7.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 177 KiB

After

Width:  |  Height:  |  Size: 177 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 569 KiB

After

Width:  |  Height:  |  Size: 569 KiB

View File

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

Before

Width:  |  Height:  |  Size: 100 KiB

After

Width:  |  Height:  |  Size: 100 KiB

View File

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 7.5 KiB

View File

Before

Width:  |  Height:  |  Size: 131 KiB

After

Width:  |  Height:  |  Size: 131 KiB

View File

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 41 KiB

View File

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 60 KiB

View File

Before

Width:  |  Height:  |  Size: 83 KiB

After

Width:  |  Height:  |  Size: 83 KiB

View File

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

Before

Width:  |  Height:  |  Size: 87 KiB

After

Width:  |  Height:  |  Size: 87 KiB

View File

Before

Width:  |  Height:  |  Size: 91 KiB

After

Width:  |  Height:  |  Size: 91 KiB

View File

Before

Width:  |  Height:  |  Size: 869 KiB

After

Width:  |  Height:  |  Size: 869 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 638 KiB

After

Width:  |  Height:  |  Size: 638 KiB

View File

Before

Width:  |  Height:  |  Size: 408 KiB

After

Width:  |  Height:  |  Size: 408 KiB

View File

Before

Width:  |  Height:  |  Size: 410 KiB

After

Width:  |  Height:  |  Size: 410 KiB

View File

Before

Width:  |  Height:  |  Size: 154 KiB

After

Width:  |  Height:  |  Size: 154 KiB

View File

Before

Width:  |  Height:  |  Size: 8.7 KiB

After

Width:  |  Height:  |  Size: 8.7 KiB

View File

Before

Width:  |  Height:  |  Size: 159 KiB

After

Width:  |  Height:  |  Size: 159 KiB

View File

Before

Width:  |  Height:  |  Size: 184 KiB

After

Width:  |  Height:  |  Size: 184 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 69 KiB

View File

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

Before

Width:  |  Height:  |  Size: 117 KiB

After

Width:  |  Height:  |  Size: 117 KiB

View File

Before

Width:  |  Height:  |  Size: 137 KiB

After

Width:  |  Height:  |  Size: 137 KiB

View File

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 10 KiB

View File

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

View File

Before

Width:  |  Height:  |  Size: 54 KiB

After

Width:  |  Height:  |  Size: 54 KiB

View File

Before

Width:  |  Height:  |  Size: 7.6 KiB

After

Width:  |  Height:  |  Size: 7.6 KiB

View File

Before

Width:  |  Height:  |  Size: 134 KiB

After

Width:  |  Height:  |  Size: 134 KiB

View File

Before

Width:  |  Height:  |  Size: 150 KiB

After

Width:  |  Height:  |  Size: 150 KiB

View File

Before

Width:  |  Height:  |  Size: 160 KiB

After

Width:  |  Height:  |  Size: 160 KiB

View File

Before

Width:  |  Height:  |  Size: 120 KiB

After

Width:  |  Height:  |  Size: 120 KiB

View File

@@ -47,7 +47,8 @@ hexes <- function(..., size = 64) {
right <- (seq_along(x) - 1) * size right <- (seq_along(x) - 1) * size
res <- glue::glue( res <- glue::glue(
'![](hexes/<x>.png){.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}', '![](hexes/<x>.png){.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}',
.open = "<", .close = ">" .open = "<",
.close = ">"
) )
paste0(res, collapse = " ") paste0(res, collapse = " ")
} }
@@ -65,11 +66,10 @@ theme_set(theme_bw())
options(cli.width = 70, ggplot2.discrete.fill = c("#7e96d5", "#de6c4e")) options(cli.width = 70, ggplot2.discrete.fill = c("#7e96d5", "#de6c4e"))
train_color <- "#1a162d" train_color <- "#1a162d"
test_color <- "#cd4173" test_color <- "#cd4173"
data_color <- "#767381" data_color <- "#767381"
assess_color <- "#84cae1" assess_color <- "#84cae1"
splits_pal <- c(data_color, train_color, test_color) splits_pal <- c(data_color, train_color, test_color)
``` ```
@@ -152,10 +152,27 @@ knitr::include_graphics("images/whole-game-final-performance.jpg")
# Install the packages for the workshop # Install the packages for the workshop
pkgs <- pkgs <-
c("bonsai", "doParallel", "embed", "finetune", "lightgbm", "lme4", c(
"plumber", "probably", "ranger", "rpart", "rpart.plot", "rules", "bonsai",
"splines2", "stacks", "text2vec", "textrecipes", "tidymodels", "doParallel",
"vetiver", "remotes") "embed",
"finetune",
"lightgbm",
"lme4",
"plumber",
"probably",
"ranger",
"rpart",
"rpart.plot",
"rules",
"splines2",
"stacks",
"text2vec",
"textrecipes",
"tidymodels",
"vetiver",
"remotes"
)
install.packages(pkgs) install.packages(pkgs)
``` ```
@@ -197,26 +214,34 @@ taxi
#| #|
set.seed(123) set.seed(123)
library(forcats) library(forcats)
one_split <- slice(taxi, 1:30) %>% require(tidymodels)
initial_split() %>% require(tidyverse)
tidy() %>% one_split <- taxi |>
add_row(Row = 1:30, Data = "Original") %>% dplyr::slice(1:30) |>
mutate(Data = case_when( rsample::initial_split() |>
Data == "Analysis" ~ "Training", generics::tidy() |>
Data == "Assessment" ~ "Testing", tibble::add_row(Row = 1:30, Data = "Original") |>
TRUE ~ Data dplyr::mutate(
)) %>% Data = case_when(
mutate(Data = factor(Data, levels = c("Original", "Training", "Testing"))) Data == "Analysis" ~ "Training",
Data == "Assessment" ~ "Testing",
TRUE ~ Data
)
) |>
dplyr::mutate(
Data = factor(Data, levels = c("Original", "Training", "Testing"))
)
all_split <- all_split <-
ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) + ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) +
geom_tile(color = "white", geom_tile(color = "white", linewidth = 1) +
linewidth = 1) +
scale_fill_manual(values = splits_pal, guide = "none") + scale_fill_manual(values = splits_pal, guide = "none") +
theme_minimal() + theme_minimal() +
theme(axis.text.y = element_text(size = rel(2)), theme(
axis.text.x = element_blank(), axis.text.y = element_text(size = rel(2)),
legend.position = "top", axis.text.x = element_blank(),
panel.grid = element_blank()) + legend.position = "top",
panel.grid = element_blank()
) +
coord_equal(ratio = 1) + coord_equal(ratio = 1) +
labs(x = NULL, y = NULL) labs(x = NULL, y = NULL)
all_split all_split
@@ -645,9 +670,18 @@ augment(taxi_fit, new_data = taxi_train) %>%
augment(taxi_fit, new_data = taxi_train) %>% augment(taxi_fit, new_data = taxi_train) %>%
roc_curve(truth = tip, .pred_yes) %>% roc_curve(truth = tip, .pred_yes) %>%
filter(is.finite(.threshold)) %>% filter(is.finite(.threshold)) %>%
pivot_longer(c(specificity, sensitivity), names_to = "statistic", values_to = "value") %>% pivot_longer(
c(specificity, sensitivity),
names_to = "statistic",
values_to = "value"
) %>%
rename(`event threshold` = .threshold) %>% rename(`event threshold` = .threshold) %>%
ggplot(aes(x = `event threshold`, y = value, col = statistic, group = statistic)) + ggplot(aes(
x = `event threshold`,
y = value,
col = statistic,
group = statistic
)) +
geom_line() + geom_line() +
scale_color_brewer(palette = "Dark2") + scale_color_brewer(palette = "Dark2") +
labs(y = NULL) + labs(y = NULL) +
@@ -1010,34 +1044,39 @@ collect_metrics(final_fit)
```{r} ```{r}
require(tidyverse) require(tidyverse)
sitedf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv") |> sitedf <- readr::read_csv(
select(SITE_ID, "https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
) |>
select(
SITE_ID,
lon = LON_DD, lon = LON_DD,
lat = LAT_DD, lat = LAT_DD,
name = LAKENAME, name = LAKENAME,
area = LAKEAREA, area = LAKEAREA,
zmax = DEPTHMAX zmax = DEPTHMAX
) |> ) |>
group_by(SITE_ID) |> group_by(SITE_ID) |>
summarize(lon = mean(lon, na.rm = TRUE), summarize(
lon = mean(lon, na.rm = TRUE),
lat = mean(lat, na.rm = TRUE), lat = mean(lat, na.rm = TRUE),
name = unique(name), name = unique(name),
area = mean(area, na.rm = TRUE), area = mean(area, na.rm = TRUE),
zmax = mean(zmax, na.rm = TRUE)) zmax = mean(zmax, na.rm = TRUE)
)
visitdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |> visitdf <- readr::read_csv(
select(SITE_ID, "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
date = DATE_PROFILE, ) |>
year = YEAR, select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
visit = VISIT_NO
) |>
distinct() distinct()
waterchemdf <- readr::read_csv(
waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |> "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
select(SITE_ID, ) |>
select(
SITE_ID,
date = DATE_PROFILE, date = DATE_PROFILE,
depth = DEPTH, depth = DEPTH,
temp = TEMP_FIELD, temp = TEMP_FIELD,
@@ -1046,38 +1085,43 @@ waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/
cond = COND_FIELD, cond = COND_FIELD,
) )
sddf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv") |> sddf <- readr::read_csv(
select(SITE_ID, "https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
) |>
select(
SITE_ID,
date = DATE_SECCHI, date = DATE_SECCHI,
sd = SECMEAN, sd = SECMEAN,
clear_to_bottom = CLEAR_TO_BOTTOM clear_to_bottom = CLEAR_TO_BOTTOM
) )
trophicdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv") |> trophicdf <- readr::read_csv(
select(SITE_ID, "https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
visit = VISIT_NO, ) |>
tp = PTL, select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
tn = NTL,
chla = CHLA) |>
left_join(visitdf, by = c("SITE_ID", "visit")) |> left_join(visitdf, by = c("SITE_ID", "visit")) |>
select(-year, -visit) |> select(-year, -visit) |>
group_by(SITE_ID, date) |> group_by(SITE_ID, date) |>
summarize(tp = mean(tp, na.rm = TRUE), summarize(
tp = mean(tp, na.rm = TRUE),
tn = mean(tn, na.rm = TRUE), tn = mean(tn, na.rm = TRUE),
chla = mean(chla, na.rm = TRUE) chla = mean(chla, na.rm = TRUE)
) )
phytodf <- readr::read_csv(
phytodf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv") |> "https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
select(SITE_ID, ) |>
select(
SITE_ID,
date = DATEPHYT, date = DATEPHYT,
depth = SAMPLE_DEPTH, depth = SAMPLE_DEPTH,
phyta = DIVISION, phyta = DIVISION,
genus = GENUS, genus = GENUS,
species = SPECIES, species = SPECIES,
tax = TAXANAME, tax = TAXANAME,
abund = ABUND) |> abund = ABUND
) |>
mutate(phyta = gsub(" .*$", "", phyta)) |> mutate(phyta = gsub(" .*$", "", phyta)) |>
filter(!is.na(genus)) |> filter(!is.na(genus)) |>
group_by(SITE_ID, date, depth, phyta, genus) |> group_by(SITE_ID, date, depth, phyta, genus) |>
@@ -1088,7 +1132,7 @@ envdf <- waterchemdf |>
filter(depth < 2) |> filter(depth < 2) |>
select(-depth) |> select(-depth) |>
group_by(SITE_ID, date) |> group_by(SITE_ID, date) |>
summarise_all(~mean(., na.rm = TRUE)) |> summarise_all(~ mean(., na.rm = TRUE)) |>
ungroup() |> ungroup() |>
left_join(sddf, by = c("SITE_ID", "date")) |> left_join(sddf, by = c("SITE_ID", "date")) |>
left_join(trophicdf, by = c("SITE_ID", "date")) left_join(trophicdf, by = c("SITE_ID", "date"))
@@ -1097,19 +1141,21 @@ nla <- envdf |>
left_join(phytodf) |> left_join(phytodf) |>
left_join(sitedf, by = "SITE_ID") |> left_join(sitedf, by = "SITE_ID") |>
filter(!purrr::map_lgl(phytodf, is.null)) |> filter(!purrr::map_lgl(phytodf, is.null)) |>
mutate(cyanophyta = purrr::map(phytodf, ~ .x |> mutate(
dplyr::filter(phyta == "Cyanophyta") |> cyanophyta = purrr::map(
summarize(cyanophyta = sum(abund, na.rm = TRUE)) phytodf,
)) |> ~ .x |>
dplyr::filter(phyta == "Cyanophyta") |>
summarize(cyanophyta = sum(abund, na.rm = TRUE))
)
) |>
unnest(cyanophyta) |> unnest(cyanophyta) |>
select(-phyta) |> select(-phyta) |>
mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE)) mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
# library(rmdify) # library(rmdify)
# library(dwfun) # library(dwfun)
# dwfun::init() # dwfun::init()
``` ```
@@ -1127,18 +1173,20 @@ skimr::skim(nla)
nla |> nla |>
filter(tp > 1) |> filter(tp > 1) |>
ggplot(aes(tn, tp)) + ggplot(aes(tn, tp)) +
geom_point() + geom_point() +
geom_smooth(method = "lm") + geom_smooth(method = "lm") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), scale_x_log10(
labels = scales::trans_format("log10", scales::math_format(10^.x))) + breaks = scales::trans_breaks("log10", function(x) 10^x),
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))
labels = scales::trans_format("log10", scales::math_format(10^.x))) ) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
m1 <- lm(log10(tp) ~ log10(tn), data = nla) m1 <- lm(log10(tp) ~ log10(tn), data = nla)
summary(m1) summary(m1)
``` ```
## 复杂指标 ## 复杂指标
@@ -1147,18 +1195,20 @@ summary(m1)
nla |> nla |>
filter(tp > 1) |> filter(tp > 1) |>
ggplot(aes(tp, cyanophyta)) + ggplot(aes(tp, cyanophyta)) +
geom_point() + geom_point() +
geom_smooth(method = "lm") + geom_smooth(method = "lm") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), scale_x_log10(
labels = scales::trans_format("log10", scales::math_format(10^.x))) + breaks = scales::trans_breaks("log10", function(x) 10^x),
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))
labels = scales::trans_format("log10", scales::math_format(10^.x))) ) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla) m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla)
summary(m2) summary(m2)
``` ```
@@ -1170,13 +1220,14 @@ summary(m2)
(nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax)) (nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax))
(nla_train <- training(nla_split)) (nla_train <- training(nla_split))
(nla_test <- testing(nla_split)) (nla_test <- testing(nla_split))
``` ```
## tidymodels - recipe ## tidymodels - recipe
```{r} ```{r}
nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom") nla_formula <- as.formula(
"cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom"
)
# nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn") # nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn")
nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |> nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |>
recipes::step_string2factor(all_nominal()) |> recipes::step_string2factor(all_nominal()) |>
@@ -1191,9 +1242,9 @@ nla_recipe
```{r} ```{r}
nla_cv <- recipes::bake( nla_cv <- recipes::bake(
nla_recipe, nla_recipe,
new_data = training(nla_split) new_data = training(nla_split)
) |> ) |>
rsample::vfold_cv(v = 10) rsample::vfold_cv(v = 10)
nla_cv nla_cv
``` ```
@@ -1260,7 +1311,7 @@ if (FALSE) {
metrics = yardstick::metric_set(rmse, rsq, mae), metrics = yardstick::metric_set(rmse, rsq, mae),
control = tune::control_grid(verbose = TRUE) control = tune::control_grid(verbose = TRUE)
) )
saveRDS(xgboost_tuned, "./xgboost_tuned.RDS") saveRDS(xgboost_tuned, "./xgboost_tuned.RDS")
} }
xgboost_tuned <- readRDS("./xgboost_tuned.RDS") xgboost_tuned <- readRDS("./xgboost_tuned.RDS")
``` ```
@@ -1298,7 +1349,7 @@ xgboost_tuned |>
```{r} ```{r}
xgboost_best_params <- xgboost_tuned |> xgboost_best_params <- xgboost_tuned |>
tune::select_best("rmse") tune::select_best(metric = "rmse")
knitr::kable(xgboost_best_params) knitr::kable(xgboost_best_params)
``` ```
@@ -1327,18 +1378,19 @@ train_prediction <- xgboost_model_final |>
# fit the model on all the training data # fit the model on all the training data
fit( fit(
formula = nla_formula, formula = nla_formula,
data = train_processed data = train_processed
) |> ) |>
# predict the sale prices for the training data # predict the sale prices for the training data
predict(new_data = train_processed) |> predict(new_data = train_processed) |>
bind_cols(nla_train |> bind_cols(
mutate(.obs = log10(cyanophyta))) nla_train |>
mutate(.obs = log10(cyanophyta))
)
xgboost_score_train <- xgboost_score_train <-
train_prediction |> train_prediction |>
yardstick::metrics(.obs, .pred) |> yardstick::metrics(.obs, .pred) |>
mutate(.estimate = format(round(.estimate, 2), big.mark = ",")) mutate(.estimate = format(round(.estimate, 2), big.mark = ","))
knitr::kable(xgboost_score_train) knitr::kable(xgboost_score_train)
``` ```
## tidymodels - train evaluation ## tidymodels - train evaluation
@@ -1349,10 +1401,8 @@ knitr::kable(xgboost_score_train)
#| out-width: "80%" #| out-width: "80%"
train_prediction |> train_prediction |>
ggplot(aes(.pred, .obs)) + ggplot(aes(.pred, .obs)) +
geom_point() + geom_point() +
geom_smooth(method = "lm") geom_smooth(method = "lm")
``` ```
@@ -1360,18 +1410,20 @@ geom_smooth(method = "lm")
```{r} ```{r}
test_processed <- bake(nla_recipe, new_data = nla_test) test_processed <- bake(nla_recipe, new_data = nla_test)
test_prediction <- xgboost_model_final |> test_prediction <- xgboost_model_final |>
# fit the model on all the training data # fit the model on all the training data
fit( fit(
formula = nla_formula, formula = nla_formula,
data = train_processed data = train_processed
) |> ) |>
# use the training model fit to predict the test data # use the training model fit to predict the test data
predict(new_data = test_processed) |> predict(new_data = test_processed) |>
bind_cols(nla_test |> bind_cols(
mutate(.obs = log10(cyanophyta))) nla_test |>
mutate(.obs = log10(cyanophyta))
)
# measure the accuracy of our model using `yardstick` # measure the accuracy of our model using `yardstick`
xgboost_score <- test_prediction |> xgboost_score <- test_prediction |>
@@ -1394,7 +1446,7 @@ cyanophyta_prediction_residual <- test_prediction |>
select(.pred, residual_pct) select(.pred, residual_pct)
cyanophyta_prediction_residual |> cyanophyta_prediction_residual |>
ggplot(aes(x = .pred, y = residual_pct)) + ggplot(aes(x = .pred, y = residual_pct)) +
geom_point() + geom_point() +
xlab("Predicted Cyanophyta") + xlab("Predicted Cyanophyta") +
ylab("Residual (%)") ylab("Residual (%)")
@@ -1411,9 +1463,8 @@ ggplot(aes(x = .pred, y = residual_pct)) +
#| out-width: "80%" #| out-width: "80%"
test_prediction |> test_prediction |>
ggplot(aes(.pred, .obs)) + ggplot(aes(.pred, .obs)) +
geom_point() + geom_point() +
geom_smooth(method = "lm", colour = "black") geom_smooth(method = "lm", colour = "black")
``` ```
@@ -1421,5 +1472,4 @@ geom_smooth(method = "lm", colour = "black")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

View File

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More