Compare commits

..

1 Commits

Author SHA1 Message Date
43c660af96 Merge branch 'PUB'
* PUB:
  minor update
  第10次课
2024-04-09 00:46:05 +08:00
112 changed files with 227 additions and 1569 deletions

1
.gitignore vendored
View File

@@ -7,4 +7,3 @@ _variables.yml
_freeze/
*_cache/
*_files/
SD/_*/

View File

@@ -1 +0,0 @@
454c267b732cda3ca58332ab3883bec6

382
Makefile
View File

@@ -1,382 +0,0 @@
# Makefile for Quarto Project Automation
# Detect OS
HOSTNAME := $(shell hostname)
OS := $(shell uname | tr A-Z a-z)
ifeq ($(OS), darwin)
SEDI := sed -i ''
OS := OSX
else ifeq ($(OS), linux)
SEDI := sed -i
OS := linux
else
$(error Unknown operating system)
endif
# Fetch Git branch and project details
branchname := $(shell git branch --show-current)
reponame := $(shell basename $(shell git rev-parse --show-toplevel))
projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel)))
pubtype := $(if $(findstring PUB,$(branchname)),public,protected)
remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml)
siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF"
# Variables for colors and port
bcolor := grey
port := 4199
# Set background color based on branch name
ifeq ($(findstring R1,$(branchname)),R1)
bcolor := orange
else ifeq ($(findstring R2,$(branchname)),R2)
bcolor := lightblue
else ifeq ($(findstring R3,$(branchname)),R3)
bcolor := lightgreen
else ifeq ($(findstring PUB,$(branchname)),PUB)
bcolor := light
endif
# Set port based on branch name
branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210
port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/')
port := $(if $(port),$(port),4199)
# Define the state file
STATE_FILE := .source_state
# checkfile
GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www"
# Default target
.PHONY: all preview readme rsync local clean upload fix_links force check_git_status
all: local upload clean commit push
force: updrefbib check_git_status updvariable render
local: updrefbib check_git_status updvariable lazyrender
updmakefile:
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \
cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \
git add "Makefile" && git commit -m "Update Makefile"; \
echo "本项目Makefile更新完成."; \
else \
echo "Makefile 无需在本系统上更新."; \
fi
updrefbib:
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
echo "更新本项目参考文献..."; \
cp "$$HOME/literature/Ref.bib" "./BB/"; \
echo "推送本地参考文献到远程服务器..."; \
rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \
echo "本项目参考文献更新完成."; \
else \
echo "检查网络连通性..."; \
if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \
echo "网络正常,更新本项目参考文献..."; \
wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \
echo "本项目参考文献更新完成."; \
else \
echo "网络不可用,跳过参考文献更新."; \
fi; \
fi; \
git add BB/Ref.bib; \
if [ "$$(git diff --cached)" ]; then \
git commit -m "Update Ref.bib"; \
fi
check_git_status:
@uncommitted=$$(git status --porcelain); \
if [ -n "$$uncommitted" ]; then \
git status; \
read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \
answer=$${answer:-N}; \
if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \
read -p "请输入修改说明: " message; \
git add . && git commit -m "$$message"; \
else \
echo "未提交如后续操作为pull则无法继续"; \
fi; \
fi
# Lazy render with hash checking
lazyrender:
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \
echo "源文件发生变化, 重新编译..."; \
echo "$$current_hash" > $(STATE_FILE); \
$(MAKE) render; \
else \
echo "源文件无变化, 跳过编译..."; \
exit 0; \
fi
# Render target
render:
@quarto render
commit:
@echo "提交修改(commit)..."; \
git add .; \
if [ -n "$$(git diff --cached)" ]; then \
git commit -m "render compile"; \
else \
echo "没有修改记录,跳过."; \
fi; \
# Pull changes from the specified branch based on the current branch
pull:
@echo "从远程拉取项目更新..."; \
$(MAKE) check_git_status; \
git pull; \
current_branch=$$(git rev-parse --abbrev-ref HEAD); \
if [ "$$current_branch" = main ]; then \
echo "当前分枝为$$current_branch."; \
remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \
if [ -n "$$remote_branch" ]; then \
echo "尝试从远程分枝$$remote_branch 拉取更新..."; \
git pull --rebase origin $$remote_branch; \
else \
echo "远程无可用分支$$remote_branch."; \
fi; \
else \
echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
git pull --rebase origin main; \
fi
# Pull changes from the main branch
pullmain:
$(MAKE) check_git_status; \
@current_branch=$$(git rev-parse --abbrev-ref HEAD); \
@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
git pull --rebase origin main; \
push:
@echo "推送到远程..."; \
git push
filehash:
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
echo "$$current_hash" > $(STATE_FILE)
# Preview the site on a specific port
preview:
@quarto preview --port $(port)
# Generate README.md
readme:
@quarto render index.qmd -t markdown -o README.md
@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md
@rm "$(outputdir)/README.md"
# Sync files with remote server
rsync:
@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"
# Open local site
open:
@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi
# Clean unnecessary files
clean:
@rm -f ./*.spl ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk
# Upload files to server and fix links
upload: backupdocx
@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)"
@$(MAKE) fix_links
@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \
if [ "$(OS)" = "OSX" ]; then \
open "$(siteurl)" 2>/dev/null; \
fi; \
else \
echo "Rsync failed. Attempting alternative upload method..."; \
mkdir -p "$(reponame)"; \
rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \
rm -rf "$(reponame)"; \
rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \
if [ "$(OS)" = "OSX" ]; then \
open "$(siteurl)" 2>/dev/null; \
fi; \
fi
backupdocx:
@echo "备份MS.docx文件..."; \
currentcommithash=$$(git rev-parse --short HEAD); \
datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \
mkdir -p TC/MS/; \
existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \
if [ -n "$$existing_file" ]; then \
echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \
echo "无需备份."; \
else \
if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \
cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \
git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
if [ "$$(git diff --cached)" ]; then \
git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \
fi; \
else \
echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在无需备份."; \
fi; \
fi;
trackchange:
@if [ "$(projtype)" != "manuscript" ]; then \
exit 0; \
fi; \
echo "选择两个提交以比较文档..."; \
hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \
| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \
| fzf --multi --reverse --preview="echo {}" ); \
echo $$hashes; \
hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \
hash1=$$(git rev-parse --short $${hash1}^); \
datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \
hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \
hash2=$$(git rev-parse --short $${hash2}^); \
datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \
if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \
echo "必须选择两个提交."; \
exit 1; \
fi; \
doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \
echo "$$doc1"; \
doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \
echo "$$doc2"; \
if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \
echo "打开文件: $$doc1 和 $$doc2"; \
open "$$doc1" "$$doc2"; \
printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \
echo "请在word中对比两个版本形成带修改痕迹的版本并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \
else \
echo "一个或两个文件不存在: $$doc1, $$doc2"; \
exit 1; \
fi
# Fix links in www directory
fix_links:
@find ./www -type f -name "*.html" -exec sed -i.bak \
-e "s/{{< var branch >}}/$(branchname)/g" \
-e "s/{{< var pubtype >}}/$(pubtype)/g" \
-e "s/{{< var projtype >}}/$(projtype)/g" \
-e "s/{{< var reponame >}}/$(reponame)/g" \
-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \
-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} +
@find ./www -type f -name "*.bak" -exec rm {} +
updvariable:
@touch _variables.yml # 如果文件不存在则创建
@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml
@if grep -q '^reponame:' _variables.yml; then \
$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \
else \
echo "reponame: $(reponame)" >> _variables.yml; \
fi
@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml
@if grep -q '^projtype:' _variables.yml; then \
$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \
else \
echo "projtype: $(projtype)" >> _variables.yml; \
fi
@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml
@if grep -q '^branch:' _variables.yml; then \
$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \
else \
echo "branch: $(branchname)" >> _variables.yml; \
fi
@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml
@if grep -q '^pubtype:' _variables.yml; then \
$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \
else \
echo "pubtype: $(pubtype)" >> _variables.yml; \
fi
@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml
@if grep -q '^nwAB:' _variables.yml; then \
$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \
else \
echo "nwAB: $(nwAB)" >> _variables.yml; \
fi
@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml
@if grep -q '^nwMS:' _variables.yml; then \
$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \
else \
echo "nwMS: $(nwMS)" >> _variables.yml; \
fi
@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml
@if grep -q '^figtblMS:' _variables.yml; then \
$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \
else \
echo "figtblMS: $(figtblMS)" >> _variables.yml; \
fi
@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml
@if grep -q '^figtblSM:' _variables.yml; then \
$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \
else \
echo "figtblSM: $(figtblSM)" >> _variables.yml; \
fi
@mkpapervar
# Help: list all available commands with descriptions (English and Chinese)
help:
@echo "Makefile for Quarto Project Automation"
@echo "======================================="
@echo "Available targets (English):"
@echo ""
@echo " make all - Execute local build, upload, clean, and commit"
@echo " make force - Force render, hash update, upload, clean, and commit"
@echo " make local - Check git status and perform a lazy render if changes detected"
@echo " make check_git_status - Check for uncommitted changes and ask to commit them"
@echo " make lazyrender - Render if source files have changed based on hash comparison"
@echo " make render - Force Quarto to render the project"
@echo " make commit - Commit changes if no previous uncommitted changes"
@echo " make filehash - Generate and store the file hash of source files"
@echo " make preview - Preview the site locally on the specific port (default: 4199)"
@echo " make readme - Render README.md from Quarto index.qmd"
@echo " make rsync - Sync output files with the remote server"
@echo " make open - Open the generated site locally in the browser"
@echo " make clean - Clean up unnecessary files"
@echo " make upload - Upload files to the server and fix links"
@echo " make fix_links - Fix HTML links in the 'www' directory for the remote server"
@echo " make updmakefile - Update the Makefile"
@echo " make help - Display this help message"
@echo ""
@echo "Available targets (中文):"
@echo ""
@echo " make all - 执行本地构建、上传、清理和提交"
@echo " make force - 强制渲染、更新哈希、上传、清理并提交"
@echo " make local - 检查Git状态若检测到更改则进行懒惰渲染"
@echo " make check_git_status - 检查未提交的更改,并询问是否提交"
@echo " make lazyrender - 如果源文件发生更改,则根据哈希比较进行渲染"
@echo " make render - 强制 Quarto 渲染项目"
@echo " make commit - 如果没有未提交的更改则提交"
@echo " make filehash - 生成并存储源文件的哈希值"
@echo " make preview - 本地在特定端口预览网站 (默认: 4199)"
@echo " make readme - 从 Quarto 的 index.qmd 生成 README.md"
@echo " make rsync - 将输出文件同步到远程服务器"
@echo " make open - 在浏览器中打开生成的网站"
@echo " make clean - 清理不必要的文件"
@echo " make upload - 上传文件到服务器并修复链接"
@echo " make fix_links - 修复 'www' 目录中的 HTML 链接"
@echo " make updmakefile - 更新本项目 Makefile"
@echo " make help - 显示此帮助信息"
@echo ""
@echo "Environment variables (English and 中文):"
@echo " bcolor - Background color based on branch name (基于分支名的背景颜色)"
@echo " port - Port number based on branch name (基于分支名的端口号)"
@echo " STATE_FILE - File for storing hash state of source files (用于存储源文件哈希状态的文件)"
@echo " siteurl - The URL where the site will be hosted (网站托管的 URL)"
@echo ""

View File

@@ -1,138 +0,0 @@
---
title: "Lesson 6"
format: html
---
```{r}
https://rs1.drwater.net
username:
- ruser01
- ruser02
- ruser03
- ruser04
- ruser05
- ruser06
RWEP2025
```
# 安装包
```{r}
install.packages("tidyverse")
x <- c(1:10, NA)
hist(x)
mean(x, na.rm = TRUE)
median(x, na.rm = TRUE)
sd(x, na.rm = TRUE)
for(i in 1:10){
print(i)
}
x + y + x * y
myfunc <- function(x, y = 3) {
x + y + x * y
}
myfunc(1, 2)
myfunc(10)
c(FALSE, 2, 1:3, 3)
c(FALSE, 2, 1:3, 3) > 1
all(c(FALSE, 2, 1:3, 3) > 1)
c(1L,2L,3L)
any(c(FALSE, 2, 1:3, 3) > 1)
x <- 10
sin(x) = ?
paste("sin(x) = ", sin(x), sep = " ")
paste0("sin(x) = ", sin(x))
substr("Monday", 1, 3)
```
# tidy
```{r}
require(readxl)
aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf")
# install.packages("skimr")
aqdf |>
skimr::skim()
# base
# tidyverse
aqdf |>
dplyr::group_by(Area) |>
dplyr::summarize(
n = n(),
lon.mean = mean(lon, na.rm = TRUE),
lon.sd = sd(lat, na.rm = TRUE)
) |>
dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |>
ggplot(aes(x = n, y = lon.mean)) +
geom_point() +
geom_line() +
geom_errorbar(
aes(ymin = lon.mean - lon.sd,
ymax = lon.mean + lon.sd)
)
readxl::read_xlsx("./airquality.xlsx")
flights|>
filter(dest=="IAH")|>
group_by(year,month,day)|>summarize(n=n(),
delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
```

View File

@@ -1,21 +0,0 @@
name,age,score
Alice,25,85
Bob,30,92
Charlie,28,89
David,22,95
Eva,35,87
Frank,27,91
Grace,29,88
Helen,26,93
Ivan,31,86
Jack,24,94
Kelly,32,89
Lily,28,90
Mike,33,85
Nancy,27,92
Olivia,34,88
Peter,29,93
Queen,25,89
Ryan,30,94
Samantha,26,91
Tom,31,87
1 name age score
2 Alice 25 85
3 Bob 30 92
4 Charlie 28 89
5 David 22 95
6 Eva 35 87
7 Frank 27 91
8 Grace 29 88
9 Helen 26 93
10 Ivan 31 86
11 Jack 24 94
12 Kelly 32 89
13 Lily 28 90
14 Mike 33 85
15 Nancy 27 92
16 Olivia 34 88
17 Peter 29 93
18 Queen 25 89
19 Ryan 30 94
20 Samantha 26 91
21 Tom 31 87

View File

@@ -47,8 +47,9 @@ knitr::opts_chunk$set(echo = TRUE)
### 课件
- 采用`R语言`+`quarto`完成
- 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html)
- 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git)
- 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
- 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
- 代码web界面[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/)
## 如何学习接下来的内容?
@@ -66,7 +67,7 @@ knitr::opts_chunk$set(echo = TRUE)
## Rstudio Server使用
- 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/)
- 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
- 每位同学使用1个账号随机生成
- 密码:****
- 后面的实践课程可在该服务器上完成

View File

@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@@ -264,9 +264,6 @@ t.test(x, y)
wilcox.test(x, y)
```
### [什么是 Wilcoxon-Mann-Whitney检验](https://zhuanlan.zhihu.com/p/613524533
## 统计函数
### 创建向量的直方图
@@ -795,4 +792,4 @@ names(Y) <- c("colA", "colB", "colC")
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@@ -37,7 +37,7 @@ require(learnr)
## 下载excel文件
[https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx)
[https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx)
## Tidy data

View File

@@ -79,5 +79,5 @@ Tom,31,87
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -321,4 +321,5 @@ if (FALSE) {
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@@ -323,5 +323,5 @@ flights |>
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -43,7 +43,7 @@ require(learnr)
1. 根据`airqualitydf.xlsx`按采样点统计白天8:00-20:00与夜晚20:00-8:00中空气质量指数AQI中位数按城市统计低于所有采样点AQI30%分位值的采样点占比列出上述占比最高的10个城市不考虑采样点数低于5个的城市
2. 按照不同城市分组统计白天与夜晚AQI中位数是否具有显著差异。
作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
## 示例代码
@@ -62,5 +62,5 @@ require(learnr)
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -14,7 +14,7 @@ format: html
# 下载至临时文件
if (FALSE) {
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
download.file("https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
destfile = tmpxlsxpath)
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
@@ -118,8 +118,6 @@ if (FALSE) {
saveRDS(testdf, "./testdf.RDS")
}
if (FALSE) {
lang <- "cn"
require(dwfun)
require(rmdify)
@@ -146,7 +144,5 @@ testdf |>
pull(gp)
}
```

View File

@@ -3934,5 +3934,5 @@ p
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 61 KiB

View File

@@ -100,4 +100,5 @@ geom_bar(position = "fill")
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -0,0 +1,49 @@
---
title: "课后作业9"
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
date: today
lang: zh
format:
revealjs:
theme: dark
slide-number: true
chalkboard:
buttons: true
preview-links: auto
lang: zh
toc: true
toc-depth: 1
toc-title: 大纲
logo: ./_extensions/inst/img/ucaslogo.png
css: ./_extensions/inst/css/revealjs.css
pointer:
key: "p"
color: "#32cd32"
pointerSize: 18
revealjs-plugins:
- pointer
filters:
- d2
---
```{r}
#| include: false
#| cache: false
lang <- "cn"
require(tidyverse)
require(learnr)
```
## 第9次课后作业
自选数据集使用R语言开展不同因子如年份、季节、处理方式等间某指标的差异分析采用图表方式形成简要报告。
作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@@ -0,0 +1,8 @@
---
title: 课后作业9
author: 姓名
format: html
---
要求自选数据集使用R语言开展不同因子间如年份、季节、处理方式等某指标的差异分析采用图表+文字说明等方式形成简要报告。

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

Before

Width:  |  Height:  |  Size: 272 KiB

After

Width:  |  Height:  |  Size: 272 KiB

View File

Before

Width:  |  Height:  |  Size: 345 KiB

After

Width:  |  Height:  |  Size: 345 KiB

View File

Before

Width:  |  Height:  |  Size: 210 KiB

After

Width:  |  Height:  |  Size: 210 KiB

View File

Before

Width:  |  Height:  |  Size: 470 KiB

After

Width:  |  Height:  |  Size: 470 KiB

View File

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 41 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 5.5 KiB

View File

Before

Width:  |  Height:  |  Size: 209 KiB

After

Width:  |  Height:  |  Size: 209 KiB

View File

Before

Width:  |  Height:  |  Size: 7.8 KiB

After

Width:  |  Height:  |  Size: 7.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 177 KiB

After

Width:  |  Height:  |  Size: 177 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 569 KiB

After

Width:  |  Height:  |  Size: 569 KiB

View File

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

Before

Width:  |  Height:  |  Size: 100 KiB

After

Width:  |  Height:  |  Size: 100 KiB

View File

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 7.5 KiB

View File

Before

Width:  |  Height:  |  Size: 131 KiB

After

Width:  |  Height:  |  Size: 131 KiB

View File

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 41 KiB

View File

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 60 KiB

View File

Before

Width:  |  Height:  |  Size: 83 KiB

After

Width:  |  Height:  |  Size: 83 KiB

View File

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

Before

Width:  |  Height:  |  Size: 87 KiB

After

Width:  |  Height:  |  Size: 87 KiB

View File

Before

Width:  |  Height:  |  Size: 91 KiB

After

Width:  |  Height:  |  Size: 91 KiB

View File

Before

Width:  |  Height:  |  Size: 869 KiB

After

Width:  |  Height:  |  Size: 869 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 638 KiB

After

Width:  |  Height:  |  Size: 638 KiB

View File

Before

Width:  |  Height:  |  Size: 408 KiB

After

Width:  |  Height:  |  Size: 408 KiB

View File

Before

Width:  |  Height:  |  Size: 410 KiB

After

Width:  |  Height:  |  Size: 410 KiB

View File

Before

Width:  |  Height:  |  Size: 154 KiB

After

Width:  |  Height:  |  Size: 154 KiB

View File

Before

Width:  |  Height:  |  Size: 8.7 KiB

After

Width:  |  Height:  |  Size: 8.7 KiB

View File

Before

Width:  |  Height:  |  Size: 159 KiB

After

Width:  |  Height:  |  Size: 159 KiB

View File

Before

Width:  |  Height:  |  Size: 184 KiB

After

Width:  |  Height:  |  Size: 184 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 8.0 KiB

View File

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 69 KiB

View File

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

Before

Width:  |  Height:  |  Size: 117 KiB

After

Width:  |  Height:  |  Size: 117 KiB

View File

Before

Width:  |  Height:  |  Size: 137 KiB

After

Width:  |  Height:  |  Size: 137 KiB

View File

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 10 KiB

View File

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

View File

Before

Width:  |  Height:  |  Size: 54 KiB

After

Width:  |  Height:  |  Size: 54 KiB

View File

Before

Width:  |  Height:  |  Size: 7.6 KiB

After

Width:  |  Height:  |  Size: 7.6 KiB

View File

Before

Width:  |  Height:  |  Size: 134 KiB

After

Width:  |  Height:  |  Size: 134 KiB

View File

Before

Width:  |  Height:  |  Size: 150 KiB

After

Width:  |  Height:  |  Size: 150 KiB

View File

Before

Width:  |  Height:  |  Size: 160 KiB

After

Width:  |  Height:  |  Size: 160 KiB

View File

Before

Width:  |  Height:  |  Size: 120 KiB

After

Width:  |  Height:  |  Size: 120 KiB

View File

@@ -47,8 +47,7 @@ hexes <- function(..., size = 64) {
right <- (seq_along(x) - 1) * size
res <- glue::glue(
'![](hexes/<x>.png){.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}',
.open = "<",
.close = ">"
.open = "<", .close = ">"
)
paste0(res, collapse = " ")
}
@@ -70,6 +69,7 @@ test_color <- "#cd4173"
data_color <- "#767381"
assess_color <- "#84cae1"
splits_pal <- c(data_color, train_color, test_color)
```
@@ -152,27 +152,10 @@ knitr::include_graphics("images/whole-game-final-performance.jpg")
# Install the packages for the workshop
pkgs <-
c(
"bonsai",
"doParallel",
"embed",
"finetune",
"lightgbm",
"lme4",
"plumber",
"probably",
"ranger",
"rpart",
"rpart.plot",
"rules",
"splines2",
"stacks",
"text2vec",
"textrecipes",
"tidymodels",
"vetiver",
"remotes"
)
c("bonsai", "doParallel", "embed", "finetune", "lightgbm", "lme4",
"plumber", "probably", "ranger", "rpart", "rpart.plot", "rules",
"splines2", "stacks", "text2vec", "textrecipes", "tidymodels",
"vetiver", "remotes")
install.packages(pkgs)
```
@@ -214,34 +197,26 @@ taxi
#|
set.seed(123)
library(forcats)
require(tidymodels)
require(tidyverse)
one_split <- taxi |>
dplyr::slice(1:30) |>
rsample::initial_split() |>
generics::tidy() |>
tibble::add_row(Row = 1:30, Data = "Original") |>
dplyr::mutate(
Data = case_when(
one_split <- slice(taxi, 1:30) %>%
initial_split() %>%
tidy() %>%
add_row(Row = 1:30, Data = "Original") %>%
mutate(Data = case_when(
Data == "Analysis" ~ "Training",
Data == "Assessment" ~ "Testing",
TRUE ~ Data
)
) |>
dplyr::mutate(
Data = factor(Data, levels = c("Original", "Training", "Testing"))
)
)) %>%
mutate(Data = factor(Data, levels = c("Original", "Training", "Testing")))
all_split <-
ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) +
geom_tile(color = "white", linewidth = 1) +
geom_tile(color = "white",
linewidth = 1) +
scale_fill_manual(values = splits_pal, guide = "none") +
theme_minimal() +
theme(
axis.text.y = element_text(size = rel(2)),
theme(axis.text.y = element_text(size = rel(2)),
axis.text.x = element_blank(),
legend.position = "top",
panel.grid = element_blank()
) +
panel.grid = element_blank()) +
coord_equal(ratio = 1) +
labs(x = NULL, y = NULL)
all_split
@@ -670,18 +645,9 @@ augment(taxi_fit, new_data = taxi_train) %>%
augment(taxi_fit, new_data = taxi_train) %>%
roc_curve(truth = tip, .pred_yes) %>%
filter(is.finite(.threshold)) %>%
pivot_longer(
c(specificity, sensitivity),
names_to = "statistic",
values_to = "value"
) %>%
pivot_longer(c(specificity, sensitivity), names_to = "statistic", values_to = "value") %>%
rename(`event threshold` = .threshold) %>%
ggplot(aes(
x = `event threshold`,
y = value,
col = statistic,
group = statistic
)) +
ggplot(aes(x = `event threshold`, y = value, col = statistic, group = statistic)) +
geom_line() +
scale_color_brewer(palette = "Dark2") +
labs(y = NULL) +
@@ -1044,11 +1010,8 @@ collect_metrics(final_fit)
```{r}
require(tidyverse)
sitedf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
) |>
select(
SITE_ID,
sitedf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv") |>
select(SITE_ID,
lon = LON_DD,
lat = LAT_DD,
name = LAKENAME,
@@ -1056,27 +1019,25 @@ sitedf <- readr::read_csv(
zmax = DEPTHMAX
) |>
group_by(SITE_ID) |>
summarize(
lon = mean(lon, na.rm = TRUE),
summarize(lon = mean(lon, na.rm = TRUE),
lat = mean(lat, na.rm = TRUE),
name = unique(name),
area = mean(area, na.rm = TRUE),
zmax = mean(zmax, na.rm = TRUE)
)
zmax = mean(zmax, na.rm = TRUE))
visitdf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
) |>
select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
visitdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |>
select(SITE_ID,
date = DATE_PROFILE,
year = YEAR,
visit = VISIT_NO
) |>
distinct()
waterchemdf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
) |>
select(
SITE_ID,
waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |>
select(SITE_ID,
date = DATE_PROFILE,
depth = DEPTH,
temp = TEMP_FIELD,
@@ -1085,43 +1046,38 @@ waterchemdf <- readr::read_csv(
cond = COND_FIELD,
)
sddf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
) |>
select(
SITE_ID,
sddf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv") |>
select(SITE_ID,
date = DATE_SECCHI,
sd = SECMEAN,
clear_to_bottom = CLEAR_TO_BOTTOM
)
trophicdf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
) |>
select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
trophicdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv") |>
select(SITE_ID,
visit = VISIT_NO,
tp = PTL,
tn = NTL,
chla = CHLA) |>
left_join(visitdf, by = c("SITE_ID", "visit")) |>
select(-year, -visit) |>
group_by(SITE_ID, date) |>
summarize(
tp = mean(tp, na.rm = TRUE),
summarize(tp = mean(tp, na.rm = TRUE),
tn = mean(tn, na.rm = TRUE),
chla = mean(chla, na.rm = TRUE)
)
phytodf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
) |>
select(
SITE_ID,
phytodf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv") |>
select(SITE_ID,
date = DATEPHYT,
depth = SAMPLE_DEPTH,
phyta = DIVISION,
genus = GENUS,
species = SPECIES,
tax = TAXANAME,
abund = ABUND
) |>
abund = ABUND) |>
mutate(phyta = gsub(" .*$", "", phyta)) |>
filter(!is.na(genus)) |>
group_by(SITE_ID, date, depth, phyta, genus) |>
@@ -1132,7 +1088,7 @@ envdf <- waterchemdf |>
filter(depth < 2) |>
select(-depth) |>
group_by(SITE_ID, date) |>
summarise_all(~ mean(., na.rm = TRUE)) |>
summarise_all(~mean(., na.rm = TRUE)) |>
ungroup() |>
left_join(sddf, by = c("SITE_ID", "date")) |>
left_join(trophicdf, by = c("SITE_ID", "date"))
@@ -1141,21 +1097,19 @@ nla <- envdf |>
left_join(phytodf) |>
left_join(sitedf, by = "SITE_ID") |>
filter(!purrr::map_lgl(phytodf, is.null)) |>
mutate(
cyanophyta = purrr::map(
phytodf,
~ .x |>
mutate(cyanophyta = purrr::map(phytodf, ~ .x |>
dplyr::filter(phyta == "Cyanophyta") |>
summarize(cyanophyta = sum(abund, na.rm = TRUE))
)
) |>
)) |>
unnest(cyanophyta) |>
select(-phyta) |>
mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
# library(rmdify)
# library(dwfun)
# dwfun::init()
```
@@ -1173,20 +1127,18 @@ skimr::skim(nla)
nla |>
filter(tp > 1) |>
ggplot(aes(tn, tp)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
m1 <- lm(log10(tp) ~ log10(tn), data = nla)
summary(m1)
```
## 复杂指标
@@ -1195,20 +1147,18 @@ summary(m1)
nla |>
filter(tp > 1) |>
ggplot(aes(tp, cyanophyta)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla)
summary(m2)
```
@@ -1220,14 +1170,13 @@ summary(m2)
(nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax))
(nla_train <- training(nla_split))
(nla_test <- testing(nla_split))
```
## tidymodels - recipe
```{r}
nla_formula <- as.formula(
"cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom"
)
nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom")
# nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn")
nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |>
recipes::step_string2factor(all_nominal()) |>
@@ -1244,7 +1193,7 @@ nla_recipe
nla_cv <- recipes::bake(
nla_recipe,
new_data = training(nla_split)
) |>
) |>
rsample::vfold_cv(v = 10)
nla_cv
```
@@ -1311,7 +1260,7 @@ if (FALSE) {
metrics = yardstick::metric_set(rmse, rsq, mae),
control = tune::control_grid(verbose = TRUE)
)
saveRDS(xgboost_tuned, "./xgboost_tuned.RDS")
saveRDS(xgboost_tuned, "./xgboost_tuned.RDS")
}
xgboost_tuned <- readRDS("./xgboost_tuned.RDS")
```
@@ -1349,7 +1298,7 @@ xgboost_tuned |>
```{r}
xgboost_best_params <- xgboost_tuned |>
tune::select_best(metric = "rmse")
tune::select_best("rmse")
knitr::kable(xgboost_best_params)
```
@@ -1382,15 +1331,14 @@ train_prediction <- xgboost_model_final |>
) |>
# predict the sale prices for the training data
predict(new_data = train_processed) |>
bind_cols(
nla_train |>
mutate(.obs = log10(cyanophyta))
)
bind_cols(nla_train |>
mutate(.obs = log10(cyanophyta)))
xgboost_score_train <-
train_prediction |>
yardstick::metrics(.obs, .pred) |>
mutate(.estimate = format(round(.estimate, 2), big.mark = ","))
knitr::kable(xgboost_score_train)
```
## tidymodels - train evaluation
@@ -1401,8 +1349,10 @@ knitr::kable(xgboost_score_train)
#| out-width: "80%"
train_prediction |>
ggplot(aes(.pred, .obs)) +
geom_point() +
geom_smooth(method = "lm")
geom_point() +
geom_smooth(method = "lm")
```
@@ -1420,10 +1370,8 @@ test_prediction <- xgboost_model_final |>
) |>
# use the training model fit to predict the test data
predict(new_data = test_processed) |>
bind_cols(
nla_test |>
mutate(.obs = log10(cyanophyta))
)
bind_cols(nla_test |>
mutate(.obs = log10(cyanophyta)))
# measure the accuracy of our model using `yardstick`
xgboost_score <- test_prediction |>
@@ -1446,7 +1394,7 @@ cyanophyta_prediction_residual <- test_prediction |>
select(.pred, residual_pct)
cyanophyta_prediction_residual |>
ggplot(aes(x = .pred, y = residual_pct)) +
ggplot(aes(x = .pred, y = residual_pct)) +
geom_point() +
xlab("Predicted Cyanophyta") +
ylab("Residual (%)")
@@ -1463,8 +1411,9 @@ cyanophyta_prediction_residual |>
#| out-width: "80%"
test_prediction |>
ggplot(aes(.pred, .obs)) +
geom_point() +
geom_smooth(method = "lm", colour = "black")
geom_point() +
geom_smooth(method = "lm", colour = "black")
```
@@ -1472,4 +1421,5 @@ test_prediction |>
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 61 KiB

View File

@@ -0,0 +1 @@
../../_extensions

View File

@@ -163,4 +163,5 @@ babynames |>
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More