render compile

update
render compile
2025-04-09 22:31:53 +08:00 · 2025-04-09 22:28:17 +08:00 · 2025-03-25 00:46:18 +08:00 · 2025-03-25 00:45:07 +08:00 · 2025-03-20 09:37:17 +08:00 · 2025-03-20 09:36:34 +08:00
131 changed files with 6778 additions and 464 deletions
@@ -7,3 +7,5 @@ _variables.yml
 _freeze/
 *_cache/
 *_files/
 SD/_*/
 homework/
@@ -0,0 +1 @@
 daa3ba71b87598e29019c2f370272767
@@ -0,0 +1,382 @@
 # Makefile for Quarto Project Automation
 # Detect OS
 HOSTNAME := $(shell hostname)
 OS := $(shell uname | tr A-Z a-z)
 ifeq ($(OS), darwin)
    SEDI := sed -i ''
    OS := OSX
 else ifeq ($(OS), linux)
    SEDI := sed -i
    OS := linux
 else
    $(error Unknown operating system)
 endif
 # Fetch Git branch and project details
 branchname := $(shell git branch --show-current)
 reponame := $(shell basename $(shell git rev-parse --show-toplevel))
 projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel)))
 pubtype := $(if $(findstring PUB,$(branchname)),public,protected)
 remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
 outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml)
 siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
 branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF"
 # Variables for colors and port
 bcolor := grey
 port := 4199
 # Set background color based on branch name
 ifeq ($(findstring R1,$(branchname)),R1)
    bcolor := orange
 else ifeq ($(findstring R2,$(branchname)),R2)
    bcolor := lightblue
 else ifeq ($(findstring R3,$(branchname)),R3)
    bcolor := lightgreen
 else ifeq ($(findstring PUB,$(branchname)),PUB)
    bcolor := light
 endif
 # Set port based on branch name
 branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210
 port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/')
 port := $(if $(port),$(port),4199)
 # Define the state file
 STATE_FILE := .source_state
 # checkfile
 GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www"
 # Default target
 .PHONY: all preview readme rsync local clean upload fix_links force check_git_status
 all: local upload clean commit push
 force: updrefbib check_git_status updvariable render
 local: updrefbib check_git_status updvariable lazyrender
 updmakefile:
 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
 		echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \
 		cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \
 		git add "Makefile" && git commit -m "Update Makefile"; \
 		echo "本项目Makefile更新完成."; \
 	else \
 		echo "Makefile 无需在本系统上更新."; \
 	fi
 updrefbib:
 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
 		echo "更新本项目参考文献..."; \
 		cp "$$HOME/literature/Ref.bib" "./BB/"; \
 		echo "推送本地参考文献到远程服务器..."; \
 		rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \
 		echo "本项目参考文献更新完成."; \
 	else \
 	  echo "检查网络连通性..."; \
 		if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \
 			echo "网络正常，更新本项目参考文献..."; \
 			wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \
 			echo "本项目参考文献更新完成."; \
 		else \
 			echo "网络不可用，跳过参考文献更新."; \
 		fi; \
 	fi; \
 	git add BB/Ref.bib; \
 	if [ "$$(git diff --cached)" ]; then \
 		git commit -m "Update Ref.bib"; \
 	fi
 check_git_status:
 	@uncommitted=$$(git status --porcelain); \
 	if [ -n "$$uncommitted" ]; then \
 	  git status; \
 		read -p "当前存在未提交的修改（如上），是否要提交？(Y/N, default is N): " answer; \
 		answer=$${answer:-N}; \
 		if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \
 			read -p "请输入修改说明: " message; \
 			git add . && git commit -m "$$message"; \
 		else \
 			echo "未提交，如后续操作为pull，则无法继续"; \
 		fi; \
 	fi
 # Lazy render with hash checking
 lazyrender:
 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
 	if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \
 		echo "源文件发生变化, 重新编译..."; \
 		echo "$$current_hash" > $(STATE_FILE); \
 		$(MAKE) render; \
 	else \
 		echo "源文件无变化, 跳过编译..."; \
 		exit 0; \
 	fi
 # Render target
 render:
 	@quarto render
 commit:
 	@echo "提交修改(commit)..."; \
 		git add .; \
 		if [ -n "$$(git diff --cached)" ]; then \
 		git commit -m "render compile"; \
 		else \
 		echo "没有修改记录，跳过."; \
 		fi; \
 # Pull changes from the specified branch based on the current branch
 pull:
 	@echo "从远程拉取项目更新..."; \
 		$(MAKE) check_git_status; \
 		git pull; \
 		current_branch=$$(git rev-parse --abbrev-ref HEAD); \
 		if [ "$$current_branch" = main ]; then \
 		  echo "当前分枝为$$current_branch."; \
 			remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \
 			if [ -n "$$remote_branch" ]; then \
 		    echo "尝试从远程分枝$$remote_branch 拉取更新..."; \
 				git pull --rebase origin $$remote_branch; \
 			else \
 				echo "远程无可用分支$$remote_branch."; \
 			fi; \
 			else \
 			echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
 			git pull --rebase origin main; \
 		fi
 # Pull changes from the main branch
 pullmain:
 	$(MAKE) check_git_status; \
 		@current_branch=$$(git rev-parse --abbrev-ref HEAD); \
 		@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
 		git pull --rebase origin main; \
 push:
 	@echo "推送到远程..."; \
 		git push
 filehash:
 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
 	echo "$$current_hash" > $(STATE_FILE)
 # Preview the site on a specific port
 preview:
 	@quarto preview --port $(port)
 # Generate README.md
 readme:
 	@quarto render index.qmd -t markdown -o README.md
 	@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md
 	@rm "$(outputdir)/README.md"
 # Sync files with remote server
 rsync:
 	@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"
 # Open local site
 open:
 	@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi
 # Clean unnecessary files
 clean:
 	@rm -f ./*.spl  ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk
 # Upload files to server and fix links
 upload: backupdocx
 	@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)"
 	@$(MAKE) fix_links
 	@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \
 		if [ "$(OS)" = "OSX" ]; then \
 			open "$(siteurl)" 2>/dev/null; \
 		fi; \
 	else \
 		echo "Rsync failed. Attempting alternative upload method..."; \
 		mkdir -p "$(reponame)"; \
 		rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \
 		rm -rf "$(reponame)"; \
 		rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \
 		if [ "$(OS)" = "OSX" ]; then \
 			open "$(siteurl)" 2>/dev/null; \
 		fi; \
 	fi
 backupdocx:
 	@echo "备份MS.docx文件..."; \
 	currentcommithash=$$(git rev-parse --short HEAD); \
 	datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \
 	mkdir -p TC/MS/; \
 	existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \
 	if [ -n "$$existing_file" ]; then \
 		echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \
 		echo "无需备份."; \
 	else \
 		if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \
 			cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
 			echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \
 			git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
 			if [ "$$(git diff --cached)" ]; then \
 				git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \
 			fi; \
 		else \
 			echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在，无需备份."; \
 		fi; \
 	fi;
 trackchange:
 	@if [ "$(projtype)" != "manuscript" ]; then \
 	exit 0; \
 	fi; \
 	echo "选择两个提交以比较文档..."; \
 	hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \
 	| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \
 	| fzf --multi --reverse --preview="echo {}" ); \
 	echo $$hashes; \
 	hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \
 	hash1=$$(git rev-parse --short $${hash1}^); \
 	datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \
 	hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \
 	hash2=$$(git rev-parse --short $${hash2}^); \
 	datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \
 	if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \
 	echo "必须选择两个提交."; \
 	exit 1; \
 	fi; \
 	doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \
 	echo "$$doc1"; \
 	doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \
 	echo "$$doc2"; \
 	if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \
 	echo "打开文件: $$doc1 和 $$doc2"; \
 	open "$$doc1" "$$doc2"; \
 	printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \
 	echo "请在word中对比两个版本形成带修改痕迹的版本，并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \
 	else \
 	echo "一个或两个文件不存在: $$doc1, $$doc2"; \
 	exit 1; \
 	fi
 # Fix links in www directory
 fix_links:
 	@find ./www -type f -name "*.html" -exec sed -i.bak \
 		-e "s/{{< var branch >}}/$(branchname)/g" \
 		-e "s/{{< var pubtype >}}/$(pubtype)/g" \
 		-e "s/{{< var projtype >}}/$(projtype)/g" \
 		-e "s/{{< var reponame >}}/$(reponame)/g" \
 		-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \
 		-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} +
 	@find ./www -type f -name "*.bak" -exec rm {} +
 updvariable:
 	@touch _variables.yml  # 如果文件不存在则创建
 	@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml
 	@if grep -q '^reponame:' _variables.yml; then \
 		$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \
 	else \
 		echo "reponame: $(reponame)" >> _variables.yml; \
 	fi
 	@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml
 	@if grep -q '^projtype:' _variables.yml; then \
 		$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \
 	else \
 		echo "projtype: $(projtype)" >> _variables.yml; \
 	fi
 	@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml
 	@if grep -q '^branch:' _variables.yml; then \
 		$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \
 	else \
 		echo "branch: $(branchname)" >> _variables.yml; \
 	fi
 	@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml
 	@if grep -q '^pubtype:' _variables.yml; then \
 		$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \
 	else \
 		echo "pubtype: $(pubtype)" >> _variables.yml; \
 	fi
 	@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml
 	@if grep -q '^nwAB:' _variables.yml; then \
 		$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \
 	else \
 		echo "nwAB: $(nwAB)" >> _variables.yml; \
 	fi
 	@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml
 	@if grep -q '^nwMS:' _variables.yml; then \
 		$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \
 	else \
 		echo "nwMS: $(nwMS)" >> _variables.yml; \
 	fi
 	@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml
 	@if grep -q '^figtblMS:' _variables.yml; then \
 		$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \
 	else \
 		echo "figtblMS: $(figtblMS)" >> _variables.yml; \
 	fi
 	@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml
 	@if grep -q '^figtblSM:' _variables.yml; then \
 		$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \
 	else \
 		echo "figtblSM: $(figtblSM)" >> _variables.yml; \
 	fi
 	@mkpapervar
 # Help: list all available commands with descriptions (English and Chinese)
 help:
 	@echo "Makefile for Quarto Project Automation"
 	@echo "======================================="
 	@echo "Available targets (English):"
 	@echo ""
 	@echo "  make all           - Execute local build, upload, clean, and commit"
 	@echo "  make force         - Force render, hash update, upload, clean, and commit"
 	@echo "  make local         - Check git status and perform a lazy render if changes detected"
 	@echo "  make check_git_status - Check for uncommitted changes and ask to commit them"
 	@echo "  make lazyrender    - Render if source files have changed based on hash comparison"
 	@echo "  make render        - Force Quarto to render the project"
 	@echo "  make commit        - Commit changes if no previous uncommitted changes"
 	@echo "  make filehash      - Generate and store the file hash of source files"
 	@echo "  make preview       - Preview the site locally on the specific port (default: 4199)"
 	@echo "  make readme        - Render README.md from Quarto index.qmd"
 	@echo "  make rsync         - Sync output files with the remote server"
 	@echo "  make open          - Open the generated site locally in the browser"
 	@echo "  make clean         - Clean up unnecessary files"
 	@echo "  make upload        - Upload files to the server and fix links"
 	@echo "  make fix_links     - Fix HTML links in the 'www' directory for the remote server"
 	@echo "  make updmakefile   - Update the Makefile"
 	@echo "  make help          - Display this help message"
 	@echo ""
 	@echo "Available targets (中文):"
 	@echo ""
 	@echo "  make all           - 执行本地构建、上传、清理和提交"
 	@echo "  make force         - 强制渲染、更新哈希、上传、清理并提交"
 	@echo "  make local         - 检查Git状态，若检测到更改则进行懒惰渲染"
 	@echo "  make check_git_status - 检查未提交的更改，并询问是否提交"
 	@echo "  make lazyrender    - 如果源文件发生更改，则根据哈希比较进行渲染"
 	@echo "  make render        - 强制 Quarto 渲染项目"
 	@echo "  make commit        - 如果没有未提交的更改则提交"
 	@echo "  make filehash      - 生成并存储源文件的哈希值"
 	@echo "  make preview       - 本地在特定端口预览网站 (默认: 4199)"
 	@echo "  make readme        - 从 Quarto 的 index.qmd 生成 README.md"
 	@echo "  make rsync         - 将输出文件同步到远程服务器"
 	@echo "  make open          - 在浏览器中打开生成的网站"
 	@echo "  make clean         - 清理不必要的文件"
 	@echo "  make upload        - 上传文件到服务器并修复链接"
 	@echo "  make fix_links     - 修复 'www' 目录中的 HTML 链接"
 	@echo "  make updmakefile   - 更新本项目 Makefile"
 	@echo "  make help          - 显示此帮助信息"
 	@echo ""
 	@echo "Environment variables (English and 中文):"
 	@echo "  bcolor             - Background color based on branch name (基于分支名的背景颜色)"
 	@echo "  port               - Port number based on branch name (基于分支名的端口号)"
 	@echo "  STATE_FILE         - File for storing hash state of source files (用于存储源文件哈希状态的文件)"
 	@echo "  siteurl            - The URL where the site will be hosted (网站托管的 URL)"
 	@echo ""
@@ -47,9 +47,8 @@ knitr::opts_chunk$set(echo = TRUE)
 ### 课件
 - 采用`R语言`+`quarto`完成
- 网页公开：[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
+- 网页公开：[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html)
- 课件代码：[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
+- 课件代码：[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git)
 - 代码web界面：[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/)
 ## 如何学习接下来的内容？
@@ -67,7 +66,7 @@ knitr::opts_chunk$set(echo = TRUE)
 ## Rstudio Server使用
- 服务网址：[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
+- 服务网址：[https://rs1.drwater.net/](https://rs1.drwater.net/)
 - 每位同学使用1个账号，随机生成
 - 密码：****
 - 后面的实践课程可在该服务器上完成
@@ -0,0 +1,138 @@
 ---
 title: "Lesson 6"
 format: html
 ---
 ```{r}
 https://rs1.drwater.net
 username: 
  - ruser01
  - ruser02
  - ruser03
  - ruser04
  - ruser05
  - ruser06
 RWEP2025
 ```
 # 安装包
 ```{r}
 install.packages("tidyverse")
 x <- c(1:10, NA)
 hist(x)
 mean(x, na.rm = TRUE)
 median(x, na.rm = TRUE)
 sd(x, na.rm = TRUE)
 for(i in 1:10){
  print(i)
 }
 x + y + x * y
 myfunc <- function(x, y = 3) {
  x + y + x * y
 }
 myfunc(1, 2)
 myfunc(10)
 c(FALSE, 2, 1:3, 3) 
 c(FALSE, 2, 1:3, 3) > 1
 all(c(FALSE, 2, 1:3, 3) > 1)
 c(1L,2L,3L)
 any(c(FALSE, 2, 1:3, 3) > 1)
 x <- 10
 sin(x) = ?
 paste("sin(x) = ", sin(x), sep = " ")
 paste0("sin(x) = ", sin(x))
 substr("Monday", 1, 3)
 ```
 # tidy
 ```{r}
 require(readxl)
 aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf")
 # install.packages("skimr")
 aqdf |>
  skimr::skim()
 # base
 # tidyverse
 aqdf |>
  dplyr::group_by(Area) |>
  dplyr::summarize(
    n = n(),
    lon.mean = mean(lon, na.rm = TRUE),
    lon.sd = sd(lat, na.rm = TRUE)
  ) |>
  dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |>
  ggplot(aes(x = n, y = lon.mean)) +
  geom_point() +
  geom_line() +
  geom_errorbar(
    aes(ymin = lon.mean - lon.sd,
      ymax = lon.mean + lon.sd)
  )
 readxl::read_xlsx("./airquality.xlsx")
 flights|>
  filter(dest=="IAH")|>
  group_by(year,month,day)|>summarize(n=n(),
 delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
 ```
@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -264,6 +264,9 @@ t.test(x, y)
 wilcox.test(x, y)
 ```
 ### [什么是 Wilcoxon-Mann-Whitney检验？](https://zhuanlan.zhihu.com/p/613524533）
 ## 统计函数
 ### 创建向量的直方图
@@ -792,4 +795,4 @@ names(Y) <- c("colA", "colB", "colC")
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -37,7 +37,7 @@ require(learnr)
 ## 下载excel文件
-[https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx)
+[https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx)
 ## Tidy data
@@ -0,0 +1,21 @@
 name,age,score
 Alice,25,85
 Bob,30,92
 Charlie,28,89
 David,22,95
 Eva,35,87
 Frank,27,91
 Grace,29,88
 Helen,26,93
 Ivan,31,86
 Jack,24,94
 Kelly,32,89
 Lily,28,90
 Mike,33,85
 Nancy,27,92
 Olivia,34,88
 Peter,29,93
 Queen,25,89
 Ryan,30,94
 Samantha,26,91
 Tom,31,87
@@ -79,5 +79,5 @@ Tom,31,87
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -0,0 +1,154 @@
 ---
 title: "Lesson 7"
 format: html
 ---
 ```{r}
 require(tidyverse)
 files <- c(
  "../../data/01-sales.csv",
  "../../data/02-sales.csv",
  "../../data/03-sales.csv"
 )
 files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
 readr::read_csv(files, id = "file") |>
  mutate(file = basename(file))
 ```
 ```{r}
 require(tidyverse)
 library(tidyverse)
 files <- dir(
  "../../data/gapminder",
  pattern = "^[12][09][0-9][0-9].xlsx$",
  full.names = TRUE
 )
 alldf <- tibble::tibble()
 for (file in files) {
  alldf <- alldf |>
    bind_rows(
      readxl::read_xlsx(file) |>
        mutate(year = parse_number(basename(file)))
    )
 }
 alldf |>
  ggplot(aes(x = lifeExp, y = gdpPercap)) +
  geom_point(aes(color = factor(year))) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_log10(
    breaks = scales::trans_breaks("log10", function(x) 10^x),
    labels = scales::trans_format("log10", scales::math_format(10^.x))
  ) +
  facet_wrap(~year, ncol = 4, scale = "fixed")
 ```
 # slope
 ```{r}
 file <- files[1]
 lm(y ~ x, data)
 m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
 summary(m)
 coef(m)[2]
 slopes <- c()
 years <- c()
 for (file in files) {
  m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
  years <- c(years, parse_number(basename(file)))
  slopes <- c(slopes, coef(m)[2])
 }
 years
 slopes
 plot(years, as.numeric(slopes), type = "b")
 ```
 # purrr
 ```{r}
 require(tidyverse)
 df <- tibble(
  filename = dir(
    "../../data/gapminder",
    pattern = "^[12][09][0-9][0-9].xlsx$",
    full.names = TRUE
  )
 ) |>
  dplyr::mutate(
    data = purrr::map(
      filename,
      \(x) readxl::read_xlsx(x)
    )
  ) |>
  mutate(year = parse_number(basename(filename))) |>
  mutate(
    m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
  ) |>
  mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
  unnest(data)
 pvalue <- summary(df$m[[1]])$coefficients[2, 4]
 rsq <- summary(df$m[[1]])$r.squared
 require(tidymodels)
 generics::tidy(df$m[[1]])
 df |>
  ggplot(aes(x = lifeExp, y = gdpPercap)) +
  geom_point(aes(color = factor(year))) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_log10(
    breaks = scales::trans_breaks("log10", function(x) 10^x),
    labels = scales::trans_format("log10", scales::math_format(10^.x))
  ) +
  facet_wrap(~year, ncol = 4, scale = "fixed")
 df |>
  ggplot(aes(x = year, y = slope)) +
  geom_line() +
  geom_point()
 df
 df$slope[[1]]
 coef(df$m[[1]])[2]
 lm(y ~ x, data)
 df |>
  unnest(data) |>
  ggplot(aes(x = lifeExp, y = gdpPercap)) +
  geom_point(aes(color = factor(year))) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_log10(
    breaks = scales::trans_breaks("log10", function(x) 10^x),
    labels = scales::trans_format("log10", scales::math_format(10^.x))
  ) +
  facet_wrap(~year, ncol = 4, scale = "fixed")
 ```
@@ -74,7 +74,7 @@ read_csv("../../data/students.csv") |>
 ## 列名不要有空格
 ```{r}
-students |> 
+students |>
  rename(
    student_id = `Student ID`,
    full_name = `Full Name`
@@ -158,7 +158,7 @@ x,y,z
 1,2,3"
 read_csv(
-  another_csv, 
+  another_csv,
  col_types = cols(.default = col_character())
 )
 read_csv(
@@ -197,9 +197,11 @@ annoying <- tibble(
 ```{r}
 #| message: false
-sales_files <- c("../../data/01-sales.csv",
+sales_files <- c(
  "../../data/01-sales.csv",
  "../../data/02-sales.csv",
-  "../../data/03-sales.csv")
+  "../../data/03-sales.csv"
 )
 read_csv(sales_files, id = "file")
 ```
@@ -223,8 +225,11 @@ read_csv(sales_files, id = "file")
 ## 批量读取
 ```{r}
-sales_files <- list.files("../../data",
+sales_files <- list.files(
-  pattern = "sales\\.csv$", full.names = TRUE)
+  "../../data",
  pattern = "sales\\.csv$",
  full.names = TRUE
 )
 sales_files
 ```
@@ -260,7 +265,7 @@ if (FALSE) {
 ```{r}
 if (FALSE) {
  conn <- cctdb::get_dbconn("nationalairquality")
-  metadf <- tbl(conn, "metadf") |> 
+  metadf <- tbl(conn, "metadf") |>
    head(100) |>
    collect()
  DBI::dbDisconnect(conn)
@@ -270,8 +275,8 @@ metadf <- readRDS(file = "../../data/metadfdemo.RDS")
 lang <- "cn"
 metadf |>
  ggplot(aes(lon, lat)) +
-geom_point(aes(fill = Area)) +
+  geom_point(aes(fill = Area)) +
-dwfun::theme_sci()
+  dwfun::theme_sci()
 ```
@@ -283,8 +288,13 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx")
 dir.create("../../data/metacity2/")
 metadf |>
  nest(sitedf = -site) |>
-  mutate(flag = purrr::map2(site, sitedf,
+  mutate(
-    ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))))
+    flag = purrr::map2(
      site,
      sitedf,
      ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))
    )
  )
 ```
@@ -296,7 +306,7 @@ metadf |>
 if (FALSE) {
  require(tidyverse)
  conn <- cctdb::get_dbconn("nationalairquality")
-  metadf <- tbl(conn, "metadf") |> 
+  metadf <- tbl(conn, "metadf") |>
    collect()
  DBI::dbDisconnect(conn)
  metanestdf <- metadf |>
@@ -305,10 +315,16 @@ if (FALSE) {
  writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
  dir.create("../../data/metacity/")
  metanestdf |>
-    mutate(flag = purrr::map2(Area, citydf,
+    mutate(
-      ~ writexl::write_xlsx(.y,
+      flag = purrr::map2(
-        path = paste0("../../data/metacity/", .x, ".xlsx")
+        Area,
-      )))
+        citydf,
        ~ writexl::write_xlsx(
          .y,
          path = paste0("../../data/metacity/", .x, ".xlsx")
        )
      )
    )
 }
 ```
@@ -321,5 +337,4 @@ if (FALSE) {
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -0,0 +1,115 @@
 ---
 title: "Lesson 8"
 format: html
 ---
 ```{r}
 require(tidyverse)
 library(nycflights13)
 flights |>
  select(3:6) |>
  head(3) |>
  rename_all(~ gsub("_", "", .))
 flights |>
  select(3:6) |>
  head(3) |>
  rename_with(toupper, .cols = 2:4)
 # 每月10号-15号，dep_delay > 100 的航班
 # 每月哪个出发地origin的 dep_delay总时长最长
 flights |>
  group_by(month, origin) |>
  summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |>
  slice_max(total_dep_delay)
 # arrange(month, origin, desc(total_dep_delay))
 # 每月随机抽取一天，随机抽取三个航班
 slice_sample(n = 1)
 flights |>
  tidyr::nest(ymddf = -c(year, month, day)) |>
  group_by(year, month) |>
  slice_sample(n = 5) |>
  unnest(ymddf) |>
  group_by(year, month, day) |>
  slice_sample(n = 3)
 flights |>
  tidyr::nest(ymddf = -c(year, month, day)) |>
  group_by(year, month) |>
  slice_sample(n = 1) |>
  mutate(
    ymddf = purrr::map(ymddf, \(x) {
      x |>
        slice_sample(n = 3)
    })
  )
 # 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班
 flights |>
  mutate(date = ymd(paste(year, month, day))) |>
  mutate(weekday = wday(date)) |>
  mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |>
  group_by(year, month, origin, flight, isworkday) |>
  summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |>
  tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |>
  filter(
    purrr::map(diffdelaydf, \(x) {
      nrow(x)
    }) >
      1
  ) |>
  mutate(
    diffdelay = purrr::map_dbl(diffdelaydf, \(x) {
      x |>
        arrange(isworkday) |>
        pull(mean_delay) |>
        diff()
    })
  ) |>
  group_by(year, month, origin) |>
  slice_max(diffdelay, n = 3)
 wday(today())
 weekday()
 slice_sample(n = 1)
 flights |>
  group_by(month) |>
  slice_sample(n = 1)
 ```
 ```{r}
 p1 <- flights |>
  group_by(year, month) |>
  summarize(n = n()) |>
  ggplot(aes(month, n)) +
  geom_point(shape = 21, size = 6, color = "black", fill = "red") +
  geom_line()
 ggsave("./a.pdf")
 ggsave("./a.png")
 require(patchwork)
 p1 / p1
 ```
@@ -323,5 +323,5 @@ flights |>
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -1,49 +0,0 @@
 ---
 title: "课后作业9"
 subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
 author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
 date: today
 lang: zh
 format:
  revealjs:
    theme: dark
    slide-number: true
    chalkboard:
      buttons: true
    preview-links: auto
    lang: zh
    toc: true
    toc-depth: 1
    toc-title: 大纲
    logo: ./_extensions/inst/img/ucaslogo.png
    css: ./_extensions/inst/css/revealjs.css
    pointer:
      key: "p"
      color: "#32cd32"
      pointerSize: 18
 revealjs-plugins:
  - pointer
 filters:
  - d2
 ---
 ```{r}
 #| include: false
 #| cache: false
 lang <- "cn"
 require(tidyverse)
 require(learnr)
 ```
 ## 第9次课后作业
 自选数据集，使用R语言开展不同因子（如年份、季节、处理方式等）间某指标的差异分析，采用图表方式形成简要报告。
 作业模板：[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
 ## 欢迎讨论！{.center}
 `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
@@ -1,8 +0,0 @@
 ---
 title: 课后作业9
 author: 姓名
 format: html
 ---
 要求：自选数据集，使用R语言开展不同因子间（如年份、季节、处理方式等）某指标的差异分析，采用图表+文字说明等方式形成简要报告。
@@ -1,167 +0,0 @@
 ---
 title: "大数据分析工具"
 subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
 author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
 date: today
 lang: zh
 format:
  revealjs:
    theme: dark
    slide-number: true
    chalkboard:
      buttons: true
    preview-links: auto
    lang: zh
    toc: true
    toc-depth: 1
    toc-title: 大纲
    logo: ./_extensions/inst/img/ucaslogo.png
    css: ./_extensions/inst/css/revealjs.css
    pointer:
      key: "p"
      color: "#32cd32"
      pointerSize: 18
 revealjs-plugins:
  - pointer
 filters:
  - d2
 ---
 ```{r}
 #| echo: false
 knitr::opts_chunk$set(echo = TRUE)
 source("../../coding/_common.R")
 library(nycflights13)
 library(tidyverse)
 ```
 ## 匹配数字
 ### 匹配数字：
 - \d：匹配任意数字字符。
 - \d+：匹配一个或多个数字字符。
 - [0-9]: 匹配数字
 ### 匹配字母：
 - \w：匹配任意字母、数字或下划线字符。
 - \w+：匹配一个或多个字母、数字或下划线字符。
 ## 匹配数字
 ### 匹配空白字符：
 - \s：匹配任意空白字符，包括空格、制表符、换行符等。
 - \s+：匹配一个或多个空白字符。
 ### 匹配特定字符：
 - [abc]：匹配字符 a、b 或 c 中的任意一个。
 - [a-z]：匹配任意小写字母。
 - [A-Z]：匹配任意大写字母。
 - [0-9]：匹配任意数字。
 ## 匹配数字
 ### 匹配重复次数：
 - {n}：匹配前一个字符恰好 n 次。
 - {n,}：匹配前一个字符至少 n 次。
 - {n,m}：匹配前一个字符至少 n 次，但不超过 m 次。
 ### 匹配边界：
 - ^：匹配字符串的开头。
 - $：匹配字符串的结尾。
 ## 匹配数字
 ### 匹配特殊字符：
 - \：转义特殊字符，使其按字面意义匹配。
 - .：匹配任意单个字符。
 - |：表示“或”关系，匹配两个或多个表达式之一。
 #### 匹配次数：
 - *：匹配前一个字符零次或多次。
 - +：匹配前一个字符一次或多次。
 - ?：匹配前一个字符零次或一次。
 ## 匹配数字
 ### 分组和捕获：
 - ()：将一系列模式组合成一个单元，可与特殊字符一起使用。
 ### 预定义字符集：
 - \d：任意数字，相当于 [0-9]。
 - \w：任意字母、数字或下划线字符，相当于 [a-zA-Z0-9_]。
 - \s：任意空白字符，相当于 [ \t\n\r\f\v]。
 ## 实例
 ```{r}
 library(babynames)
 (x <- c("apple", "apppple", "abc123def"))
 x[str_detect(x, "[0-9]")]
 x[str_detect(x, "abc[0-9]+")]
 x[str_detect(x, "pp")]
 x[str_detect(x, "p{4}")]
 x[str_detect(x, "p{4}")]
 x[str_detect("apple", "ap*")]
 x[str_detect("apple", "app*")]
 x[str_detect("apple", "a..le")]
 ```
 ## 练习
 找出`babyname`中名字含有ar的行
 ```{r}
 #| echo: false
 babynames |>
  filter(str_detect(name, "ar"))
 ```
 ## 练习
 找出`babyname`中名字含有ar或者以ry结尾的行。
 ```{r}
 #| echo: false
 babynames |>
  filter(str_detect(name, "ar"))
 ```
 ![](../../image/data-science/transform.png) 
 ## GNU/Linux服务器
 - `ssh`, `scp`
 - `bash`
  - grep
  - sed
  - awk
  - find
  - xargs
 - `Editor`
  - `Virtual Studio Code`
  - `Vim`
  - `Emacs`
 ## 欢迎讨论！{.center}
 `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -43,7 +43,7 @@ require(learnr)
 1. 根据`airqualitydf.xlsx`，按采样点统计白天（8:00-20:00）与夜晚（20:00-8:00）中空气质量指数（AQI）中位数，按城市统计低于所有采样点AQI30%分位值的采样点占比，列出上述占比最高的10个城市（不考虑采样点数低于5个的城市）。
 2. 按照不同城市分组，统计白天与夜晚AQI中位数是否具有显著差异。
-作业模板：[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
+作业模板：[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
 ## 示例代码
@@ -62,5 +62,5 @@ require(learnr)
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -14,8 +14,10 @@ format: html
 # 下载至临时文件
 if (FALSE) {
  tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
-  download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
+  download.file(
-    destfile = tmpxlsxpath)
+    "https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
    destfile = tmpxlsxpath
  )
  airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
  metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
  saveRDS(airqualitydf, "./airqualitydf.RDS")
@@ -41,7 +43,12 @@ airqualitydf |>
  left_join(metadf |> select(site, city = Area)) |>
  group_by(city) |>
  filter(n() > 5) |>
-  summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |>
+  summarize(
    p = sum(
      AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)
    ) /
      n()
  ) |>
  top_n(10, p)
@@ -57,12 +64,11 @@ airqualitydf |>
  left_join(metadf |> select(site, city = Area)) |>
  group_by(city) |>
  filter(length(unique(site)) >= 5) |>
-  summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2,
+  summarize(
-    na.rm = TRUE)) / n()) |>
+    p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, na.rm = TRUE)) / n()
  ) |>
  slice_max(p, n = 10) |>
-knitr::kable()
+  knitr::kable()
 ```
@@ -83,66 +89,86 @@ if (FALSE) {
    left_join(metadf |> select(site, city = Area)) |>
    group_by(city) |>
    filter(length(unique(site)) >= 5) |>
-    mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"),
+    mutate(
-      levels = c("day", "night"))
+      dayornight = factor(
        ifelse(between(hour(datetime), 8, 20), "day", "night"),
        levels = c("day", "night")
      )
    ) |>
    group_by(city) |>
    nest(citydf = -city) |>
-    mutate(median_diff = purrr::map_dbl(citydf, ~
+    mutate(
-      .x |>
+      median_diff = purrr::map_dbl(
-        specify(AQI ~ dayornight) |>
+        citydf,
-        calculate(stat = "diff in medians", order = c("day", "night")) |>
+        ~ .x |>
-        pull(stat)
+          specify(AQI ~ dayornight) |>
-    )) |>
+          calculate(stat = "diff in medians", order = c("day", "night")) |>
          pull(stat)
      )
    ) |>
    ungroup() |>
    #  slice_sample(n = 12) |>
-    mutate(null_dist = purrr::map(citydf, ~
+    mutate(
-      .x |>
+      null_dist = purrr::map(
-        specify(AQI ~ dayornight) |>
+        citydf,
-        hypothesize(null = "independence") |>
+        ~ .x |>
-        generate(reps = 1000, type = "permute") |>
+          specify(AQI ~ dayornight) |>
-        calculate(stat = "diff in medians", order = c("day", "night"))
+          hypothesize(null = "independence") |>
-    )) |>
+          generate(reps = 1000, type = "permute") |>
-    mutate(p_value = purrr::map2_dbl(null_dist, median_diff, 
+          calculate(stat = "diff in medians", order = c("day", "night"))
-      ~  get_p_value(.x, obs_stat = .y, direction = "both") |>
+      )
-        pull(p_value)
+    ) |>
-    )) |>
+    mutate(
      p_value = purrr::map2_dbl(
        null_dist,
        median_diff,
        ~ get_p_value(.x, obs_stat = .y, direction = "both") |>
          pull(p_value)
      )
    ) |>
    mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |>
-    mutate(fig = purrr::pmap(list(null_dist, median_diff, city, sigdiff),
+    mutate(
-      ~ visualize(..1) +
+      fig = purrr::pmap(
-      shade_p_value(obs_stat = ..2, direction = "both") +
+        list(null_dist, median_diff, city, sigdiff),
-      ggtitle(paste0(..3, "：", ..4)) +
+        ~ visualize(..1) +
-      theme_sci(2, 2)
+          shade_p_value(obs_stat = ..2, direction = "both") +
-    )) |>
+          ggtitle(paste0(..3, "：", ..4)) +
          theme_sci(2, 2)
      )
    ) |>
    arrange(p_value)
  saveRDS(testdf, "./testdf.RDS")
 }
-lang <- "cn"
+if (FALSE) {
-require(dwfun)
+  lang <- "cn"
-require(rmdify)
+  require(dwfun)
-require(drwateR)
+  require(rmdify)
-dwfun::init()
+  require(drwateR)
-rmdify::rmd_init()
+  dwfun::init()
-
+  rmdify::rmd_init()
 testdf <- readRDS("./testdf.RDS")
 require(tidyverse)
 testdf |>
  select(city, median_diff, p_value, sigdiff) |>
  knitr::kable()
 testdf |>
  mutate(grp = (row_number() - 1)%/% 12) |>
  group_by(grp) |>
  nest(grpdf = -grp) |>
  ungroup() |>
 #  slice(1) |>
  mutate(gp = purrr::map(grpdf,
    ~(.x |>
      pull(fig)) |>
      patchwork::wrap_plots(ncol = 3) +
      dwfun::theme_sci(5, 7))) |>
  pull(gp)
  testdf <- readRDS("./testdf.RDS")
  require(tidyverse)
  testdf |>
    select(city, median_diff, p_value, sigdiff) |>
    knitr::kable()
  testdf |>
    mutate(grp = (row_number() - 1) %/% 12) |>
    group_by(grp) |>
    nest(grpdf = -grp) |>
    ungroup() |>
    #  slice(1) |>
    mutate(
      gp = purrr::map(
        grpdf,
        ~ (.x |>
          pull(fig)) |>
          patchwork::wrap_plots(ncol = 3) +
          dwfun::theme_sci(5, 7)
      )
    ) |>
    pull(gp)
 }
 ```
@@ -3934,5 +3934,5 @@ p
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -100,5 +100,4 @@ geom_bar(position = "fill")
 ## 欢迎讨论！{.center}
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
@@ -0,0 +1,254 @@
 ---
 title: "Lesson 9"
 format: html
 ---
 ```{r}
 # install.packages("tidymodels")
 require(tidymodels)
 taxi
 taxisplit <- initial_split(taxi, prop = 0.8)
 taxi_train <- training(taxisplit)
 taxi_test <- testing(taxisplit)
 tree_spec <-
  decision_tree(cost_complexity = 0.002) %>%
  set_mode("classification")
 taxi_fit <- workflow() %>%
  add_formula(tip ~ .) %>%
  add_model(tree_spec) %>%
  fit(data = taxi_train)
 ```
 ```{r}
 augment(taxi_fit, new_data = taxi_train) %>%
  relocate(tip, .pred_class, .pred_yes, .pred_no)
 augment(taxi_fit, new_data = taxi_train) %>%
  conf_mat(truth = tip, estimate = .pred_class)
 augment(taxi_fit, new_data = taxi_train) %>%
  accuracy(truth = tip, estimate = .pred_class)
 augment(taxi_fit, new_data = taxi_train) %>%
  sensitivity(truth = tip, estimate = .pred_class)
 augment(taxi_fit, new_data = taxi_train) %>%
  specificity(truth = tip, estimate = .pred_class)
 taxi_metrics <- metric_set(accuracy, specificity, sensitivity)
 augment(taxi_fit, new_data = taxi_train) %>%
  taxi_metrics(truth = tip, estimate = .pred_class)
 taxi_metrics <- metric_set(accuracy, specificity, sensitivity)
 augment(taxi_fit, new_data = taxi_train) %>%
  group_by(local) %>%
  taxi_metrics(truth = tip, estimate = .pred_class)
 augment(taxi_fit, new_data = taxi_train) %>%
  roc_curve(truth = tip, .pred_yes) %>%
  autoplot()
 augment(taxi_fit, new_data = taxi_train)
 augment(taxi_fit, new_data = taxi_train) %>%
  roc_curve(truth = tip, .pred_yes) |>
  ggplot(aes(1 - sensitivity, specificity)) +
  geom_point() +
  geom_line() +
  geom_abline(slope = 1)
 ```
 # Cross Validation
 ```{r}
 vfold_cv(taxi_train, v = 10) |>
  pull(splits) |>
  nth(1)
 taxi_folds <- vfold_cv(taxi_train)
 taxi_folds$splits[1:3]
 vfold_cv(taxi_train, strata = tip)
 set.seed(123)
 taxi_folds <- vfold_cv(taxi_train, v = 10, strata = tip)
 taxi_folds
 taxi_wflow <- workflow() %>%
  add_formula(tip ~ .) %>%
  add_model(tree_spec)
 taxi_res <- fit_resamples(taxi_wflow, taxi_folds)
 taxi_res
 taxi_res$.metrics[[1]]
 taxi_res$splits[[1]]
 analysis(taxi_res$splits[[1]])
 analysis(taxi_res$splits[[1]])
 assessment(taxi_res$splits[[1]])
 taxi_res %>%
  collect_metrics()
 taxi_res %>%
  collect_metrics() %>%
  select(.metric, mean, n)
 # Save the assessment set results
 ctrl_taxi <- control_resamples(save_pred = TRUE)
 taxi_res <- fit_resamples(taxi_wflow, taxi_folds, control = ctrl_taxi)
 taxi_res
 ```
 # NLA2007 cyanophyta model
 ```{r}
 require(tidyverse)
 sitedf <- readr::read_csv(
  "https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
 ) |>
  select(
    SITE_ID,
    lon = LON_DD,
    lat = LAT_DD,
    name = LAKENAME,
    area = LAKEAREA,
    zmax = DEPTHMAX
  ) |>
  group_by(SITE_ID) |>
  summarize(
    lon = mean(lon, na.rm = TRUE),
    lat = mean(lat, na.rm = TRUE),
    name = unique(name),
    area = mean(area, na.rm = TRUE),
    zmax = mean(zmax, na.rm = TRUE)
  )
 visitdf <- readr::read_csv(
  "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
 ) |>
  select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
  distinct()
 waterchemdf <- readr::read_csv(
  "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
 ) |>
  select(
    SITE_ID,
    date = DATE_PROFILE,
    depth = DEPTH,
    temp = TEMP_FIELD,
    do = DO_FIELD,
    ph = PH_FIELD,
    cond = COND_FIELD,
  )
 sddf <- readr::read_csv(
  "https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
 ) |>
  select(
    SITE_ID,
    date = DATE_SECCHI,
    sd = SECMEAN,
    clear_to_bottom = CLEAR_TO_BOTTOM
  )
 trophicdf <- readr::read_csv(
  "https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
 ) |>
  select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
  left_join(visitdf, by = c("SITE_ID", "visit")) |>
  select(-year, -visit) |>
  group_by(SITE_ID, date) |>
  summarize(
    tp = mean(tp, na.rm = TRUE),
    tn = mean(tn, na.rm = TRUE),
    chla = mean(chla, na.rm = TRUE)
  )
 phytodf <- readr::read_csv(
  "https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
 ) |>
  select(
    SITE_ID,
    date = DATEPHYT,
    depth = SAMPLE_DEPTH,
    phyta = DIVISION,
    genus = GENUS,
    species = SPECIES,
    tax = TAXANAME,
    abund = ABUND
  ) |>
  mutate(phyta = gsub(" .*$", "", phyta)) |>
  filter(!is.na(genus)) |>
  group_by(SITE_ID, date, depth, phyta, genus) |>
  summarize(abund = sum(abund, na.rm = TRUE)) |>
  nest(phytodf = -c(SITE_ID, date))
 phytodf$phytodf[[1]]
 envdf <- waterchemdf |>
  filter(depth < 2) |>
  select(-depth) |>
  group_by(SITE_ID, date) |>
  summarise_all(~ mean(., na.rm = TRUE)) |>
  ungroup() |>
  left_join(sddf, by = c("SITE_ID", "date")) |>
  left_join(trophicdf, by = c("SITE_ID", "date"))
 nla <- envdf |>
  left_join(phytodf, by = c("SITE_ID", "date")) |>
  left_join(sitedf, by = "SITE_ID") |>
  filter(!purrr::map_lgl(phytodf, is.null)) |>
  mutate(
    cyanophyta = purrr::map(
      phytodf,
      \(x) {
        x |>
          dplyr::filter(phyta == "Cyanophyta") |>
          summarize(cyanophyta = sum(abund, na.rm = TRUE))
      }
    )
  ) |>
  unnest(cyanophyta) |>
  select(-phyta) |>
  mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
 # library(rmdify)
 # library(dwfun)
 # dwfun::init()
 ```
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ming	133cb1533d	render compile	2025-04-09 22:31:53 +08:00
ming	9ecb11b788	update	2025-04-09 22:28:17 +08:00
ming	6412cc5560	render compile	2025-03-25 00:46:18 +08:00
ming	bc628545b6	update leture 2	2025-03-25 00:45:07 +08:00
ming	1ead84ac70	render compile	2025-03-20 09:37:17 +08:00
ming	10ffdd46f3	add 3.1	2025-03-20 09:36:34 +08:00
ming	987a3eaea9	render compile	2025-03-20 09:35:47 +08:00
ming	c292d0ffab	add 2.3	2025-03-20 09:35:13 +08:00
ming	cb2e22fde5	update	2025-03-20 09:34:24 +08:00
ming	8a31a565a8	add some lesson for lesson6	2025-03-20 09:33:27 +08:00
ming	6e3f134635	update	2025-03-19 16:41:27 +08:00
ming	c883d6df6a	update	2025-03-19 16:40:59 +08:00
ming	fda04d79f1	update	2025-03-19 16:33:40 +08:00
ming	a4596a9836	update	2025-03-19 16:31:50 +08:00
ming	7902f28a7a	update gitignore	2025-03-17 20:35:44 +08:00
ming	654fb2f024	render compile	2025-03-17 20:32:11 +08:00
ming	352ca2b1ce	render compile	2025-03-17 20:30:20 +08:00
ming	82248cb24b	render compile	2025-03-17 20:23:18 +08:00
ming	a21430385f	update	2025-03-17 20:16:08 +08:00
ming	78196d49ed	render compile	2025-03-17 20:06:33 +08:00
ming	30dd043c3b	render compile	2025-03-17 19:50:08 +08:00
ming	2db87d5e89	update	2025-03-17 19:20:50 +08:00
`@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")`
	`## 欢迎讨论！{.center}`	`## 欢迎讨论！{.center}`


	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`@@ -79,5 +79,5 @@ Tom,31,87`
	`## 欢迎讨论！{.center}`	`## 欢迎讨论！{.center}`


	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`@@ -323,5 +323,5 @@ flights \|>`
	`## 欢迎讨论！{.center}`	`## 欢迎讨论！{.center}`


	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")`
	`## 欢迎讨论！{.center}`	`## 欢迎讨论！{.center}`


	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`@@ -3934,5 +3934,5 @@ p`
	`## 欢迎讨论！{.center}`	`## 欢迎讨论！{.center}`


	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
`@@ -100,5 +100,4 @@ geom_bar(position = "fill")`
	`## 欢迎讨论！{.center}`	`## 欢迎讨论！{.center}`


	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`	`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`