render compile

update
render compile
2025-04-09 22:31:53 +08:00 · 2025-04-09 22:28:17 +08:00 · 2025-03-25 00:46:18 +08:00 · 2025-03-25 00:45:07 +08:00 · 2025-03-20 09:37:17 +08:00 · 2025-03-20 09:36:34 +08:00
131 changed files with 6778 additions and 464 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ _variables.yml
 _freeze/
 *_cache/
 *_files/
+SD/_*/
+homework/
--- a/.source_state
+++ b/.source_state
@@ -0,0 +1 @@
+daa3ba71b87598e29019c2f370272767
--- a/382
+++ b/382
@@ -0,0 +1,382 @@
+# Makefile for Quarto Project Automation
+# Detect OS
+HOSTNAME := $(shell hostname)
+OS := $(shell uname | tr A-Z a-z)
+ifeq ($(OS), darwin)
+    SEDI := sed -i ''
+    OS := OSX
+else ifeq ($(OS), linux)
+    SEDI := sed -i
+    OS := linux
+else
+    $(error Unknown operating system)
+endif
+
+# Fetch Git branch and project details
+branchname := $(shell git branch --show-current)
+reponame := $(shell basename $(shell git rev-parse --show-toplevel))
+projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel)))
+pubtype := $(if $(findstring PUB,$(branchname)),public,protected)
+remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
+outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml)
+siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
+
+branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF"
+
+# Variables for colors and port
+bcolor := grey
+port := 4199
+
+# Set background color based on branch name
+ifeq ($(findstring R1,$(branchname)),R1)
+    bcolor := orange
+else ifeq ($(findstring R2,$(branchname)),R2)
+    bcolor := lightblue
+else ifeq ($(findstring R3,$(branchname)),R3)
+    bcolor := lightgreen
+else ifeq ($(findstring PUB,$(branchname)),PUB)
+    bcolor := light
+endif
+
+# Set port based on branch name
+branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210
+port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/')
+port := $(if $(port),$(port),4199)
+
+# Define the state file
+STATE_FILE := .source_state
+
+# checkfile
+GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www"
+
+# Default target
+.PHONY: all preview readme rsync local clean upload fix_links force check_git_status
+
+all: local upload clean commit push
+	
+force: updrefbib check_git_status updvariable render
+
+local: updrefbib check_git_status updvariable lazyrender
+
+updmakefile:
+	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
+		echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \
+		cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \
+		git add "Makefile" && git commit -m "Update Makefile"; \
+		echo "本项目Makefile更新完成."; \
+	else \
+		echo "Makefile 无需在本系统上更新."; \
+	fi
+
+
+updrefbib:
+	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
+		echo "更新本项目参考文献..."; \
+		cp "$$HOME/literature/Ref.bib" "./BB/"; \
+		echo "推送本地参考文献到远程服务器..."; \
+		rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \
+		echo "本项目参考文献更新完成."; \
+	else \
+	  echo "检查网络连通性..."; \
+		if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \
+			echo "网络正常，更新本项目参考文献..."; \
+			wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \
+			echo "本项目参考文献更新完成."; \
+		else \
+			echo "网络不可用，跳过参考文献更新."; \
+		fi; \
+	fi; \
+	git add BB/Ref.bib; \
+	if [ "$$(git diff --cached)" ]; then \
+		git commit -m "Update Ref.bib"; \
+	fi
+
+check_git_status:
+	@uncommitted=$$(git status --porcelain); \
+	if [ -n "$$uncommitted" ]; then \
+	  git status; \
+		read -p "当前存在未提交的修改（如上），是否要提交？(Y/N, default is N): " answer; \
+		answer=$${answer:-N}; \
+		if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \
+			read -p "请输入修改说明: " message; \
+			git add . && git commit -m "$$message"; \
+		else \
+			echo "未提交，如后续操作为pull，则无法继续"; \
+		fi; \
+	fi
+
+
+
+# Lazy render with hash checking
+lazyrender:
+	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
+	if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \
+		echo "源文件发生变化, 重新编译..."; \
+		echo "$$current_hash" > $(STATE_FILE); \
+		$(MAKE) render; \
+	else \
+		echo "源文件无变化, 跳过编译..."; \
+		exit 0; \
+	fi
+
+# Render target
+render:
+	@quarto render
+
+commit:
+	@echo "提交修改(commit)..."; \
+		git add .; \
+		if [ -n "$$(git diff --cached)" ]; then \
+		git commit -m "render compile"; \
+		else \
+		echo "没有修改记录，跳过."; \
+		fi; \
+
+# Pull changes from the specified branch based on the current branch
+pull:
+	@echo "从远程拉取项目更新..."; \
+		$(MAKE) check_git_status; \
+		git pull; \
+		current_branch=$$(git rev-parse --abbrev-ref HEAD); \
+		if [ "$$current_branch" = main ]; then \
+		  echo "当前分枝为$$current_branch."; \
+			remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \
+			if [ -n "$$remote_branch" ]; then \
+		    echo "尝试从远程分枝$$remote_branch 拉取更新..."; \
+				git pull --rebase origin $$remote_branch; \
+			else \
+				echo "远程无可用分支$$remote_branch."; \
+			fi; \
+			else \
+			echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
+			git pull --rebase origin main; \
+		fi
+
+# Pull changes from the main branch
+pullmain:
+	$(MAKE) check_git_status; \
+		@current_branch=$$(git rev-parse --abbrev-ref HEAD); \
+		@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
+		git pull --rebase origin main; \
+
+push:
+	@echo "推送到远程..."; \
+		git push
+
+filehash:
+	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
+	echo "$$current_hash" > $(STATE_FILE)
+
+# Preview the site on a specific port
+preview:
+	@quarto preview --port $(port)
+
+# Generate README.md
+readme:
+	@quarto render index.qmd -t markdown -o README.md
+	@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md
+	@rm "$(outputdir)/README.md"
+
+# Sync files with remote server
+rsync:
+	@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"
+
+# Open local site
+open:
+	@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi
+
+# Clean unnecessary files
+clean:
+	@rm -f ./*.spl  ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk
+
+# Upload files to server and fix links
+upload: backupdocx
+	@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)"
+	@$(MAKE) fix_links
+	@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \
+		if [ "$(OS)" = "OSX" ]; then \
+			open "$(siteurl)" 2>/dev/null; \
+		fi; \
+	else \
+		echo "Rsync failed. Attempting alternative upload method..."; \
+		mkdir -p "$(reponame)"; \
+		rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \
+		rm -rf "$(reponame)"; \
+		rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \
+		if [ "$(OS)" = "OSX" ]; then \
+			open "$(siteurl)" 2>/dev/null; \
+		fi; \
+	fi
+
+backupdocx:
+	@echo "备份MS.docx文件..."; \
+	currentcommithash=$$(git rev-parse --short HEAD); \
+	datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \
+	mkdir -p TC/MS/; \
+	existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \
+	if [ -n "$$existing_file" ]; then \
+		echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \
+		echo "无需备份."; \
+	else \
+		if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \
+			cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
+			echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \
+			git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
+			if [ "$$(git diff --cached)" ]; then \
+				git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \
+			fi; \
+		else \
+			echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在，无需备份."; \
+		fi; \
+	fi;
+
+
+trackchange:
+	@if [ "$(projtype)" != "manuscript" ]; then \
+	exit 0; \
+	fi; \
+	echo "选择两个提交以比较文档..."; \
+	hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \
+	| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \
+	| fzf --multi --reverse --preview="echo {}" ); \
+	echo $$hashes; \
+	hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \
+	hash1=$$(git rev-parse --short $${hash1}^); \
+	datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \
+	hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \
+	hash2=$$(git rev-parse --short $${hash2}^); \
+	datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \
+	if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \
+	echo "必须选择两个提交."; \
+	exit 1; \
+	fi; \
+	doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \
+	echo "$$doc1"; \
+	doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \
+	echo "$$doc2"; \
+	if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \
+	echo "打开文件: $$doc1 和 $$doc2"; \
+	open "$$doc1" "$$doc2"; \
+	printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \
+	echo "请在word中对比两个版本形成带修改痕迹的版本，并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \
+	else \
+	echo "一个或两个文件不存在: $$doc1, $$doc2"; \
+	exit 1; \
+	fi
+
+# Fix links in www directory
+fix_links:
+	@find ./www -type f -name "*.html" -exec sed -i.bak \
+		-e "s/{{< var branch >}}/$(branchname)/g" \
+		-e "s/{{< var pubtype >}}/$(pubtype)/g" \
+		-e "s/{{< var projtype >}}/$(projtype)/g" \
+		-e "s/{{< var reponame >}}/$(reponame)/g" \
+		-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \
+		-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} +
+	@find ./www -type f -name "*.bak" -exec rm {} +
+
+updvariable:
+	@touch _variables.yml  # 如果文件不存在则创建
+	@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml
+	@if grep -q '^reponame:' _variables.yml; then \
+		$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \
+	else \
+		echo "reponame: $(reponame)" >> _variables.yml; \
+	fi
+	@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml
+	@if grep -q '^projtype:' _variables.yml; then \
+		$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \
+	else \
+		echo "projtype: $(projtype)" >> _variables.yml; \
+	fi
+	@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml
+	@if grep -q '^branch:' _variables.yml; then \
+		$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \
+	else \
+		echo "branch: $(branchname)" >> _variables.yml; \
+	fi
+	@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml
+	@if grep -q '^pubtype:' _variables.yml; then \
+		$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \
+	else \
+		echo "pubtype: $(pubtype)" >> _variables.yml; \
+	fi
+	@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml
+	@if grep -q '^nwAB:' _variables.yml; then \
+		$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \
+	else \
+		echo "nwAB: $(nwAB)" >> _variables.yml; \
+	fi
+	@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml
+	@if grep -q '^nwMS:' _variables.yml; then \
+		$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \
+	else \
+		echo "nwMS: $(nwMS)" >> _variables.yml; \
+	fi
+	@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml
+	@if grep -q '^figtblMS:' _variables.yml; then \
+		$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \
+	else \
+		echo "figtblMS: $(figtblMS)" >> _variables.yml; \
+	fi
+	@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml
+	@if grep -q '^figtblSM:' _variables.yml; then \
+		$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \
+	else \
+		echo "figtblSM: $(figtblSM)" >> _variables.yml; \
+	fi
+	@mkpapervar
+
+
+# Help: list all available commands with descriptions (English and Chinese)
+help:
+	@echo "Makefile for Quarto Project Automation"
+	@echo "======================================="
+	@echo "Available targets (English):"
+	@echo ""
+	@echo "  make all           - Execute local build, upload, clean, and commit"
+	@echo "  make force         - Force render, hash update, upload, clean, and commit"
+	@echo "  make local         - Check git status and perform a lazy render if changes detected"
+	@echo "  make check_git_status - Check for uncommitted changes and ask to commit them"
+	@echo "  make lazyrender    - Render if source files have changed based on hash comparison"
+	@echo "  make render        - Force Quarto to render the project"
+	@echo "  make commit        - Commit changes if no previous uncommitted changes"
+	@echo "  make filehash      - Generate and store the file hash of source files"
+	@echo "  make preview       - Preview the site locally on the specific port (default: 4199)"
+	@echo "  make readme        - Render README.md from Quarto index.qmd"
+	@echo "  make rsync         - Sync output files with the remote server"
+	@echo "  make open          - Open the generated site locally in the browser"
+	@echo "  make clean         - Clean up unnecessary files"
+	@echo "  make upload        - Upload files to the server and fix links"
+	@echo "  make fix_links     - Fix HTML links in the 'www' directory for the remote server"
+	@echo "  make updmakefile   - Update the Makefile"
+	@echo "  make help          - Display this help message"
+	@echo ""
+	@echo "Available targets (中文):"
+	@echo ""
+	@echo "  make all           - 执行本地构建、上传、清理和提交"
+	@echo "  make force         - 强制渲染、更新哈希、上传、清理并提交"
+	@echo "  make local         - 检查Git状态，若检测到更改则进行懒惰渲染"
+	@echo "  make check_git_status - 检查未提交的更改，并询问是否提交"
+	@echo "  make lazyrender    - 如果源文件发生更改，则根据哈希比较进行渲染"
+	@echo "  make render        - 强制 Quarto 渲染项目"
+	@echo "  make commit        - 如果没有未提交的更改则提交"
+	@echo "  make filehash      - 生成并存储源文件的哈希值"
+	@echo "  make preview       - 本地在特定端口预览网站 (默认: 4199)"
+	@echo "  make readme        - 从 Quarto 的 index.qmd 生成 README.md"
+	@echo "  make rsync         - 将输出文件同步到远程服务器"
+	@echo "  make open          - 在浏览器中打开生成的网站"
+	@echo "  make clean         - 清理不必要的文件"
+	@echo "  make upload        - 上传文件到服务器并修复链接"
+	@echo "  make fix_links     - 修复 'www' 目录中的 HTML 链接"
+	@echo "  make updmakefile   - 更新本项目 Makefile"
+	@echo "  make help          - 显示此帮助信息"
+	@echo ""
+	@echo "Environment variables (English and 中文):"
+	@echo "  bcolor             - Background color based on branch name (基于分支名的背景颜色)"
+	@echo "  port               - Port number based on branch name (基于分支名的端口号)"
+	@echo "  STATE_FILE         - File for storing hash state of source files (用于存储源文件哈希状态的文件)"
+	@echo "  siteurl            - The URL where the site will be hosted (网站托管的 URL)"
+	@echo ""
+
+
--- a/SD/20240321_0_R实践课程/_extensions
+++ b/SD/20240321_0_R实践课程/_extensions
--- a/SD/20240321_0_R实践课程/index.qmd
+++ b/SD/20240321_0_R实践课程/index.qmd
@@ -47,9 +47,8 @@ knitr::opts_chunk$set(echo = TRUE)
 ### 课件

 - 采用`R语言`+`quarto`完成
- 网页公开：[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
- 课件代码：[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
- 代码web界面：[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/)
+- 网页公开：[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html)
+- 课件代码：[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git)

 ## 如何学习接下来的内容？

@@ -67,7 +66,7 @@ knitr::opts_chunk$set(echo = TRUE)

 ## Rstudio Server使用

- 服务网址：[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
+- 服务网址：[https://rs1.drwater.net/](https://rs1.drwater.net/)
 - 每位同学使用1个账号，随机生成
 - 密码：****
 - 后面的实践课程可在该服务器上完成
--- a/SD/1.1_R语言介绍/_demo.qmd
+++ b/SD/1.1_R语言介绍/_demo.qmd
@@ -0,0 +1,138 @@
+---
+title: "Lesson 6"
+format: html
+---
+
+
+
+
+```{r}
+
+https://rs1.drwater.net
+
+username: 
+  - ruser01
+  - ruser02
+  - ruser03
+  - ruser04
+  - ruser05
+  - ruser06
+
+RWEP2025
+
+```
+
+
+# 安装包
+
+
+```{r}
+install.packages("tidyverse")
+
+x <- c(1:10, NA)
+
+hist(x)
+
+mean(x, na.rm = TRUE)
+
+median(x, na.rm = TRUE)
+
+sd(x, na.rm = TRUE)
+
+
+for(i in 1:10){
+  print(i)
+}
+
+
+x + y + x * y
+
+myfunc <- function(x, y = 3) {
+  x + y + x * y
+}
+
+
+myfunc(1, 2)
+
+
+myfunc(10)
+
+
+c(FALSE, 2, 1:3, 3) 
+
+c(FALSE, 2, 1:3, 3) > 1
+
+all(c(FALSE, 2, 1:3, 3) > 1)
+
+
+c(1L,2L,3L)
+
+any(c(FALSE, 2, 1:3, 3) > 1)
+
+
+x <- 10
+
+sin(x) = ?
+
+paste("sin(x) = ", sin(x), sep = " ")
+
+paste0("sin(x) = ", sin(x))
+
+
+substr("Monday", 1, 3)
+
+
+
+```
+
+
+# tidy
+
+
+```{r}
+require(readxl)
+
+aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf")
+
+# install.packages("skimr")
+
+aqdf |>
+  skimr::skim()
+
+# base
+
+# tidyverse
+
+aqdf |>
+  dplyr::group_by(Area) |>
+  dplyr::summarize(
+    n = n(),
+    lon.mean = mean(lon, na.rm = TRUE),
+    lon.sd = sd(lat, na.rm = TRUE)
+  ) |>
+  dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |>
+  ggplot(aes(x = n, y = lon.mean)) +
+  geom_point() +
+  geom_line() +
+  geom_errorbar(
+    aes(ymin = lon.mean - lon.sd,
+      ymax = lon.mean + lon.sd)
+  )
+
+
+
+
+
+
+
+readxl::read_xlsx("./airquality.xlsx")
+
+flights|>
+  filter(dest=="IAH")|>
+  group_by(year,month,day)|>summarize(n=n(),
+delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
+
+```
+
+
+
--- a/SD/20240321_1_R语言介绍/_extensions
+++ b/SD/20240321_1_R语言介绍/_extensions
--- a/SD/20240321_1_R语言介绍/index.qmd
+++ b/SD/20240321_1_R语言介绍/index.qmd
@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
--- a/SD/20240321_2_R语言语法基础/_extensions
+++ b/SD/20240321_2_R语言语法基础/_extensions
--- a/SD/20240321_2_R语言语法基础/index.qmd
+++ b/SD/20240321_2_R语言语法基础/index.qmd
@@ -264,6 +264,9 @@ t.test(x, y)
 wilcox.test(x, y)
 ```

+
+### [什么是 Wilcoxon-Mann-Whitney检验？](https://zhuanlan.zhihu.com/p/613524533）
+
 ## 统计函数

 ### 创建向量的直方图
@@ -792,4 +795,4 @@ names(Y) <- c("colA", "colB", "colC")
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
--- a/SD/20240321_3_Excel基础/_extensions
+++ b/SD/20240321_3_Excel基础/_extensions
--- a/SD/20240321_3_Excel基础/index.qmd
+++ b/SD/20240321_3_Excel基础/index.qmd
@@ -37,7 +37,7 @@ require(learnr)

 ## 下载excel文件

-[https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx)
+[https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx)


 ## Tidy data
--- a/SD/20240321_9_课后作业/_extensions
+++ b/SD/20240321_9_课后作业/_extensions
--- a/SD/1.9_课后作业6/data.csv
+++ b/SD/1.9_课后作业6/data.csv
@@ -0,0 +1,21 @@
+name,age,score
+Alice,25,85
+Bob,30,92
+Charlie,28,89
+David,22,95
+Eva,35,87
+Frank,27,91
+Grace,29,88
+Helen,26,93
+Ivan,31,86
+Jack,24,94
+Kelly,32,89
+Lily,28,90
+Mike,33,85
+Nancy,27,92
+Olivia,34,88
+Peter,29,93
+Queen,25,89
+Ryan,30,94
+Samantha,26,91
+Tom,31,87
--- a/SD/20240321_9_课后作业/index.qmd
+++ b/SD/20240321_9_课后作业/index.qmd
@@ -79,5 +79,5 @@ Tom,31,87
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

--- a/SD/20240321_9_课后作业/第6次课后作业_模板.qmd
+++ b/SD/20240321_9_课后作业/第6次课后作业_模板.qmd
--- a/SD/20240326_1_codestyle/_extensions
+++ b/SD/20240326_1_codestyle/_extensions
--- a/SD/20240326_1_codestyle/index.qmd
+++ b/SD/20240326_1_codestyle/index.qmd
--- a/SD/2.2_dataimport/_demo.qmd
+++ b/SD/2.2_dataimport/_demo.qmd
@@ -0,0 +1,154 @@
+---
+title: "Lesson 7"
+format: html
+---
+
+
+```{r}
+require(tidyverse)
+
+
+files <- c(
+  "../../data/01-sales.csv",
+  "../../data/02-sales.csv",
+  "../../data/03-sales.csv"
+)
+
+files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
+
+
+readr::read_csv(files, id = "file") |>
+  mutate(file = basename(file))
+```
+
+
+```{r}
+require(tidyverse)
+library(tidyverse)
+
+files <- dir(
+  "../../data/gapminder",
+  pattern = "^[12][09][0-9][0-9].xlsx$",
+  full.names = TRUE
+)
+
+
+alldf <- tibble::tibble()
+for (file in files) {
+  alldf <- alldf |>
+    bind_rows(
+      readxl::read_xlsx(file) |>
+        mutate(year = parse_number(basename(file)))
+    )
+}
+
+alldf |>
+  ggplot(aes(x = lifeExp, y = gdpPercap)) +
+  geom_point(aes(color = factor(year))) +
+  geom_smooth(method = "lm", se = FALSE) +
+  scale_y_log10(
+    breaks = scales::trans_breaks("log10", function(x) 10^x),
+    labels = scales::trans_format("log10", scales::math_format(10^.x))
+  ) +
+  facet_wrap(~year, ncol = 4, scale = "fixed")
+```
+
+# slope
+
+```{r}
+file <- files[1]
+
+lm(y ~ x, data)
+
+m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
+
+summary(m)
+
+coef(m)[2]
+
+slopes <- c()
+years <- c()
+for (file in files) {
+  m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
+  years <- c(years, parse_number(basename(file)))
+  slopes <- c(slopes, coef(m)[2])
+}
+years
+slopes
+
+
+plot(years, as.numeric(slopes), type = "b")
+```
+
+# purrr
+
+
+
+```{r}
+require(tidyverse)
+df <- tibble(
+  filename = dir(
+    "../../data/gapminder",
+    pattern = "^[12][09][0-9][0-9].xlsx$",
+    full.names = TRUE
+  )
+) |>
+  dplyr::mutate(
+    data = purrr::map(
+      filename,
+      \(x) readxl::read_xlsx(x)
+    )
+  ) |>
+  mutate(year = parse_number(basename(filename))) |>
+  mutate(
+    m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
+  ) |>
+  mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
+  unnest(data)
+
+pvalue <- summary(df$m[[1]])$coefficients[2, 4]
+rsq <- summary(df$m[[1]])$r.squared
+
+require(tidymodels)
+generics::tidy(df$m[[1]])
+
+df |>
+  ggplot(aes(x = lifeExp, y = gdpPercap)) +
+  geom_point(aes(color = factor(year))) +
+  geom_smooth(method = "lm", se = FALSE) +
+  scale_y_log10(
+    breaks = scales::trans_breaks("log10", function(x) 10^x),
+    labels = scales::trans_format("log10", scales::math_format(10^.x))
+  ) +
+  facet_wrap(~year, ncol = 4, scale = "fixed")
+
+
+df |>
+  ggplot(aes(x = year, y = slope)) +
+  geom_line() +
+  geom_point()
+
+
+df
+df$slope[[1]]
+
+coef(df$m[[1]])[2]
+
+lm(y ~ x, data)
+
+
+df |>
+  unnest(data) |>
+  ggplot(aes(x = lifeExp, y = gdpPercap)) +
+  geom_point(aes(color = factor(year))) +
+  geom_smooth(method = "lm", se = FALSE) +
+  scale_y_log10(
+    breaks = scales::trans_breaks("log10", function(x) 10^x),
+    labels = scales::trans_format("log10", scales::math_format(10^.x))
+  ) +
+  facet_wrap(~year, ncol = 4, scale = "fixed")
+```
+
+
+
+
--- a/SD/20240326_2_dataimport/_extensions
+++ b/SD/20240326_2_dataimport/_extensions
--- a/SD/20240326_2_dataimport/index.qmd
+++ b/SD/20240326_2_dataimport/index.qmd
@@ -197,9 +197,11 @@ annoying <- tibble(
 ```{r}
 #| message: false

-sales_files <- c("../../data/01-sales.csv",
+sales_files <- c(
+  "../../data/01-sales.csv",
  "../../data/02-sales.csv",
-  "../../data/03-sales.csv")
+  "../../data/03-sales.csv"
+)
 read_csv(sales_files, id = "file")
 ```

@@ -223,8 +225,11 @@ read_csv(sales_files, id = "file")
 ## 批量读取

 ```{r}
-sales_files <- list.files("../../data",
-  pattern = "sales\\.csv$", full.names = TRUE)
+sales_files <- list.files(
+  "../../data",
+  pattern = "sales\\.csv$",
+  full.names = TRUE
+)
 sales_files
 ```

@@ -283,8 +288,13 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx")
 dir.create("../../data/metacity2/")
 metadf |>
  nest(sitedf = -site) |>
-  mutate(flag = purrr::map2(site, sitedf,
-    ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))))
+  mutate(
+    flag = purrr::map2(
+      site,
+      sitedf,
+      ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))
+    )
+  )
 ```


@@ -305,10 +315,16 @@ if (FALSE) {
  writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
  dir.create("../../data/metacity/")
  metanestdf |>
-    mutate(flag = purrr::map2(Area, citydf,
-      ~ writexl::write_xlsx(.y,
+    mutate(
+      flag = purrr::map2(
+        Area,
+        citydf,
+        ~ writexl::write_xlsx(
+          .y,
          path = paste0("../../data/metacity/", .x, ".xlsx")
-      )))
+        )
+      )
+    )
 }
 ```

@@ -321,5 +337,4 @@ if (FALSE) {
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
-
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
--- a/SD/20240326_2_dataimport/students-2.csv
+++ b/SD/20240326_2_dataimport/students-2.csv
--- a/SD/2.3_datatransform/_demo.qmd
+++ b/SD/2.3_datatransform/_demo.qmd
@@ -0,0 +1,115 @@
+---
+title: "Lesson 8"
+format: html
+---
+
+
+```{r}
+require(tidyverse)
+library(nycflights13)
+
+flights |>
+  select(3:6) |>
+  head(3) |>
+  rename_all(~ gsub("_", "", .))
+
+
+flights |>
+  select(3:6) |>
+  head(3) |>
+  rename_with(toupper, .cols = 2:4)
+
+# 每月10号-15号，dep_delay > 100 的航班
+
+# 每月哪个出发地origin的 dep_delay总时长最长
+
+flights |>
+  group_by(month, origin) |>
+  summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |>
+  slice_max(total_dep_delay)
+
+# arrange(month, origin, desc(total_dep_delay))
+
+# 每月随机抽取一天，随机抽取三个航班
+
+slice_sample(n = 1)
+
+flights |>
+  tidyr::nest(ymddf = -c(year, month, day)) |>
+  group_by(year, month) |>
+  slice_sample(n = 5) |>
+  unnest(ymddf) |>
+  group_by(year, month, day) |>
+  slice_sample(n = 3)
+
+
+flights |>
+  tidyr::nest(ymddf = -c(year, month, day)) |>
+  group_by(year, month) |>
+  slice_sample(n = 1) |>
+  mutate(
+    ymddf = purrr::map(ymddf, \(x) {
+      x |>
+        slice_sample(n = 3)
+    })
+  )
+
+# 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班
+
+flights |>
+  mutate(date = ymd(paste(year, month, day))) |>
+  mutate(weekday = wday(date)) |>
+  mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |>
+  group_by(year, month, origin, flight, isworkday) |>
+  summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |>
+  tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |>
+  filter(
+    purrr::map(diffdelaydf, \(x) {
+      nrow(x)
+    }) >
+      1
+  ) |>
+  mutate(
+    diffdelay = purrr::map_dbl(diffdelaydf, \(x) {
+      x |>
+        arrange(isworkday) |>
+        pull(mean_delay) |>
+        diff()
+    })
+  ) |>
+  group_by(year, month, origin) |>
+  slice_max(diffdelay, n = 3)
+
+
+wday(today())
+
+weekday()
+
+
+slice_sample(n = 1)
+
+
+flights |>
+  group_by(month) |>
+  slice_sample(n = 1)
+```
+
+
+
+```{r}
+p1 <- flights |>
+  group_by(year, month) |>
+  summarize(n = n()) |>
+  ggplot(aes(month, n)) +
+  geom_point(shape = 21, size = 6, color = "black", fill = "red") +
+  geom_line()
+
+ggsave("./a.pdf")
+ggsave("./a.png")
+
+require(patchwork)
+
+p1 / p1
+```
+
+
--- a/SD/20240326_3_datatransform/_extensions
+++ b/SD/20240326_3_datatransform/_extensions
--- a/SD/2.3_datatransform/a.pdf
+++ b/SD/2.3_datatransform/a.pdf
--- a/SD/2.3_datatransform/a.png
+++ b/SD/2.3_datatransform/a.png
--- a/SD/20240326_3_datatransform/index.qmd
+++ b/SD/20240326_3_datatransform/index.qmd
@@ -323,5 +323,5 @@ flights |>
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

--- a/SD/20240402_9_课后作业/index.qmd
+++ b/SD/20240402_9_课后作业/index.qmd
@@ -1,49 +0,0 @@
---
-title: "课后作业9"
-subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
-author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
-date: today
-lang: zh
-format:
-  revealjs:
-    theme: dark
-    slide-number: true
-    chalkboard:
-      buttons: true
-    preview-links: auto
-    lang: zh
-    toc: true
-    toc-depth: 1
-    toc-title: 大纲
-    logo: ./_extensions/inst/img/ucaslogo.png
-    css: ./_extensions/inst/css/revealjs.css
-    pointer:
-      key: "p"
-      color: "#32cd32"
-      pointerSize: 18
-revealjs-plugins:
-  - pointer
-filters:
-  - d2
---
-
-```{r}
-#| include: false
-#| cache: false
-lang <- "cn"
-require(tidyverse)
-require(learnr)
-```
-
-## 第9次课后作业
-
-自选数据集，使用R语言开展不同因子（如年份、季节、处理方式等）间某指标的差异分析，采用图表方式形成简要报告。
-
-作业模板：[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
-
-
-## 欢迎讨论！{.center}
-
-
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
-
--- a/SD/20240402_9_课后作业/第9次课后作业_模板.qmd
+++ b/SD/20240402_9_课后作业/第9次课后作业_模板.qmd
@@ -1,8 +0,0 @@
---
-title: 课后作业9
-author: 姓名
-format: html
---
-
-要求：自选数据集，使用R语言开展不同因子间（如年份、季节、处理方式等）某指标的差异分析，采用图表+文字说明等方式形成简要报告。
-
--- a/SD/20240409_2_大数据分析工具/index.qmd
+++ b/SD/20240409_2_大数据分析工具/index.qmd
@@ -1,167 +0,0 @@
---
-title: "大数据分析工具"
-subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
-author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
-date: today
-lang: zh
-format:
-  revealjs:
-    theme: dark
-    slide-number: true
-    chalkboard:
-      buttons: true
-    preview-links: auto
-    lang: zh
-    toc: true
-    toc-depth: 1
-    toc-title: 大纲
-    logo: ./_extensions/inst/img/ucaslogo.png
-    css: ./_extensions/inst/css/revealjs.css
-    pointer:
-      key: "p"
-      color: "#32cd32"
-      pointerSize: 18
-revealjs-plugins:
-  - pointer
-filters:
-  - d2
---
-
-```{r}
-#| echo: false
-knitr::opts_chunk$set(echo = TRUE)
-source("../../coding/_common.R")
-library(nycflights13)
-library(tidyverse)
-
-```
-
-## 匹配数字
-
-### 匹配数字：
-
- \d：匹配任意数字字符。
- \d+：匹配一个或多个数字字符。
- [0-9]: 匹配数字
-
-### 匹配字母：
-
- \w：匹配任意字母、数字或下划线字符。
- \w+：匹配一个或多个字母、数字或下划线字符。
-
-## 匹配数字
-
-### 匹配空白字符：
-
- \s：匹配任意空白字符，包括空格、制表符、换行符等。
- \s+：匹配一个或多个空白字符。
-
-### 匹配特定字符：
-
- [abc]：匹配字符 a、b 或 c 中的任意一个。
- [a-z]：匹配任意小写字母。
- [A-Z]：匹配任意大写字母。
- [0-9]：匹配任意数字。
-
-## 匹配数字
-
-### 匹配重复次数：
-
- {n}：匹配前一个字符恰好 n 次。
- {n,}：匹配前一个字符至少 n 次。
- {n,m}：匹配前一个字符至少 n 次，但不超过 m 次。
-
-### 匹配边界：
-
- ^：匹配字符串的开头。
- $：匹配字符串的结尾。
-
-## 匹配数字
-
-### 匹配特殊字符：
-
- \：转义特殊字符，使其按字面意义匹配。
- .：匹配任意单个字符。
- |：表示“或”关系，匹配两个或多个表达式之一。
-
-#### 匹配次数：
-
- *：匹配前一个字符零次或多次。
- +：匹配前一个字符一次或多次。
- ?：匹配前一个字符零次或一次。
-
-## 匹配数字
-
-### 分组和捕获：
-
- ()：将一系列模式组合成一个单元，可与特殊字符一起使用。
-
-### 预定义字符集：
-
- \d：任意数字，相当于 [0-9]。
- \w：任意字母、数字或下划线字符，相当于 [a-zA-Z0-9_]。
- \s：任意空白字符，相当于 [ \t\n\r\f\v]。
-
-
-
-## 实例
-
-```{r}
-library(babynames)
-(x <- c("apple", "apppple", "abc123def"))
-x[str_detect(x, "[0-9]")]
-x[str_detect(x, "abc[0-9]+")]
-x[str_detect(x, "pp")]
-x[str_detect(x, "p{4}")]
-x[str_detect(x, "p{4}")]
-x[str_detect("apple", "ap*")]
-x[str_detect("apple", "app*")]
-x[str_detect("apple", "a..le")]
-```
-
-## 练习
-
-
-找出`babyname`中名字含有ar的行
-
-```{r}
-#| echo: false
-babynames |>
-  filter(str_detect(name, "ar"))
-```
-
-## 练习
-
-
-找出`babyname`中名字含有ar或者以ry结尾的行。
-
-```{r}
-#| echo: false
-babynames |>
-  filter(str_detect(name, "ar"))
-```
-
-![](../../image/data-science/transform.png) 
-
-
-## GNU/Linux服务器
-
- `ssh`, `scp`
- `bash`
-  - grep
-  - sed
-  - awk
-  - find
-  - xargs
- `Editor`
-  - `Virtual Studio Code`
-  - `Vim`
-  - `Emacs`
-
-
-
-## 欢迎讨论！{.center}
-
-
-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
-
--- a/SD/20240328_1_datatransform/_extensions
+++ b/SD/20240328_1_datatransform/_extensions
--- a/SD/20240328_1_datatransform/index.qmd
+++ b/SD/20240328_1_datatransform/index.qmd
@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

--- a/SD/3.9_课后作业8/.RData
+++ b/SD/3.9_课后作业8/.RData
--- a/SD/20240328_9_课后作业/_extensions
+++ b/SD/20240328_9_课后作业/_extensions
--- a/SD/3.9_课后作业8/airqualitydf.RDS
+++ b/SD/3.9_课后作业8/airqualitydf.RDS
--- a/SD/20240328_9_课后作业/airqualitymedianoutrow5.pdf
+++ b/SD/20240328_9_课后作业/airqualitymedianoutrow5.pdf
--- a/SD/20240328_9_课后作业/freq.pdf
+++ b/SD/20240328_9_课后作业/freq.pdf
--- a/SD/20240328_9_课后作业/index.qmd
+++ b/SD/20240328_9_课后作业/index.qmd
@@ -43,7 +43,7 @@ require(learnr)
 1. 根据`airqualitydf.xlsx`，按采样点统计白天（8:00-20:00）与夜晚（20:00-8:00）中空气质量指数（AQI）中位数，按城市统计低于所有采样点AQI30%分位值的采样点占比，列出上述占比最高的10个城市（不考虑采样点数低于5个的城市）。
 2. 按照不同城市分组，统计白天与夜晚AQI中位数是否具有显著差异。

-作业模板：[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
+作业模板：[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)

 ## 示例代码

@@ -62,5 +62,5 @@ require(learnr)
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

--- a/SD/20240328_9_课后作业/median.pdf
+++ b/SD/20240328_9_课后作业/median.pdf
--- a/SD/3.9_课后作业8/metadf.RDS
+++ b/SD/3.9_课后作业8/metadf.RDS
--- a/SD/20240328_9_课后作业/npar1wayConover.pdf
+++ b/SD/20240328_9_课后作业/npar1wayConover.pdf
--- a/SD/3.9_课后作业8/testdf.RDS
+++ b/SD/3.9_课后作业8/testdf.RDS
--- a/SD/20240328_9_课后作业/第8次课后作业_模板.qmd
+++ b/SD/20240328_9_课后作业/第8次课后作业_模板.qmd
@@ -14,8 +14,10 @@ format: html
 # 下载至临时文件
 if (FALSE) {
  tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
-  download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
-    destfile = tmpxlsxpath)
+  download.file(
+    "https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
+    destfile = tmpxlsxpath
+  )
  airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
  metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
  saveRDS(airqualitydf, "./airqualitydf.RDS")
@@ -41,7 +43,12 @@ airqualitydf |>
  left_join(metadf |> select(site, city = Area)) |>
  group_by(city) |>
  filter(n() > 5) |>
-  summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |>
+  summarize(
+    p = sum(
+      AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)
+    ) /
+      n()
+  ) |>
  top_n(10, p)


@@ -57,12 +64,11 @@ airqualitydf |>
  left_join(metadf |> select(site, city = Area)) |>
  group_by(city) |>
  filter(length(unique(site)) >= 5) |>
-  summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2,
-    na.rm = TRUE)) / n()) |>
+  summarize(
+    p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, na.rm = TRUE)) / n()
+  ) |>
  slice_max(p, n = 10) |>
  knitr::kable()
-
-
 ```


@@ -83,41 +89,58 @@ if (FALSE) {
    left_join(metadf |> select(site, city = Area)) |>
    group_by(city) |>
    filter(length(unique(site)) >= 5) |>
-    mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"),
-      levels = c("day", "night"))
+    mutate(
+      dayornight = factor(
+        ifelse(between(hour(datetime), 8, 20), "day", "night"),
+        levels = c("day", "night")
+      )
    ) |>
    group_by(city) |>
    nest(citydf = -city) |>
-    mutate(median_diff = purrr::map_dbl(citydf, ~
-      .x |>
+    mutate(
+      median_diff = purrr::map_dbl(
+        citydf,
+        ~ .x |>
          specify(AQI ~ dayornight) |>
          calculate(stat = "diff in medians", order = c("day", "night")) |>
          pull(stat)
-    )) |>
+      )
+    ) |>
    ungroup() |>
    #  slice_sample(n = 12) |>
-    mutate(null_dist = purrr::map(citydf, ~
-      .x |>
+    mutate(
+      null_dist = purrr::map(
+        citydf,
+        ~ .x |>
          specify(AQI ~ dayornight) |>
          hypothesize(null = "independence") |>
          generate(reps = 1000, type = "permute") |>
          calculate(stat = "diff in medians", order = c("day", "night"))
-    )) |>
-    mutate(p_value = purrr::map2_dbl(null_dist, median_diff, 
+      )
+    ) |>
+    mutate(
+      p_value = purrr::map2_dbl(
+        null_dist,
+        median_diff,
        ~ get_p_value(.x, obs_stat = .y, direction = "both") |>
          pull(p_value)
-    )) |>
+      )
+    ) |>
    mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |>
-    mutate(fig = purrr::pmap(list(null_dist, median_diff, city, sigdiff),
+    mutate(
+      fig = purrr::pmap(
+        list(null_dist, median_diff, city, sigdiff),
        ~ visualize(..1) +
          shade_p_value(obs_stat = ..2, direction = "both") +
          ggtitle(paste0(..3, "：", ..4)) +
          theme_sci(2, 2)
-    )) |>
+      )
+    ) |>
    arrange(p_value)
  saveRDS(testdf, "./testdf.RDS")
 }

+if (FALSE) {
  lang <- "cn"
  require(dwfun)
  require(rmdify)
@@ -136,13 +159,16 @@ testdf |>
    nest(grpdf = -grp) |>
    ungroup() |>
    #  slice(1) |>
-  mutate(gp = purrr::map(grpdf,
+    mutate(
+      gp = purrr::map(
+        grpdf,
        ~ (.x |>
          pull(fig)) |>
          patchwork::wrap_plots(ncol = 3) +
-      dwfun::theme_sci(5, 7))) |>
+          dwfun::theme_sci(5, 7)
+      )
+    ) |>
    pull(gp)
-
-
+}
 ```

--- a/SD/20240328_9_课后作业/第8次课后作业_模板.sas
+++ b/SD/20240328_9_课后作业/第8次课后作业_模板.sas
--- a/SD/20240402_1_datavisualize/_extensions
+++ b/SD/20240402_1_datavisualize/_extensions
--- a/SD/20240402_1_datavisualize/index.qmd
+++ b/SD/20240402_1_datavisualize/index.qmd
@@ -3934,5 +3934,5 @@ p
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`

--- a/SD/20240402_1_datavisualize/mpg-plot.png
+++ b/SD/20240402_1_datavisualize/mpg-plot.png
--- a/SD/20240402_2_实践部分/_extensions
+++ b/SD/20240402_2_实践部分/_extensions
--- a/SD/20240402_2_实践部分/index.qmd
+++ b/SD/20240402_2_实践部分/index.qmd
@@ -100,5 +100,4 @@ geom_bar(position = "fill")
 ## 欢迎讨论！{.center}


-`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
-
+`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
--- a/SD/5.1_model/_demo.qmd
+++ b/SD/5.1_model/_demo.qmd
@@ -0,0 +1,254 @@
+---
+title: "Lesson 9"
+format: html
+---
+
+
+```{r}
+# install.packages("tidymodels")
+
+require(tidymodels)
+taxi
+
+taxisplit <- initial_split(taxi, prop = 0.8)
+taxi_train <- training(taxisplit)
+taxi_test <- testing(taxisplit)
+
+tree_spec <-
+  decision_tree(cost_complexity = 0.002) %>%
+  set_mode("classification")
+
+taxi_fit <- workflow() %>%
+  add_formula(tip ~ .) %>%
+  add_model(tree_spec) %>%
+  fit(data = taxi_train)
+```
+
+
+```{r}
+augment(taxi_fit, new_data = taxi_train) %>%
+  relocate(tip, .pred_class, .pred_yes, .pred_no)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  conf_mat(truth = tip, estimate = .pred_class)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  accuracy(truth = tip, estimate = .pred_class)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  sensitivity(truth = tip, estimate = .pred_class)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  specificity(truth = tip, estimate = .pred_class)
+
+taxi_metrics <- metric_set(accuracy, specificity, sensitivity)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  taxi_metrics(truth = tip, estimate = .pred_class)
+
+taxi_metrics <- metric_set(accuracy, specificity, sensitivity)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  group_by(local) %>%
+  taxi_metrics(truth = tip, estimate = .pred_class)
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  roc_curve(truth = tip, .pred_yes) %>%
+  autoplot()
+
+
+augment(taxi_fit, new_data = taxi_train)
+
+
+augment(taxi_fit, new_data = taxi_train) %>%
+  roc_curve(truth = tip, .pred_yes) |>
+  ggplot(aes(1 - sensitivity, specificity)) +
+  geom_point() +
+  geom_line() +
+  geom_abline(slope = 1)
+```
+
+
+# Cross Validation
+
+```{r}
+vfold_cv(taxi_train, v = 10) |>
+  pull(splits) |>
+  nth(1)
+taxi_folds <- vfold_cv(taxi_train)
+taxi_folds$splits[1:3]
+
+vfold_cv(taxi_train, strata = tip)
+
+set.seed(123)
+taxi_folds <- vfold_cv(taxi_train, v = 10, strata = tip)
+taxi_folds
+
+taxi_wflow <- workflow() %>%
+  add_formula(tip ~ .) %>%
+  add_model(tree_spec)
+
+
+taxi_res <- fit_resamples(taxi_wflow, taxi_folds)
+taxi_res
+
+
+taxi_res$.metrics[[1]]
+
+
+taxi_res$splits[[1]]
+
+analysis(taxi_res$splits[[1]])
+
+analysis(taxi_res$splits[[1]])
+assessment(taxi_res$splits[[1]])
+
+
+taxi_res %>%
+  collect_metrics()
+
+taxi_res %>%
+  collect_metrics() %>%
+  select(.metric, mean, n)
+
+# Save the assessment set results
+ctrl_taxi <- control_resamples(save_pred = TRUE)
+taxi_res <- fit_resamples(taxi_wflow, taxi_folds, control = ctrl_taxi)
+
+taxi_res
+```
+
+
+
+
+
+
+
+# NLA2007 cyanophyta model
+
+
+```{r}
+require(tidyverse)
+sitedf <- readr::read_csv(
+  "https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
+) |>
+  select(
+    SITE_ID,
+    lon = LON_DD,
+    lat = LAT_DD,
+    name = LAKENAME,
+    area = LAKEAREA,
+    zmax = DEPTHMAX
+  ) |>
+  group_by(SITE_ID) |>
+  summarize(
+    lon = mean(lon, na.rm = TRUE),
+    lat = mean(lat, na.rm = TRUE),
+    name = unique(name),
+    area = mean(area, na.rm = TRUE),
+    zmax = mean(zmax, na.rm = TRUE)
+  )
+
+
+visitdf <- readr::read_csv(
+  "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
+) |>
+  select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
+  distinct()
+
+
+waterchemdf <- readr::read_csv(
+  "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
+) |>
+  select(
+    SITE_ID,
+    date = DATE_PROFILE,
+    depth = DEPTH,
+    temp = TEMP_FIELD,
+    do = DO_FIELD,
+    ph = PH_FIELD,
+    cond = COND_FIELD,
+  )
+
+sddf <- readr::read_csv(
+  "https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
+) |>
+  select(
+    SITE_ID,
+    date = DATE_SECCHI,
+    sd = SECMEAN,
+    clear_to_bottom = CLEAR_TO_BOTTOM
+  )
+
+trophicdf <- readr::read_csv(
+  "https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
+) |>
+  select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
+  left_join(visitdf, by = c("SITE_ID", "visit")) |>
+  select(-year, -visit) |>
+  group_by(SITE_ID, date) |>
+  summarize(
+    tp = mean(tp, na.rm = TRUE),
+    tn = mean(tn, na.rm = TRUE),
+    chla = mean(chla, na.rm = TRUE)
+  )
+
+
+phytodf <- readr::read_csv(
+  "https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
+) |>
+  select(
+    SITE_ID,
+    date = DATEPHYT,
+    depth = SAMPLE_DEPTH,
+    phyta = DIVISION,
+    genus = GENUS,
+    species = SPECIES,
+    tax = TAXANAME,
+    abund = ABUND
+  ) |>
+  mutate(phyta = gsub(" .*$", "", phyta)) |>
+  filter(!is.na(genus)) |>
+  group_by(SITE_ID, date, depth, phyta, genus) |>
+  summarize(abund = sum(abund, na.rm = TRUE)) |>
+  nest(phytodf = -c(SITE_ID, date))
+
+phytodf$phytodf[[1]]
+
+
+envdf <- waterchemdf |>
+  filter(depth < 2) |>
+  select(-depth) |>
+  group_by(SITE_ID, date) |>
+  summarise_all(~ mean(., na.rm = TRUE)) |>
+  ungroup() |>
+  left_join(sddf, by = c("SITE_ID", "date")) |>
+  left_join(trophicdf, by = c("SITE_ID", "date"))
+
+nla <- envdf |>
+  left_join(phytodf, by = c("SITE_ID", "date")) |>
+  left_join(sitedf, by = "SITE_ID") |>
+  filter(!purrr::map_lgl(phytodf, is.null)) |>
+  mutate(
+    cyanophyta = purrr::map(
+      phytodf,
+      \(x) {
+        x |>
+          dplyr::filter(phyta == "Cyanophyta") |>
+          summarize(cyanophyta = sum(abund, na.rm = TRUE))
+      }
+    )
+  ) |>
+  unnest(cyanophyta) |>
+  select(-phyta) |>
+  mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
+
+# library(rmdify)
+# library(dwfun)
+# dwfun::init()
+```
+
+
+
+
+
--- a/SD/20240402_9_课后作业/_extensions
+++ b/SD/20240402_9_课后作业/_extensions
--- a/SD/20240409_1_model/images/10-Fold-CV.svg
+++ b/SD/20240409_1_model/images/10-Fold-CV.svg
--- a/SD/20240409_1_model/images/Ac_2tads.jpg
+++ b/SD/20240409_1_model/images/Ac_2tads.jpg
--- a/SD/20240409_1_model/images/Hatching-process.jpg
+++ b/SD/20240409_1_model/images/Hatching-process.jpg
--- a/SD/20240409_1_model/images/bad_workflow.png
+++ b/SD/20240409_1_model/images/bad_workflow.png
--- a/SD/20240409_1_model/images/cap.png
+++ b/SD/20240409_1_model/images/cap.png
--- a/SD/20240409_1_model/images/confusion-matrix-accuracy.png
+++ b/SD/20240409_1_model/images/confusion-matrix-accuracy.png
--- a/SD/20240409_1_model/images/confusion-matrix-sensitivity.png
+++ b/SD/20240409_1_model/images/confusion-matrix-sensitivity.png
--- a/SD/20240409_1_model/images/confusion-matrix-specificity.png
+++ b/SD/20240409_1_model/images/confusion-matrix-specificity.png
--- a/SD/20240409_1_model/images/confusion-matrix.png
+++ b/SD/20240409_1_model/images/confusion-matrix.png
--- a/SD/20240409_1_model/images/fe_venn.svg
+++ b/SD/20240409_1_model/images/fe_venn.svg
--- a/SD/20240409_1_model/images/fe_venn_info.svg
+++ b/SD/20240409_1_model/images/fe_venn_info.svg
--- a/SD/20240409_1_model/images/good_workflow.png
+++ b/SD/20240409_1_model/images/good_workflow.png
--- a/SD/20240409_1_model/images/grid_points.svg
+++ b/SD/20240409_1_model/images/grid_points.svg
--- a/SD/20240409_1_model/images/initial-split.svg
+++ b/SD/20240409_1_model/images/initial-split.svg
--- a/SD/20240409_1_model/images/ml_illustration.jpg
+++ b/SD/20240409_1_model/images/ml_illustration.jpg
--- a/SD/20240409_1_model/images/model-optimization.svg
+++ b/SD/20240409_1_model/images/model-optimization.svg
--- a/SD/20240409_1_model/images/parsnip-flagger.jpg
+++ b/SD/20240409_1_model/images/parsnip-flagger.jpg
--- a/SD/20240409_1_model/images/pointing.svg
+++ b/SD/20240409_1_model/images/pointing.svg
--- a/SD/20240409_1_model/images/rolling.svg
+++ b/SD/20240409_1_model/images/rolling.svg
--- a/SD/20240409_1_model/images/small_init.svg
+++ b/SD/20240409_1_model/images/small_init.svg
--- a/SD/20240409_1_model/images/snake.png
+++ b/SD/20240409_1_model/images/snake.png
--- a/SD/20240409_1_model/images/stack_01.png
+++ b/SD/20240409_1_model/images/stack_01.png
--- a/SD/20240409_1_model/images/stack_02.png
+++ b/SD/20240409_1_model/images/stack_02.png
--- a/SD/20240409_1_model/images/stack_03.png
+++ b/SD/20240409_1_model/images/stack_03.png
--- a/SD/20240409_1_model/images/stack_04.png
+++ b/SD/20240409_1_model/images/stack_04.png
--- a/SD/20240409_1_model/images/stack_05.png
+++ b/SD/20240409_1_model/images/stack_05.png
--- a/SD/20240409_1_model/images/steve.gif
+++ b/SD/20240409_1_model/images/steve.gif
--- a/SD/20240409_1_model/images/taxi.png
+++ b/SD/20240409_1_model/images/taxi.png
--- a/SD/20240409_1_model/images/taxi_spinning.svg
+++ b/SD/20240409_1_model/images/taxi_spinning.svg
--- a/SD/20240409_1_model/images/tm-org.png
+++ b/SD/20240409_1_model/images/tm-org.png
--- a/SD/20240409_1_model/images/tuning-overfitting-test-1.svg
+++ b/SD/20240409_1_model/images/tuning-overfitting-test-1.svg
--- a/SD/20240409_1_model/images/tuning-overfitting-train-1.svg
+++ b/SD/20240409_1_model/images/tuning-overfitting-train-1.svg
--- a/SD/20240409_1_model/images/what_is_ml.jpg
+++ b/SD/20240409_1_model/images/what_is_ml.jpg
--- a/SD/20240409_1_model/images/whole-game-boost.svg
+++ b/SD/20240409_1_model/images/whole-game-boost.svg
--- a/SD/20240409_1_model/images/whole-game-final-fit.jpg
+++ b/SD/20240409_1_model/images/whole-game-final-fit.jpg
--- a/SD/20240409_1_model/images/whole-game-final-performance.jpg
+++ b/SD/20240409_1_model/images/whole-game-final-performance.jpg
--- a/SD/20240409_1_model/images/whole-game-final-resamples.svg
+++ b/SD/20240409_1_model/images/whole-game-final-resamples.svg
--- a/SD/20240409_1_model/images/whole-game-final.svg
+++ b/SD/20240409_1_model/images/whole-game-final.svg
--- a/SD/20240409_1_model/images/whole-game-logistic.svg
+++ b/SD/20240409_1_model/images/whole-game-logistic.svg
--- a/SD/20240409_1_model/images/whole-game-model-1.jpg
+++ b/SD/20240409_1_model/images/whole-game-model-1.jpg
--- a/SD/20240409_1_model/images/whole-game-model-n.jpg
+++ b/SD/20240409_1_model/images/whole-game-model-n.jpg
--- a/SD/20240409_1_model/images/whole-game-resamples.jpg
+++ b/SD/20240409_1_model/images/whole-game-resamples.jpg
--- a/SD/20240409_1_model/images/whole-game-select.jpg
+++ b/SD/20240409_1_model/images/whole-game-select.jpg
--- a/SD/20240409_1_model/images/whole-game-select.svg
+++ b/SD/20240409_1_model/images/whole-game-select.svg
--- a/SD/20240409_1_model/images/whole-game-split-short.svg
+++ b/SD/20240409_1_model/images/whole-game-split-short.svg
--- a/SD/20240409_1_model/images/whole-game-split.jpg
+++ b/SD/20240409_1_model/images/whole-game-split.jpg
--- a/SD/20240409_1_model/images/whole-game-split.svg
+++ b/SD/20240409_1_model/images/whole-game-split.svg
--- a/SD/20240409_1_model/images/whole-game-transparent-model-1.jpg
+++ b/SD/20240409_1_model/images/whole-game-transparent-model-1.jpg
--- a/SD/20240409_1_model/images/whole-game-transparent-resamples.jpg
+++ b/SD/20240409_1_model/images/whole-game-transparent-resamples.jpg
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ming	133cb1533d	render compile	2025-04-09 22:31:53 +08:00
ming	9ecb11b788	update	2025-04-09 22:28:17 +08:00
ming	6412cc5560	render compile	2025-03-25 00:46:18 +08:00
ming	bc628545b6	update leture 2	2025-03-25 00:45:07 +08:00
ming	1ead84ac70	render compile	2025-03-20 09:37:17 +08:00
ming	10ffdd46f3	add 3.1	2025-03-20 09:36:34 +08:00
ming	987a3eaea9	render compile	2025-03-20 09:35:47 +08:00
ming	c292d0ffab	add 2.3	2025-03-20 09:35:13 +08:00
ming	cb2e22fde5	update	2025-03-20 09:34:24 +08:00
ming	8a31a565a8	add some lesson for lesson6	2025-03-20 09:33:27 +08:00
ming	6e3f134635	update	2025-03-19 16:41:27 +08:00
ming	c883d6df6a	update	2025-03-19 16:40:59 +08:00
ming	fda04d79f1	update	2025-03-19 16:33:40 +08:00
ming	a4596a9836	update	2025-03-19 16:31:50 +08:00
ming	7902f28a7a	update gitignore	2025-03-17 20:35:44 +08:00
ming	654fb2f024	render compile	2025-03-17 20:32:11 +08:00
ming	352ca2b1ce	render compile	2025-03-17 20:30:20 +08:00
ming	82248cb24b	render compile	2025-03-17 20:23:18 +08:00
ming	a21430385f	update	2025-03-17 20:16:08 +08:00
ming	78196d49ed	render compile	2025-03-17 20:06:33 +08:00
ming	30dd043c3b	render compile	2025-03-17 19:50:08 +08:00
ming	2db87d5e89	update	2025-03-17 19:20:50 +08:00