Compare commits
	
		
			20 Commits
		
	
	
		
			main
			...
			6412cc5560
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 6412cc5560 | |||
| bc628545b6 | |||
| 1ead84ac70 | |||
| 10ffdd46f3 | |||
| 987a3eaea9 | |||
| c292d0ffab | |||
| cb2e22fde5 | |||
| 8a31a565a8 | |||
| 6e3f134635 | |||
| c883d6df6a | |||
| fda04d79f1 | |||
| a4596a9836 | |||
| 7902f28a7a | |||
| 654fb2f024 | |||
| 352ca2b1ce | |||
| 82248cb24b | |||
| a21430385f | |||
| 78196d49ed | |||
| 30dd043c3b | |||
| 2db87d5e89 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -7,3 +7,4 @@ _variables.yml | |||||||
| _freeze/ | _freeze/ | ||||||
| *_cache/ | *_cache/ | ||||||
| *_files/ | *_files/ | ||||||
|  | SD/_*/ | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								.source_state
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1 @@ | |||||||
|  | 454c267b732cda3ca58332ab3883bec6 | ||||||
							
								
								
									
										382
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,382 @@ | |||||||
|  | # Makefile for Quarto Project Automation | ||||||
|  | # Detect OS | ||||||
|  | HOSTNAME := $(shell hostname) | ||||||
|  | OS := $(shell uname | tr A-Z a-z) | ||||||
|  | ifeq ($(OS), darwin) | ||||||
|  |     SEDI := sed -i '' | ||||||
|  |     OS := OSX | ||||||
|  | else ifeq ($(OS), linux) | ||||||
|  |     SEDI := sed -i | ||||||
|  |     OS := linux | ||||||
|  | else | ||||||
|  |     $(error Unknown operating system) | ||||||
|  | endif | ||||||
|  |  | ||||||
|  | # Fetch Git branch and project details | ||||||
|  | branchname := $(shell git branch --show-current) | ||||||
|  | reponame := $(shell basename $(shell git rev-parse --show-toplevel)) | ||||||
|  | projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel))) | ||||||
|  | pubtype := $(if $(findstring PUB,$(branchname)),public,protected) | ||||||
|  | remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname) | ||||||
|  | outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml) | ||||||
|  | siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname) | ||||||
|  |  | ||||||
|  | branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF" | ||||||
|  |  | ||||||
|  | # Variables for colors and port | ||||||
|  | bcolor := grey | ||||||
|  | port := 4199 | ||||||
|  |  | ||||||
|  | # Set background color based on branch name | ||||||
|  | ifeq ($(findstring R1,$(branchname)),R1) | ||||||
|  |     bcolor := orange | ||||||
|  | else ifeq ($(findstring R2,$(branchname)),R2) | ||||||
|  |     bcolor := lightblue | ||||||
|  | else ifeq ($(findstring R3,$(branchname)),R3) | ||||||
|  |     bcolor := lightgreen | ||||||
|  | else ifeq ($(findstring PUB,$(branchname)),PUB) | ||||||
|  |     bcolor := light | ||||||
|  | endif | ||||||
|  |  | ||||||
|  | # Set port based on branch name | ||||||
|  | branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210 | ||||||
|  | port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/') | ||||||
|  | port := $(if $(port),$(port),4199) | ||||||
|  |  | ||||||
|  | # Define the state file | ||||||
|  | STATE_FILE := .source_state | ||||||
|  |  | ||||||
|  | # checkfile | ||||||
|  | GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www" | ||||||
|  |  | ||||||
|  | # Default target | ||||||
|  | .PHONY: all preview readme rsync local clean upload fix_links force check_git_status | ||||||
|  |  | ||||||
|  | all: local upload clean commit push | ||||||
|  | 	 | ||||||
|  | force: updrefbib check_git_status updvariable render | ||||||
|  |  | ||||||
|  | local: updrefbib check_git_status updvariable lazyrender | ||||||
|  |  | ||||||
|  | updmakefile: | ||||||
|  | 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \ | ||||||
|  | 		echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \ | ||||||
|  | 		cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \ | ||||||
|  | 		git add "Makefile" && git commit -m "Update Makefile"; \ | ||||||
|  | 		echo "本项目Makefile更新完成."; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "Makefile 无需在本系统上更新."; \ | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  |  | ||||||
|  | updrefbib: | ||||||
|  | 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \ | ||||||
|  | 		echo "更新本项目参考文献..."; \ | ||||||
|  | 		cp "$$HOME/literature/Ref.bib" "./BB/"; \ | ||||||
|  | 		echo "推送本地参考文献到远程服务器..."; \ | ||||||
|  | 		rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \ | ||||||
|  | 		echo "本项目参考文献更新完成."; \ | ||||||
|  | 	else \ | ||||||
|  | 	  echo "检查网络连通性..."; \ | ||||||
|  | 		if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \ | ||||||
|  | 			echo "网络正常,更新本项目参考文献..."; \ | ||||||
|  | 			wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \ | ||||||
|  | 			echo "本项目参考文献更新完成."; \ | ||||||
|  | 		else \ | ||||||
|  | 			echo "网络不可用,跳过参考文献更新."; \ | ||||||
|  | 		fi; \ | ||||||
|  | 	fi; \ | ||||||
|  | 	git add BB/Ref.bib; \ | ||||||
|  | 	if [ "$$(git diff --cached)" ]; then \ | ||||||
|  | 		git commit -m "Update Ref.bib"; \ | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | check_git_status: | ||||||
|  | 	@uncommitted=$$(git status --porcelain); \ | ||||||
|  | 	if [ -n "$$uncommitted" ]; then \ | ||||||
|  | 	  git status; \ | ||||||
|  | 		read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \ | ||||||
|  | 		answer=$${answer:-N}; \ | ||||||
|  | 		if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \ | ||||||
|  | 			read -p "请输入修改说明: " message; \ | ||||||
|  | 			git add . && git commit -m "$$message"; \ | ||||||
|  | 		else \ | ||||||
|  | 			echo "未提交,如后续操作为pull,则无法继续"; \ | ||||||
|  | 		fi; \ | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Lazy render with hash checking | ||||||
|  | lazyrender: | ||||||
|  | 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \ | ||||||
|  | 	if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \ | ||||||
|  | 		echo "源文件发生变化, 重新编译..."; \ | ||||||
|  | 		echo "$$current_hash" > $(STATE_FILE); \ | ||||||
|  | 		$(MAKE) render; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "源文件无变化, 跳过编译..."; \ | ||||||
|  | 		exit 0; \ | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | # Render target | ||||||
|  | render: | ||||||
|  | 	@quarto render | ||||||
|  |  | ||||||
|  | commit: | ||||||
|  | 	@echo "提交修改(commit)..."; \ | ||||||
|  | 		git add .; \ | ||||||
|  | 		if [ -n "$$(git diff --cached)" ]; then \ | ||||||
|  | 		git commit -m "render compile"; \ | ||||||
|  | 		else \ | ||||||
|  | 		echo "没有修改记录,跳过."; \ | ||||||
|  | 		fi; \ | ||||||
|  |  | ||||||
|  | # Pull changes from the specified branch based on the current branch | ||||||
|  | pull: | ||||||
|  | 	@echo "从远程拉取项目更新..."; \ | ||||||
|  | 		$(MAKE) check_git_status; \ | ||||||
|  | 		git pull; \ | ||||||
|  | 		current_branch=$$(git rev-parse --abbrev-ref HEAD); \ | ||||||
|  | 		if [ "$$current_branch" = main ]; then \ | ||||||
|  | 		  echo "当前分枝为$$current_branch."; \ | ||||||
|  | 			remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \ | ||||||
|  | 			if [ -n "$$remote_branch" ]; then \ | ||||||
|  | 		    echo "尝试从远程分枝$$remote_branch 拉取更新..."; \ | ||||||
|  | 				git pull --rebase origin $$remote_branch; \ | ||||||
|  | 			else \ | ||||||
|  | 				echo "远程无可用分支$$remote_branch."; \ | ||||||
|  | 			fi; \ | ||||||
|  | 			else \ | ||||||
|  | 			echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \ | ||||||
|  | 			git pull --rebase origin main; \ | ||||||
|  | 		fi | ||||||
|  |  | ||||||
|  | # Pull changes from the main branch | ||||||
|  | pullmain: | ||||||
|  | 	$(MAKE) check_git_status; \ | ||||||
|  | 		@current_branch=$$(git rev-parse --abbrev-ref HEAD); \ | ||||||
|  | 		@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \ | ||||||
|  | 		git pull --rebase origin main; \ | ||||||
|  |  | ||||||
|  | push: | ||||||
|  | 	@echo "推送到远程..."; \ | ||||||
|  | 		git push | ||||||
|  |  | ||||||
|  | filehash: | ||||||
|  | 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \ | ||||||
|  | 	echo "$$current_hash" > $(STATE_FILE) | ||||||
|  |  | ||||||
|  | # Preview the site on a specific port | ||||||
|  | preview: | ||||||
|  | 	@quarto preview --port $(port) | ||||||
|  |  | ||||||
|  | # Generate README.md | ||||||
|  | readme: | ||||||
|  | 	@quarto render index.qmd -t markdown -o README.md | ||||||
|  | 	@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md | ||||||
|  | 	@rm "$(outputdir)/README.md" | ||||||
|  |  | ||||||
|  | # Sync files with remote server | ||||||
|  | rsync: | ||||||
|  | 	@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)" | ||||||
|  |  | ||||||
|  | # Open local site | ||||||
|  | open: | ||||||
|  | 	@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi | ||||||
|  |  | ||||||
|  | # Clean unnecessary files | ||||||
|  | clean: | ||||||
|  | 	@rm -f ./*.spl  ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk | ||||||
|  |  | ||||||
|  | # Upload files to server and fix links | ||||||
|  | upload: backupdocx | ||||||
|  | 	@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)" | ||||||
|  | 	@$(MAKE) fix_links | ||||||
|  | 	@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \ | ||||||
|  | 		if [ "$(OS)" = "OSX" ]; then \ | ||||||
|  | 			open "$(siteurl)" 2>/dev/null; \ | ||||||
|  | 		fi; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "Rsync failed. Attempting alternative upload method..."; \ | ||||||
|  | 		mkdir -p "$(reponame)"; \ | ||||||
|  | 		rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \ | ||||||
|  | 		rm -rf "$(reponame)"; \ | ||||||
|  | 		rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \ | ||||||
|  | 		if [ "$(OS)" = "OSX" ]; then \ | ||||||
|  | 			open "$(siteurl)" 2>/dev/null; \ | ||||||
|  | 		fi; \ | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | backupdocx: | ||||||
|  | 	@echo "备份MS.docx文件..."; \ | ||||||
|  | 	currentcommithash=$$(git rev-parse --short HEAD); \ | ||||||
|  | 	datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \ | ||||||
|  | 	mkdir -p TC/MS/; \ | ||||||
|  | 	existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \ | ||||||
|  | 	if [ -n "$$existing_file" ]; then \ | ||||||
|  | 		echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \ | ||||||
|  | 		echo "无需备份."; \ | ||||||
|  | 	else \ | ||||||
|  | 		if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \ | ||||||
|  | 			cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \ | ||||||
|  | 			echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \ | ||||||
|  | 			git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \ | ||||||
|  | 			if [ "$$(git diff --cached)" ]; then \ | ||||||
|  | 				git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \ | ||||||
|  | 			fi; \ | ||||||
|  | 		else \ | ||||||
|  | 			echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在,无需备份."; \ | ||||||
|  | 		fi; \ | ||||||
|  | 	fi; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | trackchange: | ||||||
|  | 	@if [ "$(projtype)" != "manuscript" ]; then \ | ||||||
|  | 	exit 0; \ | ||||||
|  | 	fi; \ | ||||||
|  | 	echo "选择两个提交以比较文档..."; \ | ||||||
|  | 	hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \ | ||||||
|  | 	| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \ | ||||||
|  | 	| fzf --multi --reverse --preview="echo {}" ); \ | ||||||
|  | 	echo $$hashes; \ | ||||||
|  | 	hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \ | ||||||
|  | 	hash1=$$(git rev-parse --short $${hash1}^); \ | ||||||
|  | 	datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \ | ||||||
|  | 	hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \ | ||||||
|  | 	hash2=$$(git rev-parse --short $${hash2}^); \ | ||||||
|  | 	datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \ | ||||||
|  | 	if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \ | ||||||
|  | 	echo "必须选择两个提交."; \ | ||||||
|  | 	exit 1; \ | ||||||
|  | 	fi; \ | ||||||
|  | 	doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \ | ||||||
|  | 	echo "$$doc1"; \ | ||||||
|  | 	doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \ | ||||||
|  | 	echo "$$doc2"; \ | ||||||
|  | 	if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \ | ||||||
|  | 	echo "打开文件: $$doc1 和 $$doc2"; \ | ||||||
|  | 	open "$$doc1" "$$doc2"; \ | ||||||
|  | 	printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \ | ||||||
|  | 	echo "请在word中对比两个版本形成带修改痕迹的版本,并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \ | ||||||
|  | 	else \ | ||||||
|  | 	echo "一个或两个文件不存在: $$doc1, $$doc2"; \ | ||||||
|  | 	exit 1; \ | ||||||
|  | 	fi | ||||||
|  |  | ||||||
|  | # Fix links in www directory | ||||||
|  | fix_links: | ||||||
|  | 	@find ./www -type f -name "*.html" -exec sed -i.bak \ | ||||||
|  | 		-e "s/{{< var branch >}}/$(branchname)/g" \ | ||||||
|  | 		-e "s/{{< var pubtype >}}/$(pubtype)/g" \ | ||||||
|  | 		-e "s/{{< var projtype >}}/$(projtype)/g" \ | ||||||
|  | 		-e "s/{{< var reponame >}}/$(reponame)/g" \ | ||||||
|  | 		-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \ | ||||||
|  | 		-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} + | ||||||
|  | 	@find ./www -type f -name "*.bak" -exec rm {} + | ||||||
|  |  | ||||||
|  | updvariable: | ||||||
|  | 	@touch _variables.yml  # 如果文件不存在则创建 | ||||||
|  | 	@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml | ||||||
|  | 	@if grep -q '^reponame:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "reponame: $(reponame)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml | ||||||
|  | 	@if grep -q '^projtype:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "projtype: $(projtype)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml | ||||||
|  | 	@if grep -q '^branch:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "branch: $(branchname)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml | ||||||
|  | 	@if grep -q '^pubtype:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "pubtype: $(pubtype)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml | ||||||
|  | 	@if grep -q '^nwAB:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "nwAB: $(nwAB)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml | ||||||
|  | 	@if grep -q '^nwMS:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "nwMS: $(nwMS)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml | ||||||
|  | 	@if grep -q '^figtblMS:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "figtblMS: $(figtblMS)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml | ||||||
|  | 	@if grep -q '^figtblSM:' _variables.yml; then \ | ||||||
|  | 		$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \ | ||||||
|  | 	else \ | ||||||
|  | 		echo "figtblSM: $(figtblSM)" >> _variables.yml; \ | ||||||
|  | 	fi | ||||||
|  | 	@mkpapervar | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Help: list all available commands with descriptions (English and Chinese) | ||||||
|  | help: | ||||||
|  | 	@echo "Makefile for Quarto Project Automation" | ||||||
|  | 	@echo "=======================================" | ||||||
|  | 	@echo "Available targets (English):" | ||||||
|  | 	@echo "" | ||||||
|  | 	@echo "  make all           - Execute local build, upload, clean, and commit" | ||||||
|  | 	@echo "  make force         - Force render, hash update, upload, clean, and commit" | ||||||
|  | 	@echo "  make local         - Check git status and perform a lazy render if changes detected" | ||||||
|  | 	@echo "  make check_git_status - Check for uncommitted changes and ask to commit them" | ||||||
|  | 	@echo "  make lazyrender    - Render if source files have changed based on hash comparison" | ||||||
|  | 	@echo "  make render        - Force Quarto to render the project" | ||||||
|  | 	@echo "  make commit        - Commit changes if no previous uncommitted changes" | ||||||
|  | 	@echo "  make filehash      - Generate and store the file hash of source files" | ||||||
|  | 	@echo "  make preview       - Preview the site locally on the specific port (default: 4199)" | ||||||
|  | 	@echo "  make readme        - Render README.md from Quarto index.qmd" | ||||||
|  | 	@echo "  make rsync         - Sync output files with the remote server" | ||||||
|  | 	@echo "  make open          - Open the generated site locally in the browser" | ||||||
|  | 	@echo "  make clean         - Clean up unnecessary files" | ||||||
|  | 	@echo "  make upload        - Upload files to the server and fix links" | ||||||
|  | 	@echo "  make fix_links     - Fix HTML links in the 'www' directory for the remote server" | ||||||
|  | 	@echo "  make updmakefile   - Update the Makefile" | ||||||
|  | 	@echo "  make help          - Display this help message" | ||||||
|  | 	@echo "" | ||||||
|  | 	@echo "Available targets (中文):" | ||||||
|  | 	@echo "" | ||||||
|  | 	@echo "  make all           - 执行本地构建、上传、清理和提交" | ||||||
|  | 	@echo "  make force         - 强制渲染、更新哈希、上传、清理并提交" | ||||||
|  | 	@echo "  make local         - 检查Git状态,若检测到更改则进行懒惰渲染" | ||||||
|  | 	@echo "  make check_git_status - 检查未提交的更改,并询问是否提交" | ||||||
|  | 	@echo "  make lazyrender    - 如果源文件发生更改,则根据哈希比较进行渲染" | ||||||
|  | 	@echo "  make render        - 强制 Quarto 渲染项目" | ||||||
|  | 	@echo "  make commit        - 如果没有未提交的更改则提交" | ||||||
|  | 	@echo "  make filehash      - 生成并存储源文件的哈希值" | ||||||
|  | 	@echo "  make preview       - 本地在特定端口预览网站 (默认: 4199)" | ||||||
|  | 	@echo "  make readme        - 从 Quarto 的 index.qmd 生成 README.md" | ||||||
|  | 	@echo "  make rsync         - 将输出文件同步到远程服务器" | ||||||
|  | 	@echo "  make open          - 在浏览器中打开生成的网站" | ||||||
|  | 	@echo "  make clean         - 清理不必要的文件" | ||||||
|  | 	@echo "  make upload        - 上传文件到服务器并修复链接" | ||||||
|  | 	@echo "  make fix_links     - 修复 'www' 目录中的 HTML 链接" | ||||||
|  | 	@echo "  make updmakefile   - 更新本项目 Makefile" | ||||||
|  | 	@echo "  make help          - 显示此帮助信息" | ||||||
|  | 	@echo "" | ||||||
|  | 	@echo "Environment variables (English and 中文):" | ||||||
|  | 	@echo "  bcolor             - Background color based on branch name (基于分支名的背景颜色)" | ||||||
|  | 	@echo "  port               - Port number based on branch name (基于分支名的端口号)" | ||||||
|  | 	@echo "  STATE_FILE         - File for storing hash state of source files (用于存储源文件哈希状态的文件)" | ||||||
|  | 	@echo "  siteurl            - The URL where the site will be hosted (网站托管的 URL)" | ||||||
|  | 	@echo "" | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -47,9 +47,8 @@ knitr::opts_chunk$set(echo = TRUE) | |||||||
| ### 课件 | ### 课件 | ||||||
| 
 | 
 | ||||||
| - 采用`R语言`+`quarto`完成 | - 采用`R语言`+`quarto`完成 | ||||||
| - 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html) | - 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html) | ||||||
| - 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git) | - 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git) | ||||||
| - 代码web界面:[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/) |  | ||||||
| 
 | 
 | ||||||
| ## 如何学习接下来的内容? | ## 如何学习接下来的内容? | ||||||
| 
 | 
 | ||||||
| @@ -67,7 +66,7 @@ knitr::opts_chunk$set(echo = TRUE) | |||||||
| 
 | 
 | ||||||
| ## Rstudio Server使用 | ## Rstudio Server使用 | ||||||
| 
 | 
 | ||||||
| - 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/) | - 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/) | ||||||
| - 每位同学使用1个账号,随机生成 | - 每位同学使用1个账号,随机生成 | ||||||
| - 密码:**** | - 密码:**** | ||||||
| - 后面的实践课程可在该服务器上完成 | - 后面的实践课程可在该服务器上完成 | ||||||
							
								
								
									
										138
									
								
								SD/1.1_R语言介绍/_demo.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,138 @@ | |||||||
|  | --- | ||||||
|  | title: "Lesson 6" | ||||||
|  | format: html | ||||||
|  | --- | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ```{r} | ||||||
|  |  | ||||||
|  | https://rs1.drwater.net | ||||||
|  |  | ||||||
|  | username:  | ||||||
|  |   - ruser01 | ||||||
|  |   - ruser02 | ||||||
|  |   - ruser03 | ||||||
|  |   - ruser04 | ||||||
|  |   - ruser05 | ||||||
|  |   - ruser06 | ||||||
|  |  | ||||||
|  | RWEP2025 | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # 安装包 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ```{r} | ||||||
|  | install.packages("tidyverse") | ||||||
|  |  | ||||||
|  | x <- c(1:10, NA) | ||||||
|  |  | ||||||
|  | hist(x) | ||||||
|  |  | ||||||
|  | mean(x, na.rm = TRUE) | ||||||
|  |  | ||||||
|  | median(x, na.rm = TRUE) | ||||||
|  |  | ||||||
|  | sd(x, na.rm = TRUE) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | for(i in 1:10){ | ||||||
|  |   print(i) | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | x + y + x * y | ||||||
|  |  | ||||||
|  | myfunc <- function(x, y = 3) { | ||||||
|  |   x + y + x * y | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | myfunc(1, 2) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | myfunc(10) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | c(FALSE, 2, 1:3, 3)  | ||||||
|  |  | ||||||
|  | c(FALSE, 2, 1:3, 3) > 1 | ||||||
|  |  | ||||||
|  | all(c(FALSE, 2, 1:3, 3) > 1) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | c(1L,2L,3L) | ||||||
|  |  | ||||||
|  | any(c(FALSE, 2, 1:3, 3) > 1) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | x <- 10 | ||||||
|  |  | ||||||
|  | sin(x) = ? | ||||||
|  |  | ||||||
|  | paste("sin(x) = ", sin(x), sep = " ") | ||||||
|  |  | ||||||
|  | paste0("sin(x) = ", sin(x)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | substr("Monday", 1, 3) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # tidy | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ```{r} | ||||||
|  | require(readxl) | ||||||
|  |  | ||||||
|  | aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf") | ||||||
|  |  | ||||||
|  | # install.packages("skimr") | ||||||
|  |  | ||||||
|  | aqdf |> | ||||||
|  |   skimr::skim() | ||||||
|  |  | ||||||
|  | # base | ||||||
|  |  | ||||||
|  | # tidyverse | ||||||
|  |  | ||||||
|  | aqdf |> | ||||||
|  |   dplyr::group_by(Area) |> | ||||||
|  |   dplyr::summarize( | ||||||
|  |     n = n(), | ||||||
|  |     lon.mean = mean(lon, na.rm = TRUE), | ||||||
|  |     lon.sd = sd(lat, na.rm = TRUE) | ||||||
|  |   ) |> | ||||||
|  |   dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |> | ||||||
|  |   ggplot(aes(x = n, y = lon.mean)) + | ||||||
|  |   geom_point() + | ||||||
|  |   geom_line() + | ||||||
|  |   geom_errorbar( | ||||||
|  |     aes(ymin = lon.mean - lon.sd, | ||||||
|  |       ymax = lon.mean + lon.sd) | ||||||
|  |   ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | readxl::read_xlsx("./airquality.xlsx") | ||||||
|  |  | ||||||
|  | flights|> | ||||||
|  |   filter(dest=="IAH")|> | ||||||
|  |   group_by(year,month,day)|>summarize(n=n(), | ||||||
|  | delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10) | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| @@ -264,6 +264,9 @@ t.test(x, y) | |||||||
| wilcox.test(x, y) | wilcox.test(x, y) | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | ### [什么是 Wilcoxon-Mann-Whitney检验?](https://zhuanlan.zhihu.com/p/613524533) | ||||||
|  | 
 | ||||||
| ## 统计函数 | ## 统计函数 | ||||||
| 
 | 
 | ||||||
| ### 创建向量的直方图 | ### 创建向量的直方图 | ||||||
| @@ -792,4 +795,4 @@ names(Y) <- c("colA", "colB", "colC") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| @@ -37,7 +37,7 @@ require(learnr) | |||||||
| 
 | 
 | ||||||
| ## 下载excel文件 | ## 下载excel文件 | ||||||
| 
 | 
 | ||||||
| [https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx) | [https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ## Tidy data | ## Tidy data | ||||||
							
								
								
									
										21
									
								
								SD/1.9_课后作业6/data.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,21 @@ | |||||||
|  | name,age,score | ||||||
|  | Alice,25,85 | ||||||
|  | Bob,30,92 | ||||||
|  | Charlie,28,89 | ||||||
|  | David,22,95 | ||||||
|  | Eva,35,87 | ||||||
|  | Frank,27,91 | ||||||
|  | Grace,29,88 | ||||||
|  | Helen,26,93 | ||||||
|  | Ivan,31,86 | ||||||
|  | Jack,24,94 | ||||||
|  | Kelly,32,89 | ||||||
|  | Lily,28,90 | ||||||
|  | Mike,33,85 | ||||||
|  | Nancy,27,92 | ||||||
|  | Olivia,34,88 | ||||||
|  | Peter,29,93 | ||||||
|  | Queen,25,89 | ||||||
|  | Ryan,30,94 | ||||||
|  | Samantha,26,91 | ||||||
|  | Tom,31,87 | ||||||
| 
 | 
| @@ -79,5 +79,5 @@ Tom,31,87 | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| @@ -321,5 +321,4 @@ if (FALSE) { | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 |  | ||||||
| @@ -323,5 +323,5 @@ flights |> | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| @@ -1,49 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "课后作业9" |  | ||||||
| subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution |  | ||||||
| author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心 |  | ||||||
| date: today |  | ||||||
| lang: zh |  | ||||||
| format: |  | ||||||
|   revealjs: |  | ||||||
|     theme: dark |  | ||||||
|     slide-number: true |  | ||||||
|     chalkboard: |  | ||||||
|       buttons: true |  | ||||||
|     preview-links: auto |  | ||||||
|     lang: zh |  | ||||||
|     toc: true |  | ||||||
|     toc-depth: 1 |  | ||||||
|     toc-title: 大纲 |  | ||||||
|     logo: ./_extensions/inst/img/ucaslogo.png |  | ||||||
|     css: ./_extensions/inst/css/revealjs.css |  | ||||||
|     pointer: |  | ||||||
|       key: "p" |  | ||||||
|       color: "#32cd32" |  | ||||||
|       pointerSize: 18 |  | ||||||
| revealjs-plugins: |  | ||||||
|   - pointer |  | ||||||
| filters: |  | ||||||
|   - d2 |  | ||||||
| --- |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| #| include: false |  | ||||||
| #| cache: false |  | ||||||
| lang <- "cn" |  | ||||||
| require(tidyverse) |  | ||||||
| require(learnr) |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| ## 第9次课后作业 |  | ||||||
|  |  | ||||||
| 自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。 |  | ||||||
|  |  | ||||||
| 作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ## 欢迎讨论!{.center} |  | ||||||
|  |  | ||||||
|  |  | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` |  | ||||||
|  |  | ||||||
| @@ -1,8 +0,0 @@ | |||||||
| --- |  | ||||||
| title: 课后作业9 |  | ||||||
| author: 姓名 |  | ||||||
| format: html |  | ||||||
| --- |  | ||||||
|  |  | ||||||
| 要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。 |  | ||||||
|  |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../../_extensions |  | ||||||
| @@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 | 
 | ||||||
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/.RData
									
									
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/airqualitydf.RDS
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -43,7 +43,7 @@ require(learnr) | |||||||
| 1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 | 1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 | ||||||
| 2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 | 2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 | ||||||
| 
 | 
 | ||||||
| 作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) | 作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) | ||||||
| 
 | 
 | ||||||
| ## 示例代码 | ## 示例代码 | ||||||
| 
 | 
 | ||||||
| @@ -62,5 +62,5 @@ require(learnr) | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 | 
 | ||||||
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/metadf.RDS
									
									
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/testdf.RDS
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -14,7 +14,7 @@ format: html | |||||||
| # 下载至临时文件 | # 下载至临时文件 | ||||||
| if (FALSE) { | if (FALSE) { | ||||||
|   tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") |   tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") | ||||||
|   download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx", |   download.file("https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx", | ||||||
|     destfile = tmpxlsxpath) |     destfile = tmpxlsxpath) | ||||||
|   airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) |   airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) | ||||||
|   metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) |   metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) | ||||||
| @@ -118,6 +118,8 @@ if (FALSE) { | |||||||
|   saveRDS(testdf, "./testdf.RDS") |   saveRDS(testdf, "./testdf.RDS") | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | if (FALSE) { | ||||||
|  | 
 | ||||||
| lang <- "cn" | lang <- "cn" | ||||||
| require(dwfun) | require(dwfun) | ||||||
| require(rmdify) | require(rmdify) | ||||||
| @@ -144,5 +146,7 @@ testdf |> | |||||||
|   pull(gp) |   pull(gp) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | } | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| @@ -3934,5 +3934,5 @@ p | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB | 
| @@ -100,5 +100,4 @@ geom_bar(position = "fill") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 |  | ||||||
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 272 KiB After Width: | Height: | Size: 272 KiB | 
| Before Width: | Height: | Size: 345 KiB After Width: | Height: | Size: 345 KiB | 
| Before Width: | Height: | Size: 210 KiB After Width: | Height: | Size: 210 KiB | 
| Before Width: | Height: | Size: 470 KiB After Width: | Height: | Size: 470 KiB | 
| Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB | 
| Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB | 
| Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB | 
| Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB | 
| Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB | 
| Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 5.5 KiB | 
| Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 209 KiB | 
| Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB | 
| Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB | 
| Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 177 KiB | 
| Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB | 
| Before Width: | Height: | Size: 569 KiB After Width: | Height: | Size: 569 KiB | 
| Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB | 
| Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB | 
| Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.5 KiB | 
| Before Width: | Height: | Size: 131 KiB After Width: | Height: | Size: 131 KiB | 
| Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB | 
| Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB | 
| Before Width: | Height: | Size: 83 KiB After Width: | Height: | Size: 83 KiB | 
| Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB | 
| Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB | 
| Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB | 
| Before Width: | Height: | Size: 869 KiB After Width: | Height: | Size: 869 KiB | 
| Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB | 
| Before Width: | Height: | Size: 638 KiB After Width: | Height: | Size: 638 KiB | 
| Before Width: | Height: | Size: 408 KiB After Width: | Height: | Size: 408 KiB | 
| Before Width: | Height: | Size: 410 KiB After Width: | Height: | Size: 410 KiB | 
| Before Width: | Height: | Size: 154 KiB After Width: | Height: | Size: 154 KiB | 
| Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB | 
| Before Width: | Height: | Size: 159 KiB After Width: | Height: | Size: 159 KiB | 
| Before Width: | Height: | Size: 184 KiB After Width: | Height: | Size: 184 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB | 
| Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 69 KiB | 
| Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB | 
| Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB | 
| Before Width: | Height: | Size: 137 KiB After Width: | Height: | Size: 137 KiB | 
| Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB | 
| Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB | 
| Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB | 
| Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB | 
| Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 134 KiB | 
| Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 150 KiB | 
| Before Width: | Height: | Size: 160 KiB After Width: | Height: | Size: 160 KiB | 
| Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 120 KiB | 
| @@ -47,7 +47,8 @@ hexes <- function(..., size = 64) { | |||||||
|   right <- (seq_along(x) - 1) * size |   right <- (seq_along(x) - 1) * size | ||||||
|   res <- glue::glue( |   res <- glue::glue( | ||||||
|     '{.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}', |     '{.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}', | ||||||
|     .open = "<", .close = ">" |     .open = "<", | ||||||
|  |     .close = ">" | ||||||
|   ) |   ) | ||||||
|   paste0(res, collapse = " ") |   paste0(res, collapse = " ") | ||||||
| } | } | ||||||
| @@ -69,7 +70,6 @@ test_color  <- "#cd4173" | |||||||
| data_color <- "#767381" | data_color <- "#767381" | ||||||
| assess_color <- "#84cae1" | assess_color <- "#84cae1" | ||||||
| splits_pal <- c(data_color, train_color, test_color) | splits_pal <- c(data_color, train_color, test_color) | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -152,10 +152,27 @@ knitr::include_graphics("images/whole-game-final-performance.jpg") | |||||||
| 
 | 
 | ||||||
| # Install the packages for the workshop | # Install the packages for the workshop | ||||||
| pkgs <- | pkgs <- | ||||||
|   c("bonsai", "doParallel", "embed", "finetune", "lightgbm", "lme4", |   c( | ||||||
|     "plumber", "probably", "ranger", "rpart", "rpart.plot", "rules", |     "bonsai", | ||||||
|     "splines2", "stacks", "text2vec", "textrecipes", "tidymodels",  |     "doParallel", | ||||||
|     "vetiver", "remotes") |     "embed", | ||||||
|  |     "finetune", | ||||||
|  |     "lightgbm", | ||||||
|  |     "lme4", | ||||||
|  |     "plumber", | ||||||
|  |     "probably", | ||||||
|  |     "ranger", | ||||||
|  |     "rpart", | ||||||
|  |     "rpart.plot", | ||||||
|  |     "rules", | ||||||
|  |     "splines2", | ||||||
|  |     "stacks", | ||||||
|  |     "text2vec", | ||||||
|  |     "textrecipes", | ||||||
|  |     "tidymodels", | ||||||
|  |     "vetiver", | ||||||
|  |     "remotes" | ||||||
|  |   ) | ||||||
| 
 | 
 | ||||||
| install.packages(pkgs) | install.packages(pkgs) | ||||||
| ``` | ``` | ||||||
| @@ -197,26 +214,34 @@ taxi | |||||||
| #| | #| | ||||||
| set.seed(123) | set.seed(123) | ||||||
| library(forcats) | library(forcats) | ||||||
| one_split <- slice(taxi, 1:30) %>%  | require(tidymodels) | ||||||
|   initial_split() %>%  | require(tidyverse) | ||||||
|   tidy() %>%  | one_split <- taxi |> | ||||||
|   add_row(Row = 1:30, Data = "Original") %>%  |   dplyr::slice(1:30) |> | ||||||
|   mutate(Data = case_when( |   rsample::initial_split() |> | ||||||
|  |   generics::tidy() |> | ||||||
|  |   tibble::add_row(Row = 1:30, Data = "Original") |> | ||||||
|  |   dplyr::mutate( | ||||||
|  |     Data = case_when( | ||||||
|       Data == "Analysis" ~ "Training", |       Data == "Analysis" ~ "Training", | ||||||
|       Data == "Assessment" ~ "Testing", |       Data == "Assessment" ~ "Testing", | ||||||
|       TRUE ~ Data |       TRUE ~ Data | ||||||
|   )) %>%  |     ) | ||||||
|   mutate(Data = factor(Data, levels = c("Original", "Training", "Testing"))) |   ) |> | ||||||
|  |   dplyr::mutate( | ||||||
|  |     Data = factor(Data, levels = c("Original", "Training", "Testing")) | ||||||
|  |   ) | ||||||
| all_split <- | all_split <- | ||||||
|   ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) + |   ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) + | ||||||
|   geom_tile(color = "white", |   geom_tile(color = "white", linewidth = 1) + | ||||||
|             linewidth = 1) +  |  | ||||||
|   scale_fill_manual(values = splits_pal, guide = "none") + |   scale_fill_manual(values = splits_pal, guide = "none") + | ||||||
|   theme_minimal() + |   theme_minimal() + | ||||||
|   theme(axis.text.y = element_text(size = rel(2)), |   theme( | ||||||
|  |     axis.text.y = element_text(size = rel(2)), | ||||||
|     axis.text.x = element_blank(), |     axis.text.x = element_blank(), | ||||||
|     legend.position = "top", |     legend.position = "top", | ||||||
|         panel.grid = element_blank()) + |     panel.grid = element_blank() | ||||||
|  |   ) + | ||||||
|   coord_equal(ratio = 1) + |   coord_equal(ratio = 1) + | ||||||
|   labs(x = NULL, y = NULL) |   labs(x = NULL, y = NULL) | ||||||
| all_split | all_split | ||||||
| @@ -645,9 +670,18 @@ augment(taxi_fit, new_data = taxi_train) %>% | |||||||
| augment(taxi_fit, new_data = taxi_train) %>% | augment(taxi_fit, new_data = taxi_train) %>% | ||||||
|   roc_curve(truth = tip, .pred_yes) %>% |   roc_curve(truth = tip, .pred_yes) %>% | ||||||
|   filter(is.finite(.threshold)) %>% |   filter(is.finite(.threshold)) %>% | ||||||
|   pivot_longer(c(specificity, sensitivity), names_to = "statistic", values_to = "value") %>%  |   pivot_longer( | ||||||
|  |     c(specificity, sensitivity), | ||||||
|  |     names_to = "statistic", | ||||||
|  |     values_to = "value" | ||||||
|  |   ) %>% | ||||||
|   rename(`event threshold` = .threshold) %>% |   rename(`event threshold` = .threshold) %>% | ||||||
|   ggplot(aes(x = `event threshold`, y = value, col = statistic, group = statistic)) +  |   ggplot(aes( | ||||||
|  |     x = `event threshold`, | ||||||
|  |     y = value, | ||||||
|  |     col = statistic, | ||||||
|  |     group = statistic | ||||||
|  |   )) + | ||||||
|   geom_line() + |   geom_line() + | ||||||
|   scale_color_brewer(palette = "Dark2") + |   scale_color_brewer(palette = "Dark2") + | ||||||
|   labs(y = NULL) + |   labs(y = NULL) + | ||||||
| @@ -1010,8 +1044,11 @@ collect_metrics(final_fit) | |||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| require(tidyverse) | require(tidyverse) | ||||||
| sitedf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv") |> | sitedf <- readr::read_csv( | ||||||
|   select(SITE_ID, |   "https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv" | ||||||
|  | ) |> | ||||||
|  |   select( | ||||||
|  |     SITE_ID, | ||||||
|     lon = LON_DD, |     lon = LON_DD, | ||||||
|     lat = LAT_DD, |     lat = LAT_DD, | ||||||
|     name = LAKENAME, |     name = LAKENAME, | ||||||
| @@ -1019,25 +1056,27 @@ sitedf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-01/nla20 | |||||||
|     zmax = DEPTHMAX |     zmax = DEPTHMAX | ||||||
|   ) |> |   ) |> | ||||||
|   group_by(SITE_ID) |> |   group_by(SITE_ID) |> | ||||||
|   summarize(lon = mean(lon, na.rm = TRUE), |   summarize( | ||||||
|  |     lon = mean(lon, na.rm = TRUE), | ||||||
|     lat = mean(lat, na.rm = TRUE), |     lat = mean(lat, na.rm = TRUE), | ||||||
|     name = unique(name), |     name = unique(name), | ||||||
|     area = mean(area, na.rm = TRUE), |     area = mean(area, na.rm = TRUE), | ||||||
|     zmax = mean(zmax, na.rm = TRUE)) |     zmax = mean(zmax, na.rm = TRUE) | ||||||
|  |   ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| visitdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |> | visitdf <- readr::read_csv( | ||||||
|   select(SITE_ID, |   "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv" | ||||||
|     date = DATE_PROFILE, | ) |> | ||||||
|     year = YEAR, |   select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |> | ||||||
|     visit = VISIT_NO |  | ||||||
|   ) |> |  | ||||||
|   distinct() |   distinct() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | waterchemdf <- readr::read_csv( | ||||||
| waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |> |   "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv" | ||||||
|   select(SITE_ID, | ) |> | ||||||
|  |   select( | ||||||
|  |     SITE_ID, | ||||||
|     date = DATE_PROFILE, |     date = DATE_PROFILE, | ||||||
|     depth = DEPTH, |     depth = DEPTH, | ||||||
|     temp = TEMP_FIELD, |     temp = TEMP_FIELD, | ||||||
| @@ -1046,38 +1085,43 @@ waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/ | |||||||
|     cond = COND_FIELD, |     cond = COND_FIELD, | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
| sddf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv") |> | sddf <- readr::read_csv( | ||||||
|   select(SITE_ID,  |   "https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv" | ||||||
|  | ) |> | ||||||
|  |   select( | ||||||
|  |     SITE_ID, | ||||||
|     date = DATE_SECCHI, |     date = DATE_SECCHI, | ||||||
|     sd = SECMEAN, |     sd = SECMEAN, | ||||||
|     clear_to_bottom = CLEAR_TO_BOTTOM |     clear_to_bottom = CLEAR_TO_BOTTOM | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
| trophicdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv") |> | trophicdf <- readr::read_csv( | ||||||
|   select(SITE_ID, |   "https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv" | ||||||
|     visit = VISIT_NO, | ) |> | ||||||
|     tp = PTL, |   select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |> | ||||||
|     tn = NTL, |  | ||||||
|     chla = CHLA) |> |  | ||||||
|   left_join(visitdf, by = c("SITE_ID", "visit")) |> |   left_join(visitdf, by = c("SITE_ID", "visit")) |> | ||||||
|   select(-year, -visit) |> |   select(-year, -visit) |> | ||||||
|   group_by(SITE_ID, date) |> |   group_by(SITE_ID, date) |> | ||||||
|   summarize(tp = mean(tp, na.rm = TRUE), |   summarize( | ||||||
|  |     tp = mean(tp, na.rm = TRUE), | ||||||
|     tn = mean(tn, na.rm = TRUE), |     tn = mean(tn, na.rm = TRUE), | ||||||
|     chla = mean(chla, na.rm = TRUE) |     chla = mean(chla, na.rm = TRUE) | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | phytodf <- readr::read_csv( | ||||||
| phytodf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv") |> |   "https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv" | ||||||
|   select(SITE_ID, | ) |> | ||||||
|  |   select( | ||||||
|  |     SITE_ID, | ||||||
|     date = DATEPHYT, |     date = DATEPHYT, | ||||||
|     depth = SAMPLE_DEPTH, |     depth = SAMPLE_DEPTH, | ||||||
|     phyta = DIVISION, |     phyta = DIVISION, | ||||||
|     genus = GENUS, |     genus = GENUS, | ||||||
|     species = SPECIES, |     species = SPECIES, | ||||||
|     tax = TAXANAME, |     tax = TAXANAME, | ||||||
|     abund = ABUND) |> |     abund = ABUND | ||||||
|  |   ) |> | ||||||
|   mutate(phyta = gsub(" .*$", "", phyta)) |> |   mutate(phyta = gsub(" .*$", "", phyta)) |> | ||||||
|   filter(!is.na(genus)) |> |   filter(!is.na(genus)) |> | ||||||
|   group_by(SITE_ID, date, depth, phyta, genus) |> |   group_by(SITE_ID, date, depth, phyta, genus) |> | ||||||
| @@ -1088,7 +1132,7 @@ envdf <- waterchemdf |> | |||||||
|   filter(depth < 2) |> |   filter(depth < 2) |> | ||||||
|   select(-depth) |> |   select(-depth) |> | ||||||
|   group_by(SITE_ID, date) |> |   group_by(SITE_ID, date) |> | ||||||
|   summarise_all(~mean(., na.rm = TRUE)) |> |   summarise_all(~ mean(., na.rm = TRUE)) |> | ||||||
|   ungroup() |> |   ungroup() |> | ||||||
|   left_join(sddf, by = c("SITE_ID", "date")) |> |   left_join(sddf, by = c("SITE_ID", "date")) |> | ||||||
|   left_join(trophicdf, by = c("SITE_ID", "date")) |   left_join(trophicdf, by = c("SITE_ID", "date")) | ||||||
| @@ -1097,19 +1141,21 @@ nla <- envdf |> | |||||||
|   left_join(phytodf) |> |   left_join(phytodf) |> | ||||||
|   left_join(sitedf, by = "SITE_ID") |> |   left_join(sitedf, by = "SITE_ID") |> | ||||||
|   filter(!purrr::map_lgl(phytodf, is.null)) |> |   filter(!purrr::map_lgl(phytodf, is.null)) |> | ||||||
|   mutate(cyanophyta = purrr::map(phytodf, ~ .x |> |   mutate( | ||||||
|  |     cyanophyta = purrr::map( | ||||||
|  |       phytodf, | ||||||
|  |       ~ .x |> | ||||||
|         dplyr::filter(phyta == "Cyanophyta") |> |         dplyr::filter(phyta == "Cyanophyta") |> | ||||||
|         summarize(cyanophyta = sum(abund, na.rm = TRUE)) |         summarize(cyanophyta = sum(abund, na.rm = TRUE)) | ||||||
|   )) |> |     ) | ||||||
|  |   ) |> | ||||||
|   unnest(cyanophyta) |> |   unnest(cyanophyta) |> | ||||||
|   select(-phyta) |> |   select(-phyta) |> | ||||||
|   mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE)) |   mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE)) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| # library(rmdify) | # library(rmdify) | ||||||
| # library(dwfun) | # library(dwfun) | ||||||
| # dwfun::init() | # dwfun::init() | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1127,18 +1173,20 @@ skimr::skim(nla) | |||||||
| nla |> | nla |> | ||||||
|   filter(tp > 1) |> |   filter(tp > 1) |> | ||||||
|   ggplot(aes(tn, tp)) + |   ggplot(aes(tn, tp)) + | ||||||
| geom_point() + |   geom_point() + | ||||||
| geom_smooth(method = "lm") + |   geom_smooth(method = "lm") + | ||||||
| scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), |   scale_x_log10( | ||||||
|                labels = scales::trans_format("log10", scales::math_format(10^.x))) + |     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
| scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), |     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||||
|                labels = scales::trans_format("log10", scales::math_format(10^.x))) |   ) + | ||||||
|  |   scale_y_log10( | ||||||
|  |     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
|  |     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||||
|  |   ) | ||||||
| 
 | 
 | ||||||
| m1 <- lm(log10(tp) ~ log10(tn), data = nla) | m1 <- lm(log10(tp) ~ log10(tn), data = nla) | ||||||
| 
 | 
 | ||||||
| summary(m1) | summary(m1) | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## 复杂指标 | ## 复杂指标 | ||||||
| @@ -1147,18 +1195,20 @@ summary(m1) | |||||||
| nla |> | nla |> | ||||||
|   filter(tp > 1) |> |   filter(tp > 1) |> | ||||||
|   ggplot(aes(tp, cyanophyta)) + |   ggplot(aes(tp, cyanophyta)) + | ||||||
| geom_point() + |   geom_point() + | ||||||
| geom_smooth(method = "lm") + |   geom_smooth(method = "lm") + | ||||||
| scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), |   scale_x_log10( | ||||||
|                labels = scales::trans_format("log10", scales::math_format(10^.x))) + |     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
| scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), |     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||||
|                labels = scales::trans_format("log10", scales::math_format(10^.x))) |   ) + | ||||||
|  |   scale_y_log10( | ||||||
|  |     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
|  |     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||||
|  |   ) | ||||||
| 
 | 
 | ||||||
| m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla) | m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla) | ||||||
| 
 | 
 | ||||||
| summary(m2) | summary(m2) | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1170,13 +1220,14 @@ summary(m2) | |||||||
| (nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax)) | (nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax)) | ||||||
| (nla_train <- training(nla_split)) | (nla_train <- training(nla_split)) | ||||||
| (nla_test <- testing(nla_split)) | (nla_test <- testing(nla_split)) | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## tidymodels - recipe | ## tidymodels - recipe | ||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom") | nla_formula <- as.formula( | ||||||
|  |   "cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom" | ||||||
|  | ) | ||||||
| # nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn") | # nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn") | ||||||
| nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |> | nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |> | ||||||
|   recipes::step_string2factor(all_nominal()) |> |   recipes::step_string2factor(all_nominal()) |> | ||||||
| @@ -1193,7 +1244,7 @@ nla_recipe | |||||||
| nla_cv <- recipes::bake( | nla_cv <- recipes::bake( | ||||||
|   nla_recipe, |   nla_recipe, | ||||||
|   new_data = training(nla_split) |   new_data = training(nla_split) | ||||||
|   ) |> | ) |> | ||||||
|   rsample::vfold_cv(v = 10) |   rsample::vfold_cv(v = 10) | ||||||
| nla_cv | nla_cv | ||||||
| ``` | ``` | ||||||
| @@ -1260,7 +1311,7 @@ if (FALSE) { | |||||||
|     metrics = yardstick::metric_set(rmse, rsq, mae), |     metrics = yardstick::metric_set(rmse, rsq, mae), | ||||||
|     control = tune::control_grid(verbose = TRUE) |     control = tune::control_grid(verbose = TRUE) | ||||||
|   ) |   ) | ||||||
| saveRDS(xgboost_tuned, "./xgboost_tuned.RDS") |   saveRDS(xgboost_tuned, "./xgboost_tuned.RDS") | ||||||
| } | } | ||||||
| xgboost_tuned <- readRDS("./xgboost_tuned.RDS") | xgboost_tuned <- readRDS("./xgboost_tuned.RDS") | ||||||
| ``` | ``` | ||||||
| @@ -1298,7 +1349,7 @@ xgboost_tuned |> | |||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| xgboost_best_params <- xgboost_tuned |> | xgboost_best_params <- xgboost_tuned |> | ||||||
|   tune::select_best("rmse") |   tune::select_best(metric = "rmse") | ||||||
| 
 | 
 | ||||||
| knitr::kable(xgboost_best_params) | knitr::kable(xgboost_best_params) | ||||||
| ``` | ``` | ||||||
| @@ -1331,14 +1382,15 @@ train_prediction <- xgboost_model_final |> | |||||||
|   ) |> |   ) |> | ||||||
|   # predict the sale prices for the training data |   # predict the sale prices for the training data | ||||||
|   predict(new_data = train_processed) |> |   predict(new_data = train_processed) |> | ||||||
|   bind_cols(nla_train |> |   bind_cols( | ||||||
|   mutate(.obs = log10(cyanophyta))) |     nla_train |> | ||||||
|  |       mutate(.obs = log10(cyanophyta)) | ||||||
|  |   ) | ||||||
| xgboost_score_train <- | xgboost_score_train <- | ||||||
|   train_prediction |> |   train_prediction |> | ||||||
|   yardstick::metrics(.obs, .pred) |> |   yardstick::metrics(.obs, .pred) |> | ||||||
|   mutate(.estimate = format(round(.estimate, 2), big.mark = ",")) |   mutate(.estimate = format(round(.estimate, 2), big.mark = ",")) | ||||||
| knitr::kable(xgboost_score_train) | knitr::kable(xgboost_score_train) | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## tidymodels - train evaluation | ## tidymodels - train evaluation | ||||||
| @@ -1349,10 +1401,8 @@ knitr::kable(xgboost_score_train) | |||||||
| #| out-width: "80%" | #| out-width: "80%" | ||||||
| train_prediction |> | train_prediction |> | ||||||
|   ggplot(aes(.pred, .obs)) + |   ggplot(aes(.pred, .obs)) + | ||||||
| geom_point() + |   geom_point() + | ||||||
| geom_smooth(method = "lm") |   geom_smooth(method = "lm") | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1370,8 +1420,10 @@ test_prediction <- xgboost_model_final |> | |||||||
|   ) |> |   ) |> | ||||||
|   # use the training model fit to predict the test data |   # use the training model fit to predict the test data | ||||||
|   predict(new_data = test_processed) |> |   predict(new_data = test_processed) |> | ||||||
|   bind_cols(nla_test |> |   bind_cols( | ||||||
|   mutate(.obs = log10(cyanophyta))) |     nla_test |> | ||||||
|  |       mutate(.obs = log10(cyanophyta)) | ||||||
|  |   ) | ||||||
| 
 | 
 | ||||||
| # measure the accuracy of our model using `yardstick` | # measure the accuracy of our model using `yardstick` | ||||||
| xgboost_score <- test_prediction |> | xgboost_score <- test_prediction |> | ||||||
| @@ -1394,7 +1446,7 @@ cyanophyta_prediction_residual <- test_prediction |> | |||||||
|   select(.pred, residual_pct) |   select(.pred, residual_pct) | ||||||
| 
 | 
 | ||||||
| cyanophyta_prediction_residual |> | cyanophyta_prediction_residual |> | ||||||
| ggplot(aes(x = .pred, y = residual_pct)) + |   ggplot(aes(x = .pred, y = residual_pct)) + | ||||||
|   geom_point() + |   geom_point() + | ||||||
|   xlab("Predicted Cyanophyta") + |   xlab("Predicted Cyanophyta") + | ||||||
|   ylab("Residual (%)") |   ylab("Residual (%)") | ||||||
| @@ -1411,9 +1463,8 @@ ggplot(aes(x = .pred, y = residual_pct)) + | |||||||
| #| out-width: "80%" | #| out-width: "80%" | ||||||
| test_prediction |> | test_prediction |> | ||||||
|   ggplot(aes(.pred, .obs)) + |   ggplot(aes(.pred, .obs)) + | ||||||
| geom_point() + |   geom_point() + | ||||||
| geom_smooth(method = "lm", colour = "black") |   geom_smooth(method = "lm", colour = "black") | ||||||
| 
 |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1421,5 +1472,4 @@ geom_smooth(method = "lm", colour = "black") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||||
| 
 |  | ||||||
| Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |