Compare commits
	
		
			1 Commits
		
	
	
		
			133cb1533d
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 43c660af96 | 
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -7,5 +7,3 @@ _variables.yml | |||||||
| _freeze/ | _freeze/ | ||||||
| *_cache/ | *_cache/ | ||||||
| *_files/ | *_files/ | ||||||
| SD/_*/ |  | ||||||
| homework/ |  | ||||||
|   | |||||||
| @@ -1 +0,0 @@ | |||||||
| daa3ba71b87598e29019c2f370272767 |  | ||||||
							
								
								
									
										382
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						| @@ -1,382 +0,0 @@ | |||||||
| # Makefile for Quarto Project Automation |  | ||||||
| # Detect OS |  | ||||||
| HOSTNAME := $(shell hostname) |  | ||||||
| OS := $(shell uname | tr A-Z a-z) |  | ||||||
| ifeq ($(OS), darwin) |  | ||||||
|     SEDI := sed -i '' |  | ||||||
|     OS := OSX |  | ||||||
| else ifeq ($(OS), linux) |  | ||||||
|     SEDI := sed -i |  | ||||||
|     OS := linux |  | ||||||
| else |  | ||||||
|     $(error Unknown operating system) |  | ||||||
| endif |  | ||||||
|  |  | ||||||
| # Fetch Git branch and project details |  | ||||||
| branchname := $(shell git branch --show-current) |  | ||||||
| reponame := $(shell basename $(shell git rev-parse --show-toplevel)) |  | ||||||
| projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel))) |  | ||||||
| pubtype := $(if $(findstring PUB,$(branchname)),public,protected) |  | ||||||
| remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname) |  | ||||||
| outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml) |  | ||||||
| siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname) |  | ||||||
|  |  | ||||||
| branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF" |  | ||||||
|  |  | ||||||
| # Variables for colors and port |  | ||||||
| bcolor := grey |  | ||||||
| port := 4199 |  | ||||||
|  |  | ||||||
| # Set background color based on branch name |  | ||||||
| ifeq ($(findstring R1,$(branchname)),R1) |  | ||||||
|     bcolor := orange |  | ||||||
| else ifeq ($(findstring R2,$(branchname)),R2) |  | ||||||
|     bcolor := lightblue |  | ||||||
| else ifeq ($(findstring R3,$(branchname)),R3) |  | ||||||
|     bcolor := lightgreen |  | ||||||
| else ifeq ($(findstring PUB,$(branchname)),PUB) |  | ||||||
|     bcolor := light |  | ||||||
| endif |  | ||||||
|  |  | ||||||
| # Set port based on branch name |  | ||||||
| branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210 |  | ||||||
| port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/') |  | ||||||
| port := $(if $(port),$(port),4199) |  | ||||||
|  |  | ||||||
| # Define the state file |  | ||||||
| STATE_FILE := .source_state |  | ||||||
|  |  | ||||||
| # checkfile |  | ||||||
| GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www" |  | ||||||
|  |  | ||||||
| # Default target |  | ||||||
| .PHONY: all preview readme rsync local clean upload fix_links force check_git_status |  | ||||||
|  |  | ||||||
| all: local upload clean commit push |  | ||||||
| 	 |  | ||||||
| force: updrefbib check_git_status updvariable render |  | ||||||
|  |  | ||||||
| local: updrefbib check_git_status updvariable lazyrender |  | ||||||
|  |  | ||||||
| updmakefile: |  | ||||||
| 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \ |  | ||||||
| 		echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \ |  | ||||||
| 		cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \ |  | ||||||
| 		git add "Makefile" && git commit -m "Update Makefile"; \ |  | ||||||
| 		echo "本项目Makefile更新完成."; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "Makefile 无需在本系统上更新."; \ |  | ||||||
| 	fi |  | ||||||
|  |  | ||||||
|  |  | ||||||
| updrefbib: |  | ||||||
| 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \ |  | ||||||
| 		echo "更新本项目参考文献..."; \ |  | ||||||
| 		cp "$$HOME/literature/Ref.bib" "./BB/"; \ |  | ||||||
| 		echo "推送本地参考文献到远程服务器..."; \ |  | ||||||
| 		rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \ |  | ||||||
| 		echo "本项目参考文献更新完成."; \ |  | ||||||
| 	else \ |  | ||||||
| 	  echo "检查网络连通性..."; \ |  | ||||||
| 		if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \ |  | ||||||
| 			echo "网络正常,更新本项目参考文献..."; \ |  | ||||||
| 			wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \ |  | ||||||
| 			echo "本项目参考文献更新完成."; \ |  | ||||||
| 		else \ |  | ||||||
| 			echo "网络不可用,跳过参考文献更新."; \ |  | ||||||
| 		fi; \ |  | ||||||
| 	fi; \ |  | ||||||
| 	git add BB/Ref.bib; \ |  | ||||||
| 	if [ "$$(git diff --cached)" ]; then \ |  | ||||||
| 		git commit -m "Update Ref.bib"; \ |  | ||||||
| 	fi |  | ||||||
|  |  | ||||||
| check_git_status: |  | ||||||
| 	@uncommitted=$$(git status --porcelain); \ |  | ||||||
| 	if [ -n "$$uncommitted" ]; then \ |  | ||||||
| 	  git status; \ |  | ||||||
| 		read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \ |  | ||||||
| 		answer=$${answer:-N}; \ |  | ||||||
| 		if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \ |  | ||||||
| 			read -p "请输入修改说明: " message; \ |  | ||||||
| 			git add . && git commit -m "$$message"; \ |  | ||||||
| 		else \ |  | ||||||
| 			echo "未提交,如后续操作为pull,则无法继续"; \ |  | ||||||
| 		fi; \ |  | ||||||
| 	fi |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Lazy render with hash checking |  | ||||||
| lazyrender: |  | ||||||
| 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \ |  | ||||||
| 	if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \ |  | ||||||
| 		echo "源文件发生变化, 重新编译..."; \ |  | ||||||
| 		echo "$$current_hash" > $(STATE_FILE); \ |  | ||||||
| 		$(MAKE) render; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "源文件无变化, 跳过编译..."; \ |  | ||||||
| 		exit 0; \ |  | ||||||
| 	fi |  | ||||||
|  |  | ||||||
| # Render target |  | ||||||
| render: |  | ||||||
| 	@quarto render |  | ||||||
|  |  | ||||||
| commit: |  | ||||||
| 	@echo "提交修改(commit)..."; \ |  | ||||||
| 		git add .; \ |  | ||||||
| 		if [ -n "$$(git diff --cached)" ]; then \ |  | ||||||
| 		git commit -m "render compile"; \ |  | ||||||
| 		else \ |  | ||||||
| 		echo "没有修改记录,跳过."; \ |  | ||||||
| 		fi; \ |  | ||||||
|  |  | ||||||
| # Pull changes from the specified branch based on the current branch |  | ||||||
| pull: |  | ||||||
| 	@echo "从远程拉取项目更新..."; \ |  | ||||||
| 		$(MAKE) check_git_status; \ |  | ||||||
| 		git pull; \ |  | ||||||
| 		current_branch=$$(git rev-parse --abbrev-ref HEAD); \ |  | ||||||
| 		if [ "$$current_branch" = main ]; then \ |  | ||||||
| 		  echo "当前分枝为$$current_branch."; \ |  | ||||||
| 			remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \ |  | ||||||
| 			if [ -n "$$remote_branch" ]; then \ |  | ||||||
| 		    echo "尝试从远程分枝$$remote_branch 拉取更新..."; \ |  | ||||||
| 				git pull --rebase origin $$remote_branch; \ |  | ||||||
| 			else \ |  | ||||||
| 				echo "远程无可用分支$$remote_branch."; \ |  | ||||||
| 			fi; \ |  | ||||||
| 			else \ |  | ||||||
| 			echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \ |  | ||||||
| 			git pull --rebase origin main; \ |  | ||||||
| 		fi |  | ||||||
|  |  | ||||||
| # Pull changes from the main branch |  | ||||||
| pullmain: |  | ||||||
| 	$(MAKE) check_git_status; \ |  | ||||||
| 		@current_branch=$$(git rev-parse --abbrev-ref HEAD); \ |  | ||||||
| 		@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \ |  | ||||||
| 		git pull --rebase origin main; \ |  | ||||||
|  |  | ||||||
| push: |  | ||||||
| 	@echo "推送到远程..."; \ |  | ||||||
| 		git push |  | ||||||
|  |  | ||||||
| filehash: |  | ||||||
| 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \ |  | ||||||
| 	echo "$$current_hash" > $(STATE_FILE) |  | ||||||
|  |  | ||||||
| # Preview the site on a specific port |  | ||||||
| preview: |  | ||||||
| 	@quarto preview --port $(port) |  | ||||||
|  |  | ||||||
| # Generate README.md |  | ||||||
| readme: |  | ||||||
| 	@quarto render index.qmd -t markdown -o README.md |  | ||||||
| 	@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md |  | ||||||
| 	@rm "$(outputdir)/README.md" |  | ||||||
|  |  | ||||||
| # Sync files with remote server |  | ||||||
| rsync: |  | ||||||
| 	@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)" |  | ||||||
|  |  | ||||||
| # Open local site |  | ||||||
| open: |  | ||||||
| 	@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi |  | ||||||
|  |  | ||||||
| # Clean unnecessary files |  | ||||||
| clean: |  | ||||||
| 	@rm -f ./*.spl  ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk |  | ||||||
|  |  | ||||||
| # Upload files to server and fix links |  | ||||||
| upload: backupdocx |  | ||||||
| 	@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)" |  | ||||||
| 	@$(MAKE) fix_links |  | ||||||
| 	@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \ |  | ||||||
| 		if [ "$(OS)" = "OSX" ]; then \ |  | ||||||
| 			open "$(siteurl)" 2>/dev/null; \ |  | ||||||
| 		fi; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "Rsync failed. Attempting alternative upload method..."; \ |  | ||||||
| 		mkdir -p "$(reponame)"; \ |  | ||||||
| 		rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \ |  | ||||||
| 		rm -rf "$(reponame)"; \ |  | ||||||
| 		rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \ |  | ||||||
| 		if [ "$(OS)" = "OSX" ]; then \ |  | ||||||
| 			open "$(siteurl)" 2>/dev/null; \ |  | ||||||
| 		fi; \ |  | ||||||
| 	fi |  | ||||||
|  |  | ||||||
| backupdocx: |  | ||||||
| 	@echo "备份MS.docx文件..."; \ |  | ||||||
| 	currentcommithash=$$(git rev-parse --short HEAD); \ |  | ||||||
| 	datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \ |  | ||||||
| 	mkdir -p TC/MS/; \ |  | ||||||
| 	existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \ |  | ||||||
| 	if [ -n "$$existing_file" ]; then \ |  | ||||||
| 		echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \ |  | ||||||
| 		echo "无需备份."; \ |  | ||||||
| 	else \ |  | ||||||
| 		if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \ |  | ||||||
| 			cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \ |  | ||||||
| 			echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \ |  | ||||||
| 			git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \ |  | ||||||
| 			if [ "$$(git diff --cached)" ]; then \ |  | ||||||
| 				git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \ |  | ||||||
| 			fi; \ |  | ||||||
| 		else \ |  | ||||||
| 			echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在,无需备份."; \ |  | ||||||
| 		fi; \ |  | ||||||
| 	fi; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| trackchange: |  | ||||||
| 	@if [ "$(projtype)" != "manuscript" ]; then \ |  | ||||||
| 	exit 0; \ |  | ||||||
| 	fi; \ |  | ||||||
| 	echo "选择两个提交以比较文档..."; \ |  | ||||||
| 	hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \ |  | ||||||
| 	| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \ |  | ||||||
| 	| fzf --multi --reverse --preview="echo {}" ); \ |  | ||||||
| 	echo $$hashes; \ |  | ||||||
| 	hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \ |  | ||||||
| 	hash1=$$(git rev-parse --short $${hash1}^); \ |  | ||||||
| 	datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \ |  | ||||||
| 	hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \ |  | ||||||
| 	hash2=$$(git rev-parse --short $${hash2}^); \ |  | ||||||
| 	datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \ |  | ||||||
| 	if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \ |  | ||||||
| 	echo "必须选择两个提交."; \ |  | ||||||
| 	exit 1; \ |  | ||||||
| 	fi; \ |  | ||||||
| 	doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \ |  | ||||||
| 	echo "$$doc1"; \ |  | ||||||
| 	doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \ |  | ||||||
| 	echo "$$doc2"; \ |  | ||||||
| 	if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \ |  | ||||||
| 	echo "打开文件: $$doc1 和 $$doc2"; \ |  | ||||||
| 	open "$$doc1" "$$doc2"; \ |  | ||||||
| 	printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \ |  | ||||||
| 	echo "请在word中对比两个版本形成带修改痕迹的版本,并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \ |  | ||||||
| 	else \ |  | ||||||
| 	echo "一个或两个文件不存在: $$doc1, $$doc2"; \ |  | ||||||
| 	exit 1; \ |  | ||||||
| 	fi |  | ||||||
|  |  | ||||||
| # Fix links in www directory |  | ||||||
| fix_links: |  | ||||||
| 	@find ./www -type f -name "*.html" -exec sed -i.bak \ |  | ||||||
| 		-e "s/{{< var branch >}}/$(branchname)/g" \ |  | ||||||
| 		-e "s/{{< var pubtype >}}/$(pubtype)/g" \ |  | ||||||
| 		-e "s/{{< var projtype >}}/$(projtype)/g" \ |  | ||||||
| 		-e "s/{{< var reponame >}}/$(reponame)/g" \ |  | ||||||
| 		-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \ |  | ||||||
| 		-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} + |  | ||||||
| 	@find ./www -type f -name "*.bak" -exec rm {} + |  | ||||||
|  |  | ||||||
| updvariable: |  | ||||||
| 	@touch _variables.yml  # 如果文件不存在则创建 |  | ||||||
| 	@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml |  | ||||||
| 	@if grep -q '^reponame:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "reponame: $(reponame)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml |  | ||||||
| 	@if grep -q '^projtype:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "projtype: $(projtype)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml |  | ||||||
| 	@if grep -q '^branch:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "branch: $(branchname)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml |  | ||||||
| 	@if grep -q '^pubtype:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "pubtype: $(pubtype)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml |  | ||||||
| 	@if grep -q '^nwAB:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "nwAB: $(nwAB)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml |  | ||||||
| 	@if grep -q '^nwMS:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "nwMS: $(nwMS)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml |  | ||||||
| 	@if grep -q '^figtblMS:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "figtblMS: $(figtblMS)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml |  | ||||||
| 	@if grep -q '^figtblSM:' _variables.yml; then \ |  | ||||||
| 		$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \ |  | ||||||
| 	else \ |  | ||||||
| 		echo "figtblSM: $(figtblSM)" >> _variables.yml; \ |  | ||||||
| 	fi |  | ||||||
| 	@mkpapervar |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Help: list all available commands with descriptions (English and Chinese) |  | ||||||
| help: |  | ||||||
| 	@echo "Makefile for Quarto Project Automation" |  | ||||||
| 	@echo "=======================================" |  | ||||||
| 	@echo "Available targets (English):" |  | ||||||
| 	@echo "" |  | ||||||
| 	@echo "  make all           - Execute local build, upload, clean, and commit" |  | ||||||
| 	@echo "  make force         - Force render, hash update, upload, clean, and commit" |  | ||||||
| 	@echo "  make local         - Check git status and perform a lazy render if changes detected" |  | ||||||
| 	@echo "  make check_git_status - Check for uncommitted changes and ask to commit them" |  | ||||||
| 	@echo "  make lazyrender    - Render if source files have changed based on hash comparison" |  | ||||||
| 	@echo "  make render        - Force Quarto to render the project" |  | ||||||
| 	@echo "  make commit        - Commit changes if no previous uncommitted changes" |  | ||||||
| 	@echo "  make filehash      - Generate and store the file hash of source files" |  | ||||||
| 	@echo "  make preview       - Preview the site locally on the specific port (default: 4199)" |  | ||||||
| 	@echo "  make readme        - Render README.md from Quarto index.qmd" |  | ||||||
| 	@echo "  make rsync         - Sync output files with the remote server" |  | ||||||
| 	@echo "  make open          - Open the generated site locally in the browser" |  | ||||||
| 	@echo "  make clean         - Clean up unnecessary files" |  | ||||||
| 	@echo "  make upload        - Upload files to the server and fix links" |  | ||||||
| 	@echo "  make fix_links     - Fix HTML links in the 'www' directory for the remote server" |  | ||||||
| 	@echo "  make updmakefile   - Update the Makefile" |  | ||||||
| 	@echo "  make help          - Display this help message" |  | ||||||
| 	@echo "" |  | ||||||
| 	@echo "Available targets (中文):" |  | ||||||
| 	@echo "" |  | ||||||
| 	@echo "  make all           - 执行本地构建、上传、清理和提交" |  | ||||||
| 	@echo "  make force         - 强制渲染、更新哈希、上传、清理并提交" |  | ||||||
| 	@echo "  make local         - 检查Git状态,若检测到更改则进行懒惰渲染" |  | ||||||
| 	@echo "  make check_git_status - 检查未提交的更改,并询问是否提交" |  | ||||||
| 	@echo "  make lazyrender    - 如果源文件发生更改,则根据哈希比较进行渲染" |  | ||||||
| 	@echo "  make render        - 强制 Quarto 渲染项目" |  | ||||||
| 	@echo "  make commit        - 如果没有未提交的更改则提交" |  | ||||||
| 	@echo "  make filehash      - 生成并存储源文件的哈希值" |  | ||||||
| 	@echo "  make preview       - 本地在特定端口预览网站 (默认: 4199)" |  | ||||||
| 	@echo "  make readme        - 从 Quarto 的 index.qmd 生成 README.md" |  | ||||||
| 	@echo "  make rsync         - 将输出文件同步到远程服务器" |  | ||||||
| 	@echo "  make open          - 在浏览器中打开生成的网站" |  | ||||||
| 	@echo "  make clean         - 清理不必要的文件" |  | ||||||
| 	@echo "  make upload        - 上传文件到服务器并修复链接" |  | ||||||
| 	@echo "  make fix_links     - 修复 'www' 目录中的 HTML 链接" |  | ||||||
| 	@echo "  make updmakefile   - 更新本项目 Makefile" |  | ||||||
| 	@echo "  make help          - 显示此帮助信息" |  | ||||||
| 	@echo "" |  | ||||||
| 	@echo "Environment variables (English and 中文):" |  | ||||||
| 	@echo "  bcolor             - Background color based on branch name (基于分支名的背景颜色)" |  | ||||||
| 	@echo "  port               - Port number based on branch name (基于分支名的端口号)" |  | ||||||
| 	@echo "  STATE_FILE         - File for storing hash state of source files (用于存储源文件哈希状态的文件)" |  | ||||||
| 	@echo "  siteurl            - The URL where the site will be hosted (网站托管的 URL)" |  | ||||||
| 	@echo "" |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,138 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "Lesson 6" |  | ||||||
| format: html |  | ||||||
| --- |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
|  |  | ||||||
| https://rs1.drwater.net |  | ||||||
|  |  | ||||||
| username:  |  | ||||||
|   - ruser01 |  | ||||||
|   - ruser02 |  | ||||||
|   - ruser03 |  | ||||||
|   - ruser04 |  | ||||||
|   - ruser05 |  | ||||||
|   - ruser06 |  | ||||||
|  |  | ||||||
| RWEP2025 |  | ||||||
|  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # 安装包 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| install.packages("tidyverse") |  | ||||||
|  |  | ||||||
| x <- c(1:10, NA) |  | ||||||
|  |  | ||||||
| hist(x) |  | ||||||
|  |  | ||||||
| mean(x, na.rm = TRUE) |  | ||||||
|  |  | ||||||
| median(x, na.rm = TRUE) |  | ||||||
|  |  | ||||||
| sd(x, na.rm = TRUE) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| for(i in 1:10){ |  | ||||||
|   print(i) |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| x + y + x * y |  | ||||||
|  |  | ||||||
| myfunc <- function(x, y = 3) { |  | ||||||
|   x + y + x * y |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| myfunc(1, 2) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| myfunc(10) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| c(FALSE, 2, 1:3, 3)  |  | ||||||
|  |  | ||||||
| c(FALSE, 2, 1:3, 3) > 1 |  | ||||||
|  |  | ||||||
| all(c(FALSE, 2, 1:3, 3) > 1) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| c(1L,2L,3L) |  | ||||||
|  |  | ||||||
| any(c(FALSE, 2, 1:3, 3) > 1) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| x <- 10 |  | ||||||
|  |  | ||||||
| sin(x) = ? |  | ||||||
|  |  | ||||||
| paste("sin(x) = ", sin(x), sep = " ") |  | ||||||
|  |  | ||||||
| paste0("sin(x) = ", sin(x)) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| substr("Monday", 1, 3) |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # tidy |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| require(readxl) |  | ||||||
|  |  | ||||||
| aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf") |  | ||||||
|  |  | ||||||
| # install.packages("skimr") |  | ||||||
|  |  | ||||||
| aqdf |> |  | ||||||
|   skimr::skim() |  | ||||||
|  |  | ||||||
| # base |  | ||||||
|  |  | ||||||
| # tidyverse |  | ||||||
|  |  | ||||||
| aqdf |> |  | ||||||
|   dplyr::group_by(Area) |> |  | ||||||
|   dplyr::summarize( |  | ||||||
|     n = n(), |  | ||||||
|     lon.mean = mean(lon, na.rm = TRUE), |  | ||||||
|     lon.sd = sd(lat, na.rm = TRUE) |  | ||||||
|   ) |> |  | ||||||
|   dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |> |  | ||||||
|   ggplot(aes(x = n, y = lon.mean)) + |  | ||||||
|   geom_point() + |  | ||||||
|   geom_line() + |  | ||||||
|   geom_errorbar( |  | ||||||
|     aes(ymin = lon.mean - lon.sd, |  | ||||||
|       ymax = lon.mean + lon.sd) |  | ||||||
|   ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| readxl::read_xlsx("./airquality.xlsx") |  | ||||||
|  |  | ||||||
| flights|> |  | ||||||
|   filter(dest=="IAH")|> |  | ||||||
|   group_by(year,month,day)|>summarize(n=n(), |  | ||||||
| delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10) |  | ||||||
|  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,21 +0,0 @@ | |||||||
| name,age,score |  | ||||||
| Alice,25,85 |  | ||||||
| Bob,30,92 |  | ||||||
| Charlie,28,89 |  | ||||||
| David,22,95 |  | ||||||
| Eva,35,87 |  | ||||||
| Frank,27,91 |  | ||||||
| Grace,29,88 |  | ||||||
| Helen,26,93 |  | ||||||
| Ivan,31,86 |  | ||||||
| Jack,24,94 |  | ||||||
| Kelly,32,89 |  | ||||||
| Lily,28,90 |  | ||||||
| Mike,33,85 |  | ||||||
| Nancy,27,92 |  | ||||||
| Olivia,34,88 |  | ||||||
| Peter,29,93 |  | ||||||
| Queen,25,89 |  | ||||||
| Ryan,30,94 |  | ||||||
| Samantha,26,91 |  | ||||||
| Tom,31,87 |  | ||||||
| 
 | 
| @@ -1,154 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "Lesson 7" |  | ||||||
| format: html |  | ||||||
| --- |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| require(tidyverse) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| files <- c( |  | ||||||
|   "../../data/01-sales.csv", |  | ||||||
|   "../../data/02-sales.csv", |  | ||||||
|   "../../data/03-sales.csv" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| files <- dir("../../data", pattern = "sales.csv", full.names = TRUE) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| readr::read_csv(files, id = "file") |> |  | ||||||
|   mutate(file = basename(file)) |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| require(tidyverse) |  | ||||||
| library(tidyverse) |  | ||||||
|  |  | ||||||
| files <- dir( |  | ||||||
|   "../../data/gapminder", |  | ||||||
|   pattern = "^[12][09][0-9][0-9].xlsx$", |  | ||||||
|   full.names = TRUE |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| alldf <- tibble::tibble() |  | ||||||
| for (file in files) { |  | ||||||
|   alldf <- alldf |> |  | ||||||
|     bind_rows( |  | ||||||
|       readxl::read_xlsx(file) |> |  | ||||||
|         mutate(year = parse_number(basename(file))) |  | ||||||
|     ) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| alldf |> |  | ||||||
|   ggplot(aes(x = lifeExp, y = gdpPercap)) + |  | ||||||
|   geom_point(aes(color = factor(year))) + |  | ||||||
|   geom_smooth(method = "lm", se = FALSE) + |  | ||||||
|   scale_y_log10( |  | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |  | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) |  | ||||||
|   ) + |  | ||||||
|   facet_wrap(~year, ncol = 4, scale = "fixed") |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| # slope |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| file <- files[1] |  | ||||||
|  |  | ||||||
| lm(y ~ x, data) |  | ||||||
|  |  | ||||||
| m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file)) |  | ||||||
|  |  | ||||||
| summary(m) |  | ||||||
|  |  | ||||||
| coef(m)[2] |  | ||||||
|  |  | ||||||
| slopes <- c() |  | ||||||
| years <- c() |  | ||||||
| for (file in files) { |  | ||||||
|   m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file)) |  | ||||||
|   years <- c(years, parse_number(basename(file))) |  | ||||||
|   slopes <- c(slopes, coef(m)[2]) |  | ||||||
| } |  | ||||||
| years |  | ||||||
| slopes |  | ||||||
|  |  | ||||||
|  |  | ||||||
| plot(years, as.numeric(slopes), type = "b") |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| # purrr |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| require(tidyverse) |  | ||||||
| df <- tibble( |  | ||||||
|   filename = dir( |  | ||||||
|     "../../data/gapminder", |  | ||||||
|     pattern = "^[12][09][0-9][0-9].xlsx$", |  | ||||||
|     full.names = TRUE |  | ||||||
|   ) |  | ||||||
| ) |> |  | ||||||
|   dplyr::mutate( |  | ||||||
|     data = purrr::map( |  | ||||||
|       filename, |  | ||||||
|       \(x) readxl::read_xlsx(x) |  | ||||||
|     ) |  | ||||||
|   ) |> |  | ||||||
|   mutate(year = parse_number(basename(filename))) |> |  | ||||||
|   mutate( |  | ||||||
|     m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx)) |  | ||||||
|   ) |> |  | ||||||
|   mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |> |  | ||||||
|   unnest(data) |  | ||||||
|  |  | ||||||
| pvalue <- summary(df$m[[1]])$coefficients[2, 4] |  | ||||||
| rsq <- summary(df$m[[1]])$r.squared |  | ||||||
|  |  | ||||||
| require(tidymodels) |  | ||||||
| generics::tidy(df$m[[1]]) |  | ||||||
|  |  | ||||||
| df |> |  | ||||||
|   ggplot(aes(x = lifeExp, y = gdpPercap)) + |  | ||||||
|   geom_point(aes(color = factor(year))) + |  | ||||||
|   geom_smooth(method = "lm", se = FALSE) + |  | ||||||
|   scale_y_log10( |  | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |  | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) |  | ||||||
|   ) + |  | ||||||
|   facet_wrap(~year, ncol = 4, scale = "fixed") |  | ||||||
|  |  | ||||||
|  |  | ||||||
| df |> |  | ||||||
|   ggplot(aes(x = year, y = slope)) + |  | ||||||
|   geom_line() + |  | ||||||
|   geom_point() |  | ||||||
|  |  | ||||||
|  |  | ||||||
| df |  | ||||||
| df$slope[[1]] |  | ||||||
|  |  | ||||||
| coef(df$m[[1]])[2] |  | ||||||
|  |  | ||||||
| lm(y ~ x, data) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| df |> |  | ||||||
|   unnest(data) |> |  | ||||||
|   ggplot(aes(x = lifeExp, y = gdpPercap)) + |  | ||||||
|   geom_point(aes(color = factor(year))) + |  | ||||||
|   geom_smooth(method = "lm", se = FALSE) + |  | ||||||
|   scale_y_log10( |  | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |  | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) |  | ||||||
|   ) + |  | ||||||
|   facet_wrap(~year, ncol = 4, scale = "fixed") |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,115 +0,0 @@ | |||||||
| --- |  | ||||||
| title: "Lesson 8" |  | ||||||
| format: html |  | ||||||
| --- |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| require(tidyverse) |  | ||||||
| library(nycflights13) |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   select(3:6) |> |  | ||||||
|   head(3) |> |  | ||||||
|   rename_all(~ gsub("_", "", .)) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   select(3:6) |> |  | ||||||
|   head(3) |> |  | ||||||
|   rename_with(toupper, .cols = 2:4) |  | ||||||
|  |  | ||||||
| # 每月10号-15号,dep_delay > 100 的航班 |  | ||||||
|  |  | ||||||
| # 每月哪个出发地origin的 dep_delay总时长最长 |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   group_by(month, origin) |> |  | ||||||
|   summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |> |  | ||||||
|   slice_max(total_dep_delay) |  | ||||||
|  |  | ||||||
| # arrange(month, origin, desc(total_dep_delay)) |  | ||||||
|  |  | ||||||
| # 每月随机抽取一天,随机抽取三个航班 |  | ||||||
|  |  | ||||||
| slice_sample(n = 1) |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   tidyr::nest(ymddf = -c(year, month, day)) |> |  | ||||||
|   group_by(year, month) |> |  | ||||||
|   slice_sample(n = 5) |> |  | ||||||
|   unnest(ymddf) |> |  | ||||||
|   group_by(year, month, day) |> |  | ||||||
|   slice_sample(n = 3) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   tidyr::nest(ymddf = -c(year, month, day)) |> |  | ||||||
|   group_by(year, month) |> |  | ||||||
|   slice_sample(n = 1) |> |  | ||||||
|   mutate( |  | ||||||
|     ymddf = purrr::map(ymddf, \(x) { |  | ||||||
|       x |> |  | ||||||
|         slice_sample(n = 3) |  | ||||||
|     }) |  | ||||||
|   ) |  | ||||||
|  |  | ||||||
| # 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班 |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   mutate(date = ymd(paste(year, month, day))) |> |  | ||||||
|   mutate(weekday = wday(date)) |> |  | ||||||
|   mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |> |  | ||||||
|   group_by(year, month, origin, flight, isworkday) |> |  | ||||||
|   summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |> |  | ||||||
|   tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |> |  | ||||||
|   filter( |  | ||||||
|     purrr::map(diffdelaydf, \(x) { |  | ||||||
|       nrow(x) |  | ||||||
|     }) > |  | ||||||
|       1 |  | ||||||
|   ) |> |  | ||||||
|   mutate( |  | ||||||
|     diffdelay = purrr::map_dbl(diffdelaydf, \(x) { |  | ||||||
|       x |> |  | ||||||
|         arrange(isworkday) |> |  | ||||||
|         pull(mean_delay) |> |  | ||||||
|         diff() |  | ||||||
|     }) |  | ||||||
|   ) |> |  | ||||||
|   group_by(year, month, origin) |> |  | ||||||
|   slice_max(diffdelay, n = 3) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| wday(today()) |  | ||||||
|  |  | ||||||
| weekday() |  | ||||||
|  |  | ||||||
|  |  | ||||||
| slice_sample(n = 1) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| flights |> |  | ||||||
|   group_by(month) |> |  | ||||||
|   slice_sample(n = 1) |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ```{r} |  | ||||||
| p1 <- flights |> |  | ||||||
|   group_by(year, month) |> |  | ||||||
|   summarize(n = n()) |> |  | ||||||
|   ggplot(aes(month, n)) + |  | ||||||
|   geom_point(shape = 21, size = 6, color = "black", fill = "red") + |  | ||||||
|   geom_line() |  | ||||||
|  |  | ||||||
| ggsave("./a.pdf") |  | ||||||
| ggsave("./a.png") |  | ||||||
|  |  | ||||||
| require(patchwork) |  | ||||||
|  |  | ||||||
| p1 / p1 |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
|  |  | ||||||
| Before Width: | Height: | Size: 271 KiB | 
| @@ -47,8 +47,9 @@ knitr::opts_chunk$set(echo = TRUE) | |||||||
| ### 课件 | ### 课件 | ||||||
| 
 | 
 | ||||||
| - 采用`R语言`+`quarto`完成 | - 采用`R语言`+`quarto`完成 | ||||||
| - 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html) | - 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html) | ||||||
| - 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git) | - 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git) | ||||||
|  | - 代码web界面:[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/) | ||||||
| 
 | 
 | ||||||
| ## 如何学习接下来的内容? | ## 如何学习接下来的内容? | ||||||
| 
 | 
 | ||||||
| @@ -66,7 +67,7 @@ knitr::opts_chunk$set(echo = TRUE) | |||||||
| 
 | 
 | ||||||
| ## Rstudio Server使用 | ## Rstudio Server使用 | ||||||
| 
 | 
 | ||||||
| - 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/) | - 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/) | ||||||
| - 每位同学使用1个账号,随机生成 | - 每位同学使用1个账号,随机生成 | ||||||
| - 密码:**** | - 密码:**** | ||||||
| - 后面的实践课程可在该服务器上完成 | - 后面的实践课程可在该服务器上完成 | ||||||
| @@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | ||||||
| @@ -264,9 +264,6 @@ t.test(x, y) | |||||||
| wilcox.test(x, y) | wilcox.test(x, y) | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| ### [什么是 Wilcoxon-Mann-Whitney检验?](https://zhuanlan.zhihu.com/p/613524533) |  | ||||||
| 
 |  | ||||||
| ## 统计函数 | ## 统计函数 | ||||||
| 
 | 
 | ||||||
| ### 创建向量的直方图 | ### 创建向量的直方图 | ||||||
| @@ -795,4 +792,4 @@ names(Y) <- c("colA", "colB", "colC") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | ||||||
| @@ -37,7 +37,7 @@ require(learnr) | |||||||
| 
 | 
 | ||||||
| ## 下载excel文件 | ## 下载excel文件 | ||||||
| 
 | 
 | ||||||
| [https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx) | [https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ## Tidy data | ## Tidy data | ||||||
| @@ -79,5 +79,5 @@ Tom,31,87 | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| @@ -197,11 +197,9 @@ annoying <- tibble( | |||||||
| ```{r} | ```{r} | ||||||
| #| message: false | #| message: false | ||||||
| 
 | 
 | ||||||
| sales_files <- c( | sales_files <- c("../../data/01-sales.csv", | ||||||
|   "../../data/01-sales.csv", |  | ||||||
|   "../../data/02-sales.csv", |   "../../data/02-sales.csv", | ||||||
|   "../../data/03-sales.csv" |   "../../data/03-sales.csv") | ||||||
| ) |  | ||||||
| read_csv(sales_files, id = "file") | read_csv(sales_files, id = "file") | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| @@ -225,11 +223,8 @@ read_csv(sales_files, id = "file") | |||||||
| ## 批量读取 | ## 批量读取 | ||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| sales_files <- list.files( | sales_files <- list.files("../../data", | ||||||
|   "../../data", |   pattern = "sales\\.csv$", full.names = TRUE) | ||||||
|   pattern = "sales\\.csv$", |  | ||||||
|   full.names = TRUE |  | ||||||
| ) |  | ||||||
| sales_files | sales_files | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| @@ -275,8 +270,8 @@ metadf <- readRDS(file = "../../data/metadfdemo.RDS") | |||||||
| lang <- "cn" | lang <- "cn" | ||||||
| metadf |> | metadf |> | ||||||
|   ggplot(aes(lon, lat)) + |   ggplot(aes(lon, lat)) + | ||||||
|   geom_point(aes(fill = Area)) + | geom_point(aes(fill = Area)) + | ||||||
|   dwfun::theme_sci() | dwfun::theme_sci() | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -288,13 +283,8 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx") | |||||||
| dir.create("../../data/metacity2/") | dir.create("../../data/metacity2/") | ||||||
| metadf |> | metadf |> | ||||||
|   nest(sitedf = -site) |> |   nest(sitedf = -site) |> | ||||||
|   mutate( |   mutate(flag = purrr::map2(site, sitedf, | ||||||
|     flag = purrr::map2( |     ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx")))) | ||||||
|       site, |  | ||||||
|       sitedf, |  | ||||||
|       ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx")) |  | ||||||
|     ) |  | ||||||
|   ) |  | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -315,16 +305,10 @@ if (FALSE) { | |||||||
|   writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx") |   writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx") | ||||||
|   dir.create("../../data/metacity/") |   dir.create("../../data/metacity/") | ||||||
|   metanestdf |> |   metanestdf |> | ||||||
|     mutate( |     mutate(flag = purrr::map2(Area, citydf, | ||||||
|       flag = purrr::map2( |       ~ writexl::write_xlsx(.y, | ||||||
|         Area, |  | ||||||
|         citydf, |  | ||||||
|         ~ writexl::write_xlsx( |  | ||||||
|           .y, |  | ||||||
|         path = paste0("../../data/metacity/", .x, ".xlsx") |         path = paste0("../../data/metacity/", .x, ".xlsx") | ||||||
|         ) |       ))) | ||||||
|       ) |  | ||||||
|     ) |  | ||||||
| } | } | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| @@ -337,4 +321,5 @@ if (FALSE) { | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | ||||||
|  | 
 | ||||||
| @@ -323,5 +323,5 @@ flights |> | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| @@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| @@ -43,7 +43,7 @@ require(learnr) | |||||||
| 1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 | 1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 | ||||||
| 2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 | 2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 | ||||||
| 
 | 
 | ||||||
| 作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) | 作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) | ||||||
| 
 | 
 | ||||||
| ## 示例代码 | ## 示例代码 | ||||||
| 
 | 
 | ||||||
| @@ -62,5 +62,5 @@ require(learnr) | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| @@ -14,10 +14,8 @@ format: html | |||||||
| # 下载至临时文件 | # 下载至临时文件 | ||||||
| if (FALSE) { | if (FALSE) { | ||||||
|   tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") |   tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") | ||||||
|   download.file( |   download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx", | ||||||
|     "https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx", |     destfile = tmpxlsxpath) | ||||||
|     destfile = tmpxlsxpath |  | ||||||
|   ) |  | ||||||
|   airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) |   airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) | ||||||
|   metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) |   metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) | ||||||
|   saveRDS(airqualitydf, "./airqualitydf.RDS") |   saveRDS(airqualitydf, "./airqualitydf.RDS") | ||||||
| @@ -43,12 +41,7 @@ airqualitydf |> | |||||||
|   left_join(metadf |> select(site, city = Area)) |> |   left_join(metadf |> select(site, city = Area)) |> | ||||||
|   group_by(city) |> |   group_by(city) |> | ||||||
|   filter(n() > 5) |> |   filter(n() > 5) |> | ||||||
|   summarize( |   summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |> | ||||||
|     p = sum( |  | ||||||
|       AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE) |  | ||||||
|     ) / |  | ||||||
|       n() |  | ||||||
|   ) |> |  | ||||||
|   top_n(10, p) |   top_n(10, p) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -64,11 +57,12 @@ airqualitydf |> | |||||||
|   left_join(metadf |> select(site, city = Area)) |> |   left_join(metadf |> select(site, city = Area)) |> | ||||||
|   group_by(city) |> |   group_by(city) |> | ||||||
|   filter(length(unique(site)) >= 5) |> |   filter(length(unique(site)) >= 5) |> | ||||||
|   summarize( |   summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, | ||||||
|     p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, na.rm = TRUE)) / n() |     na.rm = TRUE)) / n()) |> | ||||||
|   ) |> |  | ||||||
|   slice_max(p, n = 10) |> |   slice_max(p, n = 10) |> | ||||||
|   knitr::kable() | knitr::kable() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -89,86 +83,66 @@ if (FALSE) { | |||||||
|     left_join(metadf |> select(site, city = Area)) |> |     left_join(metadf |> select(site, city = Area)) |> | ||||||
|     group_by(city) |> |     group_by(city) |> | ||||||
|     filter(length(unique(site)) >= 5) |> |     filter(length(unique(site)) >= 5) |> | ||||||
|     mutate( |     mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"), | ||||||
|       dayornight = factor( |       levels = c("day", "night")) | ||||||
|         ifelse(between(hour(datetime), 8, 20), "day", "night"), |  | ||||||
|         levels = c("day", "night") |  | ||||||
|       ) |  | ||||||
|     ) |> |     ) |> | ||||||
|     group_by(city) |> |     group_by(city) |> | ||||||
|     nest(citydf = -city) |> |     nest(citydf = -city) |> | ||||||
|     mutate( |     mutate(median_diff = purrr::map_dbl(citydf, ~ | ||||||
|       median_diff = purrr::map_dbl( |       .x |> | ||||||
|         citydf, |  | ||||||
|         ~ .x |> |  | ||||||
|         specify(AQI ~ dayornight) |> |         specify(AQI ~ dayornight) |> | ||||||
|         calculate(stat = "diff in medians", order = c("day", "night")) |> |         calculate(stat = "diff in medians", order = c("day", "night")) |> | ||||||
|         pull(stat) |         pull(stat) | ||||||
|       ) |     )) |> | ||||||
|     ) |> |  | ||||||
|     ungroup() |> |     ungroup() |> | ||||||
|     #  slice_sample(n = 12) |> |     #  slice_sample(n = 12) |> | ||||||
|     mutate( |     mutate(null_dist = purrr::map(citydf, ~ | ||||||
|       null_dist = purrr::map( |       .x |> | ||||||
|         citydf, |  | ||||||
|         ~ .x |> |  | ||||||
|         specify(AQI ~ dayornight) |> |         specify(AQI ~ dayornight) |> | ||||||
|         hypothesize(null = "independence") |> |         hypothesize(null = "independence") |> | ||||||
|         generate(reps = 1000, type = "permute") |> |         generate(reps = 1000, type = "permute") |> | ||||||
|         calculate(stat = "diff in medians", order = c("day", "night")) |         calculate(stat = "diff in medians", order = c("day", "night")) | ||||||
|       ) |     )) |> | ||||||
|     ) |> |     mutate(p_value = purrr::map2_dbl(null_dist, median_diff,  | ||||||
|     mutate( |  | ||||||
|       p_value = purrr::map2_dbl( |  | ||||||
|         null_dist, |  | ||||||
|         median_diff, |  | ||||||
|       ~  get_p_value(.x, obs_stat = .y, direction = "both") |> |       ~  get_p_value(.x, obs_stat = .y, direction = "both") |> | ||||||
|         pull(p_value) |         pull(p_value) | ||||||
|       ) |     )) |> | ||||||
|     ) |> |  | ||||||
|     mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |> |     mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |> | ||||||
|     mutate( |     mutate(fig = purrr::pmap(list(null_dist, median_diff, city, sigdiff), | ||||||
|       fig = purrr::pmap( |  | ||||||
|         list(null_dist, median_diff, city, sigdiff), |  | ||||||
|       ~ visualize(..1) + |       ~ visualize(..1) + | ||||||
|       shade_p_value(obs_stat = ..2, direction = "both") + |       shade_p_value(obs_stat = ..2, direction = "both") + | ||||||
|       ggtitle(paste0(..3, ":", ..4)) + |       ggtitle(paste0(..3, ":", ..4)) + | ||||||
|       theme_sci(2, 2) |       theme_sci(2, 2) | ||||||
|       ) |     )) |> | ||||||
|     ) |> |  | ||||||
|     arrange(p_value) |     arrange(p_value) | ||||||
|   saveRDS(testdf, "./testdf.RDS") |   saveRDS(testdf, "./testdf.RDS") | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| if (FALSE) { | lang <- "cn" | ||||||
|   lang <- "cn" | require(dwfun) | ||||||
|   require(dwfun) | require(rmdify) | ||||||
|   require(rmdify) | require(drwateR) | ||||||
|   require(drwateR) | dwfun::init() | ||||||
|   dwfun::init() | rmdify::rmd_init() | ||||||
|   rmdify::rmd_init() |  | ||||||
| 
 | 
 | ||||||
|   testdf <- readRDS("./testdf.RDS") | testdf <- readRDS("./testdf.RDS") | ||||||
|   require(tidyverse) | require(tidyverse) | ||||||
|   testdf |> | testdf |> | ||||||
|   select(city, median_diff, p_value, sigdiff) |> |   select(city, median_diff, p_value, sigdiff) |> | ||||||
|   knitr::kable() |   knitr::kable() | ||||||
|   testdf |> | testdf |> | ||||||
|     mutate(grp = (row_number() - 1) %/% 12) |> |   mutate(grp = (row_number() - 1)%/% 12) |> | ||||||
|   group_by(grp) |> |   group_by(grp) |> | ||||||
|   nest(grpdf = -grp) |> |   nest(grpdf = -grp) |> | ||||||
|   ungroup() |> |   ungroup() |> | ||||||
|     #  slice(1) |> | #  slice(1) |> | ||||||
|     mutate( |   mutate(gp = purrr::map(grpdf, | ||||||
|       gp = purrr::map( |     ~(.x |> | ||||||
|         grpdf, |  | ||||||
|         ~ (.x |> |  | ||||||
|       pull(fig)) |> |       pull(fig)) |> | ||||||
|       patchwork::wrap_plots(ncol = 3) + |       patchwork::wrap_plots(ncol = 3) + | ||||||
|           dwfun::theme_sci(5, 7) |       dwfun::theme_sci(5, 7))) |> | ||||||
|       ) |  | ||||||
|     ) |> |  | ||||||
|   pull(gp) |   pull(gp) | ||||||
| } | 
 | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| @@ -3934,5 +3934,5 @@ p | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
| 
 | 
 | ||||||
| Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB | 
| @@ -100,4 +100,5 @@ geom_bar(position = "fill") | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
|  | 
 | ||||||
							
								
								
									
										49
									
								
								SD/20240402_9_课后作业/index.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,49 @@ | |||||||
|  | --- | ||||||
|  | title: "课后作业9" | ||||||
|  | subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution | ||||||
|  | author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心 | ||||||
|  | date: today | ||||||
|  | lang: zh | ||||||
|  | format: | ||||||
|  |   revealjs: | ||||||
|  |     theme: dark | ||||||
|  |     slide-number: true | ||||||
|  |     chalkboard: | ||||||
|  |       buttons: true | ||||||
|  |     preview-links: auto | ||||||
|  |     lang: zh | ||||||
|  |     toc: true | ||||||
|  |     toc-depth: 1 | ||||||
|  |     toc-title: 大纲 | ||||||
|  |     logo: ./_extensions/inst/img/ucaslogo.png | ||||||
|  |     css: ./_extensions/inst/css/revealjs.css | ||||||
|  |     pointer: | ||||||
|  |       key: "p" | ||||||
|  |       color: "#32cd32" | ||||||
|  |       pointerSize: 18 | ||||||
|  | revealjs-plugins: | ||||||
|  |   - pointer | ||||||
|  | filters: | ||||||
|  |   - d2 | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | ```{r} | ||||||
|  | #| include: false | ||||||
|  | #| cache: false | ||||||
|  | lang <- "cn" | ||||||
|  | require(tidyverse) | ||||||
|  | require(learnr) | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ## 第9次课后作业 | ||||||
|  |  | ||||||
|  | 自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。 | ||||||
|  |  | ||||||
|  | 作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## 欢迎讨论!{.center} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
|  |  | ||||||
							
								
								
									
										8
									
								
								SD/20240402_9_课后作业/第9次课后作业_模板.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,8 @@ | |||||||
|  | --- | ||||||
|  | title: 课后作业9 | ||||||
|  | author: 姓名 | ||||||
|  | format: html | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | 要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。 | ||||||
|  |  | ||||||
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 272 KiB After Width: | Height: | Size: 272 KiB | 
| Before Width: | Height: | Size: 345 KiB After Width: | Height: | Size: 345 KiB | 
| Before Width: | Height: | Size: 210 KiB After Width: | Height: | Size: 210 KiB | 
| Before Width: | Height: | Size: 470 KiB After Width: | Height: | Size: 470 KiB | 
| Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB | 
| Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB | 
| Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB | 
| Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB | 
| Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB | 
| Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 5.5 KiB | 
| Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 209 KiB | 
| Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB | 
| Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB | 
| Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 177 KiB | 
| Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB | 
| Before Width: | Height: | Size: 569 KiB After Width: | Height: | Size: 569 KiB | 
| Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB | 
| Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB | 
| Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.5 KiB | 
| Before Width: | Height: | Size: 131 KiB After Width: | Height: | Size: 131 KiB | 
| Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB | 
| Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB | 
| Before Width: | Height: | Size: 83 KiB After Width: | Height: | Size: 83 KiB | 
| Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB | 
| Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB | 
| Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB | 
| Before Width: | Height: | Size: 869 KiB After Width: | Height: | Size: 869 KiB | 
| Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB | 
| Before Width: | Height: | Size: 638 KiB After Width: | Height: | Size: 638 KiB | 
| Before Width: | Height: | Size: 408 KiB After Width: | Height: | Size: 408 KiB | 
| Before Width: | Height: | Size: 410 KiB After Width: | Height: | Size: 410 KiB | 
| Before Width: | Height: | Size: 154 KiB After Width: | Height: | Size: 154 KiB | 
| Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB | 
| Before Width: | Height: | Size: 159 KiB After Width: | Height: | Size: 159 KiB | 
| Before Width: | Height: | Size: 184 KiB After Width: | Height: | Size: 184 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB | 
| Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 69 KiB | 
| Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB | 
| Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB | 
| Before Width: | Height: | Size: 137 KiB After Width: | Height: | Size: 137 KiB | 
| Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB | 
| Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB | 
| Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB | 
| Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB | 
| Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 134 KiB | 
| Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 150 KiB | 
| Before Width: | Height: | Size: 160 KiB After Width: | Height: | Size: 160 KiB | 
| Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 120 KiB | 
| @@ -47,8 +47,7 @@ hexes <- function(..., size = 64) { | |||||||
|   right <- (seq_along(x) - 1) * size |   right <- (seq_along(x) - 1) * size | ||||||
|   res <- glue::glue( |   res <- glue::glue( | ||||||
|     '{.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}', |     '{.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}', | ||||||
|     .open = "<", |     .open = "<", .close = ">" | ||||||
|     .close = ">" |  | ||||||
|   ) |   ) | ||||||
|   paste0(res, collapse = " ") |   paste0(res, collapse = " ") | ||||||
| } | } | ||||||
| @@ -70,6 +69,7 @@ test_color <- "#cd4173" | |||||||
| data_color  <- "#767381" | data_color  <- "#767381" | ||||||
| assess_color <- "#84cae1" | assess_color <- "#84cae1" | ||||||
| splits_pal <- c(data_color, train_color, test_color) | splits_pal <- c(data_color, train_color, test_color) | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -152,27 +152,10 @@ knitr::include_graphics("images/whole-game-final-performance.jpg") | |||||||
| 
 | 
 | ||||||
| # Install the packages for the workshop | # Install the packages for the workshop | ||||||
| pkgs <-  | pkgs <-  | ||||||
|   c( |   c("bonsai", "doParallel", "embed", "finetune", "lightgbm", "lme4", | ||||||
|     "bonsai", |     "plumber", "probably", "ranger", "rpart", "rpart.plot", "rules", | ||||||
|     "doParallel", |     "splines2", "stacks", "text2vec", "textrecipes", "tidymodels",  | ||||||
|     "embed", |     "vetiver", "remotes") | ||||||
|     "finetune", |  | ||||||
|     "lightgbm", |  | ||||||
|     "lme4", |  | ||||||
|     "plumber", |  | ||||||
|     "probably", |  | ||||||
|     "ranger", |  | ||||||
|     "rpart", |  | ||||||
|     "rpart.plot", |  | ||||||
|     "rules", |  | ||||||
|     "splines2", |  | ||||||
|     "stacks", |  | ||||||
|     "text2vec", |  | ||||||
|     "textrecipes", |  | ||||||
|     "tidymodels", |  | ||||||
|     "vetiver", |  | ||||||
|     "remotes" |  | ||||||
|   ) |  | ||||||
| 
 | 
 | ||||||
| install.packages(pkgs) | install.packages(pkgs) | ||||||
| ``` | ``` | ||||||
| @@ -214,34 +197,26 @@ taxi | |||||||
| #|  | #|  | ||||||
| set.seed(123) | set.seed(123) | ||||||
| library(forcats) | library(forcats) | ||||||
| require(tidymodels) | one_split <- slice(taxi, 1:30) %>%  | ||||||
| require(tidyverse) |   initial_split() %>%  | ||||||
| one_split <- taxi |> |   tidy() %>%  | ||||||
|   dplyr::slice(1:30) |> |   add_row(Row = 1:30, Data = "Original") %>%  | ||||||
|   rsample::initial_split() |> |   mutate(Data = case_when( | ||||||
|   generics::tidy() |> |  | ||||||
|   tibble::add_row(Row = 1:30, Data = "Original") |> |  | ||||||
|   dplyr::mutate( |  | ||||||
|     Data = case_when( |  | ||||||
|     Data == "Analysis" ~ "Training", |     Data == "Analysis" ~ "Training", | ||||||
|     Data == "Assessment" ~ "Testing", |     Data == "Assessment" ~ "Testing", | ||||||
|     TRUE ~ Data |     TRUE ~ Data | ||||||
|     ) |   )) %>%  | ||||||
|   ) |> |   mutate(Data = factor(Data, levels = c("Original", "Training", "Testing"))) | ||||||
|   dplyr::mutate( |  | ||||||
|     Data = factor(Data, levels = c("Original", "Training", "Testing")) |  | ||||||
|   ) |  | ||||||
| all_split <- | all_split <- | ||||||
|   ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) +  |   ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) +  | ||||||
|   geom_tile(color = "white", linewidth = 1) + |   geom_tile(color = "white", | ||||||
|  |             linewidth = 1) +  | ||||||
|   scale_fill_manual(values = splits_pal, guide = "none") + |   scale_fill_manual(values = splits_pal, guide = "none") + | ||||||
|   theme_minimal() + |   theme_minimal() + | ||||||
|   theme( |   theme(axis.text.y = element_text(size = rel(2)), | ||||||
|     axis.text.y = element_text(size = rel(2)), |  | ||||||
|         axis.text.x = element_blank(), |         axis.text.x = element_blank(), | ||||||
|         legend.position = "top", |         legend.position = "top", | ||||||
|     panel.grid = element_blank() |         panel.grid = element_blank()) + | ||||||
|   ) + |  | ||||||
|   coord_equal(ratio = 1) + |   coord_equal(ratio = 1) + | ||||||
|   labs(x = NULL, y = NULL) |   labs(x = NULL, y = NULL) | ||||||
| all_split | all_split | ||||||
| @@ -670,18 +645,9 @@ augment(taxi_fit, new_data = taxi_train) %>% | |||||||
| augment(taxi_fit, new_data = taxi_train) %>%  | augment(taxi_fit, new_data = taxi_train) %>%  | ||||||
|   roc_curve(truth = tip, .pred_yes) %>%  |   roc_curve(truth = tip, .pred_yes) %>%  | ||||||
|   filter(is.finite(.threshold)) %>%  |   filter(is.finite(.threshold)) %>%  | ||||||
|   pivot_longer( |   pivot_longer(c(specificity, sensitivity), names_to = "statistic", values_to = "value") %>%  | ||||||
|     c(specificity, sensitivity), |  | ||||||
|     names_to = "statistic", |  | ||||||
|     values_to = "value" |  | ||||||
|   ) %>% |  | ||||||
|   rename(`event threshold` = .threshold) %>%  |   rename(`event threshold` = .threshold) %>%  | ||||||
|   ggplot(aes( |   ggplot(aes(x = `event threshold`, y = value, col = statistic, group = statistic)) +  | ||||||
|     x = `event threshold`, |  | ||||||
|     y = value, |  | ||||||
|     col = statistic, |  | ||||||
|     group = statistic |  | ||||||
|   )) + |  | ||||||
|   geom_line() + |   geom_line() + | ||||||
|   scale_color_brewer(palette = "Dark2") + |   scale_color_brewer(palette = "Dark2") + | ||||||
|   labs(y = NULL) + |   labs(y = NULL) + | ||||||
| @@ -1044,11 +1010,8 @@ collect_metrics(final_fit) | |||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| require(tidyverse) | require(tidyverse) | ||||||
| sitedf <- readr::read_csv( | sitedf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv") |> | ||||||
|   "https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv" |   select(SITE_ID, | ||||||
| ) |> |  | ||||||
|   select( |  | ||||||
|     SITE_ID, |  | ||||||
|     lon = LON_DD,  |     lon = LON_DD,  | ||||||
|     lat = LAT_DD,  |     lat = LAT_DD,  | ||||||
|     name = LAKENAME,  |     name = LAKENAME,  | ||||||
| @@ -1056,27 +1019,25 @@ sitedf <- readr::read_csv( | |||||||
|     zmax = DEPTHMAX |     zmax = DEPTHMAX | ||||||
|     ) |> |     ) |> | ||||||
|   group_by(SITE_ID) |> |   group_by(SITE_ID) |> | ||||||
|   summarize( |   summarize(lon = mean(lon, na.rm = TRUE), | ||||||
|     lon = mean(lon, na.rm = TRUE), |  | ||||||
|     lat = mean(lat, na.rm = TRUE), |     lat = mean(lat, na.rm = TRUE), | ||||||
|     name = unique(name), |     name = unique(name), | ||||||
|     area = mean(area, na.rm = TRUE), |     area = mean(area, na.rm = TRUE), | ||||||
|     zmax = mean(zmax, na.rm = TRUE) |     zmax = mean(zmax, na.rm = TRUE)) | ||||||
|   ) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| visitdf <- readr::read_csv( | visitdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |> | ||||||
|   "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv" |   select(SITE_ID, | ||||||
| ) |> |     date = DATE_PROFILE, | ||||||
|   select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |> |     year = YEAR, | ||||||
|  |     visit = VISIT_NO | ||||||
|  |   ) |> | ||||||
|   distinct() |   distinct() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| waterchemdf <- readr::read_csv( | 
 | ||||||
|   "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv" | waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |> | ||||||
| ) |> |   select(SITE_ID, | ||||||
|   select( |  | ||||||
|     SITE_ID, |  | ||||||
|     date = DATE_PROFILE, |     date = DATE_PROFILE, | ||||||
|     depth = DEPTH, |     depth = DEPTH, | ||||||
|     temp = TEMP_FIELD, |     temp = TEMP_FIELD, | ||||||
| @@ -1085,43 +1046,38 @@ waterchemdf <- readr::read_csv( | |||||||
|     cond = COND_FIELD, |     cond = COND_FIELD, | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
| sddf <- readr::read_csv( | sddf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv") |> | ||||||
|   "https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv" |   select(SITE_ID,  | ||||||
| ) |> |  | ||||||
|   select( |  | ||||||
|     SITE_ID, |  | ||||||
|     date = DATE_SECCHI,  |     date = DATE_SECCHI,  | ||||||
|     sd = SECMEAN, |     sd = SECMEAN, | ||||||
|     clear_to_bottom = CLEAR_TO_BOTTOM |     clear_to_bottom = CLEAR_TO_BOTTOM | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
| trophicdf <- readr::read_csv( | trophicdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv") |> | ||||||
|   "https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv" |   select(SITE_ID, | ||||||
| ) |> |     visit = VISIT_NO, | ||||||
|   select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |> |     tp = PTL, | ||||||
|  |     tn = NTL, | ||||||
|  |     chla = CHLA) |> | ||||||
|   left_join(visitdf, by = c("SITE_ID", "visit")) |> |   left_join(visitdf, by = c("SITE_ID", "visit")) |> | ||||||
|   select(-year, -visit) |> |   select(-year, -visit) |> | ||||||
|   group_by(SITE_ID, date) |> |   group_by(SITE_ID, date) |> | ||||||
|   summarize( |   summarize(tp = mean(tp, na.rm = TRUE), | ||||||
|     tp = mean(tp, na.rm = TRUE), |  | ||||||
|     tn = mean(tn, na.rm = TRUE), |     tn = mean(tn, na.rm = TRUE), | ||||||
|     chla = mean(chla, na.rm = TRUE) |     chla = mean(chla, na.rm = TRUE) | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| phytodf <- readr::read_csv( | 
 | ||||||
|   "https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv" | phytodf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv") |> | ||||||
| ) |> |   select(SITE_ID, | ||||||
|   select( |  | ||||||
|     SITE_ID, |  | ||||||
|     date = DATEPHYT, |     date = DATEPHYT, | ||||||
|     depth = SAMPLE_DEPTH, |     depth = SAMPLE_DEPTH, | ||||||
|     phyta = DIVISION, |     phyta = DIVISION, | ||||||
|     genus = GENUS, |     genus = GENUS, | ||||||
|     species = SPECIES, |     species = SPECIES, | ||||||
|     tax = TAXANAME, |     tax = TAXANAME, | ||||||
|     abund = ABUND |     abund = ABUND) |> | ||||||
|   ) |> |  | ||||||
|   mutate(phyta = gsub(" .*$", "", phyta)) |> |   mutate(phyta = gsub(" .*$", "", phyta)) |> | ||||||
|   filter(!is.na(genus)) |> |   filter(!is.na(genus)) |> | ||||||
|   group_by(SITE_ID, date, depth, phyta, genus) |> |   group_by(SITE_ID, date, depth, phyta, genus) |> | ||||||
| @@ -1132,7 +1088,7 @@ envdf <- waterchemdf |> | |||||||
|   filter(depth < 2) |> |   filter(depth < 2) |> | ||||||
|   select(-depth) |> |   select(-depth) |> | ||||||
|   group_by(SITE_ID, date) |> |   group_by(SITE_ID, date) |> | ||||||
|   summarise_all(~ mean(., na.rm = TRUE)) |> |   summarise_all(~mean(., na.rm = TRUE)) |> | ||||||
|   ungroup() |> |   ungroup() |> | ||||||
|   left_join(sddf, by = c("SITE_ID", "date")) |> |   left_join(sddf, by = c("SITE_ID", "date")) |> | ||||||
|   left_join(trophicdf, by = c("SITE_ID", "date")) |   left_join(trophicdf, by = c("SITE_ID", "date")) | ||||||
| @@ -1141,21 +1097,19 @@ nla <- envdf |> | |||||||
|   left_join(phytodf) |> |   left_join(phytodf) |> | ||||||
|   left_join(sitedf, by = "SITE_ID") |> |   left_join(sitedf, by = "SITE_ID") |> | ||||||
|   filter(!purrr::map_lgl(phytodf, is.null)) |> |   filter(!purrr::map_lgl(phytodf, is.null)) |> | ||||||
|   mutate( |   mutate(cyanophyta = purrr::map(phytodf, ~ .x |> | ||||||
|     cyanophyta = purrr::map( |  | ||||||
|       phytodf, |  | ||||||
|       ~ .x |> |  | ||||||
|     dplyr::filter(phyta == "Cyanophyta") |> |     dplyr::filter(phyta == "Cyanophyta") |> | ||||||
|     summarize(cyanophyta = sum(abund, na.rm = TRUE)) |     summarize(cyanophyta = sum(abund, na.rm = TRUE)) | ||||||
|     ) |   )) |> | ||||||
|   ) |> |  | ||||||
|   unnest(cyanophyta) |> |   unnest(cyanophyta) |> | ||||||
|   select(-phyta) |> |   select(-phyta) |> | ||||||
|   mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE)) |   mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE)) | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| # library(rmdify) | # library(rmdify) | ||||||
| # library(dwfun) | # library(dwfun) | ||||||
| # dwfun::init() | # dwfun::init() | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1173,20 +1127,18 @@ skimr::skim(nla) | |||||||
| nla |> | nla |> | ||||||
|   filter(tp > 1) |> |   filter(tp > 1) |> | ||||||
|   ggplot(aes(tn, tp)) + |   ggplot(aes(tn, tp)) + | ||||||
|   geom_point() + | geom_point() + | ||||||
|   geom_smooth(method = "lm") + | geom_smooth(method = "lm") + | ||||||
|   scale_x_log10( | scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |                labels = scales::trans_format("log10", scales::math_format(10^.x))) + | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) | scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
|   ) + |                labels = scales::trans_format("log10", scales::math_format(10^.x))) | ||||||
|   scale_y_log10( |  | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |  | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) |  | ||||||
|   ) |  | ||||||
| 
 | 
 | ||||||
| m1 <- lm(log10(tp) ~ log10(tn), data = nla) | m1 <- lm(log10(tp) ~ log10(tn), data = nla) | ||||||
| 
 | 
 | ||||||
| summary(m1) | summary(m1) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## 复杂指标 | ## 复杂指标 | ||||||
| @@ -1195,20 +1147,18 @@ summary(m1) | |||||||
| nla |> | nla |> | ||||||
|   filter(tp > 1) |> |   filter(tp > 1) |> | ||||||
|   ggplot(aes(tp, cyanophyta)) + |   ggplot(aes(tp, cyanophyta)) + | ||||||
|   geom_point() + | geom_point() + | ||||||
|   geom_smooth(method = "lm") + | geom_smooth(method = "lm") + | ||||||
|   scale_x_log10( | scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |                labels = scales::trans_format("log10", scales::math_format(10^.x))) + | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) | scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||||
|   ) + |                labels = scales::trans_format("log10", scales::math_format(10^.x))) | ||||||
|   scale_y_log10( |  | ||||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), |  | ||||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) |  | ||||||
|   ) |  | ||||||
| 
 | 
 | ||||||
| m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla) | m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla) | ||||||
| 
 | 
 | ||||||
| summary(m2) | summary(m2) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1220,14 +1170,13 @@ summary(m2) | |||||||
| (nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax)) | (nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax)) | ||||||
| (nla_train <- training(nla_split)) | (nla_train <- training(nla_split)) | ||||||
| (nla_test <- testing(nla_split)) | (nla_test <- testing(nla_split)) | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## tidymodels - recipe | ## tidymodels - recipe | ||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| nla_formula <- as.formula( | nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom") | ||||||
|   "cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom" |  | ||||||
| ) |  | ||||||
| # nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn") | # nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn") | ||||||
| nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |> | nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |> | ||||||
|   recipes::step_string2factor(all_nominal()) |> |   recipes::step_string2factor(all_nominal()) |> | ||||||
| @@ -1244,7 +1193,7 @@ nla_recipe | |||||||
| nla_cv <- recipes::bake( | nla_cv <- recipes::bake( | ||||||
|     nla_recipe,  |     nla_recipe,  | ||||||
|     new_data = training(nla_split) |     new_data = training(nla_split) | ||||||
| ) |> |   ) |> | ||||||
|   rsample::vfold_cv(v = 10) |   rsample::vfold_cv(v = 10) | ||||||
| nla_cv | nla_cv | ||||||
| ``` | ``` | ||||||
| @@ -1311,7 +1260,7 @@ if (FALSE) { | |||||||
|     metrics = yardstick::metric_set(rmse, rsq, mae), |     metrics = yardstick::metric_set(rmse, rsq, mae), | ||||||
|     control = tune::control_grid(verbose = TRUE) |     control = tune::control_grid(verbose = TRUE) | ||||||
|   ) |   ) | ||||||
|   saveRDS(xgboost_tuned, "./xgboost_tuned.RDS") | saveRDS(xgboost_tuned, "./xgboost_tuned.RDS") | ||||||
| } | } | ||||||
| xgboost_tuned <- readRDS("./xgboost_tuned.RDS") | xgboost_tuned <- readRDS("./xgboost_tuned.RDS") | ||||||
| ``` | ``` | ||||||
| @@ -1349,7 +1298,7 @@ xgboost_tuned |> | |||||||
| 
 | 
 | ||||||
| ```{r} | ```{r} | ||||||
| xgboost_best_params <- xgboost_tuned |> | xgboost_best_params <- xgboost_tuned |> | ||||||
|   tune::select_best(metric = "rmse") |   tune::select_best("rmse") | ||||||
| 
 | 
 | ||||||
| knitr::kable(xgboost_best_params) | knitr::kable(xgboost_best_params) | ||||||
| ``` | ``` | ||||||
| @@ -1382,15 +1331,14 @@ train_prediction <- xgboost_model_final |> | |||||||
|   ) |> |   ) |> | ||||||
|   # predict the sale prices for the training data |   # predict the sale prices for the training data | ||||||
|   predict(new_data = train_processed) |> |   predict(new_data = train_processed) |> | ||||||
|   bind_cols( |   bind_cols(nla_train |> | ||||||
|     nla_train |> |   mutate(.obs = log10(cyanophyta))) | ||||||
|       mutate(.obs = log10(cyanophyta)) |  | ||||||
|   ) |  | ||||||
| xgboost_score_train <-  | xgboost_score_train <-  | ||||||
|   train_prediction |> |   train_prediction |> | ||||||
|   yardstick::metrics(.obs, .pred) |> |   yardstick::metrics(.obs, .pred) |> | ||||||
|   mutate(.estimate = format(round(.estimate, 2), big.mark = ",")) |   mutate(.estimate = format(round(.estimate, 2), big.mark = ",")) | ||||||
| knitr::kable(xgboost_score_train) | knitr::kable(xgboost_score_train) | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## tidymodels - train evaluation | ## tidymodels - train evaluation | ||||||
| @@ -1401,8 +1349,10 @@ knitr::kable(xgboost_score_train) | |||||||
| #| out-width: "80%" | #| out-width: "80%" | ||||||
| train_prediction |> | train_prediction |> | ||||||
|   ggplot(aes(.pred, .obs)) + |   ggplot(aes(.pred, .obs)) + | ||||||
|   geom_point() + | geom_point() + | ||||||
|   geom_smooth(method = "lm") | geom_smooth(method = "lm") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1420,10 +1370,8 @@ test_prediction <- xgboost_model_final |> | |||||||
|   ) |> |   ) |> | ||||||
|   # use the training model fit to predict the test data |   # use the training model fit to predict the test data | ||||||
|   predict(new_data = test_processed) |> |   predict(new_data = test_processed) |> | ||||||
|   bind_cols( |   bind_cols(nla_test |> | ||||||
|     nla_test |> |   mutate(.obs = log10(cyanophyta))) | ||||||
|       mutate(.obs = log10(cyanophyta)) |  | ||||||
|   ) |  | ||||||
| 
 | 
 | ||||||
| # measure the accuracy of our model using `yardstick` | # measure the accuracy of our model using `yardstick` | ||||||
| xgboost_score <- test_prediction |> | xgboost_score <- test_prediction |> | ||||||
| @@ -1446,7 +1394,7 @@ cyanophyta_prediction_residual <- test_prediction |> | |||||||
|   select(.pred, residual_pct) |   select(.pred, residual_pct) | ||||||
| 
 | 
 | ||||||
| cyanophyta_prediction_residual |> | cyanophyta_prediction_residual |> | ||||||
|   ggplot(aes(x = .pred, y = residual_pct)) + | ggplot(aes(x = .pred, y = residual_pct)) + | ||||||
|   geom_point() + |   geom_point() + | ||||||
|   xlab("Predicted Cyanophyta") + |   xlab("Predicted Cyanophyta") + | ||||||
|   ylab("Residual (%)") |   ylab("Residual (%)") | ||||||
| @@ -1463,8 +1411,9 @@ cyanophyta_prediction_residual |> | |||||||
| #| out-width: "80%" | #| out-width: "80%" | ||||||
| test_prediction |> | test_prediction |> | ||||||
|   ggplot(aes(.pred, .obs)) + |   ggplot(aes(.pred, .obs)) + | ||||||
|   geom_point() + | geom_point() + | ||||||
|   geom_smooth(method = "lm", colour = "black") | geom_smooth(method = "lm", colour = "black") | ||||||
|  | 
 | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @@ -1472,4 +1421,5 @@ test_prediction |> | |||||||
| ## 欢迎讨论!{.center} | ## 欢迎讨论!{.center} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||||
|  | 
 | ||||||
| Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |