Compare commits
	
		
			22 Commits
		
	
	
		
			main
			...
			133cb1533d
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 133cb1533d | |||
| 9ecb11b788 | |||
| 6412cc5560 | |||
| bc628545b6 | |||
| 1ead84ac70 | |||
| 10ffdd46f3 | |||
| 987a3eaea9 | |||
| c292d0ffab | |||
| cb2e22fde5 | |||
| 8a31a565a8 | |||
| 6e3f134635 | |||
| c883d6df6a | |||
| fda04d79f1 | |||
| a4596a9836 | |||
| 7902f28a7a | |||
| 654fb2f024 | |||
| 352ca2b1ce | |||
| 82248cb24b | |||
| a21430385f | |||
| 78196d49ed | |||
| 30dd043c3b | |||
| 2db87d5e89 | 
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -7,3 +7,5 @@ _variables.yml | ||||
| _freeze/ | ||||
| *_cache/ | ||||
| *_files/ | ||||
| SD/_*/ | ||||
| homework/ | ||||
|   | ||||
							
								
								
									
										1
									
								
								.source_state
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1 @@ | ||||
| daa3ba71b87598e29019c2f370272767 | ||||
							
								
								
									
										382
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,382 @@ | ||||
| # Makefile for Quarto Project Automation | ||||
| # Detect OS | ||||
| HOSTNAME := $(shell hostname) | ||||
| OS := $(shell uname | tr A-Z a-z) | ||||
| ifeq ($(OS), darwin) | ||||
|     SEDI := sed -i '' | ||||
|     OS := OSX | ||||
| else ifeq ($(OS), linux) | ||||
|     SEDI := sed -i | ||||
|     OS := linux | ||||
| else | ||||
|     $(error Unknown operating system) | ||||
| endif | ||||
|  | ||||
| # Fetch Git branch and project details | ||||
| branchname := $(shell git branch --show-current) | ||||
| reponame := $(shell basename $(shell git rev-parse --show-toplevel)) | ||||
| projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel))) | ||||
| pubtype := $(if $(findstring PUB,$(branchname)),public,protected) | ||||
| remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname) | ||||
| outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml) | ||||
| siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname) | ||||
|  | ||||
| branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF" | ||||
|  | ||||
| # Variables for colors and port | ||||
| bcolor := grey | ||||
| port := 4199 | ||||
|  | ||||
| # Set background color based on branch name | ||||
| ifeq ($(findstring R1,$(branchname)),R1) | ||||
|     bcolor := orange | ||||
| else ifeq ($(findstring R2,$(branchname)),R2) | ||||
|     bcolor := lightblue | ||||
| else ifeq ($(findstring R3,$(branchname)),R3) | ||||
|     bcolor := lightgreen | ||||
| else ifeq ($(findstring PUB,$(branchname)),PUB) | ||||
|     bcolor := light | ||||
| endif | ||||
|  | ||||
| # Set port based on branch name | ||||
| branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210 | ||||
| port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/') | ||||
| port := $(if $(port),$(port),4199) | ||||
|  | ||||
| # Define the state file | ||||
| STATE_FILE := .source_state | ||||
|  | ||||
| # checkfile | ||||
| GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www" | ||||
|  | ||||
| # Default target | ||||
| .PHONY: all preview readme rsync local clean upload fix_links force check_git_status | ||||
|  | ||||
| all: local upload clean commit push | ||||
| 	 | ||||
| force: updrefbib check_git_status updvariable render | ||||
|  | ||||
| local: updrefbib check_git_status updvariable lazyrender | ||||
|  | ||||
| updmakefile: | ||||
| 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \ | ||||
| 		echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \ | ||||
| 		cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \ | ||||
| 		git add "Makefile" && git commit -m "Update Makefile"; \ | ||||
| 		echo "本项目Makefile更新完成."; \ | ||||
| 	else \ | ||||
| 		echo "Makefile 无需在本系统上更新."; \ | ||||
| 	fi | ||||
|  | ||||
|  | ||||
| updrefbib: | ||||
| 	@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \ | ||||
| 		echo "更新本项目参考文献..."; \ | ||||
| 		cp "$$HOME/literature/Ref.bib" "./BB/"; \ | ||||
| 		echo "推送本地参考文献到远程服务器..."; \ | ||||
| 		rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \ | ||||
| 		echo "本项目参考文献更新完成."; \ | ||||
| 	else \ | ||||
| 	  echo "检查网络连通性..."; \ | ||||
| 		if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \ | ||||
| 			echo "网络正常,更新本项目参考文献..."; \ | ||||
| 			wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \ | ||||
| 			echo "本项目参考文献更新完成."; \ | ||||
| 		else \ | ||||
| 			echo "网络不可用,跳过参考文献更新."; \ | ||||
| 		fi; \ | ||||
| 	fi; \ | ||||
| 	git add BB/Ref.bib; \ | ||||
| 	if [ "$$(git diff --cached)" ]; then \ | ||||
| 		git commit -m "Update Ref.bib"; \ | ||||
| 	fi | ||||
|  | ||||
| check_git_status: | ||||
| 	@uncommitted=$$(git status --porcelain); \ | ||||
| 	if [ -n "$$uncommitted" ]; then \ | ||||
| 	  git status; \ | ||||
| 		read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \ | ||||
| 		answer=$${answer:-N}; \ | ||||
| 		if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \ | ||||
| 			read -p "请输入修改说明: " message; \ | ||||
| 			git add . && git commit -m "$$message"; \ | ||||
| 		else \ | ||||
| 			echo "未提交,如后续操作为pull,则无法继续"; \ | ||||
| 		fi; \ | ||||
| 	fi | ||||
|  | ||||
|  | ||||
|  | ||||
| # Lazy render with hash checking | ||||
| lazyrender: | ||||
| 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \ | ||||
| 	if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \ | ||||
| 		echo "源文件发生变化, 重新编译..."; \ | ||||
| 		echo "$$current_hash" > $(STATE_FILE); \ | ||||
| 		$(MAKE) render; \ | ||||
| 	else \ | ||||
| 		echo "源文件无变化, 跳过编译..."; \ | ||||
| 		exit 0; \ | ||||
| 	fi | ||||
|  | ||||
| # Render target | ||||
| render: | ||||
| 	@quarto render | ||||
|  | ||||
| commit: | ||||
| 	@echo "提交修改(commit)..."; \ | ||||
| 		git add .; \ | ||||
| 		if [ -n "$$(git diff --cached)" ]; then \ | ||||
| 		git commit -m "render compile"; \ | ||||
| 		else \ | ||||
| 		echo "没有修改记录,跳过."; \ | ||||
| 		fi; \ | ||||
|  | ||||
| # Pull changes from the specified branch based on the current branch | ||||
| pull: | ||||
| 	@echo "从远程拉取项目更新..."; \ | ||||
| 		$(MAKE) check_git_status; \ | ||||
| 		git pull; \ | ||||
| 		current_branch=$$(git rev-parse --abbrev-ref HEAD); \ | ||||
| 		if [ "$$current_branch" = main ]; then \ | ||||
| 		  echo "当前分枝为$$current_branch."; \ | ||||
| 			remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \ | ||||
| 			if [ -n "$$remote_branch" ]; then \ | ||||
| 		    echo "尝试从远程分枝$$remote_branch 拉取更新..."; \ | ||||
| 				git pull --rebase origin $$remote_branch; \ | ||||
| 			else \ | ||||
| 				echo "远程无可用分支$$remote_branch."; \ | ||||
| 			fi; \ | ||||
| 			else \ | ||||
| 			echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \ | ||||
| 			git pull --rebase origin main; \ | ||||
| 		fi | ||||
|  | ||||
| # Pull changes from the main branch | ||||
| pullmain: | ||||
| 	$(MAKE) check_git_status; \ | ||||
| 		@current_branch=$$(git rev-parse --abbrev-ref HEAD); \ | ||||
| 		@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \ | ||||
| 		git pull --rebase origin main; \ | ||||
|  | ||||
| push: | ||||
| 	@echo "推送到远程..."; \ | ||||
| 		git push | ||||
|  | ||||
| filehash: | ||||
| 	@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \ | ||||
| 	echo "$$current_hash" > $(STATE_FILE) | ||||
|  | ||||
| # Preview the site on a specific port | ||||
| preview: | ||||
| 	@quarto preview --port $(port) | ||||
|  | ||||
| # Generate README.md | ||||
| readme: | ||||
| 	@quarto render index.qmd -t markdown -o README.md | ||||
| 	@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md | ||||
| 	@rm "$(outputdir)/README.md" | ||||
|  | ||||
| # Sync files with remote server | ||||
| rsync: | ||||
| 	@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)" | ||||
|  | ||||
| # Open local site | ||||
| open: | ||||
| 	@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi | ||||
|  | ||||
| # Clean unnecessary files | ||||
| clean: | ||||
| 	@rm -f ./*.spl  ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk | ||||
|  | ||||
| # Upload files to server and fix links | ||||
| upload: backupdocx | ||||
| 	@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)" | ||||
| 	@$(MAKE) fix_links | ||||
| 	@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \ | ||||
| 		if [ "$(OS)" = "OSX" ]; then \ | ||||
| 			open "$(siteurl)" 2>/dev/null; \ | ||||
| 		fi; \ | ||||
| 	else \ | ||||
| 		echo "Rsync failed. Attempting alternative upload method..."; \ | ||||
| 		mkdir -p "$(reponame)"; \ | ||||
| 		rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \ | ||||
| 		rm -rf "$(reponame)"; \ | ||||
| 		rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \ | ||||
| 		if [ "$(OS)" = "OSX" ]; then \ | ||||
| 			open "$(siteurl)" 2>/dev/null; \ | ||||
| 		fi; \ | ||||
| 	fi | ||||
|  | ||||
| backupdocx: | ||||
| 	@echo "备份MS.docx文件..."; \ | ||||
| 	currentcommithash=$$(git rev-parse --short HEAD); \ | ||||
| 	datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \ | ||||
| 	mkdir -p TC/MS/; \ | ||||
| 	existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \ | ||||
| 	if [ -n "$$existing_file" ]; then \ | ||||
| 		echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \ | ||||
| 		echo "无需备份."; \ | ||||
| 	else \ | ||||
| 		if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \ | ||||
| 			cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \ | ||||
| 			echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \ | ||||
| 			git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \ | ||||
| 			if [ "$$(git diff --cached)" ]; then \ | ||||
| 				git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \ | ||||
| 			fi; \ | ||||
| 		else \ | ||||
| 			echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在,无需备份."; \ | ||||
| 		fi; \ | ||||
| 	fi; | ||||
|  | ||||
|  | ||||
| trackchange: | ||||
| 	@if [ "$(projtype)" != "manuscript" ]; then \ | ||||
| 	exit 0; \ | ||||
| 	fi; \ | ||||
| 	echo "选择两个提交以比较文档..."; \ | ||||
| 	hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \ | ||||
| 	| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \ | ||||
| 	| fzf --multi --reverse --preview="echo {}" ); \ | ||||
| 	echo $$hashes; \ | ||||
| 	hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \ | ||||
| 	hash1=$$(git rev-parse --short $${hash1}^); \ | ||||
| 	datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \ | ||||
| 	hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \ | ||||
| 	hash2=$$(git rev-parse --short $${hash2}^); \ | ||||
| 	datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \ | ||||
| 	if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \ | ||||
| 	echo "必须选择两个提交."; \ | ||||
| 	exit 1; \ | ||||
| 	fi; \ | ||||
| 	doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \ | ||||
| 	echo "$$doc1"; \ | ||||
| 	doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \ | ||||
| 	echo "$$doc2"; \ | ||||
| 	if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \ | ||||
| 	echo "打开文件: $$doc1 和 $$doc2"; \ | ||||
| 	open "$$doc1" "$$doc2"; \ | ||||
| 	printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \ | ||||
| 	echo "请在word中对比两个版本形成带修改痕迹的版本,并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \ | ||||
| 	else \ | ||||
| 	echo "一个或两个文件不存在: $$doc1, $$doc2"; \ | ||||
| 	exit 1; \ | ||||
| 	fi | ||||
|  | ||||
| # Fix links in www directory | ||||
| fix_links: | ||||
| 	@find ./www -type f -name "*.html" -exec sed -i.bak \ | ||||
| 		-e "s/{{< var branch >}}/$(branchname)/g" \ | ||||
| 		-e "s/{{< var pubtype >}}/$(pubtype)/g" \ | ||||
| 		-e "s/{{< var projtype >}}/$(projtype)/g" \ | ||||
| 		-e "s/{{< var reponame >}}/$(reponame)/g" \ | ||||
| 		-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \ | ||||
| 		-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} + | ||||
| 	@find ./www -type f -name "*.bak" -exec rm {} + | ||||
|  | ||||
| updvariable: | ||||
| 	@touch _variables.yml  # 如果文件不存在则创建 | ||||
| 	@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml | ||||
| 	@if grep -q '^reponame:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "reponame: $(reponame)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml | ||||
| 	@if grep -q '^projtype:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "projtype: $(projtype)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml | ||||
| 	@if grep -q '^branch:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "branch: $(branchname)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml | ||||
| 	@if grep -q '^pubtype:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "pubtype: $(pubtype)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml | ||||
| 	@if grep -q '^nwAB:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "nwAB: $(nwAB)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml | ||||
| 	@if grep -q '^nwMS:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "nwMS: $(nwMS)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml | ||||
| 	@if grep -q '^figtblMS:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "figtblMS: $(figtblMS)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml | ||||
| 	@if grep -q '^figtblSM:' _variables.yml; then \ | ||||
| 		$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \ | ||||
| 	else \ | ||||
| 		echo "figtblSM: $(figtblSM)" >> _variables.yml; \ | ||||
| 	fi | ||||
| 	@mkpapervar | ||||
|  | ||||
|  | ||||
| # Help: list all available commands with descriptions (English and Chinese) | ||||
| help: | ||||
| 	@echo "Makefile for Quarto Project Automation" | ||||
| 	@echo "=======================================" | ||||
| 	@echo "Available targets (English):" | ||||
| 	@echo "" | ||||
| 	@echo "  make all           - Execute local build, upload, clean, and commit" | ||||
| 	@echo "  make force         - Force render, hash update, upload, clean, and commit" | ||||
| 	@echo "  make local         - Check git status and perform a lazy render if changes detected" | ||||
| 	@echo "  make check_git_status - Check for uncommitted changes and ask to commit them" | ||||
| 	@echo "  make lazyrender    - Render if source files have changed based on hash comparison" | ||||
| 	@echo "  make render        - Force Quarto to render the project" | ||||
| 	@echo "  make commit        - Commit changes if no previous uncommitted changes" | ||||
| 	@echo "  make filehash      - Generate and store the file hash of source files" | ||||
| 	@echo "  make preview       - Preview the site locally on the specific port (default: 4199)" | ||||
| 	@echo "  make readme        - Render README.md from Quarto index.qmd" | ||||
| 	@echo "  make rsync         - Sync output files with the remote server" | ||||
| 	@echo "  make open          - Open the generated site locally in the browser" | ||||
| 	@echo "  make clean         - Clean up unnecessary files" | ||||
| 	@echo "  make upload        - Upload files to the server and fix links" | ||||
| 	@echo "  make fix_links     - Fix HTML links in the 'www' directory for the remote server" | ||||
| 	@echo "  make updmakefile   - Update the Makefile" | ||||
| 	@echo "  make help          - Display this help message" | ||||
| 	@echo "" | ||||
| 	@echo "Available targets (中文):" | ||||
| 	@echo "" | ||||
| 	@echo "  make all           - 执行本地构建、上传、清理和提交" | ||||
| 	@echo "  make force         - 强制渲染、更新哈希、上传、清理并提交" | ||||
| 	@echo "  make local         - 检查Git状态,若检测到更改则进行懒惰渲染" | ||||
| 	@echo "  make check_git_status - 检查未提交的更改,并询问是否提交" | ||||
| 	@echo "  make lazyrender    - 如果源文件发生更改,则根据哈希比较进行渲染" | ||||
| 	@echo "  make render        - 强制 Quarto 渲染项目" | ||||
| 	@echo "  make commit        - 如果没有未提交的更改则提交" | ||||
| 	@echo "  make filehash      - 生成并存储源文件的哈希值" | ||||
| 	@echo "  make preview       - 本地在特定端口预览网站 (默认: 4199)" | ||||
| 	@echo "  make readme        - 从 Quarto 的 index.qmd 生成 README.md" | ||||
| 	@echo "  make rsync         - 将输出文件同步到远程服务器" | ||||
| 	@echo "  make open          - 在浏览器中打开生成的网站" | ||||
| 	@echo "  make clean         - 清理不必要的文件" | ||||
| 	@echo "  make upload        - 上传文件到服务器并修复链接" | ||||
| 	@echo "  make fix_links     - 修复 'www' 目录中的 HTML 链接" | ||||
| 	@echo "  make updmakefile   - 更新本项目 Makefile" | ||||
| 	@echo "  make help          - 显示此帮助信息" | ||||
| 	@echo "" | ||||
| 	@echo "Environment variables (English and 中文):" | ||||
| 	@echo "  bcolor             - Background color based on branch name (基于分支名的背景颜色)" | ||||
| 	@echo "  port               - Port number based on branch name (基于分支名的端口号)" | ||||
| 	@echo "  STATE_FILE         - File for storing hash state of source files (用于存储源文件哈希状态的文件)" | ||||
| 	@echo "  siteurl            - The URL where the site will be hosted (网站托管的 URL)" | ||||
| 	@echo "" | ||||
|  | ||||
|  | ||||
| @@ -47,9 +47,8 @@ knitr::opts_chunk$set(echo = TRUE) | ||||
| ### 课件 | ||||
| 
 | ||||
| - 采用`R语言`+`quarto`完成 | ||||
| - 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html) | ||||
| - 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git) | ||||
| - 代码web界面:[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/) | ||||
| - 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html) | ||||
| - 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git) | ||||
| 
 | ||||
| ## 如何学习接下来的内容? | ||||
| 
 | ||||
| @@ -67,7 +66,7 @@ knitr::opts_chunk$set(echo = TRUE) | ||||
| 
 | ||||
| ## Rstudio Server使用 | ||||
| 
 | ||||
| - 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/) | ||||
| - 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/) | ||||
| - 每位同学使用1个账号,随机生成 | ||||
| - 密码:**** | ||||
| - 后面的实践课程可在该服务器上完成 | ||||
							
								
								
									
										138
									
								
								SD/1.1_R语言介绍/_demo.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,138 @@ | ||||
| --- | ||||
| title: "Lesson 6" | ||||
| format: html | ||||
| --- | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
|  | ||||
| https://rs1.drwater.net | ||||
|  | ||||
| username:  | ||||
|   - ruser01 | ||||
|   - ruser02 | ||||
|   - ruser03 | ||||
|   - ruser04 | ||||
|   - ruser05 | ||||
|   - ruser06 | ||||
|  | ||||
| RWEP2025 | ||||
|  | ||||
| ``` | ||||
|  | ||||
|  | ||||
| # 安装包 | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| install.packages("tidyverse") | ||||
|  | ||||
| x <- c(1:10, NA) | ||||
|  | ||||
| hist(x) | ||||
|  | ||||
| mean(x, na.rm = TRUE) | ||||
|  | ||||
| median(x, na.rm = TRUE) | ||||
|  | ||||
| sd(x, na.rm = TRUE) | ||||
|  | ||||
|  | ||||
| for(i in 1:10){ | ||||
|   print(i) | ||||
| } | ||||
|  | ||||
|  | ||||
| x + y + x * y | ||||
|  | ||||
| myfunc <- function(x, y = 3) { | ||||
|   x + y + x * y | ||||
| } | ||||
|  | ||||
|  | ||||
| myfunc(1, 2) | ||||
|  | ||||
|  | ||||
| myfunc(10) | ||||
|  | ||||
|  | ||||
| c(FALSE, 2, 1:3, 3)  | ||||
|  | ||||
| c(FALSE, 2, 1:3, 3) > 1 | ||||
|  | ||||
| all(c(FALSE, 2, 1:3, 3) > 1) | ||||
|  | ||||
|  | ||||
| c(1L,2L,3L) | ||||
|  | ||||
| any(c(FALSE, 2, 1:3, 3) > 1) | ||||
|  | ||||
|  | ||||
| x <- 10 | ||||
|  | ||||
| sin(x) = ? | ||||
|  | ||||
| paste("sin(x) = ", sin(x), sep = " ") | ||||
|  | ||||
| paste0("sin(x) = ", sin(x)) | ||||
|  | ||||
|  | ||||
| substr("Monday", 1, 3) | ||||
|  | ||||
|  | ||||
|  | ||||
| ``` | ||||
|  | ||||
|  | ||||
| # tidy | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| require(readxl) | ||||
|  | ||||
| aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf") | ||||
|  | ||||
| # install.packages("skimr") | ||||
|  | ||||
| aqdf |> | ||||
|   skimr::skim() | ||||
|  | ||||
| # base | ||||
|  | ||||
| # tidyverse | ||||
|  | ||||
| aqdf |> | ||||
|   dplyr::group_by(Area) |> | ||||
|   dplyr::summarize( | ||||
|     n = n(), | ||||
|     lon.mean = mean(lon, na.rm = TRUE), | ||||
|     lon.sd = sd(lat, na.rm = TRUE) | ||||
|   ) |> | ||||
|   dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |> | ||||
|   ggplot(aes(x = n, y = lon.mean)) + | ||||
|   geom_point() + | ||||
|   geom_line() + | ||||
|   geom_errorbar( | ||||
|     aes(ymin = lon.mean - lon.sd, | ||||
|       ymax = lon.mean + lon.sd) | ||||
|   ) | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| readxl::read_xlsx("./airquality.xlsx") | ||||
|  | ||||
| flights|> | ||||
|   filter(dest=="IAH")|> | ||||
|   group_by(year,month,day)|>summarize(n=n(), | ||||
| delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10) | ||||
|  | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz") | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| @@ -264,6 +264,9 @@ t.test(x, y) | ||||
| wilcox.test(x, y) | ||||
| ``` | ||||
| 
 | ||||
| 
 | ||||
| ### [什么是 Wilcoxon-Mann-Whitney检验?](https://zhuanlan.zhihu.com/p/613524533) | ||||
| 
 | ||||
| ## 统计函数 | ||||
| 
 | ||||
| ### 创建向量的直方图 | ||||
| @@ -792,4 +795,4 @@ names(Y) <- c("colA", "colB", "colC") | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| @@ -37,7 +37,7 @@ require(learnr) | ||||
| 
 | ||||
| ## 下载excel文件 | ||||
| 
 | ||||
| [https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx) | ||||
| [https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx) | ||||
| 
 | ||||
| 
 | ||||
| ## Tidy data | ||||
							
								
								
									
										21
									
								
								SD/1.9_课后作业6/data.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,21 @@ | ||||
| name,age,score | ||||
| Alice,25,85 | ||||
| Bob,30,92 | ||||
| Charlie,28,89 | ||||
| David,22,95 | ||||
| Eva,35,87 | ||||
| Frank,27,91 | ||||
| Grace,29,88 | ||||
| Helen,26,93 | ||||
| Ivan,31,86 | ||||
| Jack,24,94 | ||||
| Kelly,32,89 | ||||
| Lily,28,90 | ||||
| Mike,33,85 | ||||
| Nancy,27,92 | ||||
| Olivia,34,88 | ||||
| Peter,29,93 | ||||
| Queen,25,89 | ||||
| Ryan,30,94 | ||||
| Samantha,26,91 | ||||
| Tom,31,87 | ||||
| 
 | 
| @@ -79,5 +79,5 @@ Tom,31,87 | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| 
 | ||||
							
								
								
									
										154
									
								
								SD/2.2_dataimport/_demo.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,154 @@ | ||||
| --- | ||||
| title: "Lesson 7" | ||||
| format: html | ||||
| --- | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| require(tidyverse) | ||||
|  | ||||
|  | ||||
| files <- c( | ||||
|   "../../data/01-sales.csv", | ||||
|   "../../data/02-sales.csv", | ||||
|   "../../data/03-sales.csv" | ||||
| ) | ||||
|  | ||||
| files <- dir("../../data", pattern = "sales.csv", full.names = TRUE) | ||||
|  | ||||
|  | ||||
| readr::read_csv(files, id = "file") |> | ||||
|   mutate(file = basename(file)) | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| require(tidyverse) | ||||
| library(tidyverse) | ||||
|  | ||||
| files <- dir( | ||||
|   "../../data/gapminder", | ||||
|   pattern = "^[12][09][0-9][0-9].xlsx$", | ||||
|   full.names = TRUE | ||||
| ) | ||||
|  | ||||
|  | ||||
| alldf <- tibble::tibble() | ||||
| for (file in files) { | ||||
|   alldf <- alldf |> | ||||
|     bind_rows( | ||||
|       readxl::read_xlsx(file) |> | ||||
|         mutate(year = parse_number(basename(file))) | ||||
|     ) | ||||
| } | ||||
|  | ||||
| alldf |> | ||||
|   ggplot(aes(x = lifeExp, y = gdpPercap)) + | ||||
|   geom_point(aes(color = factor(year))) + | ||||
|   geom_smooth(method = "lm", se = FALSE) + | ||||
|   scale_y_log10( | ||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||
|   ) + | ||||
|   facet_wrap(~year, ncol = 4, scale = "fixed") | ||||
| ``` | ||||
|  | ||||
| # slope | ||||
|  | ||||
| ```{r} | ||||
| file <- files[1] | ||||
|  | ||||
| lm(y ~ x, data) | ||||
|  | ||||
| m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file)) | ||||
|  | ||||
| summary(m) | ||||
|  | ||||
| coef(m)[2] | ||||
|  | ||||
| slopes <- c() | ||||
| years <- c() | ||||
| for (file in files) { | ||||
|   m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file)) | ||||
|   years <- c(years, parse_number(basename(file))) | ||||
|   slopes <- c(slopes, coef(m)[2]) | ||||
| } | ||||
| years | ||||
| slopes | ||||
|  | ||||
|  | ||||
| plot(years, as.numeric(slopes), type = "b") | ||||
| ``` | ||||
|  | ||||
| # purrr | ||||
|  | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| require(tidyverse) | ||||
| df <- tibble( | ||||
|   filename = dir( | ||||
|     "../../data/gapminder", | ||||
|     pattern = "^[12][09][0-9][0-9].xlsx$", | ||||
|     full.names = TRUE | ||||
|   ) | ||||
| ) |> | ||||
|   dplyr::mutate( | ||||
|     data = purrr::map( | ||||
|       filename, | ||||
|       \(x) readxl::read_xlsx(x) | ||||
|     ) | ||||
|   ) |> | ||||
|   mutate(year = parse_number(basename(filename))) |> | ||||
|   mutate( | ||||
|     m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx)) | ||||
|   ) |> | ||||
|   mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |> | ||||
|   unnest(data) | ||||
|  | ||||
| pvalue <- summary(df$m[[1]])$coefficients[2, 4] | ||||
| rsq <- summary(df$m[[1]])$r.squared | ||||
|  | ||||
| require(tidymodels) | ||||
| generics::tidy(df$m[[1]]) | ||||
|  | ||||
| df |> | ||||
|   ggplot(aes(x = lifeExp, y = gdpPercap)) + | ||||
|   geom_point(aes(color = factor(year))) + | ||||
|   geom_smooth(method = "lm", se = FALSE) + | ||||
|   scale_y_log10( | ||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||
|   ) + | ||||
|   facet_wrap(~year, ncol = 4, scale = "fixed") | ||||
|  | ||||
|  | ||||
| df |> | ||||
|   ggplot(aes(x = year, y = slope)) + | ||||
|   geom_line() + | ||||
|   geom_point() | ||||
|  | ||||
|  | ||||
| df | ||||
| df$slope[[1]] | ||||
|  | ||||
| coef(df$m[[1]])[2] | ||||
|  | ||||
| lm(y ~ x, data) | ||||
|  | ||||
|  | ||||
| df |> | ||||
|   unnest(data) |> | ||||
|   ggplot(aes(x = lifeExp, y = gdpPercap)) + | ||||
|   geom_point(aes(color = factor(year))) + | ||||
|   geom_smooth(method = "lm", se = FALSE) + | ||||
|   scale_y_log10( | ||||
|     breaks = scales::trans_breaks("log10", function(x) 10^x), | ||||
|     labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||||
|   ) + | ||||
|   facet_wrap(~year, ncol = 4, scale = "fixed") | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -197,9 +197,11 @@ annoying <- tibble( | ||||
| ```{r} | ||||
| #| message: false | ||||
| 
 | ||||
| sales_files <- c("../../data/01-sales.csv", | ||||
| sales_files <- c( | ||||
|   "../../data/01-sales.csv", | ||||
|   "../../data/02-sales.csv", | ||||
|   "../../data/03-sales.csv") | ||||
|   "../../data/03-sales.csv" | ||||
| ) | ||||
| read_csv(sales_files, id = "file") | ||||
| ``` | ||||
| 
 | ||||
| @@ -223,8 +225,11 @@ read_csv(sales_files, id = "file") | ||||
| ## 批量读取 | ||||
| 
 | ||||
| ```{r} | ||||
| sales_files <- list.files("../../data", | ||||
|   pattern = "sales\\.csv$", full.names = TRUE) | ||||
| sales_files <- list.files( | ||||
|   "../../data", | ||||
|   pattern = "sales\\.csv$", | ||||
|   full.names = TRUE | ||||
| ) | ||||
| sales_files | ||||
| ``` | ||||
| 
 | ||||
| @@ -283,8 +288,13 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx") | ||||
| dir.create("../../data/metacity2/") | ||||
| metadf |> | ||||
|   nest(sitedf = -site) |> | ||||
|   mutate(flag = purrr::map2(site, sitedf, | ||||
|     ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx")))) | ||||
|   mutate( | ||||
|     flag = purrr::map2( | ||||
|       site, | ||||
|       sitedf, | ||||
|       ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx")) | ||||
|     ) | ||||
|   ) | ||||
| ``` | ||||
| 
 | ||||
| 
 | ||||
| @@ -305,10 +315,16 @@ if (FALSE) { | ||||
|   writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx") | ||||
|   dir.create("../../data/metacity/") | ||||
|   metanestdf |> | ||||
|     mutate(flag = purrr::map2(Area, citydf, | ||||
|       ~ writexl::write_xlsx(.y, | ||||
|     mutate( | ||||
|       flag = purrr::map2( | ||||
|         Area, | ||||
|         citydf, | ||||
|         ~ writexl::write_xlsx( | ||||
|           .y, | ||||
|           path = paste0("../../data/metacity/", .x, ".xlsx") | ||||
|       ))) | ||||
|         ) | ||||
|       ) | ||||
|     ) | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| @@ -321,5 +337,4 @@ if (FALSE) { | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
							
								
								
									
										115
									
								
								SD/2.3_datatransform/_demo.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,115 @@ | ||||
| --- | ||||
| title: "Lesson 8" | ||||
| format: html | ||||
| --- | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| require(tidyverse) | ||||
| library(nycflights13) | ||||
|  | ||||
| flights |> | ||||
|   select(3:6) |> | ||||
|   head(3) |> | ||||
|   rename_all(~ gsub("_", "", .)) | ||||
|  | ||||
|  | ||||
| flights |> | ||||
|   select(3:6) |> | ||||
|   head(3) |> | ||||
|   rename_with(toupper, .cols = 2:4) | ||||
|  | ||||
| # 每月10号-15号,dep_delay > 100 的航班 | ||||
|  | ||||
| # 每月哪个出发地origin的 dep_delay总时长最长 | ||||
|  | ||||
| flights |> | ||||
|   group_by(month, origin) |> | ||||
|   summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |> | ||||
|   slice_max(total_dep_delay) | ||||
|  | ||||
| # arrange(month, origin, desc(total_dep_delay)) | ||||
|  | ||||
| # 每月随机抽取一天,随机抽取三个航班 | ||||
|  | ||||
| slice_sample(n = 1) | ||||
|  | ||||
| flights |> | ||||
|   tidyr::nest(ymddf = -c(year, month, day)) |> | ||||
|   group_by(year, month) |> | ||||
|   slice_sample(n = 5) |> | ||||
|   unnest(ymddf) |> | ||||
|   group_by(year, month, day) |> | ||||
|   slice_sample(n = 3) | ||||
|  | ||||
|  | ||||
| flights |> | ||||
|   tidyr::nest(ymddf = -c(year, month, day)) |> | ||||
|   group_by(year, month) |> | ||||
|   slice_sample(n = 1) |> | ||||
|   mutate( | ||||
|     ymddf = purrr::map(ymddf, \(x) { | ||||
|       x |> | ||||
|         slice_sample(n = 3) | ||||
|     }) | ||||
|   ) | ||||
|  | ||||
| # 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班 | ||||
|  | ||||
| flights |> | ||||
|   mutate(date = ymd(paste(year, month, day))) |> | ||||
|   mutate(weekday = wday(date)) |> | ||||
|   mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |> | ||||
|   group_by(year, month, origin, flight, isworkday) |> | ||||
|   summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |> | ||||
|   tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |> | ||||
|   filter( | ||||
|     purrr::map(diffdelaydf, \(x) { | ||||
|       nrow(x) | ||||
|     }) > | ||||
|       1 | ||||
|   ) |> | ||||
|   mutate( | ||||
|     diffdelay = purrr::map_dbl(diffdelaydf, \(x) { | ||||
|       x |> | ||||
|         arrange(isworkday) |> | ||||
|         pull(mean_delay) |> | ||||
|         diff() | ||||
|     }) | ||||
|   ) |> | ||||
|   group_by(year, month, origin) |> | ||||
|   slice_max(diffdelay, n = 3) | ||||
|  | ||||
|  | ||||
| wday(today()) | ||||
|  | ||||
| weekday() | ||||
|  | ||||
|  | ||||
| slice_sample(n = 1) | ||||
|  | ||||
|  | ||||
| flights |> | ||||
|   group_by(month) |> | ||||
|   slice_sample(n = 1) | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| p1 <- flights |> | ||||
|   group_by(year, month) |> | ||||
|   summarize(n = n()) |> | ||||
|   ggplot(aes(month, n)) + | ||||
|   geom_point(shape = 21, size = 6, color = "black", fill = "red") + | ||||
|   geom_line() | ||||
|  | ||||
| ggsave("./a.pdf") | ||||
| ggsave("./a.png") | ||||
|  | ||||
| require(patchwork) | ||||
|  | ||||
| p1 / p1 | ||||
| ``` | ||||
|  | ||||
|  | ||||
							
								
								
									
										
											BIN
										
									
								
								SD/2.3_datatransform/a.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										
											BIN
										
									
								
								SD/2.3_datatransform/a.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 271 KiB | 
| @@ -323,5 +323,5 @@ flights |> | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| 
 | ||||
| @@ -1,49 +0,0 @@ | ||||
| --- | ||||
| title: "课后作业9" | ||||
| subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution | ||||
| author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心 | ||||
| date: today | ||||
| lang: zh | ||||
| format: | ||||
|   revealjs: | ||||
|     theme: dark | ||||
|     slide-number: true | ||||
|     chalkboard: | ||||
|       buttons: true | ||||
|     preview-links: auto | ||||
|     lang: zh | ||||
|     toc: true | ||||
|     toc-depth: 1 | ||||
|     toc-title: 大纲 | ||||
|     logo: ./_extensions/inst/img/ucaslogo.png | ||||
|     css: ./_extensions/inst/css/revealjs.css | ||||
|     pointer: | ||||
|       key: "p" | ||||
|       color: "#32cd32" | ||||
|       pointerSize: 18 | ||||
| revealjs-plugins: | ||||
|   - pointer | ||||
| filters: | ||||
|   - d2 | ||||
| --- | ||||
|  | ||||
| ```{r} | ||||
| #| include: false | ||||
| #| cache: false | ||||
| lang <- "cn" | ||||
| require(tidyverse) | ||||
| require(learnr) | ||||
| ``` | ||||
|  | ||||
| ## 第9次课后作业 | ||||
|  | ||||
| 自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。 | ||||
|  | ||||
| 作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd) | ||||
|  | ||||
|  | ||||
| ## 欢迎讨论!{.center} | ||||
|  | ||||
|  | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
|  | ||||
| @@ -1,8 +0,0 @@ | ||||
| --- | ||||
| title: 课后作业9 | ||||
| author: 姓名 | ||||
| format: html | ||||
| --- | ||||
|  | ||||
| 要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。 | ||||
|  | ||||
| @@ -1,167 +0,0 @@ | ||||
| --- | ||||
| title: "大数据分析工具" | ||||
| subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution | ||||
| author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心 | ||||
| date: today | ||||
| lang: zh | ||||
| format: | ||||
|   revealjs: | ||||
|     theme: dark | ||||
|     slide-number: true | ||||
|     chalkboard: | ||||
|       buttons: true | ||||
|     preview-links: auto | ||||
|     lang: zh | ||||
|     toc: true | ||||
|     toc-depth: 1 | ||||
|     toc-title: 大纲 | ||||
|     logo: ./_extensions/inst/img/ucaslogo.png | ||||
|     css: ./_extensions/inst/css/revealjs.css | ||||
|     pointer: | ||||
|       key: "p" | ||||
|       color: "#32cd32" | ||||
|       pointerSize: 18 | ||||
| revealjs-plugins: | ||||
|   - pointer | ||||
| filters: | ||||
|   - d2 | ||||
| --- | ||||
|  | ||||
| ```{r} | ||||
| #| echo: false | ||||
| knitr::opts_chunk$set(echo = TRUE) | ||||
| source("../../coding/_common.R") | ||||
| library(nycflights13) | ||||
| library(tidyverse) | ||||
|  | ||||
| ``` | ||||
|  | ||||
| ## 匹配数字 | ||||
|  | ||||
| ### 匹配数字: | ||||
|  | ||||
| - \d:匹配任意数字字符。 | ||||
| - \d+:匹配一个或多个数字字符。 | ||||
| - [0-9]: 匹配数字 | ||||
|  | ||||
| ### 匹配字母: | ||||
|  | ||||
| - \w:匹配任意字母、数字或下划线字符。 | ||||
| - \w+:匹配一个或多个字母、数字或下划线字符。 | ||||
|  | ||||
| ## 匹配数字 | ||||
|  | ||||
| ### 匹配空白字符: | ||||
|  | ||||
| - \s:匹配任意空白字符,包括空格、制表符、换行符等。 | ||||
| - \s+:匹配一个或多个空白字符。 | ||||
|  | ||||
| ### 匹配特定字符: | ||||
|  | ||||
| - [abc]:匹配字符 a、b 或 c 中的任意一个。 | ||||
| - [a-z]:匹配任意小写字母。 | ||||
| - [A-Z]:匹配任意大写字母。 | ||||
| - [0-9]:匹配任意数字。 | ||||
|  | ||||
| ## 匹配数字 | ||||
|  | ||||
| ### 匹配重复次数: | ||||
|  | ||||
| - {n}:匹配前一个字符恰好 n 次。 | ||||
| - {n,}:匹配前一个字符至少 n 次。 | ||||
| - {n,m}:匹配前一个字符至少 n 次,但不超过 m 次。 | ||||
|  | ||||
| ### 匹配边界: | ||||
|  | ||||
| - ^:匹配字符串的开头。 | ||||
| - $:匹配字符串的结尾。 | ||||
|  | ||||
| ## 匹配数字 | ||||
|  | ||||
| ### 匹配特殊字符: | ||||
|  | ||||
| - \:转义特殊字符,使其按字面意义匹配。 | ||||
| - .:匹配任意单个字符。 | ||||
| - |:表示“或”关系,匹配两个或多个表达式之一。 | ||||
|  | ||||
| #### 匹配次数: | ||||
|  | ||||
| - *:匹配前一个字符零次或多次。 | ||||
| - +:匹配前一个字符一次或多次。 | ||||
| - ?:匹配前一个字符零次或一次。 | ||||
|  | ||||
| ## 匹配数字 | ||||
|  | ||||
| ### 分组和捕获: | ||||
|  | ||||
| - ():将一系列模式组合成一个单元,可与特殊字符一起使用。 | ||||
|  | ||||
| ### 预定义字符集: | ||||
|  | ||||
| - \d:任意数字,相当于 [0-9]。 | ||||
| - \w:任意字母、数字或下划线字符,相当于 [a-zA-Z0-9_]。 | ||||
| - \s:任意空白字符,相当于 [ \t\n\r\f\v]。 | ||||
|  | ||||
|  | ||||
|  | ||||
| ## 实例 | ||||
|  | ||||
| ```{r} | ||||
| library(babynames) | ||||
| (x <- c("apple", "apppple", "abc123def")) | ||||
| x[str_detect(x, "[0-9]")] | ||||
| x[str_detect(x, "abc[0-9]+")] | ||||
| x[str_detect(x, "pp")] | ||||
| x[str_detect(x, "p{4}")] | ||||
| x[str_detect(x, "p{4}")] | ||||
| x[str_detect("apple", "ap*")] | ||||
| x[str_detect("apple", "app*")] | ||||
| x[str_detect("apple", "a..le")] | ||||
| ``` | ||||
|  | ||||
| ## 练习 | ||||
|  | ||||
|  | ||||
| 找出`babyname`中名字含有ar的行 | ||||
|  | ||||
| ```{r} | ||||
| #| echo: false | ||||
| babynames |> | ||||
|   filter(str_detect(name, "ar")) | ||||
| ``` | ||||
|  | ||||
| ## 练习 | ||||
|  | ||||
|  | ||||
| 找出`babyname`中名字含有ar或者以ry结尾的行。 | ||||
|  | ||||
| ```{r} | ||||
| #| echo: false | ||||
| babynames |> | ||||
|   filter(str_detect(name, "ar")) | ||||
| ``` | ||||
|  | ||||
|   | ||||
|  | ||||
|  | ||||
| ## GNU/Linux服务器 | ||||
|  | ||||
| - `ssh`, `scp` | ||||
| - `bash` | ||||
|   - grep | ||||
|   - sed | ||||
|   - awk | ||||
|   - find | ||||
|   - xargs | ||||
| - `Editor` | ||||
|   - `Virtual Studio Code` | ||||
|   - `Vim` | ||||
|   - `Emacs` | ||||
|  | ||||
|  | ||||
|  | ||||
| ## 欢迎讨论!{.center} | ||||
|  | ||||
|  | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
|  | ||||
| @@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id") | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| 
 | ||||
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/.RData
									
									
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/airqualitydf.RDS
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -43,7 +43,7 @@ require(learnr) | ||||
| 1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。 | ||||
| 2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。 | ||||
| 
 | ||||
| 作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) | ||||
| 作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd) | ||||
| 
 | ||||
| ## 示例代码 | ||||
| 
 | ||||
| @@ -62,5 +62,5 @@ require(learnr) | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| 
 | ||||
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/metadf.RDS
									
									
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										
											BIN
										
									
								
								SD/3.9_课后作业8/testdf.RDS
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -14,8 +14,10 @@ format: html | ||||
| # 下载至临时文件 | ||||
| if (FALSE) { | ||||
|   tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx") | ||||
|   download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx", | ||||
|     destfile = tmpxlsxpath) | ||||
|   download.file( | ||||
|     "https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx", | ||||
|     destfile = tmpxlsxpath | ||||
|   ) | ||||
|   airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2) | ||||
|   metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1) | ||||
|   saveRDS(airqualitydf, "./airqualitydf.RDS") | ||||
| @@ -41,7 +43,12 @@ airqualitydf |> | ||||
|   left_join(metadf |> select(site, city = Area)) |> | ||||
|   group_by(city) |> | ||||
|   filter(n() > 5) |> | ||||
|   summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |> | ||||
|   summarize( | ||||
|     p = sum( | ||||
|       AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE) | ||||
|     ) / | ||||
|       n() | ||||
|   ) |> | ||||
|   top_n(10, p) | ||||
| 
 | ||||
| 
 | ||||
| @@ -57,12 +64,11 @@ airqualitydf |> | ||||
|   left_join(metadf |> select(site, city = Area)) |> | ||||
|   group_by(city) |> | ||||
|   filter(length(unique(site)) >= 5) |> | ||||
|   summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, | ||||
|     na.rm = TRUE)) / n()) |> | ||||
|   summarize( | ||||
|     p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, na.rm = TRUE)) / n() | ||||
|   ) |> | ||||
|   slice_max(p, n = 10) |> | ||||
|   knitr::kable() | ||||
| 
 | ||||
| 
 | ||||
| ``` | ||||
| 
 | ||||
| 
 | ||||
| @@ -83,41 +89,58 @@ if (FALSE) { | ||||
|     left_join(metadf |> select(site, city = Area)) |> | ||||
|     group_by(city) |> | ||||
|     filter(length(unique(site)) >= 5) |> | ||||
|     mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"), | ||||
|       levels = c("day", "night")) | ||||
|     mutate( | ||||
|       dayornight = factor( | ||||
|         ifelse(between(hour(datetime), 8, 20), "day", "night"), | ||||
|         levels = c("day", "night") | ||||
|       ) | ||||
|     ) |> | ||||
|     group_by(city) |> | ||||
|     nest(citydf = -city) |> | ||||
|     mutate(median_diff = purrr::map_dbl(citydf, ~ | ||||
|       .x |> | ||||
|     mutate( | ||||
|       median_diff = purrr::map_dbl( | ||||
|         citydf, | ||||
|         ~ .x |> | ||||
|           specify(AQI ~ dayornight) |> | ||||
|           calculate(stat = "diff in medians", order = c("day", "night")) |> | ||||
|           pull(stat) | ||||
|     )) |> | ||||
|       ) | ||||
|     ) |> | ||||
|     ungroup() |> | ||||
|     #  slice_sample(n = 12) |> | ||||
|     mutate(null_dist = purrr::map(citydf, ~ | ||||
|       .x |> | ||||
|     mutate( | ||||
|       null_dist = purrr::map( | ||||
|         citydf, | ||||
|         ~ .x |> | ||||
|           specify(AQI ~ dayornight) |> | ||||
|           hypothesize(null = "independence") |> | ||||
|           generate(reps = 1000, type = "permute") |> | ||||
|           calculate(stat = "diff in medians", order = c("day", "night")) | ||||
|     )) |> | ||||
|     mutate(p_value = purrr::map2_dbl(null_dist, median_diff,  | ||||
|       ) | ||||
|     ) |> | ||||
|     mutate( | ||||
|       p_value = purrr::map2_dbl( | ||||
|         null_dist, | ||||
|         median_diff, | ||||
|         ~ get_p_value(.x, obs_stat = .y, direction = "both") |> | ||||
|           pull(p_value) | ||||
|     )) |> | ||||
|       ) | ||||
|     ) |> | ||||
|     mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |> | ||||
|     mutate(fig = purrr::pmap(list(null_dist, median_diff, city, sigdiff), | ||||
|     mutate( | ||||
|       fig = purrr::pmap( | ||||
|         list(null_dist, median_diff, city, sigdiff), | ||||
|         ~ visualize(..1) + | ||||
|           shade_p_value(obs_stat = ..2, direction = "both") + | ||||
|           ggtitle(paste0(..3, ":", ..4)) + | ||||
|           theme_sci(2, 2) | ||||
|     )) |> | ||||
|       ) | ||||
|     ) |> | ||||
|     arrange(p_value) | ||||
|   saveRDS(testdf, "./testdf.RDS") | ||||
| } | ||||
| 
 | ||||
| if (FALSE) { | ||||
|   lang <- "cn" | ||||
|   require(dwfun) | ||||
|   require(rmdify) | ||||
| @@ -136,13 +159,16 @@ testdf |> | ||||
|     nest(grpdf = -grp) |> | ||||
|     ungroup() |> | ||||
|     #  slice(1) |> | ||||
|   mutate(gp = purrr::map(grpdf, | ||||
|     mutate( | ||||
|       gp = purrr::map( | ||||
|         grpdf, | ||||
|         ~ (.x |> | ||||
|           pull(fig)) |> | ||||
|           patchwork::wrap_plots(ncol = 3) + | ||||
|       dwfun::theme_sci(5, 7))) |> | ||||
|           dwfun::theme_sci(5, 7) | ||||
|       ) | ||||
|     ) |> | ||||
|     pull(gp) | ||||
| 
 | ||||
| 
 | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| @@ -3934,5 +3934,5 @@ p | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
| 
 | ||||
| Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB | 
| @@ -100,5 +100,4 @@ geom_bar(position = "fill") | ||||
| ## 欢迎讨论!{.center} | ||||
| 
 | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` | ||||
| 
 | ||||
| `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` | ||||
							
								
								
									
										254
									
								
								SD/5.1_model/_demo.qmd
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,254 @@ | ||||
| --- | ||||
| title: "Lesson 9" | ||||
| format: html | ||||
| --- | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| # install.packages("tidymodels") | ||||
|  | ||||
| require(tidymodels) | ||||
| taxi | ||||
|  | ||||
| taxisplit <- initial_split(taxi, prop = 0.8) | ||||
| taxi_train <- training(taxisplit) | ||||
| taxi_test <- testing(taxisplit) | ||||
|  | ||||
| tree_spec <- | ||||
|   decision_tree(cost_complexity = 0.002) %>% | ||||
|   set_mode("classification") | ||||
|  | ||||
| taxi_fit <- workflow() %>% | ||||
|   add_formula(tip ~ .) %>% | ||||
|   add_model(tree_spec) %>% | ||||
|   fit(data = taxi_train) | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   relocate(tip, .pred_class, .pred_yes, .pred_no) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   conf_mat(truth = tip, estimate = .pred_class) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   accuracy(truth = tip, estimate = .pred_class) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   sensitivity(truth = tip, estimate = .pred_class) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   specificity(truth = tip, estimate = .pred_class) | ||||
|  | ||||
| taxi_metrics <- metric_set(accuracy, specificity, sensitivity) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   taxi_metrics(truth = tip, estimate = .pred_class) | ||||
|  | ||||
| taxi_metrics <- metric_set(accuracy, specificity, sensitivity) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   group_by(local) %>% | ||||
|   taxi_metrics(truth = tip, estimate = .pred_class) | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   roc_curve(truth = tip, .pred_yes) %>% | ||||
|   autoplot() | ||||
|  | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) | ||||
|  | ||||
|  | ||||
| augment(taxi_fit, new_data = taxi_train) %>% | ||||
|   roc_curve(truth = tip, .pred_yes) |> | ||||
|   ggplot(aes(1 - sensitivity, specificity)) + | ||||
|   geom_point() + | ||||
|   geom_line() + | ||||
|   geom_abline(slope = 1) | ||||
| ``` | ||||
|  | ||||
|  | ||||
| # Cross Validation | ||||
|  | ||||
| ```{r} | ||||
| vfold_cv(taxi_train, v = 10) |> | ||||
|   pull(splits) |> | ||||
|   nth(1) | ||||
| taxi_folds <- vfold_cv(taxi_train) | ||||
| taxi_folds$splits[1:3] | ||||
|  | ||||
| vfold_cv(taxi_train, strata = tip) | ||||
|  | ||||
| set.seed(123) | ||||
| taxi_folds <- vfold_cv(taxi_train, v = 10, strata = tip) | ||||
| taxi_folds | ||||
|  | ||||
| taxi_wflow <- workflow() %>% | ||||
|   add_formula(tip ~ .) %>% | ||||
|   add_model(tree_spec) | ||||
|  | ||||
|  | ||||
| taxi_res <- fit_resamples(taxi_wflow, taxi_folds) | ||||
| taxi_res | ||||
|  | ||||
|  | ||||
| taxi_res$.metrics[[1]] | ||||
|  | ||||
|  | ||||
| taxi_res$splits[[1]] | ||||
|  | ||||
| analysis(taxi_res$splits[[1]]) | ||||
|  | ||||
| analysis(taxi_res$splits[[1]]) | ||||
| assessment(taxi_res$splits[[1]]) | ||||
|  | ||||
|  | ||||
| taxi_res %>% | ||||
|   collect_metrics() | ||||
|  | ||||
| taxi_res %>% | ||||
|   collect_metrics() %>% | ||||
|   select(.metric, mean, n) | ||||
|  | ||||
| # Save the assessment set results | ||||
| ctrl_taxi <- control_resamples(save_pred = TRUE) | ||||
| taxi_res <- fit_resamples(taxi_wflow, taxi_folds, control = ctrl_taxi) | ||||
|  | ||||
| taxi_res | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| # NLA2007 cyanophyta model | ||||
|  | ||||
|  | ||||
| ```{r} | ||||
| require(tidyverse) | ||||
| sitedf <- readr::read_csv( | ||||
|   "https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv" | ||||
| ) |> | ||||
|   select( | ||||
|     SITE_ID, | ||||
|     lon = LON_DD, | ||||
|     lat = LAT_DD, | ||||
|     name = LAKENAME, | ||||
|     area = LAKEAREA, | ||||
|     zmax = DEPTHMAX | ||||
|   ) |> | ||||
|   group_by(SITE_ID) |> | ||||
|   summarize( | ||||
|     lon = mean(lon, na.rm = TRUE), | ||||
|     lat = mean(lat, na.rm = TRUE), | ||||
|     name = unique(name), | ||||
|     area = mean(area, na.rm = TRUE), | ||||
|     zmax = mean(zmax, na.rm = TRUE) | ||||
|   ) | ||||
|  | ||||
|  | ||||
| visitdf <- readr::read_csv( | ||||
|   "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv" | ||||
| ) |> | ||||
|   select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |> | ||||
|   distinct() | ||||
|  | ||||
|  | ||||
| waterchemdf <- readr::read_csv( | ||||
|   "https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv" | ||||
| ) |> | ||||
|   select( | ||||
|     SITE_ID, | ||||
|     date = DATE_PROFILE, | ||||
|     depth = DEPTH, | ||||
|     temp = TEMP_FIELD, | ||||
|     do = DO_FIELD, | ||||
|     ph = PH_FIELD, | ||||
|     cond = COND_FIELD, | ||||
|   ) | ||||
|  | ||||
| sddf <- readr::read_csv( | ||||
|   "https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv" | ||||
| ) |> | ||||
|   select( | ||||
|     SITE_ID, | ||||
|     date = DATE_SECCHI, | ||||
|     sd = SECMEAN, | ||||
|     clear_to_bottom = CLEAR_TO_BOTTOM | ||||
|   ) | ||||
|  | ||||
| trophicdf <- readr::read_csv( | ||||
|   "https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv" | ||||
| ) |> | ||||
|   select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |> | ||||
|   left_join(visitdf, by = c("SITE_ID", "visit")) |> | ||||
|   select(-year, -visit) |> | ||||
|   group_by(SITE_ID, date) |> | ||||
|   summarize( | ||||
|     tp = mean(tp, na.rm = TRUE), | ||||
|     tn = mean(tn, na.rm = TRUE), | ||||
|     chla = mean(chla, na.rm = TRUE) | ||||
|   ) | ||||
|  | ||||
|  | ||||
| phytodf <- readr::read_csv( | ||||
|   "https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv" | ||||
| ) |> | ||||
|   select( | ||||
|     SITE_ID, | ||||
|     date = DATEPHYT, | ||||
|     depth = SAMPLE_DEPTH, | ||||
|     phyta = DIVISION, | ||||
|     genus = GENUS, | ||||
|     species = SPECIES, | ||||
|     tax = TAXANAME, | ||||
|     abund = ABUND | ||||
|   ) |> | ||||
|   mutate(phyta = gsub(" .*$", "", phyta)) |> | ||||
|   filter(!is.na(genus)) |> | ||||
|   group_by(SITE_ID, date, depth, phyta, genus) |> | ||||
|   summarize(abund = sum(abund, na.rm = TRUE)) |> | ||||
|   nest(phytodf = -c(SITE_ID, date)) | ||||
|  | ||||
| phytodf$phytodf[[1]] | ||||
|  | ||||
|  | ||||
| envdf <- waterchemdf |> | ||||
|   filter(depth < 2) |> | ||||
|   select(-depth) |> | ||||
|   group_by(SITE_ID, date) |> | ||||
|   summarise_all(~ mean(., na.rm = TRUE)) |> | ||||
|   ungroup() |> | ||||
|   left_join(sddf, by = c("SITE_ID", "date")) |> | ||||
|   left_join(trophicdf, by = c("SITE_ID", "date")) | ||||
|  | ||||
| nla <- envdf |> | ||||
|   left_join(phytodf, by = c("SITE_ID", "date")) |> | ||||
|   left_join(sitedf, by = "SITE_ID") |> | ||||
|   filter(!purrr::map_lgl(phytodf, is.null)) |> | ||||
|   mutate( | ||||
|     cyanophyta = purrr::map( | ||||
|       phytodf, | ||||
|       \(x) { | ||||
|         x |> | ||||
|           dplyr::filter(phyta == "Cyanophyta") |> | ||||
|           summarize(cyanophyta = sum(abund, na.rm = TRUE)) | ||||
|       } | ||||
|     ) | ||||
|   ) |> | ||||
|   unnest(cyanophyta) |> | ||||
|   select(-phyta) |> | ||||
|   mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE)) | ||||
|  | ||||
| # library(rmdify) | ||||
| # library(dwfun) | ||||
| # dwfun::init() | ||||
| ``` | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
| Before Width: | Height: | Size: 272 KiB After Width: | Height: | Size: 272 KiB | 
| Before Width: | Height: | Size: 345 KiB After Width: | Height: | Size: 345 KiB | 
| Before Width: | Height: | Size: 210 KiB After Width: | Height: | Size: 210 KiB | 
| Before Width: | Height: | Size: 470 KiB After Width: | Height: | Size: 470 KiB | 
| Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB | 
| Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB | 
| Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB | 
| Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB | 
| Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB | 
| Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 5.5 KiB | 
| Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 209 KiB | 
| Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB | 
| Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB | 
| Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 177 KiB | 
| Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB | 
| Before Width: | Height: | Size: 569 KiB After Width: | Height: | Size: 569 KiB | 
| Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB | 
| Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB | 
| Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.5 KiB | 
| Before Width: | Height: | Size: 131 KiB After Width: | Height: | Size: 131 KiB | 
| Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB | 
| Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB | 
| Before Width: | Height: | Size: 83 KiB After Width: | Height: | Size: 83 KiB | 
| Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB | 
| Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB | 
| Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB | 
| Before Width: | Height: | Size: 869 KiB After Width: | Height: | Size: 869 KiB | 
| Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB | 
| Before Width: | Height: | Size: 638 KiB After Width: | Height: | Size: 638 KiB | 
| Before Width: | Height: | Size: 408 KiB After Width: | Height: | Size: 408 KiB | 
| Before Width: | Height: | Size: 410 KiB After Width: | Height: | Size: 410 KiB | 
| Before Width: | Height: | Size: 154 KiB After Width: | Height: | Size: 154 KiB | 
| Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB | 
| Before Width: | Height: | Size: 159 KiB After Width: | Height: | Size: 159 KiB | 
| Before Width: | Height: | Size: 184 KiB After Width: | Height: | Size: 184 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB | 
| Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 69 KiB | 
| Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB | 
| Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB | 
| Before Width: | Height: | Size: 137 KiB After Width: | Height: | Size: 137 KiB | 
| Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB | 
| Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB | 
| Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB | 
| Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB | 
| Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 134 KiB | 
| Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 150 KiB |