Compare commits
22 Commits
main
...
133cb1533d
| Author | SHA1 | Date | |
|---|---|---|---|
| 133cb1533d | |||
| 9ecb11b788 | |||
| 6412cc5560 | |||
| bc628545b6 | |||
| 1ead84ac70 | |||
| 10ffdd46f3 | |||
| 987a3eaea9 | |||
| c292d0ffab | |||
| cb2e22fde5 | |||
| 8a31a565a8 | |||
| 6e3f134635 | |||
| c883d6df6a | |||
| fda04d79f1 | |||
| a4596a9836 | |||
| 7902f28a7a | |||
| 654fb2f024 | |||
| 352ca2b1ce | |||
| 82248cb24b | |||
| a21430385f | |||
| 78196d49ed | |||
| 30dd043c3b | |||
| 2db87d5e89 |
2
.gitignore
vendored
@@ -7,3 +7,5 @@ _variables.yml
|
|||||||
_freeze/
|
_freeze/
|
||||||
*_cache/
|
*_cache/
|
||||||
*_files/
|
*_files/
|
||||||
|
SD/_*/
|
||||||
|
homework/
|
||||||
|
|||||||
1
.source_state
Normal file
@@ -0,0 +1 @@
|
|||||||
|
daa3ba71b87598e29019c2f370272767
|
||||||
382
Makefile
Normal file
@@ -0,0 +1,382 @@
|
|||||||
|
# Makefile for Quarto Project Automation
|
||||||
|
# Detect OS
|
||||||
|
HOSTNAME := $(shell hostname)
|
||||||
|
OS := $(shell uname | tr A-Z a-z)
|
||||||
|
ifeq ($(OS), darwin)
|
||||||
|
SEDI := sed -i ''
|
||||||
|
OS := OSX
|
||||||
|
else ifeq ($(OS), linux)
|
||||||
|
SEDI := sed -i
|
||||||
|
OS := linux
|
||||||
|
else
|
||||||
|
$(error Unknown operating system)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Fetch Git branch and project details
|
||||||
|
branchname := $(shell git branch --show-current)
|
||||||
|
reponame := $(shell basename $(shell git rev-parse --show-toplevel))
|
||||||
|
projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel)))
|
||||||
|
pubtype := $(if $(findstring PUB,$(branchname)),public,protected)
|
||||||
|
remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
|
||||||
|
outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml)
|
||||||
|
siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
|
||||||
|
|
||||||
|
branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF"
|
||||||
|
|
||||||
|
# Variables for colors and port
|
||||||
|
bcolor := grey
|
||||||
|
port := 4199
|
||||||
|
|
||||||
|
# Set background color based on branch name
|
||||||
|
ifeq ($(findstring R1,$(branchname)),R1)
|
||||||
|
bcolor := orange
|
||||||
|
else ifeq ($(findstring R2,$(branchname)),R2)
|
||||||
|
bcolor := lightblue
|
||||||
|
else ifeq ($(findstring R3,$(branchname)),R3)
|
||||||
|
bcolor := lightgreen
|
||||||
|
else ifeq ($(findstring PUB,$(branchname)),PUB)
|
||||||
|
bcolor := light
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Set port based on branch name
|
||||||
|
branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210
|
||||||
|
port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/')
|
||||||
|
port := $(if $(port),$(port),4199)
|
||||||
|
|
||||||
|
# Define the state file
|
||||||
|
STATE_FILE := .source_state
|
||||||
|
|
||||||
|
# checkfile
|
||||||
|
GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www"
|
||||||
|
|
||||||
|
# Default target
|
||||||
|
.PHONY: all preview readme rsync local clean upload fix_links force check_git_status
|
||||||
|
|
||||||
|
all: local upload clean commit push
|
||||||
|
|
||||||
|
force: updrefbib check_git_status updvariable render
|
||||||
|
|
||||||
|
local: updrefbib check_git_status updvariable lazyrender
|
||||||
|
|
||||||
|
updmakefile:
|
||||||
|
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
|
||||||
|
echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \
|
||||||
|
cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \
|
||||||
|
git add "Makefile" && git commit -m "Update Makefile"; \
|
||||||
|
echo "本项目Makefile更新完成."; \
|
||||||
|
else \
|
||||||
|
echo "Makefile 无需在本系统上更新."; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
updrefbib:
|
||||||
|
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
|
||||||
|
echo "更新本项目参考文献..."; \
|
||||||
|
cp "$$HOME/literature/Ref.bib" "./BB/"; \
|
||||||
|
echo "推送本地参考文献到远程服务器..."; \
|
||||||
|
rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \
|
||||||
|
echo "本项目参考文献更新完成."; \
|
||||||
|
else \
|
||||||
|
echo "检查网络连通性..."; \
|
||||||
|
if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \
|
||||||
|
echo "网络正常,更新本项目参考文献..."; \
|
||||||
|
wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \
|
||||||
|
echo "本项目参考文献更新完成."; \
|
||||||
|
else \
|
||||||
|
echo "网络不可用,跳过参考文献更新."; \
|
||||||
|
fi; \
|
||||||
|
fi; \
|
||||||
|
git add BB/Ref.bib; \
|
||||||
|
if [ "$$(git diff --cached)" ]; then \
|
||||||
|
git commit -m "Update Ref.bib"; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_git_status:
|
||||||
|
@uncommitted=$$(git status --porcelain); \
|
||||||
|
if [ -n "$$uncommitted" ]; then \
|
||||||
|
git status; \
|
||||||
|
read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \
|
||||||
|
answer=$${answer:-N}; \
|
||||||
|
if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \
|
||||||
|
read -p "请输入修改说明: " message; \
|
||||||
|
git add . && git commit -m "$$message"; \
|
||||||
|
else \
|
||||||
|
echo "未提交,如后续操作为pull,则无法继续"; \
|
||||||
|
fi; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Lazy render with hash checking
|
||||||
|
lazyrender:
|
||||||
|
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
|
||||||
|
if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \
|
||||||
|
echo "源文件发生变化, 重新编译..."; \
|
||||||
|
echo "$$current_hash" > $(STATE_FILE); \
|
||||||
|
$(MAKE) render; \
|
||||||
|
else \
|
||||||
|
echo "源文件无变化, 跳过编译..."; \
|
||||||
|
exit 0; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Render target
|
||||||
|
render:
|
||||||
|
@quarto render
|
||||||
|
|
||||||
|
commit:
|
||||||
|
@echo "提交修改(commit)..."; \
|
||||||
|
git add .; \
|
||||||
|
if [ -n "$$(git diff --cached)" ]; then \
|
||||||
|
git commit -m "render compile"; \
|
||||||
|
else \
|
||||||
|
echo "没有修改记录,跳过."; \
|
||||||
|
fi; \
|
||||||
|
|
||||||
|
# Pull changes from the specified branch based on the current branch
|
||||||
|
pull:
|
||||||
|
@echo "从远程拉取项目更新..."; \
|
||||||
|
$(MAKE) check_git_status; \
|
||||||
|
git pull; \
|
||||||
|
current_branch=$$(git rev-parse --abbrev-ref HEAD); \
|
||||||
|
if [ "$$current_branch" = main ]; then \
|
||||||
|
echo "当前分枝为$$current_branch."; \
|
||||||
|
remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \
|
||||||
|
if [ -n "$$remote_branch" ]; then \
|
||||||
|
echo "尝试从远程分枝$$remote_branch 拉取更新..."; \
|
||||||
|
git pull --rebase origin $$remote_branch; \
|
||||||
|
else \
|
||||||
|
echo "远程无可用分支$$remote_branch."; \
|
||||||
|
fi; \
|
||||||
|
else \
|
||||||
|
echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
|
||||||
|
git pull --rebase origin main; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Pull changes from the main branch
|
||||||
|
pullmain:
|
||||||
|
$(MAKE) check_git_status; \
|
||||||
|
@current_branch=$$(git rev-parse --abbrev-ref HEAD); \
|
||||||
|
@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
|
||||||
|
git pull --rebase origin main; \
|
||||||
|
|
||||||
|
push:
|
||||||
|
@echo "推送到远程..."; \
|
||||||
|
git push
|
||||||
|
|
||||||
|
filehash:
|
||||||
|
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
|
||||||
|
echo "$$current_hash" > $(STATE_FILE)
|
||||||
|
|
||||||
|
# Preview the site on a specific port
|
||||||
|
preview:
|
||||||
|
@quarto preview --port $(port)
|
||||||
|
|
||||||
|
# Generate README.md
|
||||||
|
readme:
|
||||||
|
@quarto render index.qmd -t markdown -o README.md
|
||||||
|
@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md
|
||||||
|
@rm "$(outputdir)/README.md"
|
||||||
|
|
||||||
|
# Sync files with remote server
|
||||||
|
rsync:
|
||||||
|
@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"
|
||||||
|
|
||||||
|
# Open local site
|
||||||
|
open:
|
||||||
|
@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi
|
||||||
|
|
||||||
|
# Clean unnecessary files
|
||||||
|
clean:
|
||||||
|
@rm -f ./*.spl ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk
|
||||||
|
|
||||||
|
# Upload files to server and fix links
|
||||||
|
upload: backupdocx
|
||||||
|
@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)"
|
||||||
|
@$(MAKE) fix_links
|
||||||
|
@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \
|
||||||
|
if [ "$(OS)" = "OSX" ]; then \
|
||||||
|
open "$(siteurl)" 2>/dev/null; \
|
||||||
|
fi; \
|
||||||
|
else \
|
||||||
|
echo "Rsync failed. Attempting alternative upload method..."; \
|
||||||
|
mkdir -p "$(reponame)"; \
|
||||||
|
rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \
|
||||||
|
rm -rf "$(reponame)"; \
|
||||||
|
rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \
|
||||||
|
if [ "$(OS)" = "OSX" ]; then \
|
||||||
|
open "$(siteurl)" 2>/dev/null; \
|
||||||
|
fi; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
backupdocx:
|
||||||
|
@echo "备份MS.docx文件..."; \
|
||||||
|
currentcommithash=$$(git rev-parse --short HEAD); \
|
||||||
|
datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \
|
||||||
|
mkdir -p TC/MS/; \
|
||||||
|
existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \
|
||||||
|
if [ -n "$$existing_file" ]; then \
|
||||||
|
echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \
|
||||||
|
echo "无需备份."; \
|
||||||
|
else \
|
||||||
|
if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \
|
||||||
|
cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
|
||||||
|
echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \
|
||||||
|
git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
|
||||||
|
if [ "$$(git diff --cached)" ]; then \
|
||||||
|
git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \
|
||||||
|
fi; \
|
||||||
|
else \
|
||||||
|
echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在,无需备份."; \
|
||||||
|
fi; \
|
||||||
|
fi;
|
||||||
|
|
||||||
|
|
||||||
|
trackchange:
|
||||||
|
@if [ "$(projtype)" != "manuscript" ]; then \
|
||||||
|
exit 0; \
|
||||||
|
fi; \
|
||||||
|
echo "选择两个提交以比较文档..."; \
|
||||||
|
hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \
|
||||||
|
| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \
|
||||||
|
| fzf --multi --reverse --preview="echo {}" ); \
|
||||||
|
echo $$hashes; \
|
||||||
|
hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \
|
||||||
|
hash1=$$(git rev-parse --short $${hash1}^); \
|
||||||
|
datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \
|
||||||
|
hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \
|
||||||
|
hash2=$$(git rev-parse --short $${hash2}^); \
|
||||||
|
datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \
|
||||||
|
if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \
|
||||||
|
echo "必须选择两个提交."; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \
|
||||||
|
echo "$$doc1"; \
|
||||||
|
doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \
|
||||||
|
echo "$$doc2"; \
|
||||||
|
if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \
|
||||||
|
echo "打开文件: $$doc1 和 $$doc2"; \
|
||||||
|
open "$$doc1" "$$doc2"; \
|
||||||
|
printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \
|
||||||
|
echo "请在word中对比两个版本形成带修改痕迹的版本,并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \
|
||||||
|
else \
|
||||||
|
echo "一个或两个文件不存在: $$doc1, $$doc2"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Fix links in www directory
|
||||||
|
fix_links:
|
||||||
|
@find ./www -type f -name "*.html" -exec sed -i.bak \
|
||||||
|
-e "s/{{< var branch >}}/$(branchname)/g" \
|
||||||
|
-e "s/{{< var pubtype >}}/$(pubtype)/g" \
|
||||||
|
-e "s/{{< var projtype >}}/$(projtype)/g" \
|
||||||
|
-e "s/{{< var reponame >}}/$(reponame)/g" \
|
||||||
|
-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \
|
||||||
|
-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} +
|
||||||
|
@find ./www -type f -name "*.bak" -exec rm {} +
|
||||||
|
|
||||||
|
updvariable:
|
||||||
|
@touch _variables.yml # 如果文件不存在则创建
|
||||||
|
@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml
|
||||||
|
@if grep -q '^reponame:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "reponame: $(reponame)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml
|
||||||
|
@if grep -q '^projtype:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "projtype: $(projtype)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml
|
||||||
|
@if grep -q '^branch:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "branch: $(branchname)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml
|
||||||
|
@if grep -q '^pubtype:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "pubtype: $(pubtype)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml
|
||||||
|
@if grep -q '^nwAB:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "nwAB: $(nwAB)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml
|
||||||
|
@if grep -q '^nwMS:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "nwMS: $(nwMS)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml
|
||||||
|
@if grep -q '^figtblMS:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "figtblMS: $(figtblMS)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml
|
||||||
|
@if grep -q '^figtblSM:' _variables.yml; then \
|
||||||
|
$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \
|
||||||
|
else \
|
||||||
|
echo "figtblSM: $(figtblSM)" >> _variables.yml; \
|
||||||
|
fi
|
||||||
|
@mkpapervar
|
||||||
|
|
||||||
|
|
||||||
|
# Help: list all available commands with descriptions (English and Chinese)
|
||||||
|
help:
|
||||||
|
@echo "Makefile for Quarto Project Automation"
|
||||||
|
@echo "======================================="
|
||||||
|
@echo "Available targets (English):"
|
||||||
|
@echo ""
|
||||||
|
@echo " make all - Execute local build, upload, clean, and commit"
|
||||||
|
@echo " make force - Force render, hash update, upload, clean, and commit"
|
||||||
|
@echo " make local - Check git status and perform a lazy render if changes detected"
|
||||||
|
@echo " make check_git_status - Check for uncommitted changes and ask to commit them"
|
||||||
|
@echo " make lazyrender - Render if source files have changed based on hash comparison"
|
||||||
|
@echo " make render - Force Quarto to render the project"
|
||||||
|
@echo " make commit - Commit changes if no previous uncommitted changes"
|
||||||
|
@echo " make filehash - Generate and store the file hash of source files"
|
||||||
|
@echo " make preview - Preview the site locally on the specific port (default: 4199)"
|
||||||
|
@echo " make readme - Render README.md from Quarto index.qmd"
|
||||||
|
@echo " make rsync - Sync output files with the remote server"
|
||||||
|
@echo " make open - Open the generated site locally in the browser"
|
||||||
|
@echo " make clean - Clean up unnecessary files"
|
||||||
|
@echo " make upload - Upload files to the server and fix links"
|
||||||
|
@echo " make fix_links - Fix HTML links in the 'www' directory for the remote server"
|
||||||
|
@echo " make updmakefile - Update the Makefile"
|
||||||
|
@echo " make help - Display this help message"
|
||||||
|
@echo ""
|
||||||
|
@echo "Available targets (中文):"
|
||||||
|
@echo ""
|
||||||
|
@echo " make all - 执行本地构建、上传、清理和提交"
|
||||||
|
@echo " make force - 强制渲染、更新哈希、上传、清理并提交"
|
||||||
|
@echo " make local - 检查Git状态,若检测到更改则进行懒惰渲染"
|
||||||
|
@echo " make check_git_status - 检查未提交的更改,并询问是否提交"
|
||||||
|
@echo " make lazyrender - 如果源文件发生更改,则根据哈希比较进行渲染"
|
||||||
|
@echo " make render - 强制 Quarto 渲染项目"
|
||||||
|
@echo " make commit - 如果没有未提交的更改则提交"
|
||||||
|
@echo " make filehash - 生成并存储源文件的哈希值"
|
||||||
|
@echo " make preview - 本地在特定端口预览网站 (默认: 4199)"
|
||||||
|
@echo " make readme - 从 Quarto 的 index.qmd 生成 README.md"
|
||||||
|
@echo " make rsync - 将输出文件同步到远程服务器"
|
||||||
|
@echo " make open - 在浏览器中打开生成的网站"
|
||||||
|
@echo " make clean - 清理不必要的文件"
|
||||||
|
@echo " make upload - 上传文件到服务器并修复链接"
|
||||||
|
@echo " make fix_links - 修复 'www' 目录中的 HTML 链接"
|
||||||
|
@echo " make updmakefile - 更新本项目 Makefile"
|
||||||
|
@echo " make help - 显示此帮助信息"
|
||||||
|
@echo ""
|
||||||
|
@echo "Environment variables (English and 中文):"
|
||||||
|
@echo " bcolor - Background color based on branch name (基于分支名的背景颜色)"
|
||||||
|
@echo " port - Port number based on branch name (基于分支名的端口号)"
|
||||||
|
@echo " STATE_FILE - File for storing hash state of source files (用于存储源文件哈希状态的文件)"
|
||||||
|
@echo " siteurl - The URL where the site will be hosted (网站托管的 URL)"
|
||||||
|
@echo ""
|
||||||
|
|
||||||
|
|
||||||
@@ -47,9 +47,8 @@ knitr::opts_chunk$set(echo = TRUE)
|
|||||||
### 课件
|
### 课件
|
||||||
|
|
||||||
- 采用`R语言`+`quarto`完成
|
- 采用`R语言`+`quarto`完成
|
||||||
- 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
|
- 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html)
|
||||||
- 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
|
- 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git)
|
||||||
- 代码web界面:[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/)
|
|
||||||
|
|
||||||
## 如何学习接下来的内容?
|
## 如何学习接下来的内容?
|
||||||
|
|
||||||
@@ -67,7 +66,7 @@ knitr::opts_chunk$set(echo = TRUE)
|
|||||||
|
|
||||||
## Rstudio Server使用
|
## Rstudio Server使用
|
||||||
|
|
||||||
- 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
|
- 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/)
|
||||||
- 每位同学使用1个账号,随机生成
|
- 每位同学使用1个账号,随机生成
|
||||||
- 密码:****
|
- 密码:****
|
||||||
- 后面的实践课程可在该服务器上完成
|
- 后面的实践课程可在该服务器上完成
|
||||||
138
SD/1.1_R语言介绍/_demo.qmd
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
---
|
||||||
|
title: "Lesson 6"
|
||||||
|
format: html
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
|
||||||
|
https://rs1.drwater.net
|
||||||
|
|
||||||
|
username:
|
||||||
|
- ruser01
|
||||||
|
- ruser02
|
||||||
|
- ruser03
|
||||||
|
- ruser04
|
||||||
|
- ruser05
|
||||||
|
- ruser06
|
||||||
|
|
||||||
|
RWEP2025
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# 安装包
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
install.packages("tidyverse")
|
||||||
|
|
||||||
|
x <- c(1:10, NA)
|
||||||
|
|
||||||
|
hist(x)
|
||||||
|
|
||||||
|
mean(x, na.rm = TRUE)
|
||||||
|
|
||||||
|
median(x, na.rm = TRUE)
|
||||||
|
|
||||||
|
sd(x, na.rm = TRUE)
|
||||||
|
|
||||||
|
|
||||||
|
for(i in 1:10){
|
||||||
|
print(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
x + y + x * y
|
||||||
|
|
||||||
|
myfunc <- function(x, y = 3) {
|
||||||
|
x + y + x * y
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
myfunc(1, 2)
|
||||||
|
|
||||||
|
|
||||||
|
myfunc(10)
|
||||||
|
|
||||||
|
|
||||||
|
c(FALSE, 2, 1:3, 3)
|
||||||
|
|
||||||
|
c(FALSE, 2, 1:3, 3) > 1
|
||||||
|
|
||||||
|
all(c(FALSE, 2, 1:3, 3) > 1)
|
||||||
|
|
||||||
|
|
||||||
|
c(1L,2L,3L)
|
||||||
|
|
||||||
|
any(c(FALSE, 2, 1:3, 3) > 1)
|
||||||
|
|
||||||
|
|
||||||
|
x <- 10
|
||||||
|
|
||||||
|
sin(x) = ?
|
||||||
|
|
||||||
|
paste("sin(x) = ", sin(x), sep = " ")
|
||||||
|
|
||||||
|
paste0("sin(x) = ", sin(x))
|
||||||
|
|
||||||
|
|
||||||
|
substr("Monday", 1, 3)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# tidy
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
require(readxl)
|
||||||
|
|
||||||
|
aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf")
|
||||||
|
|
||||||
|
# install.packages("skimr")
|
||||||
|
|
||||||
|
aqdf |>
|
||||||
|
skimr::skim()
|
||||||
|
|
||||||
|
# base
|
||||||
|
|
||||||
|
# tidyverse
|
||||||
|
|
||||||
|
aqdf |>
|
||||||
|
dplyr::group_by(Area) |>
|
||||||
|
dplyr::summarize(
|
||||||
|
n = n(),
|
||||||
|
lon.mean = mean(lon, na.rm = TRUE),
|
||||||
|
lon.sd = sd(lat, na.rm = TRUE)
|
||||||
|
) |>
|
||||||
|
dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |>
|
||||||
|
ggplot(aes(x = n, y = lon.mean)) +
|
||||||
|
geom_point() +
|
||||||
|
geom_line() +
|
||||||
|
geom_errorbar(
|
||||||
|
aes(ymin = lon.mean - lon.sd,
|
||||||
|
ymax = lon.mean + lon.sd)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
readxl::read_xlsx("./airquality.xlsx")
|
||||||
|
|
||||||
|
flights|>
|
||||||
|
filter(dest=="IAH")|>
|
||||||
|
group_by(year,month,day)|>summarize(n=n(),
|
||||||
|
delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
@@ -264,6 +264,9 @@ t.test(x, y)
|
|||||||
wilcox.test(x, y)
|
wilcox.test(x, y)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### [什么是 Wilcoxon-Mann-Whitney检验?](https://zhuanlan.zhihu.com/p/613524533)
|
||||||
|
|
||||||
## 统计函数
|
## 统计函数
|
||||||
|
|
||||||
### 创建向量的直方图
|
### 创建向量的直方图
|
||||||
@@ -792,4 +795,4 @@ names(Y) <- c("colA", "colB", "colC")
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
@@ -37,7 +37,7 @@ require(learnr)
|
|||||||
|
|
||||||
## 下载excel文件
|
## 下载excel文件
|
||||||
|
|
||||||
[https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx)
|
[https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx)
|
||||||
|
|
||||||
|
|
||||||
## Tidy data
|
## Tidy data
|
||||||
21
SD/1.9_课后作业6/data.csv
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
name,age,score
|
||||||
|
Alice,25,85
|
||||||
|
Bob,30,92
|
||||||
|
Charlie,28,89
|
||||||
|
David,22,95
|
||||||
|
Eva,35,87
|
||||||
|
Frank,27,91
|
||||||
|
Grace,29,88
|
||||||
|
Helen,26,93
|
||||||
|
Ivan,31,86
|
||||||
|
Jack,24,94
|
||||||
|
Kelly,32,89
|
||||||
|
Lily,28,90
|
||||||
|
Mike,33,85
|
||||||
|
Nancy,27,92
|
||||||
|
Olivia,34,88
|
||||||
|
Peter,29,93
|
||||||
|
Queen,25,89
|
||||||
|
Ryan,30,94
|
||||||
|
Samantha,26,91
|
||||||
|
Tom,31,87
|
||||||
|
@@ -79,5 +79,5 @@ Tom,31,87
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
154
SD/2.2_dataimport/_demo.qmd
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
---
|
||||||
|
title: "Lesson 7"
|
||||||
|
format: html
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
require(tidyverse)
|
||||||
|
|
||||||
|
|
||||||
|
files <- c(
|
||||||
|
"../../data/01-sales.csv",
|
||||||
|
"../../data/02-sales.csv",
|
||||||
|
"../../data/03-sales.csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
|
||||||
|
|
||||||
|
|
||||||
|
readr::read_csv(files, id = "file") |>
|
||||||
|
mutate(file = basename(file))
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
require(tidyverse)
|
||||||
|
library(tidyverse)
|
||||||
|
|
||||||
|
files <- dir(
|
||||||
|
"../../data/gapminder",
|
||||||
|
pattern = "^[12][09][0-9][0-9].xlsx$",
|
||||||
|
full.names = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
alldf <- tibble::tibble()
|
||||||
|
for (file in files) {
|
||||||
|
alldf <- alldf |>
|
||||||
|
bind_rows(
|
||||||
|
readxl::read_xlsx(file) |>
|
||||||
|
mutate(year = parse_number(basename(file)))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
alldf |>
|
||||||
|
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||||
|
geom_point(aes(color = factor(year))) +
|
||||||
|
geom_smooth(method = "lm", se = FALSE) +
|
||||||
|
scale_y_log10(
|
||||||
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||||
|
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||||
|
) +
|
||||||
|
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||||
|
```
|
||||||
|
|
||||||
|
# slope
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
file <- files[1]
|
||||||
|
|
||||||
|
lm(y ~ x, data)
|
||||||
|
|
||||||
|
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
||||||
|
|
||||||
|
summary(m)
|
||||||
|
|
||||||
|
coef(m)[2]
|
||||||
|
|
||||||
|
slopes <- c()
|
||||||
|
years <- c()
|
||||||
|
for (file in files) {
|
||||||
|
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
||||||
|
years <- c(years, parse_number(basename(file)))
|
||||||
|
slopes <- c(slopes, coef(m)[2])
|
||||||
|
}
|
||||||
|
years
|
||||||
|
slopes
|
||||||
|
|
||||||
|
|
||||||
|
plot(years, as.numeric(slopes), type = "b")
|
||||||
|
```
|
||||||
|
|
||||||
|
# purrr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
require(tidyverse)
|
||||||
|
df <- tibble(
|
||||||
|
filename = dir(
|
||||||
|
"../../data/gapminder",
|
||||||
|
pattern = "^[12][09][0-9][0-9].xlsx$",
|
||||||
|
full.names = TRUE
|
||||||
|
)
|
||||||
|
) |>
|
||||||
|
dplyr::mutate(
|
||||||
|
data = purrr::map(
|
||||||
|
filename,
|
||||||
|
\(x) readxl::read_xlsx(x)
|
||||||
|
)
|
||||||
|
) |>
|
||||||
|
mutate(year = parse_number(basename(filename))) |>
|
||||||
|
mutate(
|
||||||
|
m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
|
||||||
|
) |>
|
||||||
|
mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
|
||||||
|
unnest(data)
|
||||||
|
|
||||||
|
pvalue <- summary(df$m[[1]])$coefficients[2, 4]
|
||||||
|
rsq <- summary(df$m[[1]])$r.squared
|
||||||
|
|
||||||
|
require(tidymodels)
|
||||||
|
generics::tidy(df$m[[1]])
|
||||||
|
|
||||||
|
df |>
|
||||||
|
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||||
|
geom_point(aes(color = factor(year))) +
|
||||||
|
geom_smooth(method = "lm", se = FALSE) +
|
||||||
|
scale_y_log10(
|
||||||
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||||
|
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||||
|
) +
|
||||||
|
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||||
|
|
||||||
|
|
||||||
|
df |>
|
||||||
|
ggplot(aes(x = year, y = slope)) +
|
||||||
|
geom_line() +
|
||||||
|
geom_point()
|
||||||
|
|
||||||
|
|
||||||
|
df
|
||||||
|
df$slope[[1]]
|
||||||
|
|
||||||
|
coef(df$m[[1]])[2]
|
||||||
|
|
||||||
|
lm(y ~ x, data)
|
||||||
|
|
||||||
|
|
||||||
|
df |>
|
||||||
|
unnest(data) |>
|
||||||
|
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||||
|
geom_point(aes(color = factor(year))) +
|
||||||
|
geom_smooth(method = "lm", se = FALSE) +
|
||||||
|
scale_y_log10(
|
||||||
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||||
|
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||||
|
) +
|
||||||
|
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -74,7 +74,7 @@ read_csv("../../data/students.csv") |>
|
|||||||
## 列名不要有空格
|
## 列名不要有空格
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
students |>
|
students |>
|
||||||
rename(
|
rename(
|
||||||
student_id = `Student ID`,
|
student_id = `Student ID`,
|
||||||
full_name = `Full Name`
|
full_name = `Full Name`
|
||||||
@@ -158,7 +158,7 @@ x,y,z
|
|||||||
1,2,3"
|
1,2,3"
|
||||||
|
|
||||||
read_csv(
|
read_csv(
|
||||||
another_csv,
|
another_csv,
|
||||||
col_types = cols(.default = col_character())
|
col_types = cols(.default = col_character())
|
||||||
)
|
)
|
||||||
read_csv(
|
read_csv(
|
||||||
@@ -197,9 +197,11 @@ annoying <- tibble(
|
|||||||
```{r}
|
```{r}
|
||||||
#| message: false
|
#| message: false
|
||||||
|
|
||||||
sales_files <- c("../../data/01-sales.csv",
|
sales_files <- c(
|
||||||
|
"../../data/01-sales.csv",
|
||||||
"../../data/02-sales.csv",
|
"../../data/02-sales.csv",
|
||||||
"../../data/03-sales.csv")
|
"../../data/03-sales.csv"
|
||||||
|
)
|
||||||
read_csv(sales_files, id = "file")
|
read_csv(sales_files, id = "file")
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -223,8 +225,11 @@ read_csv(sales_files, id = "file")
|
|||||||
## 批量读取
|
## 批量读取
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
sales_files <- list.files("../../data",
|
sales_files <- list.files(
|
||||||
pattern = "sales\\.csv$", full.names = TRUE)
|
"../../data",
|
||||||
|
pattern = "sales\\.csv$",
|
||||||
|
full.names = TRUE
|
||||||
|
)
|
||||||
sales_files
|
sales_files
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -260,7 +265,7 @@ if (FALSE) {
|
|||||||
```{r}
|
```{r}
|
||||||
if (FALSE) {
|
if (FALSE) {
|
||||||
conn <- cctdb::get_dbconn("nationalairquality")
|
conn <- cctdb::get_dbconn("nationalairquality")
|
||||||
metadf <- tbl(conn, "metadf") |>
|
metadf <- tbl(conn, "metadf") |>
|
||||||
head(100) |>
|
head(100) |>
|
||||||
collect()
|
collect()
|
||||||
DBI::dbDisconnect(conn)
|
DBI::dbDisconnect(conn)
|
||||||
@@ -270,8 +275,8 @@ metadf <- readRDS(file = "../../data/metadfdemo.RDS")
|
|||||||
lang <- "cn"
|
lang <- "cn"
|
||||||
metadf |>
|
metadf |>
|
||||||
ggplot(aes(lon, lat)) +
|
ggplot(aes(lon, lat)) +
|
||||||
geom_point(aes(fill = Area)) +
|
geom_point(aes(fill = Area)) +
|
||||||
dwfun::theme_sci()
|
dwfun::theme_sci()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -283,8 +288,13 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx")
|
|||||||
dir.create("../../data/metacity2/")
|
dir.create("../../data/metacity2/")
|
||||||
metadf |>
|
metadf |>
|
||||||
nest(sitedf = -site) |>
|
nest(sitedf = -site) |>
|
||||||
mutate(flag = purrr::map2(site, sitedf,
|
mutate(
|
||||||
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))))
|
flag = purrr::map2(
|
||||||
|
site,
|
||||||
|
sitedf,
|
||||||
|
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))
|
||||||
|
)
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -296,7 +306,7 @@ metadf |>
|
|||||||
if (FALSE) {
|
if (FALSE) {
|
||||||
require(tidyverse)
|
require(tidyverse)
|
||||||
conn <- cctdb::get_dbconn("nationalairquality")
|
conn <- cctdb::get_dbconn("nationalairquality")
|
||||||
metadf <- tbl(conn, "metadf") |>
|
metadf <- tbl(conn, "metadf") |>
|
||||||
collect()
|
collect()
|
||||||
DBI::dbDisconnect(conn)
|
DBI::dbDisconnect(conn)
|
||||||
metanestdf <- metadf |>
|
metanestdf <- metadf |>
|
||||||
@@ -305,10 +315,16 @@ if (FALSE) {
|
|||||||
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
|
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
|
||||||
dir.create("../../data/metacity/")
|
dir.create("../../data/metacity/")
|
||||||
metanestdf |>
|
metanestdf |>
|
||||||
mutate(flag = purrr::map2(Area, citydf,
|
mutate(
|
||||||
~ writexl::write_xlsx(.y,
|
flag = purrr::map2(
|
||||||
path = paste0("../../data/metacity/", .x, ".xlsx")
|
Area,
|
||||||
)))
|
citydf,
|
||||||
|
~ writexl::write_xlsx(
|
||||||
|
.y,
|
||||||
|
path = paste0("../../data/metacity/", .x, ".xlsx")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -321,5 +337,4 @@ if (FALSE) {
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
115
SD/2.3_datatransform/_demo.qmd
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
---
|
||||||
|
title: "Lesson 8"
|
||||||
|
format: html
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
require(tidyverse)
|
||||||
|
library(nycflights13)
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
select(3:6) |>
|
||||||
|
head(3) |>
|
||||||
|
rename_all(~ gsub("_", "", .))
|
||||||
|
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
select(3:6) |>
|
||||||
|
head(3) |>
|
||||||
|
rename_with(toupper, .cols = 2:4)
|
||||||
|
|
||||||
|
# 每月10号-15号,dep_delay > 100 的航班
|
||||||
|
|
||||||
|
# 每月哪个出发地origin的 dep_delay总时长最长
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
group_by(month, origin) |>
|
||||||
|
summarize(n = n(), total_dep_delay = mean(dep_delay, na.rm = TRUE)) |>
|
||||||
|
slice_max(total_dep_delay)
|
||||||
|
|
||||||
|
# arrange(month, origin, desc(total_dep_delay))
|
||||||
|
|
||||||
|
# 每月随机抽取一天,随机抽取三个航班
|
||||||
|
|
||||||
|
slice_sample(n = 1)
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
tidyr::nest(ymddf = -c(year, month, day)) |>
|
||||||
|
group_by(year, month) |>
|
||||||
|
slice_sample(n = 5) |>
|
||||||
|
unnest(ymddf) |>
|
||||||
|
group_by(year, month, day) |>
|
||||||
|
slice_sample(n = 3)
|
||||||
|
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
tidyr::nest(ymddf = -c(year, month, day)) |>
|
||||||
|
group_by(year, month) |>
|
||||||
|
slice_sample(n = 1) |>
|
||||||
|
mutate(
|
||||||
|
ymddf = purrr::map(ymddf, \(x) {
|
||||||
|
x |>
|
||||||
|
slice_sample(n = 3)
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
# 每月 每个出发地 周末的平均dep_delay 与 工作日的平均dep_delay 差值最大的3个航班
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
mutate(date = ymd(paste(year, month, day))) |>
|
||||||
|
mutate(weekday = wday(date)) |>
|
||||||
|
mutate(isworkday = if_else(between(weekday, 2, 6), "Yes", "No")) |>
|
||||||
|
group_by(year, month, origin, flight, isworkday) |>
|
||||||
|
summarize(mean_delay = mean(dep_delay, na.rm = TRUE)) |>
|
||||||
|
tidyr::nest(diffdelaydf = c(isworkday, mean_delay)) |>
|
||||||
|
filter(
|
||||||
|
purrr::map(diffdelaydf, \(x) {
|
||||||
|
nrow(x)
|
||||||
|
}) >
|
||||||
|
1
|
||||||
|
) |>
|
||||||
|
mutate(
|
||||||
|
diffdelay = purrr::map_dbl(diffdelaydf, \(x) {
|
||||||
|
x |>
|
||||||
|
arrange(isworkday) |>
|
||||||
|
pull(mean_delay) |>
|
||||||
|
diff()
|
||||||
|
})
|
||||||
|
) |>
|
||||||
|
group_by(year, month, origin) |>
|
||||||
|
slice_max(diffdelay, n = 3)
|
||||||
|
|
||||||
|
|
||||||
|
wday(today())
|
||||||
|
|
||||||
|
weekday()
|
||||||
|
|
||||||
|
|
||||||
|
slice_sample(n = 1)
|
||||||
|
|
||||||
|
|
||||||
|
flights |>
|
||||||
|
group_by(month) |>
|
||||||
|
slice_sample(n = 1)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
p1 <- flights |>
|
||||||
|
group_by(year, month) |>
|
||||||
|
summarize(n = n()) |>
|
||||||
|
ggplot(aes(month, n)) +
|
||||||
|
geom_point(shape = 21, size = 6, color = "black", fill = "red") +
|
||||||
|
geom_line()
|
||||||
|
|
||||||
|
ggsave("./a.pdf")
|
||||||
|
ggsave("./a.png")
|
||||||
|
|
||||||
|
require(patchwork)
|
||||||
|
|
||||||
|
p1 / p1
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
BIN
SD/2.3_datatransform/a.pdf
Normal file
BIN
SD/2.3_datatransform/a.png
Normal file
|
After Width: | Height: | Size: 271 KiB |
@@ -323,5 +323,5 @@ flights |>
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
---
|
|
||||||
title: "课后作业9"
|
|
||||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
|
||||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
|
||||||
date: today
|
|
||||||
lang: zh
|
|
||||||
format:
|
|
||||||
revealjs:
|
|
||||||
theme: dark
|
|
||||||
slide-number: true
|
|
||||||
chalkboard:
|
|
||||||
buttons: true
|
|
||||||
preview-links: auto
|
|
||||||
lang: zh
|
|
||||||
toc: true
|
|
||||||
toc-depth: 1
|
|
||||||
toc-title: 大纲
|
|
||||||
logo: ./_extensions/inst/img/ucaslogo.png
|
|
||||||
css: ./_extensions/inst/css/revealjs.css
|
|
||||||
pointer:
|
|
||||||
key: "p"
|
|
||||||
color: "#32cd32"
|
|
||||||
pointerSize: 18
|
|
||||||
revealjs-plugins:
|
|
||||||
- pointer
|
|
||||||
filters:
|
|
||||||
- d2
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| include: false
|
|
||||||
#| cache: false
|
|
||||||
lang <- "cn"
|
|
||||||
require(tidyverse)
|
|
||||||
require(learnr)
|
|
||||||
```
|
|
||||||
|
|
||||||
## 第9次课后作业
|
|
||||||
|
|
||||||
自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。
|
|
||||||
|
|
||||||
作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
|
|
||||||
|
|
||||||
|
|
||||||
## 欢迎讨论!{.center}
|
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
|
||||||
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
---
|
|
||||||
title: 课后作业9
|
|
||||||
author: 姓名
|
|
||||||
format: html
|
|
||||||
---
|
|
||||||
|
|
||||||
要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。
|
|
||||||
|
|
||||||
@@ -1,167 +0,0 @@
|
|||||||
---
|
|
||||||
title: "大数据分析工具"
|
|
||||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
|
||||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
|
||||||
date: today
|
|
||||||
lang: zh
|
|
||||||
format:
|
|
||||||
revealjs:
|
|
||||||
theme: dark
|
|
||||||
slide-number: true
|
|
||||||
chalkboard:
|
|
||||||
buttons: true
|
|
||||||
preview-links: auto
|
|
||||||
lang: zh
|
|
||||||
toc: true
|
|
||||||
toc-depth: 1
|
|
||||||
toc-title: 大纲
|
|
||||||
logo: ./_extensions/inst/img/ucaslogo.png
|
|
||||||
css: ./_extensions/inst/css/revealjs.css
|
|
||||||
pointer:
|
|
||||||
key: "p"
|
|
||||||
color: "#32cd32"
|
|
||||||
pointerSize: 18
|
|
||||||
revealjs-plugins:
|
|
||||||
- pointer
|
|
||||||
filters:
|
|
||||||
- d2
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
knitr::opts_chunk$set(echo = TRUE)
|
|
||||||
source("../../coding/_common.R")
|
|
||||||
library(nycflights13)
|
|
||||||
library(tidyverse)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配数字:
|
|
||||||
|
|
||||||
- \d:匹配任意数字字符。
|
|
||||||
- \d+:匹配一个或多个数字字符。
|
|
||||||
- [0-9]: 匹配数字
|
|
||||||
|
|
||||||
### 匹配字母:
|
|
||||||
|
|
||||||
- \w:匹配任意字母、数字或下划线字符。
|
|
||||||
- \w+:匹配一个或多个字母、数字或下划线字符。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配空白字符:
|
|
||||||
|
|
||||||
- \s:匹配任意空白字符,包括空格、制表符、换行符等。
|
|
||||||
- \s+:匹配一个或多个空白字符。
|
|
||||||
|
|
||||||
### 匹配特定字符:
|
|
||||||
|
|
||||||
- [abc]:匹配字符 a、b 或 c 中的任意一个。
|
|
||||||
- [a-z]:匹配任意小写字母。
|
|
||||||
- [A-Z]:匹配任意大写字母。
|
|
||||||
- [0-9]:匹配任意数字。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配重复次数:
|
|
||||||
|
|
||||||
- {n}:匹配前一个字符恰好 n 次。
|
|
||||||
- {n,}:匹配前一个字符至少 n 次。
|
|
||||||
- {n,m}:匹配前一个字符至少 n 次,但不超过 m 次。
|
|
||||||
|
|
||||||
### 匹配边界:
|
|
||||||
|
|
||||||
- ^:匹配字符串的开头。
|
|
||||||
- $:匹配字符串的结尾。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配特殊字符:
|
|
||||||
|
|
||||||
- \:转义特殊字符,使其按字面意义匹配。
|
|
||||||
- .:匹配任意单个字符。
|
|
||||||
- |:表示“或”关系,匹配两个或多个表达式之一。
|
|
||||||
|
|
||||||
#### 匹配次数:
|
|
||||||
|
|
||||||
- *:匹配前一个字符零次或多次。
|
|
||||||
- +:匹配前一个字符一次或多次。
|
|
||||||
- ?:匹配前一个字符零次或一次。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 分组和捕获:
|
|
||||||
|
|
||||||
- ():将一系列模式组合成一个单元,可与特殊字符一起使用。
|
|
||||||
|
|
||||||
### 预定义字符集:
|
|
||||||
|
|
||||||
- \d:任意数字,相当于 [0-9]。
|
|
||||||
- \w:任意字母、数字或下划线字符,相当于 [a-zA-Z0-9_]。
|
|
||||||
- \s:任意空白字符,相当于 [ \t\n\r\f\v]。
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 实例
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
library(babynames)
|
|
||||||
(x <- c("apple", "apppple", "abc123def"))
|
|
||||||
x[str_detect(x, "[0-9]")]
|
|
||||||
x[str_detect(x, "abc[0-9]+")]
|
|
||||||
x[str_detect(x, "pp")]
|
|
||||||
x[str_detect(x, "p{4}")]
|
|
||||||
x[str_detect(x, "p{4}")]
|
|
||||||
x[str_detect("apple", "ap*")]
|
|
||||||
x[str_detect("apple", "app*")]
|
|
||||||
x[str_detect("apple", "a..le")]
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
|
|
||||||
找出`babyname`中名字含有ar的行
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
babynames |>
|
|
||||||
filter(str_detect(name, "ar"))
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
|
|
||||||
找出`babyname`中名字含有ar或者以ry结尾的行。
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
babynames |>
|
|
||||||
filter(str_detect(name, "ar"))
|
|
||||||
```
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
## GNU/Linux服务器
|
|
||||||
|
|
||||||
- `ssh`, `scp`
|
|
||||||
- `bash`
|
|
||||||
- grep
|
|
||||||
- sed
|
|
||||||
- awk
|
|
||||||
- find
|
|
||||||
- xargs
|
|
||||||
- `Editor`
|
|
||||||
- `Virtual Studio Code`
|
|
||||||
- `Vim`
|
|
||||||
- `Emacs`
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 欢迎讨论!{.center}
|
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
|
||||||
|
|
||||||
@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
BIN
SD/3.9_课后作业8/.RData
Normal file
BIN
SD/3.9_课后作业8/airqualitydf.RDS
Normal file
@@ -43,7 +43,7 @@ require(learnr)
|
|||||||
1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
|
1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
|
||||||
2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。
|
2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。
|
||||||
|
|
||||||
作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
|
作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
|
||||||
|
|
||||||
## 示例代码
|
## 示例代码
|
||||||
|
|
||||||
@@ -62,5 +62,5 @@ require(learnr)
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
BIN
SD/3.9_课后作业8/metadf.RDS
Normal file
BIN
SD/3.9_课后作业8/testdf.RDS
Normal file
@@ -14,8 +14,10 @@ format: html
|
|||||||
# 下载至临时文件
|
# 下载至临时文件
|
||||||
if (FALSE) {
|
if (FALSE) {
|
||||||
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
|
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
|
||||||
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
|
download.file(
|
||||||
destfile = tmpxlsxpath)
|
"https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
|
||||||
|
destfile = tmpxlsxpath
|
||||||
|
)
|
||||||
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
|
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
|
||||||
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
|
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
|
||||||
saveRDS(airqualitydf, "./airqualitydf.RDS")
|
saveRDS(airqualitydf, "./airqualitydf.RDS")
|
||||||
@@ -41,7 +43,12 @@ airqualitydf |>
|
|||||||
left_join(metadf |> select(site, city = Area)) |>
|
left_join(metadf |> select(site, city = Area)) |>
|
||||||
group_by(city) |>
|
group_by(city) |>
|
||||||
filter(n() > 5) |>
|
filter(n() > 5) |>
|
||||||
summarize(p = sum(AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)) / n()) |>
|
summarize(
|
||||||
|
p = sum(
|
||||||
|
AQI.median < quantile(airqualitydf$AQI, probs = 0.5, na.rm = TRUE)
|
||||||
|
) /
|
||||||
|
n()
|
||||||
|
) |>
|
||||||
top_n(10, p)
|
top_n(10, p)
|
||||||
|
|
||||||
|
|
||||||
@@ -57,12 +64,11 @@ airqualitydf |>
|
|||||||
left_join(metadf |> select(site, city = Area)) |>
|
left_join(metadf |> select(site, city = Area)) |>
|
||||||
group_by(city) |>
|
group_by(city) |>
|
||||||
filter(length(unique(site)) >= 5) |>
|
filter(length(unique(site)) >= 5) |>
|
||||||
summarize(p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2,
|
summarize(
|
||||||
na.rm = TRUE)) / n()) |>
|
p = sum(AQI < quantile(airqualitydf$AQI, probs = 0.2, na.rm = TRUE)) / n()
|
||||||
|
) |>
|
||||||
slice_max(p, n = 10) |>
|
slice_max(p, n = 10) |>
|
||||||
knitr::kable()
|
knitr::kable()
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@@ -83,66 +89,86 @@ if (FALSE) {
|
|||||||
left_join(metadf |> select(site, city = Area)) |>
|
left_join(metadf |> select(site, city = Area)) |>
|
||||||
group_by(city) |>
|
group_by(city) |>
|
||||||
filter(length(unique(site)) >= 5) |>
|
filter(length(unique(site)) >= 5) |>
|
||||||
mutate(dayornight = factor(ifelse(between(hour(datetime), 8, 20), "day", "night"),
|
mutate(
|
||||||
levels = c("day", "night"))
|
dayornight = factor(
|
||||||
|
ifelse(between(hour(datetime), 8, 20), "day", "night"),
|
||||||
|
levels = c("day", "night")
|
||||||
|
)
|
||||||
) |>
|
) |>
|
||||||
group_by(city) |>
|
group_by(city) |>
|
||||||
nest(citydf = -city) |>
|
nest(citydf = -city) |>
|
||||||
mutate(median_diff = purrr::map_dbl(citydf, ~
|
mutate(
|
||||||
.x |>
|
median_diff = purrr::map_dbl(
|
||||||
specify(AQI ~ dayornight) |>
|
citydf,
|
||||||
calculate(stat = "diff in medians", order = c("day", "night")) |>
|
~ .x |>
|
||||||
pull(stat)
|
specify(AQI ~ dayornight) |>
|
||||||
)) |>
|
calculate(stat = "diff in medians", order = c("day", "night")) |>
|
||||||
|
pull(stat)
|
||||||
|
)
|
||||||
|
) |>
|
||||||
ungroup() |>
|
ungroup() |>
|
||||||
# slice_sample(n = 12) |>
|
# slice_sample(n = 12) |>
|
||||||
mutate(null_dist = purrr::map(citydf, ~
|
mutate(
|
||||||
.x |>
|
null_dist = purrr::map(
|
||||||
specify(AQI ~ dayornight) |>
|
citydf,
|
||||||
hypothesize(null = "independence") |>
|
~ .x |>
|
||||||
generate(reps = 1000, type = "permute") |>
|
specify(AQI ~ dayornight) |>
|
||||||
calculate(stat = "diff in medians", order = c("day", "night"))
|
hypothesize(null = "independence") |>
|
||||||
)) |>
|
generate(reps = 1000, type = "permute") |>
|
||||||
mutate(p_value = purrr::map2_dbl(null_dist, median_diff,
|
calculate(stat = "diff in medians", order = c("day", "night"))
|
||||||
~ get_p_value(.x, obs_stat = .y, direction = "both") |>
|
)
|
||||||
pull(p_value)
|
) |>
|
||||||
)) |>
|
mutate(
|
||||||
|
p_value = purrr::map2_dbl(
|
||||||
|
null_dist,
|
||||||
|
median_diff,
|
||||||
|
~ get_p_value(.x, obs_stat = .y, direction = "both") |>
|
||||||
|
pull(p_value)
|
||||||
|
)
|
||||||
|
) |>
|
||||||
mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |>
|
mutate(sigdiff = ifelse(p_value < 0.01, "显著差异", "无显著差异")) |>
|
||||||
mutate(fig = purrr::pmap(list(null_dist, median_diff, city, sigdiff),
|
mutate(
|
||||||
~ visualize(..1) +
|
fig = purrr::pmap(
|
||||||
shade_p_value(obs_stat = ..2, direction = "both") +
|
list(null_dist, median_diff, city, sigdiff),
|
||||||
ggtitle(paste0(..3, ":", ..4)) +
|
~ visualize(..1) +
|
||||||
theme_sci(2, 2)
|
shade_p_value(obs_stat = ..2, direction = "both") +
|
||||||
)) |>
|
ggtitle(paste0(..3, ":", ..4)) +
|
||||||
|
theme_sci(2, 2)
|
||||||
|
)
|
||||||
|
) |>
|
||||||
arrange(p_value)
|
arrange(p_value)
|
||||||
saveRDS(testdf, "./testdf.RDS")
|
saveRDS(testdf, "./testdf.RDS")
|
||||||
}
|
}
|
||||||
|
|
||||||
lang <- "cn"
|
if (FALSE) {
|
||||||
require(dwfun)
|
lang <- "cn"
|
||||||
require(rmdify)
|
require(dwfun)
|
||||||
require(drwateR)
|
require(rmdify)
|
||||||
dwfun::init()
|
require(drwateR)
|
||||||
rmdify::rmd_init()
|
dwfun::init()
|
||||||
|
rmdify::rmd_init()
|
||||||
testdf <- readRDS("./testdf.RDS")
|
|
||||||
require(tidyverse)
|
|
||||||
testdf |>
|
|
||||||
select(city, median_diff, p_value, sigdiff) |>
|
|
||||||
knitr::kable()
|
|
||||||
testdf |>
|
|
||||||
mutate(grp = (row_number() - 1)%/% 12) |>
|
|
||||||
group_by(grp) |>
|
|
||||||
nest(grpdf = -grp) |>
|
|
||||||
ungroup() |>
|
|
||||||
# slice(1) |>
|
|
||||||
mutate(gp = purrr::map(grpdf,
|
|
||||||
~(.x |>
|
|
||||||
pull(fig)) |>
|
|
||||||
patchwork::wrap_plots(ncol = 3) +
|
|
||||||
dwfun::theme_sci(5, 7))) |>
|
|
||||||
pull(gp)
|
|
||||||
|
|
||||||
|
|
||||||
|
testdf <- readRDS("./testdf.RDS")
|
||||||
|
require(tidyverse)
|
||||||
|
testdf |>
|
||||||
|
select(city, median_diff, p_value, sigdiff) |>
|
||||||
|
knitr::kable()
|
||||||
|
testdf |>
|
||||||
|
mutate(grp = (row_number() - 1) %/% 12) |>
|
||||||
|
group_by(grp) |>
|
||||||
|
nest(grpdf = -grp) |>
|
||||||
|
ungroup() |>
|
||||||
|
# slice(1) |>
|
||||||
|
mutate(
|
||||||
|
gp = purrr::map(
|
||||||
|
grpdf,
|
||||||
|
~ (.x |>
|
||||||
|
pull(fig)) |>
|
||||||
|
patchwork::wrap_plots(ncol = 3) +
|
||||||
|
dwfun::theme_sci(5, 7)
|
||||||
|
)
|
||||||
|
) |>
|
||||||
|
pull(gp)
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -3934,5 +3934,5 @@ p
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |
@@ -100,5 +100,4 @@ geom_bar(position = "fill")
|
|||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||||
|
|
||||||
254
SD/5.1_model/_demo.qmd
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
---
|
||||||
|
title: "Lesson 9"
|
||||||
|
format: html
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
# install.packages("tidymodels")
|
||||||
|
|
||||||
|
require(tidymodels)
|
||||||
|
taxi
|
||||||
|
|
||||||
|
taxisplit <- initial_split(taxi, prop = 0.8)
|
||||||
|
taxi_train <- training(taxisplit)
|
||||||
|
taxi_test <- testing(taxisplit)
|
||||||
|
|
||||||
|
tree_spec <-
|
||||||
|
decision_tree(cost_complexity = 0.002) %>%
|
||||||
|
set_mode("classification")
|
||||||
|
|
||||||
|
taxi_fit <- workflow() %>%
|
||||||
|
add_formula(tip ~ .) %>%
|
||||||
|
add_model(tree_spec) %>%
|
||||||
|
fit(data = taxi_train)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
relocate(tip, .pred_class, .pred_yes, .pred_no)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
conf_mat(truth = tip, estimate = .pred_class)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
accuracy(truth = tip, estimate = .pred_class)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
sensitivity(truth = tip, estimate = .pred_class)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
specificity(truth = tip, estimate = .pred_class)
|
||||||
|
|
||||||
|
taxi_metrics <- metric_set(accuracy, specificity, sensitivity)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
taxi_metrics(truth = tip, estimate = .pred_class)
|
||||||
|
|
||||||
|
taxi_metrics <- metric_set(accuracy, specificity, sensitivity)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
group_by(local) %>%
|
||||||
|
taxi_metrics(truth = tip, estimate = .pred_class)
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
roc_curve(truth = tip, .pred_yes) %>%
|
||||||
|
autoplot()
|
||||||
|
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train)
|
||||||
|
|
||||||
|
|
||||||
|
augment(taxi_fit, new_data = taxi_train) %>%
|
||||||
|
roc_curve(truth = tip, .pred_yes) |>
|
||||||
|
ggplot(aes(1 - sensitivity, specificity)) +
|
||||||
|
geom_point() +
|
||||||
|
geom_line() +
|
||||||
|
geom_abline(slope = 1)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Cross Validation
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
vfold_cv(taxi_train, v = 10) |>
|
||||||
|
pull(splits) |>
|
||||||
|
nth(1)
|
||||||
|
taxi_folds <- vfold_cv(taxi_train)
|
||||||
|
taxi_folds$splits[1:3]
|
||||||
|
|
||||||
|
vfold_cv(taxi_train, strata = tip)
|
||||||
|
|
||||||
|
set.seed(123)
|
||||||
|
taxi_folds <- vfold_cv(taxi_train, v = 10, strata = tip)
|
||||||
|
taxi_folds
|
||||||
|
|
||||||
|
taxi_wflow <- workflow() %>%
|
||||||
|
add_formula(tip ~ .) %>%
|
||||||
|
add_model(tree_spec)
|
||||||
|
|
||||||
|
|
||||||
|
taxi_res <- fit_resamples(taxi_wflow, taxi_folds)
|
||||||
|
taxi_res
|
||||||
|
|
||||||
|
|
||||||
|
taxi_res$.metrics[[1]]
|
||||||
|
|
||||||
|
|
||||||
|
taxi_res$splits[[1]]
|
||||||
|
|
||||||
|
analysis(taxi_res$splits[[1]])
|
||||||
|
|
||||||
|
analysis(taxi_res$splits[[1]])
|
||||||
|
assessment(taxi_res$splits[[1]])
|
||||||
|
|
||||||
|
|
||||||
|
taxi_res %>%
|
||||||
|
collect_metrics()
|
||||||
|
|
||||||
|
taxi_res %>%
|
||||||
|
collect_metrics() %>%
|
||||||
|
select(.metric, mean, n)
|
||||||
|
|
||||||
|
# Save the assessment set results
|
||||||
|
ctrl_taxi <- control_resamples(save_pred = TRUE)
|
||||||
|
taxi_res <- fit_resamples(taxi_wflow, taxi_folds, control = ctrl_taxi)
|
||||||
|
|
||||||
|
taxi_res
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# NLA2007 cyanophyta model
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
require(tidyverse)
|
||||||
|
sitedf <- readr::read_csv(
|
||||||
|
"https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
|
||||||
|
) |>
|
||||||
|
select(
|
||||||
|
SITE_ID,
|
||||||
|
lon = LON_DD,
|
||||||
|
lat = LAT_DD,
|
||||||
|
name = LAKENAME,
|
||||||
|
area = LAKEAREA,
|
||||||
|
zmax = DEPTHMAX
|
||||||
|
) |>
|
||||||
|
group_by(SITE_ID) |>
|
||||||
|
summarize(
|
||||||
|
lon = mean(lon, na.rm = TRUE),
|
||||||
|
lat = mean(lat, na.rm = TRUE),
|
||||||
|
name = unique(name),
|
||||||
|
area = mean(area, na.rm = TRUE),
|
||||||
|
zmax = mean(zmax, na.rm = TRUE)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
visitdf <- readr::read_csv(
|
||||||
|
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
|
||||||
|
) |>
|
||||||
|
select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
|
||||||
|
distinct()
|
||||||
|
|
||||||
|
|
||||||
|
waterchemdf <- readr::read_csv(
|
||||||
|
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
|
||||||
|
) |>
|
||||||
|
select(
|
||||||
|
SITE_ID,
|
||||||
|
date = DATE_PROFILE,
|
||||||
|
depth = DEPTH,
|
||||||
|
temp = TEMP_FIELD,
|
||||||
|
do = DO_FIELD,
|
||||||
|
ph = PH_FIELD,
|
||||||
|
cond = COND_FIELD,
|
||||||
|
)
|
||||||
|
|
||||||
|
sddf <- readr::read_csv(
|
||||||
|
"https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
|
||||||
|
) |>
|
||||||
|
select(
|
||||||
|
SITE_ID,
|
||||||
|
date = DATE_SECCHI,
|
||||||
|
sd = SECMEAN,
|
||||||
|
clear_to_bottom = CLEAR_TO_BOTTOM
|
||||||
|
)
|
||||||
|
|
||||||
|
trophicdf <- readr::read_csv(
|
||||||
|
"https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
|
||||||
|
) |>
|
||||||
|
select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
|
||||||
|
left_join(visitdf, by = c("SITE_ID", "visit")) |>
|
||||||
|
select(-year, -visit) |>
|
||||||
|
group_by(SITE_ID, date) |>
|
||||||
|
summarize(
|
||||||
|
tp = mean(tp, na.rm = TRUE),
|
||||||
|
tn = mean(tn, na.rm = TRUE),
|
||||||
|
chla = mean(chla, na.rm = TRUE)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
phytodf <- readr::read_csv(
|
||||||
|
"https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
|
||||||
|
) |>
|
||||||
|
select(
|
||||||
|
SITE_ID,
|
||||||
|
date = DATEPHYT,
|
||||||
|
depth = SAMPLE_DEPTH,
|
||||||
|
phyta = DIVISION,
|
||||||
|
genus = GENUS,
|
||||||
|
species = SPECIES,
|
||||||
|
tax = TAXANAME,
|
||||||
|
abund = ABUND
|
||||||
|
) |>
|
||||||
|
mutate(phyta = gsub(" .*$", "", phyta)) |>
|
||||||
|
filter(!is.na(genus)) |>
|
||||||
|
group_by(SITE_ID, date, depth, phyta, genus) |>
|
||||||
|
summarize(abund = sum(abund, na.rm = TRUE)) |>
|
||||||
|
nest(phytodf = -c(SITE_ID, date))
|
||||||
|
|
||||||
|
phytodf$phytodf[[1]]
|
||||||
|
|
||||||
|
|
||||||
|
envdf <- waterchemdf |>
|
||||||
|
filter(depth < 2) |>
|
||||||
|
select(-depth) |>
|
||||||
|
group_by(SITE_ID, date) |>
|
||||||
|
summarise_all(~ mean(., na.rm = TRUE)) |>
|
||||||
|
ungroup() |>
|
||||||
|
left_join(sddf, by = c("SITE_ID", "date")) |>
|
||||||
|
left_join(trophicdf, by = c("SITE_ID", "date"))
|
||||||
|
|
||||||
|
nla <- envdf |>
|
||||||
|
left_join(phytodf, by = c("SITE_ID", "date")) |>
|
||||||
|
left_join(sitedf, by = "SITE_ID") |>
|
||||||
|
filter(!purrr::map_lgl(phytodf, is.null)) |>
|
||||||
|
mutate(
|
||||||
|
cyanophyta = purrr::map(
|
||||||
|
phytodf,
|
||||||
|
\(x) {
|
||||||
|
x |>
|
||||||
|
dplyr::filter(phyta == "Cyanophyta") |>
|
||||||
|
summarize(cyanophyta = sum(abund, na.rm = TRUE))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
) |>
|
||||||
|
unnest(cyanophyta) |>
|
||||||
|
select(-phyta) |>
|
||||||
|
mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
|
||||||
|
|
||||||
|
# library(rmdify)
|
||||||
|
# library(dwfun)
|
||||||
|
# dwfun::init()
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 272 KiB After Width: | Height: | Size: 272 KiB |
|
Before Width: | Height: | Size: 345 KiB After Width: | Height: | Size: 345 KiB |
|
Before Width: | Height: | Size: 210 KiB After Width: | Height: | Size: 210 KiB |
|
Before Width: | Height: | Size: 470 KiB After Width: | Height: | Size: 470 KiB |
|
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
|
Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 5.5 KiB |
|
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 209 KiB |
|
Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB |
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
|
Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 177 KiB |
|
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB |
|
Before Width: | Height: | Size: 569 KiB After Width: | Height: | Size: 569 KiB |
|
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB |
|
Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB |
|
Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.5 KiB |
|
Before Width: | Height: | Size: 131 KiB After Width: | Height: | Size: 131 KiB |
|
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 83 KiB After Width: | Height: | Size: 83 KiB |
|
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB |
|
Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB |
|
Before Width: | Height: | Size: 869 KiB After Width: | Height: | Size: 869 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB |
|
Before Width: | Height: | Size: 638 KiB After Width: | Height: | Size: 638 KiB |
|
Before Width: | Height: | Size: 408 KiB After Width: | Height: | Size: 408 KiB |
|
Before Width: | Height: | Size: 410 KiB After Width: | Height: | Size: 410 KiB |
|
Before Width: | Height: | Size: 154 KiB After Width: | Height: | Size: 154 KiB |
|
Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB |
|
Before Width: | Height: | Size: 159 KiB After Width: | Height: | Size: 159 KiB |
|
Before Width: | Height: | Size: 184 KiB After Width: | Height: | Size: 184 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB |
|
Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 69 KiB |
|
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 137 KiB After Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB |
|
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB |
|
Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 134 KiB |
|
Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 150 KiB |