Compare commits
1 Commits
6412cc5560
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 43c660af96 |
1
.gitignore
vendored
@@ -7,4 +7,3 @@ _variables.yml
|
||||
_freeze/
|
||||
*_cache/
|
||||
*_files/
|
||||
SD/_*/
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
454c267b732cda3ca58332ab3883bec6
|
||||
382
Makefile
@@ -1,382 +0,0 @@
|
||||
# Makefile for Quarto Project Automation
|
||||
# Detect OS
|
||||
HOSTNAME := $(shell hostname)
|
||||
OS := $(shell uname | tr A-Z a-z)
|
||||
ifeq ($(OS), darwin)
|
||||
SEDI := sed -i ''
|
||||
OS := OSX
|
||||
else ifeq ($(OS), linux)
|
||||
SEDI := sed -i
|
||||
OS := linux
|
||||
else
|
||||
$(error Unknown operating system)
|
||||
endif
|
||||
|
||||
# Fetch Git branch and project details
|
||||
branchname := $(shell git branch --show-current)
|
||||
reponame := $(shell basename $(shell git rev-parse --show-toplevel))
|
||||
projtype := $(shell basename $(shell dirname $(shell git rev-parse --show-toplevel)))
|
||||
pubtype := $(if $(findstring PUB,$(branchname)),public,protected)
|
||||
remotedir := dwuser@drwater.net:/home/www/drc/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
|
||||
outputdir := $(shell awk -F': *' '/^ *output-dir:/ {print $$2 "/" }' ./_quarto.yml)
|
||||
siteurl := https://drc.drwater.net/$(projtype)/$(pubtype)/$(reponame)/$(branchname)
|
||||
|
||||
branchnames := "TX\|FJ\|YF\|ZY\|WW\|JB\|YY\|YJ\|DYF"
|
||||
|
||||
# Variables for colors and port
|
||||
bcolor := grey
|
||||
port := 4199
|
||||
|
||||
# Set background color based on branch name
|
||||
ifeq ($(findstring R1,$(branchname)),R1)
|
||||
bcolor := orange
|
||||
else ifeq ($(findstring R2,$(branchname)),R2)
|
||||
bcolor := lightblue
|
||||
else ifeq ($(findstring R3,$(branchname)),R3)
|
||||
bcolor := lightgreen
|
||||
else ifeq ($(findstring PUB,$(branchname)),PUB)
|
||||
bcolor := light
|
||||
endif
|
||||
|
||||
# Set port based on branch name
|
||||
branch_ports := main:4200 SM:4201 TX:4202 FJ:4203 YF:4204 ZY:4205 WW:4206 JB:4207 YY:4208 YJ:4209 DYF:4210
|
||||
port := $(shell echo $(branch_ports) | tr " " "\n" | grep -E "^$(branchname):" | sed -E 's/^$(branchname):([0-9]+)/\1/')
|
||||
port := $(if $(port),$(port),4199)
|
||||
|
||||
# Define the state file
|
||||
STATE_FILE := .source_state
|
||||
|
||||
# checkfile
|
||||
GREPSTR := " \|(\|)\|^submit\|^analysis\|_cache\|_freeze\|^site_libs\|^www"
|
||||
|
||||
# Default target
|
||||
.PHONY: all preview readme rsync local clean upload fix_links force check_git_status
|
||||
|
||||
all: local upload clean commit push
|
||||
|
||||
force: updrefbib check_git_status updvariable render
|
||||
|
||||
local: updrefbib check_git_status updvariable lazyrender
|
||||
|
||||
updmakefile:
|
||||
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
|
||||
echo "基于 $$HOME/bin/publish2dw.Makefile 更新本项目 Makefile..."; \
|
||||
cp "$$HOME/bin/publish2dw.Makefile" "./Makefile"; \
|
||||
git add "Makefile" && git commit -m "Update Makefile"; \
|
||||
echo "本项目Makefile更新完成."; \
|
||||
else \
|
||||
echo "Makefile 无需在本系统上更新."; \
|
||||
fi
|
||||
|
||||
|
||||
updrefbib:
|
||||
@if [ "$(OS)" = "OSX" ] && [ "$(HOSTNAME)" = "max" ]; then \
|
||||
echo "更新本项目参考文献..."; \
|
||||
cp "$$HOME/literature/Ref.bib" "./BB/"; \
|
||||
echo "推送本地参考文献到远程服务器..."; \
|
||||
rsync -azvu --progress "$$HOME/literature/Ref.bib" "drwater.net:/home/www/drc/datapool/public/BB/Ref.bib"; \
|
||||
echo "本项目参考文献更新完成."; \
|
||||
else \
|
||||
echo "检查网络连通性..."; \
|
||||
if ping -c 1 -W 1 drc.drwater.net > /dev/null 2>&1; then \
|
||||
echo "网络正常,更新本项目参考文献..."; \
|
||||
wget -O BB/Ref.bib "https://drc.drwater.net/datapool/public/BB/Ref.bib"; \
|
||||
echo "本项目参考文献更新完成."; \
|
||||
else \
|
||||
echo "网络不可用,跳过参考文献更新."; \
|
||||
fi; \
|
||||
fi; \
|
||||
git add BB/Ref.bib; \
|
||||
if [ "$$(git diff --cached)" ]; then \
|
||||
git commit -m "Update Ref.bib"; \
|
||||
fi
|
||||
|
||||
check_git_status:
|
||||
@uncommitted=$$(git status --porcelain); \
|
||||
if [ -n "$$uncommitted" ]; then \
|
||||
git status; \
|
||||
read -p "当前存在未提交的修改(如上),是否要提交?(Y/N, default is N): " answer; \
|
||||
answer=$${answer:-N}; \
|
||||
if [ "$$answer" = "Y" ] || [ "$$answer" = "y" ]; then \
|
||||
read -p "请输入修改说明: " message; \
|
||||
git add . && git commit -m "$$message"; \
|
||||
else \
|
||||
echo "未提交,如后续操作为pull,则无法继续"; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Lazy render with hash checking
|
||||
lazyrender:
|
||||
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
|
||||
if [ ! -f $(STATE_FILE) ] || [ "$$current_hash" != "$$(cat $(STATE_FILE))" ]; then \
|
||||
echo "源文件发生变化, 重新编译..."; \
|
||||
echo "$$current_hash" > $(STATE_FILE); \
|
||||
$(MAKE) render; \
|
||||
else \
|
||||
echo "源文件无变化, 跳过编译..."; \
|
||||
exit 0; \
|
||||
fi
|
||||
|
||||
# Render target
|
||||
render:
|
||||
@quarto render
|
||||
|
||||
commit:
|
||||
@echo "提交修改(commit)..."; \
|
||||
git add .; \
|
||||
if [ -n "$$(git diff --cached)" ]; then \
|
||||
git commit -m "render compile"; \
|
||||
else \
|
||||
echo "没有修改记录,跳过."; \
|
||||
fi; \
|
||||
|
||||
# Pull changes from the specified branch based on the current branch
|
||||
pull:
|
||||
@echo "从远程拉取项目更新..."; \
|
||||
$(MAKE) check_git_status; \
|
||||
git pull; \
|
||||
current_branch=$$(git rev-parse --abbrev-ref HEAD); \
|
||||
if [ "$$current_branch" = main ]; then \
|
||||
echo "当前分枝为$$current_branch."; \
|
||||
remote_branch=$$(git branch --remote | grep -v 'main' | grep $(branchnames) | awk '{print $$1}' | sed 's/origin\///' | head -n 1); \
|
||||
if [ -n "$$remote_branch" ]; then \
|
||||
echo "尝试从远程分枝$$remote_branch 拉取更新..."; \
|
||||
git pull --rebase origin $$remote_branch; \
|
||||
else \
|
||||
echo "远程无可用分支$$remote_branch."; \
|
||||
fi; \
|
||||
else \
|
||||
echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
|
||||
git pull --rebase origin main; \
|
||||
fi
|
||||
|
||||
# Pull changes from the main branch
|
||||
pullmain:
|
||||
$(MAKE) check_git_status; \
|
||||
@current_branch=$$(git rev-parse --abbrev-ref HEAD); \
|
||||
@echo "尝试将远程main分枝合并至本地$$current_branch 分枝."; \
|
||||
git pull --rebase origin main; \
|
||||
|
||||
push:
|
||||
@echo "推送到远程..."; \
|
||||
git push
|
||||
|
||||
filehash:
|
||||
@current_hash=$$(find $(shell git ls-files "*.qmd" "_*.yml" "*.pdf" "*.svg" "*.png" | grep -v $(GREPSTR)) -exec cat {} + | md5sum | awk '{print $$1}'); \
|
||||
echo "$$current_hash" > $(STATE_FILE)
|
||||
|
||||
# Preview the site on a specific port
|
||||
preview:
|
||||
@quarto preview --port $(port)
|
||||
|
||||
# Generate README.md
|
||||
readme:
|
||||
@quarto render index.qmd -t markdown -o README.md
|
||||
@sed -e '/^---/,/^---/d' "$(outputdir)/README.md" > README.md
|
||||
@rm "$(outputdir)/README.md"
|
||||
|
||||
# Sync files with remote server
|
||||
rsync:
|
||||
@rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"
|
||||
|
||||
# Open local site
|
||||
open:
|
||||
@if [ "$(OS)" = "OSX" ]; then open "$(outputdir)/index.html"; fi
|
||||
|
||||
# Clean unnecessary files
|
||||
clean:
|
||||
@rm -f ./*.spl ./*.bbl ./*.blg ./*.log ./*.tex ./*.bcf ./*.tex.sedbak ./*.fdb_latexmk
|
||||
|
||||
# Upload files to server and fix links
|
||||
upload: backupdocx
|
||||
@mkdir -p "$(outputdir)" && chmod -R 2775 "$(outputdir)"
|
||||
@$(MAKE) fix_links
|
||||
@if rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; then \
|
||||
if [ "$(OS)" = "OSX" ]; then \
|
||||
open "$(siteurl)" 2>/dev/null; \
|
||||
fi; \
|
||||
else \
|
||||
echo "Rsync failed. Attempting alternative upload method..."; \
|
||||
mkdir -p "$(reponame)"; \
|
||||
rsync -azvu --progress --delete -r "$(reponame)" "$(dir $(remotedir))"; \
|
||||
rm -rf "$(reponame)"; \
|
||||
rsync -azvu --progress --delete -r "$(outputdir)" "$(remotedir)"; \
|
||||
if [ "$(OS)" = "OSX" ]; then \
|
||||
open "$(siteurl)" 2>/dev/null; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
backupdocx:
|
||||
@echo "备份MS.docx文件..."; \
|
||||
currentcommithash=$$(git rev-parse --short HEAD); \
|
||||
datetime=$$(git show -s --format=%ci $$currentcommithash | sed 's/[-: ]//g' | cut -c3-12); \
|
||||
mkdir -p TC/MS/; \
|
||||
existing_file=$$(find TC/MS -name "MS*.docx" -exec cmp -s www/MS/MS.docx {} \; -print -quit); \
|
||||
if [ -n "$$existing_file" ]; then \
|
||||
echo "与www/MS/MS.docx 内容相同的备份文件已存在: $$existing_file"; \
|
||||
echo "无需备份."; \
|
||||
else \
|
||||
if [ ! -e TC/MS/MS$${datetime}_$${currentcommithash}.docx ]; then \
|
||||
cp www/MS/MS.docx TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
|
||||
echo "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx完成."; \
|
||||
git add TC/MS/MS$${datetime}_$${currentcommithash}.docx; \
|
||||
if [ "$$(git diff --cached)" ]; then \
|
||||
git commit -m "备份TC/MS/MS$${datetime}_$${currentcommithash}.docx"; \
|
||||
fi; \
|
||||
else \
|
||||
echo "TC/MS/MS$${datetime}_$${currentcommithash}.docx已存在,无需备份."; \
|
||||
fi; \
|
||||
fi;
|
||||
|
||||
|
||||
trackchange:
|
||||
@if [ "$(projtype)" != "manuscript" ]; then \
|
||||
exit 0; \
|
||||
fi; \
|
||||
echo "选择两个提交以比较文档..."; \
|
||||
hashes=$$(git log --pretty=format:'%h: %s BY %an (%ar)' \
|
||||
| grep -E "$$(ls TC/MS/*.docx | xargs -n1 basename | sed -E 's/MS.*_([0-9a-f]+)\.docx/\1/' | tr '\n' '|')SMT_】" \
|
||||
| fzf --multi --reverse --preview="echo {}" ); \
|
||||
echo $$hashes; \
|
||||
hash1=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | tail -n 1 | awk '{print $$1}' | tr -d ':'); \
|
||||
hash1=$$(git rev-parse --short $${hash1}^); \
|
||||
datetime1=$$(git show -s --format=%ci $$hash1 | sed 's/[-: ]//g' | cut -c3-12); \
|
||||
hash2=$$(echo $$hashes | sed -e 's/) \([a-z0-9]\{7\}:\)/)\n\1/g' | head -n 1 | awk '{print $$1}' | tr -d ':'); \
|
||||
hash2=$$(git rev-parse --short $${hash2}^); \
|
||||
datetime2=$$(git show -s --format=%ci $$hash2 | sed 's/[-: ]//g' | cut -c3-12); \
|
||||
if [ -z "$$hash1" ] || [ -z "$$hash2" ]; then \
|
||||
echo "必须选择两个提交."; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
doc1="TC/MS/MS$${datetime1}_$$hash1.docx"; \
|
||||
echo "$$doc1"; \
|
||||
doc2="TC/MS/MS$${datetime2}_$$hash2.docx"; \
|
||||
echo "$$doc2"; \
|
||||
if [ -f "$$doc1" ] && [ -f "$$doc2" ] && [ "$$doc1" != "$$doc2" ]; then \
|
||||
echo "打开文件: $$doc1 和 $$doc2"; \
|
||||
open "$$doc1" "$$doc2"; \
|
||||
printf "MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}" | pbcopy; \
|
||||
echo "请在word中对比两个版本形成带修改痕迹的版本,并保存至TC/MS$${datetime1}-$${datetime2}_$${hash1}-$${hash2}.docx!"; \
|
||||
else \
|
||||
echo "一个或两个文件不存在: $$doc1, $$doc2"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
# Fix links in www directory
|
||||
fix_links:
|
||||
@find ./www -type f -name "*.html" -exec sed -i.bak \
|
||||
-e "s/{{< var branch >}}/$(branchname)/g" \
|
||||
-e "s/{{< var pubtype >}}/$(pubtype)/g" \
|
||||
-e "s/{{< var projtype >}}/$(projtype)/g" \
|
||||
-e "s/{{< var reponame >}}/$(reponame)/g" \
|
||||
-e "s/$(reponame)\/blob/$(reponame)\/raw\/branch/g" \
|
||||
-e "s/$(reponame)\/edit/$(reponame)\/_edit/g" {} +
|
||||
@find ./www -type f -name "*.bak" -exec rm {} +
|
||||
|
||||
updvariable:
|
||||
@touch _variables.yml # 如果文件不存在则创建
|
||||
@grep -q '^reponame:' _variables.yml || echo "reponame: $(reponame)" >> _variables.yml
|
||||
@if grep -q '^reponame:' _variables.yml; then \
|
||||
$(SEDI) 's/^reponame:.*/reponame: $(reponame)/' _variables.yml; \
|
||||
else \
|
||||
echo "reponame: $(reponame)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^projtype:' _variables.yml || echo "projtype: $(projtype)" >> _variables.yml
|
||||
@if grep -q '^projtype:' _variables.yml; then \
|
||||
$(SEDI) 's/^projtype:.*/projtype: $(projtype)/' _variables.yml; \
|
||||
else \
|
||||
echo "projtype: $(projtype)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^branch:' _variables.yml || echo "branch: $(branchname)" >> _variables.yml
|
||||
@if grep -q '^branch:' _variables.yml; then \
|
||||
$(SEDI) 's/^branch:.*/branch: $(branchname)/' _variables.yml; \
|
||||
else \
|
||||
echo "branch: $(branchname)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^pubtype:' _variables.yml || echo "pubtype: $(pubtype)" >> _variables.yml
|
||||
@if grep -q '^pubtype:' _variables.yml; then \
|
||||
$(SEDI) 's/^pubtype:.*/pubtype: $(pubtype)/' _variables.yml; \
|
||||
else \
|
||||
echo "pubtype: $(pubtype)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^nwAB:' _variables.yml || echo "nwAB: $(nwAB)" >> _variables.yml
|
||||
@if grep -q '^nwAB:' _variables.yml; then \
|
||||
$(SEDI) 's/^nwAB:.*/nwAB: $(nwAB)/' _variables.yml; \
|
||||
else \
|
||||
echo "nwAB: $(nwAB)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^nwMS:' _variables.yml || echo "nwMS: $(nwMS)" >> _variables.yml
|
||||
@if grep -q '^nwMS:' _variables.yml; then \
|
||||
$(SEDI) 's/^nwMS:.*/nwMS: $(nwMS)/' _variables.yml; \
|
||||
else \
|
||||
echo "nwMS: $(nwMS)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^figtblMS:' _variables.yml || echo "figtblMS: $(figtblMS)" >> _variables.yml
|
||||
@if grep -q '^figtblMS:' _variables.yml; then \
|
||||
$(SEDI) 's/^figtblMS:.*/figtblMS: $(figtblMS)/' _variables.yml; \
|
||||
else \
|
||||
echo "figtblMS: $(figtblMS)" >> _variables.yml; \
|
||||
fi
|
||||
@grep -q '^figtblSM:' _variables.yml || echo "figtblSM: $(figtblSM)" >> _variables.yml
|
||||
@if grep -q '^figtblSM:' _variables.yml; then \
|
||||
$(SEDI) 's/^figtblSM:.*/figtblSM: $(figtblSM)/' _variables.yml; \
|
||||
else \
|
||||
echo "figtblSM: $(figtblSM)" >> _variables.yml; \
|
||||
fi
|
||||
@mkpapervar
|
||||
|
||||
|
||||
# Help: list all available commands with descriptions (English and Chinese)
|
||||
help:
|
||||
@echo "Makefile for Quarto Project Automation"
|
||||
@echo "======================================="
|
||||
@echo "Available targets (English):"
|
||||
@echo ""
|
||||
@echo " make all - Execute local build, upload, clean, and commit"
|
||||
@echo " make force - Force render, hash update, upload, clean, and commit"
|
||||
@echo " make local - Check git status and perform a lazy render if changes detected"
|
||||
@echo " make check_git_status - Check for uncommitted changes and ask to commit them"
|
||||
@echo " make lazyrender - Render if source files have changed based on hash comparison"
|
||||
@echo " make render - Force Quarto to render the project"
|
||||
@echo " make commit - Commit changes if no previous uncommitted changes"
|
||||
@echo " make filehash - Generate and store the file hash of source files"
|
||||
@echo " make preview - Preview the site locally on the specific port (default: 4199)"
|
||||
@echo " make readme - Render README.md from Quarto index.qmd"
|
||||
@echo " make rsync - Sync output files with the remote server"
|
||||
@echo " make open - Open the generated site locally in the browser"
|
||||
@echo " make clean - Clean up unnecessary files"
|
||||
@echo " make upload - Upload files to the server and fix links"
|
||||
@echo " make fix_links - Fix HTML links in the 'www' directory for the remote server"
|
||||
@echo " make updmakefile - Update the Makefile"
|
||||
@echo " make help - Display this help message"
|
||||
@echo ""
|
||||
@echo "Available targets (中文):"
|
||||
@echo ""
|
||||
@echo " make all - 执行本地构建、上传、清理和提交"
|
||||
@echo " make force - 强制渲染、更新哈希、上传、清理并提交"
|
||||
@echo " make local - 检查Git状态,若检测到更改则进行懒惰渲染"
|
||||
@echo " make check_git_status - 检查未提交的更改,并询问是否提交"
|
||||
@echo " make lazyrender - 如果源文件发生更改,则根据哈希比较进行渲染"
|
||||
@echo " make render - 强制 Quarto 渲染项目"
|
||||
@echo " make commit - 如果没有未提交的更改则提交"
|
||||
@echo " make filehash - 生成并存储源文件的哈希值"
|
||||
@echo " make preview - 本地在特定端口预览网站 (默认: 4199)"
|
||||
@echo " make readme - 从 Quarto 的 index.qmd 生成 README.md"
|
||||
@echo " make rsync - 将输出文件同步到远程服务器"
|
||||
@echo " make open - 在浏览器中打开生成的网站"
|
||||
@echo " make clean - 清理不必要的文件"
|
||||
@echo " make upload - 上传文件到服务器并修复链接"
|
||||
@echo " make fix_links - 修复 'www' 目录中的 HTML 链接"
|
||||
@echo " make updmakefile - 更新本项目 Makefile"
|
||||
@echo " make help - 显示此帮助信息"
|
||||
@echo ""
|
||||
@echo "Environment variables (English and 中文):"
|
||||
@echo " bcolor - Background color based on branch name (基于分支名的背景颜色)"
|
||||
@echo " port - Port number based on branch name (基于分支名的端口号)"
|
||||
@echo " STATE_FILE - File for storing hash state of source files (用于存储源文件哈希状态的文件)"
|
||||
@echo " siteurl - The URL where the site will be hosted (网站托管的 URL)"
|
||||
@echo ""
|
||||
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
---
|
||||
title: "Lesson 6"
|
||||
format: html
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
https://rs1.drwater.net
|
||||
|
||||
username:
|
||||
- ruser01
|
||||
- ruser02
|
||||
- ruser03
|
||||
- ruser04
|
||||
- ruser05
|
||||
- ruser06
|
||||
|
||||
RWEP2025
|
||||
|
||||
```
|
||||
|
||||
|
||||
# 安装包
|
||||
|
||||
|
||||
```{r}
|
||||
install.packages("tidyverse")
|
||||
|
||||
x <- c(1:10, NA)
|
||||
|
||||
hist(x)
|
||||
|
||||
mean(x, na.rm = TRUE)
|
||||
|
||||
median(x, na.rm = TRUE)
|
||||
|
||||
sd(x, na.rm = TRUE)
|
||||
|
||||
|
||||
for(i in 1:10){
|
||||
print(i)
|
||||
}
|
||||
|
||||
|
||||
x + y + x * y
|
||||
|
||||
myfunc <- function(x, y = 3) {
|
||||
x + y + x * y
|
||||
}
|
||||
|
||||
|
||||
myfunc(1, 2)
|
||||
|
||||
|
||||
myfunc(10)
|
||||
|
||||
|
||||
c(FALSE, 2, 1:3, 3)
|
||||
|
||||
c(FALSE, 2, 1:3, 3) > 1
|
||||
|
||||
all(c(FALSE, 2, 1:3, 3) > 1)
|
||||
|
||||
|
||||
c(1L,2L,3L)
|
||||
|
||||
any(c(FALSE, 2, 1:3, 3) > 1)
|
||||
|
||||
|
||||
x <- 10
|
||||
|
||||
sin(x) = ?
|
||||
|
||||
paste("sin(x) = ", sin(x), sep = " ")
|
||||
|
||||
paste0("sin(x) = ", sin(x))
|
||||
|
||||
|
||||
substr("Monday", 1, 3)
|
||||
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
# tidy
|
||||
|
||||
|
||||
```{r}
|
||||
require(readxl)
|
||||
|
||||
aqdf <-readxl::read_xlsx("../../data/airquality.xlsx", sheet = "metadf")
|
||||
|
||||
# install.packages("skimr")
|
||||
|
||||
aqdf |>
|
||||
skimr::skim()
|
||||
|
||||
# base
|
||||
|
||||
# tidyverse
|
||||
|
||||
aqdf |>
|
||||
dplyr::group_by(Area) |>
|
||||
dplyr::summarize(
|
||||
n = n(),
|
||||
lon.mean = mean(lon, na.rm = TRUE),
|
||||
lon.sd = sd(lat, na.rm = TRUE)
|
||||
) |>
|
||||
dplyr::filter(Area %in% c("北京市", "天津市", "上海市", "重庆市")) |>
|
||||
ggplot(aes(x = n, y = lon.mean)) +
|
||||
geom_point() +
|
||||
geom_line() +
|
||||
geom_errorbar(
|
||||
aes(ymin = lon.mean - lon.sd,
|
||||
ymax = lon.mean + lon.sd)
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
readxl::read_xlsx("./airquality.xlsx")
|
||||
|
||||
flights|>
|
||||
filter(dest=="IAH")|>
|
||||
group_by(year,month,day)|>summarize(n=n(),
|
||||
delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
name,age,score
|
||||
Alice,25,85
|
||||
Bob,30,92
|
||||
Charlie,28,89
|
||||
David,22,95
|
||||
Eva,35,87
|
||||
Frank,27,91
|
||||
Grace,29,88
|
||||
Helen,26,93
|
||||
Ivan,31,86
|
||||
Jack,24,94
|
||||
Kelly,32,89
|
||||
Lily,28,90
|
||||
Mike,33,85
|
||||
Nancy,27,92
|
||||
Olivia,34,88
|
||||
Peter,29,93
|
||||
Queen,25,89
|
||||
Ryan,30,94
|
||||
Samantha,26,91
|
||||
Tom,31,87
|
||||
|
@@ -47,8 +47,9 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||
### 课件
|
||||
|
||||
- 采用`R语言`+`quarto`完成
|
||||
- 网页公开:[https://drc.drwater.net/course/public/RWEP/PUB/index.html](https://drc.drwater.net/course/public/RWEP/PUB/index.html)
|
||||
- 课件代码:[https://git.drwater.net/course/RWEP.git](https://git.drwater.net/course/RWEP.git)
|
||||
- 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
|
||||
- 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
|
||||
- 代码web界面:[https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/](https://on.tty-share.com/s/ny3JVrMuvUNOmnuioS3I7YEeVCi5Hk3Qc9vgz2QdX0FE2cYAQZFW2MUOkQyG0P5ZUR8/)
|
||||
|
||||
## 如何学习接下来的内容?
|
||||
|
||||
@@ -66,7 +67,7 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||
|
||||
## Rstudio Server使用
|
||||
|
||||
- 服务网址:[https://rs1.drwater.net/](https://rs1.drwater.net/)
|
||||
- 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
|
||||
- 每位同学使用1个账号,随机生成
|
||||
- 密码:****
|
||||
- 后面的实践课程可在该服务器上完成
|
||||
@@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
||||
@@ -264,9 +264,6 @@ t.test(x, y)
|
||||
wilcox.test(x, y)
|
||||
```
|
||||
|
||||
|
||||
### [什么是 Wilcoxon-Mann-Whitney检验?](https://zhuanlan.zhihu.com/p/613524533)
|
||||
|
||||
## 统计函数
|
||||
|
||||
### 创建向量的直方图
|
||||
@@ -795,4 +792,4 @@ names(Y) <- c("colA", "colB", "colC")
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
||||
@@ -37,7 +37,7 @@ require(learnr)
|
||||
|
||||
## 下载excel文件
|
||||
|
||||
[https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx](https://git.drwater.net/course/RWEP/raw/branch/main/data/airquality.xlsx)
|
||||
[https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/data/airquality.xlsx)
|
||||
|
||||
|
||||
## Tidy data
|
||||
@@ -79,5 +79,5 @@ Tom,31,87
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
@@ -321,4 +321,5 @@ if (FALSE) {
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
||||
|
||||
@@ -323,5 +323,5 @@ flights |>
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
@@ -945,5 +945,5 @@ semi_join(df1, df2, by = "id")
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
@@ -43,7 +43,7 @@ require(learnr)
|
||||
1. 根据`airqualitydf.xlsx`,按采样点统计白天(8:00-20:00)与夜晚(20:00-8:00)中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
|
||||
2. 按照不同城市分组,统计白天与夜晚AQI中位数是否具有显著差异。
|
||||
|
||||
作业模板:[第8次课后作业_模板.qmd](https://git.drwater.net/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
|
||||
作业模板:[第8次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240328_9_课后作业/第8次课后作业_模板.qmd)
|
||||
|
||||
## 示例代码
|
||||
|
||||
@@ -62,5 +62,5 @@ require(learnr)
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
@@ -14,7 +14,7 @@ format: html
|
||||
# 下载至临时文件
|
||||
if (FALSE) {
|
||||
tmpxlsxpath <- file.path(tempdir(), "airquality.xlsx")
|
||||
download.file("https://git.drwater.net/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
|
||||
download.file("https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/PUB/data/airquality.xlsx",
|
||||
destfile = tmpxlsxpath)
|
||||
airqualitydf <- readxl::read_xlsx(tmpxlsxpath, sheet = 2)
|
||||
metadf <- readxl::read_xlsx(tmpxlsxpath, sheet = 1)
|
||||
@@ -118,8 +118,6 @@ if (FALSE) {
|
||||
saveRDS(testdf, "./testdf.RDS")
|
||||
}
|
||||
|
||||
if (FALSE) {
|
||||
|
||||
lang <- "cn"
|
||||
require(dwfun)
|
||||
require(rmdify)
|
||||
@@ -146,7 +144,5 @@ testdf |>
|
||||
pull(gp)
|
||||
|
||||
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
@@ -3934,5 +3934,5 @@ p
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |
@@ -100,4 +100,5 @@ geom_bar(position = "fill")
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
49
SD/20240402_9_课后作业/index.qmd
Normal file
@@ -0,0 +1,49 @@
|
||||
---
|
||||
title: "课后作业9"
|
||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
||||
date: today
|
||||
lang: zh
|
||||
format:
|
||||
revealjs:
|
||||
theme: dark
|
||||
slide-number: true
|
||||
chalkboard:
|
||||
buttons: true
|
||||
preview-links: auto
|
||||
lang: zh
|
||||
toc: true
|
||||
toc-depth: 1
|
||||
toc-title: 大纲
|
||||
logo: ./_extensions/inst/img/ucaslogo.png
|
||||
css: ./_extensions/inst/css/revealjs.css
|
||||
pointer:
|
||||
key: "p"
|
||||
color: "#32cd32"
|
||||
pointerSize: 18
|
||||
revealjs-plugins:
|
||||
- pointer
|
||||
filters:
|
||||
- d2
|
||||
---
|
||||
|
||||
```{r}
|
||||
#| include: false
|
||||
#| cache: false
|
||||
lang <- "cn"
|
||||
require(tidyverse)
|
||||
require(learnr)
|
||||
```
|
||||
|
||||
## 第9次课后作业
|
||||
|
||||
自选数据集,使用R语言开展不同因子(如年份、季节、处理方式等)间某指标的差异分析,采用图表方式形成简要报告。
|
||||
|
||||
作业模板:[第9次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240402_9_课后作业/第9次课后作业_模板.qmd)
|
||||
|
||||
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
8
SD/20240402_9_课后作业/第9次课后作业_模板.qmd
Normal file
@@ -0,0 +1,8 @@
|
||||
---
|
||||
title: 课后作业9
|
||||
author: 姓名
|
||||
format: html
|
||||
---
|
||||
|
||||
要求:自选数据集,使用R语言开展不同因子间(如年份、季节、处理方式等)某指标的差异分析,采用图表+文字说明等方式形成简要报告。
|
||||
|
||||
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 272 KiB After Width: | Height: | Size: 272 KiB |
|
Before Width: | Height: | Size: 345 KiB After Width: | Height: | Size: 345 KiB |
|
Before Width: | Height: | Size: 210 KiB After Width: | Height: | Size: 210 KiB |
|
Before Width: | Height: | Size: 470 KiB After Width: | Height: | Size: 470 KiB |
|
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
|
Before Width: | Height: | Size: 5.5 KiB After Width: | Height: | Size: 5.5 KiB |
|
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 209 KiB |
|
Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB |
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
|
Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 177 KiB |
|
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB |
|
Before Width: | Height: | Size: 569 KiB After Width: | Height: | Size: 569 KiB |
|
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB |
|
Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB |
|
Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.5 KiB |
|
Before Width: | Height: | Size: 131 KiB After Width: | Height: | Size: 131 KiB |
|
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 83 KiB After Width: | Height: | Size: 83 KiB |
|
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB |
|
Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB |
|
Before Width: | Height: | Size: 869 KiB After Width: | Height: | Size: 869 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB |
|
Before Width: | Height: | Size: 638 KiB After Width: | Height: | Size: 638 KiB |
|
Before Width: | Height: | Size: 408 KiB After Width: | Height: | Size: 408 KiB |
|
Before Width: | Height: | Size: 410 KiB After Width: | Height: | Size: 410 KiB |
|
Before Width: | Height: | Size: 154 KiB After Width: | Height: | Size: 154 KiB |
|
Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB |
|
Before Width: | Height: | Size: 159 KiB After Width: | Height: | Size: 159 KiB |
|
Before Width: | Height: | Size: 184 KiB After Width: | Height: | Size: 184 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB |
|
Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 69 KiB |
|
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
|
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 137 KiB After Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 7.1 KiB |
|
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB |
|
Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 134 KiB |
|
Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 150 KiB |
|
Before Width: | Height: | Size: 160 KiB After Width: | Height: | Size: 160 KiB |
|
Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 120 KiB |
@@ -47,8 +47,7 @@ hexes <- function(..., size = 64) {
|
||||
right <- (seq_along(x) - 1) * size
|
||||
res <- glue::glue(
|
||||
'{.absolute top=-20 right=<right> width="<size>" height="<size * 1.16>"}',
|
||||
.open = "<",
|
||||
.close = ">"
|
||||
.open = "<", .close = ">"
|
||||
)
|
||||
paste0(res, collapse = " ")
|
||||
}
|
||||
@@ -66,10 +65,11 @@ theme_set(theme_bw())
|
||||
options(cli.width = 70, ggplot2.discrete.fill = c("#7e96d5", "#de6c4e"))
|
||||
|
||||
train_color <- "#1a162d"
|
||||
test_color <- "#cd4173"
|
||||
data_color <- "#767381"
|
||||
test_color <- "#cd4173"
|
||||
data_color <- "#767381"
|
||||
assess_color <- "#84cae1"
|
||||
splits_pal <- c(data_color, train_color, test_color)
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -152,27 +152,10 @@ knitr::include_graphics("images/whole-game-final-performance.jpg")
|
||||
|
||||
# Install the packages for the workshop
|
||||
pkgs <-
|
||||
c(
|
||||
"bonsai",
|
||||
"doParallel",
|
||||
"embed",
|
||||
"finetune",
|
||||
"lightgbm",
|
||||
"lme4",
|
||||
"plumber",
|
||||
"probably",
|
||||
"ranger",
|
||||
"rpart",
|
||||
"rpart.plot",
|
||||
"rules",
|
||||
"splines2",
|
||||
"stacks",
|
||||
"text2vec",
|
||||
"textrecipes",
|
||||
"tidymodels",
|
||||
"vetiver",
|
||||
"remotes"
|
||||
)
|
||||
c("bonsai", "doParallel", "embed", "finetune", "lightgbm", "lme4",
|
||||
"plumber", "probably", "ranger", "rpart", "rpart.plot", "rules",
|
||||
"splines2", "stacks", "text2vec", "textrecipes", "tidymodels",
|
||||
"vetiver", "remotes")
|
||||
|
||||
install.packages(pkgs)
|
||||
```
|
||||
@@ -214,34 +197,26 @@ taxi
|
||||
#|
|
||||
set.seed(123)
|
||||
library(forcats)
|
||||
require(tidymodels)
|
||||
require(tidyverse)
|
||||
one_split <- taxi |>
|
||||
dplyr::slice(1:30) |>
|
||||
rsample::initial_split() |>
|
||||
generics::tidy() |>
|
||||
tibble::add_row(Row = 1:30, Data = "Original") |>
|
||||
dplyr::mutate(
|
||||
Data = case_when(
|
||||
Data == "Analysis" ~ "Training",
|
||||
Data == "Assessment" ~ "Testing",
|
||||
TRUE ~ Data
|
||||
)
|
||||
) |>
|
||||
dplyr::mutate(
|
||||
Data = factor(Data, levels = c("Original", "Training", "Testing"))
|
||||
)
|
||||
one_split <- slice(taxi, 1:30) %>%
|
||||
initial_split() %>%
|
||||
tidy() %>%
|
||||
add_row(Row = 1:30, Data = "Original") %>%
|
||||
mutate(Data = case_when(
|
||||
Data == "Analysis" ~ "Training",
|
||||
Data == "Assessment" ~ "Testing",
|
||||
TRUE ~ Data
|
||||
)) %>%
|
||||
mutate(Data = factor(Data, levels = c("Original", "Training", "Testing")))
|
||||
all_split <-
|
||||
ggplot(one_split, aes(x = Row, y = fct_rev(Data), fill = Data)) +
|
||||
geom_tile(color = "white", linewidth = 1) +
|
||||
geom_tile(color = "white",
|
||||
linewidth = 1) +
|
||||
scale_fill_manual(values = splits_pal, guide = "none") +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.y = element_text(size = rel(2)),
|
||||
axis.text.x = element_blank(),
|
||||
legend.position = "top",
|
||||
panel.grid = element_blank()
|
||||
) +
|
||||
theme(axis.text.y = element_text(size = rel(2)),
|
||||
axis.text.x = element_blank(),
|
||||
legend.position = "top",
|
||||
panel.grid = element_blank()) +
|
||||
coord_equal(ratio = 1) +
|
||||
labs(x = NULL, y = NULL)
|
||||
all_split
|
||||
@@ -670,18 +645,9 @@ augment(taxi_fit, new_data = taxi_train) %>%
|
||||
augment(taxi_fit, new_data = taxi_train) %>%
|
||||
roc_curve(truth = tip, .pred_yes) %>%
|
||||
filter(is.finite(.threshold)) %>%
|
||||
pivot_longer(
|
||||
c(specificity, sensitivity),
|
||||
names_to = "statistic",
|
||||
values_to = "value"
|
||||
) %>%
|
||||
pivot_longer(c(specificity, sensitivity), names_to = "statistic", values_to = "value") %>%
|
||||
rename(`event threshold` = .threshold) %>%
|
||||
ggplot(aes(
|
||||
x = `event threshold`,
|
||||
y = value,
|
||||
col = statistic,
|
||||
group = statistic
|
||||
)) +
|
||||
ggplot(aes(x = `event threshold`, y = value, col = statistic, group = statistic)) +
|
||||
geom_line() +
|
||||
scale_color_brewer(palette = "Dark2") +
|
||||
labs(y = NULL) +
|
||||
@@ -1044,39 +1010,34 @@ collect_metrics(final_fit)
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
sitedf <- readr::read_csv(
|
||||
"https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
|
||||
) |>
|
||||
select(
|
||||
SITE_ID,
|
||||
sitedf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv") |>
|
||||
select(SITE_ID,
|
||||
lon = LON_DD,
|
||||
lat = LAT_DD,
|
||||
name = LAKENAME,
|
||||
area = LAKEAREA,
|
||||
zmax = DEPTHMAX
|
||||
) |>
|
||||
) |>
|
||||
group_by(SITE_ID) |>
|
||||
summarize(
|
||||
lon = mean(lon, na.rm = TRUE),
|
||||
summarize(lon = mean(lon, na.rm = TRUE),
|
||||
lat = mean(lat, na.rm = TRUE),
|
||||
name = unique(name),
|
||||
area = mean(area, na.rm = TRUE),
|
||||
zmax = mean(zmax, na.rm = TRUE)
|
||||
)
|
||||
zmax = mean(zmax, na.rm = TRUE))
|
||||
|
||||
|
||||
visitdf <- readr::read_csv(
|
||||
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
|
||||
) |>
|
||||
select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
|
||||
visitdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |>
|
||||
select(SITE_ID,
|
||||
date = DATE_PROFILE,
|
||||
year = YEAR,
|
||||
visit = VISIT_NO
|
||||
) |>
|
||||
distinct()
|
||||
|
||||
|
||||
waterchemdf <- readr::read_csv(
|
||||
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
|
||||
) |>
|
||||
select(
|
||||
SITE_ID,
|
||||
|
||||
waterchemdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv") |>
|
||||
select(SITE_ID,
|
||||
date = DATE_PROFILE,
|
||||
depth = DEPTH,
|
||||
temp = TEMP_FIELD,
|
||||
@@ -1085,43 +1046,38 @@ waterchemdf <- readr::read_csv(
|
||||
cond = COND_FIELD,
|
||||
)
|
||||
|
||||
sddf <- readr::read_csv(
|
||||
"https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
|
||||
) |>
|
||||
select(
|
||||
SITE_ID,
|
||||
sddf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv") |>
|
||||
select(SITE_ID,
|
||||
date = DATE_SECCHI,
|
||||
sd = SECMEAN,
|
||||
clear_to_bottom = CLEAR_TO_BOTTOM
|
||||
)
|
||||
|
||||
trophicdf <- readr::read_csv(
|
||||
"https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
|
||||
) |>
|
||||
select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
|
||||
trophicdf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv") |>
|
||||
select(SITE_ID,
|
||||
visit = VISIT_NO,
|
||||
tp = PTL,
|
||||
tn = NTL,
|
||||
chla = CHLA) |>
|
||||
left_join(visitdf, by = c("SITE_ID", "visit")) |>
|
||||
select(-year, -visit) |>
|
||||
group_by(SITE_ID, date) |>
|
||||
summarize(
|
||||
tp = mean(tp, na.rm = TRUE),
|
||||
summarize(tp = mean(tp, na.rm = TRUE),
|
||||
tn = mean(tn, na.rm = TRUE),
|
||||
chla = mean(chla, na.rm = TRUE)
|
||||
)
|
||||
|
||||
|
||||
phytodf <- readr::read_csv(
|
||||
"https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
|
||||
) |>
|
||||
select(
|
||||
SITE_ID,
|
||||
|
||||
phytodf <- readr::read_csv("https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv") |>
|
||||
select(SITE_ID,
|
||||
date = DATEPHYT,
|
||||
depth = SAMPLE_DEPTH,
|
||||
phyta = DIVISION,
|
||||
genus = GENUS,
|
||||
species = SPECIES,
|
||||
tax = TAXANAME,
|
||||
abund = ABUND
|
||||
) |>
|
||||
abund = ABUND) |>
|
||||
mutate(phyta = gsub(" .*$", "", phyta)) |>
|
||||
filter(!is.na(genus)) |>
|
||||
group_by(SITE_ID, date, depth, phyta, genus) |>
|
||||
@@ -1132,7 +1088,7 @@ envdf <- waterchemdf |>
|
||||
filter(depth < 2) |>
|
||||
select(-depth) |>
|
||||
group_by(SITE_ID, date) |>
|
||||
summarise_all(~ mean(., na.rm = TRUE)) |>
|
||||
summarise_all(~mean(., na.rm = TRUE)) |>
|
||||
ungroup() |>
|
||||
left_join(sddf, by = c("SITE_ID", "date")) |>
|
||||
left_join(trophicdf, by = c("SITE_ID", "date"))
|
||||
@@ -1141,21 +1097,19 @@ nla <- envdf |>
|
||||
left_join(phytodf) |>
|
||||
left_join(sitedf, by = "SITE_ID") |>
|
||||
filter(!purrr::map_lgl(phytodf, is.null)) |>
|
||||
mutate(
|
||||
cyanophyta = purrr::map(
|
||||
phytodf,
|
||||
~ .x |>
|
||||
dplyr::filter(phyta == "Cyanophyta") |>
|
||||
summarize(cyanophyta = sum(abund, na.rm = TRUE))
|
||||
)
|
||||
) |>
|
||||
mutate(cyanophyta = purrr::map(phytodf, ~ .x |>
|
||||
dplyr::filter(phyta == "Cyanophyta") |>
|
||||
summarize(cyanophyta = sum(abund, na.rm = TRUE))
|
||||
)) |>
|
||||
unnest(cyanophyta) |>
|
||||
select(-phyta) |>
|
||||
mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
|
||||
|
||||
|
||||
# library(rmdify)
|
||||
# library(dwfun)
|
||||
# dwfun::init()
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -1173,20 +1127,18 @@ skimr::skim(nla)
|
||||
nla |>
|
||||
filter(tp > 1) |>
|
||||
ggplot(aes(tn, tp)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
scale_x_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
)
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
|
||||
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x)))
|
||||
|
||||
m1 <- lm(log10(tp) ~ log10(tn), data = nla)
|
||||
|
||||
summary(m1)
|
||||
|
||||
|
||||
```
|
||||
|
||||
## 复杂指标
|
||||
@@ -1195,20 +1147,18 @@ summary(m1)
|
||||
nla |>
|
||||
filter(tp > 1) |>
|
||||
ggplot(aes(tp, cyanophyta)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
scale_x_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
)
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
|
||||
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x)))
|
||||
|
||||
m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla)
|
||||
|
||||
summary(m2)
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -1220,14 +1170,13 @@ summary(m2)
|
||||
(nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax))
|
||||
(nla_train <- training(nla_split))
|
||||
(nla_test <- testing(nla_split))
|
||||
|
||||
```
|
||||
|
||||
## tidymodels - recipe
|
||||
|
||||
```{r}
|
||||
nla_formula <- as.formula(
|
||||
"cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom"
|
||||
)
|
||||
nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom")
|
||||
# nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn")
|
||||
nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |>
|
||||
recipes::step_string2factor(all_nominal()) |>
|
||||
@@ -1242,9 +1191,9 @@ nla_recipe
|
||||
|
||||
```{r}
|
||||
nla_cv <- recipes::bake(
|
||||
nla_recipe,
|
||||
new_data = training(nla_split)
|
||||
) |>
|
||||
nla_recipe,
|
||||
new_data = training(nla_split)
|
||||
) |>
|
||||
rsample::vfold_cv(v = 10)
|
||||
nla_cv
|
||||
```
|
||||
@@ -1311,7 +1260,7 @@ if (FALSE) {
|
||||
metrics = yardstick::metric_set(rmse, rsq, mae),
|
||||
control = tune::control_grid(verbose = TRUE)
|
||||
)
|
||||
saveRDS(xgboost_tuned, "./xgboost_tuned.RDS")
|
||||
saveRDS(xgboost_tuned, "./xgboost_tuned.RDS")
|
||||
}
|
||||
xgboost_tuned <- readRDS("./xgboost_tuned.RDS")
|
||||
```
|
||||
@@ -1349,7 +1298,7 @@ xgboost_tuned |>
|
||||
|
||||
```{r}
|
||||
xgboost_best_params <- xgboost_tuned |>
|
||||
tune::select_best(metric = "rmse")
|
||||
tune::select_best("rmse")
|
||||
|
||||
knitr::kable(xgboost_best_params)
|
||||
```
|
||||
@@ -1378,19 +1327,18 @@ train_prediction <- xgboost_model_final |>
|
||||
# fit the model on all the training data
|
||||
fit(
|
||||
formula = nla_formula,
|
||||
data = train_processed
|
||||
data = train_processed
|
||||
) |>
|
||||
# predict the sale prices for the training data
|
||||
predict(new_data = train_processed) |>
|
||||
bind_cols(
|
||||
nla_train |>
|
||||
mutate(.obs = log10(cyanophyta))
|
||||
)
|
||||
bind_cols(nla_train |>
|
||||
mutate(.obs = log10(cyanophyta)))
|
||||
xgboost_score_train <-
|
||||
train_prediction |>
|
||||
yardstick::metrics(.obs, .pred) |>
|
||||
mutate(.estimate = format(round(.estimate, 2), big.mark = ","))
|
||||
knitr::kable(xgboost_score_train)
|
||||
|
||||
```
|
||||
|
||||
## tidymodels - train evaluation
|
||||
@@ -1401,8 +1349,10 @@ knitr::kable(xgboost_score_train)
|
||||
#| out-width: "80%"
|
||||
train_prediction |>
|
||||
ggplot(aes(.pred, .obs)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm")
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm")
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -1410,20 +1360,18 @@ train_prediction |>
|
||||
|
||||
|
||||
```{r}
|
||||
test_processed <- bake(nla_recipe, new_data = nla_test)
|
||||
test_processed <- bake(nla_recipe, new_data = nla_test)
|
||||
|
||||
test_prediction <- xgboost_model_final |>
|
||||
# fit the model on all the training data
|
||||
fit(
|
||||
formula = nla_formula,
|
||||
data = train_processed
|
||||
data = train_processed
|
||||
) |>
|
||||
# use the training model fit to predict the test data
|
||||
predict(new_data = test_processed) |>
|
||||
bind_cols(
|
||||
nla_test |>
|
||||
mutate(.obs = log10(cyanophyta))
|
||||
)
|
||||
bind_cols(nla_test |>
|
||||
mutate(.obs = log10(cyanophyta)))
|
||||
|
||||
# measure the accuracy of our model using `yardstick`
|
||||
xgboost_score <- test_prediction |>
|
||||
@@ -1446,7 +1394,7 @@ cyanophyta_prediction_residual <- test_prediction |>
|
||||
select(.pred, residual_pct)
|
||||
|
||||
cyanophyta_prediction_residual |>
|
||||
ggplot(aes(x = .pred, y = residual_pct)) +
|
||||
ggplot(aes(x = .pred, y = residual_pct)) +
|
||||
geom_point() +
|
||||
xlab("Predicted Cyanophyta") +
|
||||
ylab("Residual (%)")
|
||||
@@ -1463,8 +1411,9 @@ cyanophyta_prediction_residual |>
|
||||
#| out-width: "80%"
|
||||
test_prediction |>
|
||||
ggplot(aes(.pred, .obs)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm", colour = "black")
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm", colour = "black")
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -1472,4 +1421,5 @@ test_prediction |>
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |
1
SD/20240409_2_大数据分析工具/_extensions
Symbolic link
@@ -0,0 +1 @@
|
||||
../../_extensions
|
||||
@@ -163,4 +163,5 @@ babynames |>
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`
|
||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||
|
||||