Compare commits
No commits in common. "04cc6935d6f3e03377668a7b2f068e5a8aa2f6c6" and "68395942405e9669a873e460b192ebcf7fbe6e5c" have entirely different histories.
04cc6935d6
...
6839594240
|
@ -49,7 +49,6 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||||
- 采用`R语言`+`quarto`完成
|
- 采用`R语言`+`quarto`完成
|
||||||
- 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
|
- 网页公开:[https://drwater.rcees.ac.cn/course/public/RWEP/\@PUB/index.html](https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/index.html)
|
||||||
- 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
|
- 课件代码:[https://drwater.rcees.ac.cn/git/course/RWEP.git](https://drwater.rcees.ac.cn/git/course/RWEP.git)
|
||||||
- 代码web界面: [https://on.tty-share.com/s/hWkn5_eF6rfZuAyJu8sheMgrcRzx6AZ2m7V32IphFHs7gn-vP9WYOeVEYtH8a-bGTuM/](https://on.tty-share.com/s/hWkn5_eF6rfZuAyJu8sheMgrcRzx6AZ2m7V32IphFHs7gn-vP9WYOeVEYtH8a-bGTuM/)
|
|
||||||
|
|
||||||
## 如何学习接下来的内容?
|
## 如何学习接下来的内容?
|
||||||
|
|
||||||
|
@ -69,7 +68,7 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||||
|
|
||||||
- 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
|
- 服务网址:[https://drwater.rcees.ac.cn/rs1/](https://drwater.rcees.ac.cn/rs1/)
|
||||||
- 每位同学使用1个账号,随机生成
|
- 每位同学使用1个账号,随机生成
|
||||||
- 密码:****
|
- 密码:\_RWEP_2024\_
|
||||||
- 后面的实践课程可在该服务器上完成
|
- 后面的实践课程可在该服务器上完成
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,7 +86,6 @@ set.seed <- 23
|
||||||
uiddf <- coursememberdf |>
|
uiddf <- coursememberdf |>
|
||||||
dplyr::mutate(uid = paste0("ruser", sprintf("%02d", sample(1:nmem, nmem)))) |>
|
dplyr::mutate(uid = paste0("ruser", sprintf("%02d", sample(1:nmem, nmem)))) |>
|
||||||
dplyr::select(uid, 姓名) |>
|
dplyr::select(uid, 姓名) |>
|
||||||
dplyr::mutate(姓名 = "XX") |>
|
|
||||||
dplyr::arrange(uid) |>
|
dplyr::arrange(uid) |>
|
||||||
dplyr::mutate(group = (dplyr::row_number() - 1) %/% 10 + 1) |>
|
dplyr::mutate(group = (dplyr::row_number() - 1) %/% 10 + 1) |>
|
||||||
tidyr::nest(gdf = -group) |>
|
tidyr::nest(gdf = -group) |>
|
||||||
|
@ -106,6 +104,7 @@ uiddf$tab[[2]]
|
||||||
```{r}
|
```{r}
|
||||||
#| echo: false
|
#| echo: false
|
||||||
#| output: asis
|
#| output: asis
|
||||||
|
|
||||||
uiddf$tab[[3]]
|
uiddf$tab[[3]]
|
||||||
uiddf$tab[[4]]
|
uiddf$tab[[4]]
|
||||||
```
|
```
|
||||||
|
@ -143,6 +142,8 @@ author: 姓名
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
````
|
````
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,4 +151,5 @@ author: 姓名
|
||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||||
|
|
||||||
|
|
|
@ -146,8 +146,9 @@ knitr::opts_chunk$set(echo = TRUE)
|
||||||
## 扩展包:R package
|
## 扩展包:R package
|
||||||
|
|
||||||
- R有一万多个扩展软件包,提供了各种各样的功能
|
- R有一万多个扩展软件包,提供了各种各样的功能
|
||||||
- 已安装的基本R包,如base, stats, graphics等,启动R时默认载入
|
- 已安装的基本R软件伴随一些必要的扩展包,如base, stats, graphics等,这些包在启动R时会默认载入
|
||||||
- 其它扩展包需要用`library(.)`函数载入运行;或者采用`dplyr::filter(.)`方式
|
- 其它扩展包需要用`library(.)`函数载入运行
|
||||||
|
- 或者采用`dplyr::filter(.)`方式
|
||||||
|
|
||||||
::: panel-tabset
|
::: panel-tabset
|
||||||
### Code
|
### Code
|
||||||
|
@ -218,4 +219,4 @@ devtools::install_github("kjhealy/socviz")
|
||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||||
|
|
|
@ -792,4 +792,4 @@ names(Y) <- c("colA", "colB", "colC")
|
||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||||
|
|
|
@ -130,4 +130,4 @@ require(learnr)
|
||||||
## 欢迎讨论!{.center}
|
## 欢迎讨论!{.center}
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
../../_extensions
|
|
|
@ -1,157 +0,0 @@
|
||||||
---
|
|
||||||
title: "代码编写规则"
|
|
||||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
|
||||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
|
||||||
date: today
|
|
||||||
lang: zh
|
|
||||||
format:
|
|
||||||
revealjs:
|
|
||||||
theme: dark
|
|
||||||
slide-number: true
|
|
||||||
chalkboard:
|
|
||||||
buttons: true
|
|
||||||
preview-links: auto
|
|
||||||
lang: zh
|
|
||||||
toc: true
|
|
||||||
toc-depth: 1
|
|
||||||
toc-title: 大纲
|
|
||||||
logo: ./_extensions/inst/img/ucaslogo.png
|
|
||||||
css: ./_extensions/inst/css/revealjs.css
|
|
||||||
pointer:
|
|
||||||
key: "p"
|
|
||||||
color: "#32cd32"
|
|
||||||
pointerSize: 18
|
|
||||||
revealjs-plugins:
|
|
||||||
- pointer
|
|
||||||
filters:
|
|
||||||
- d2
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
knitr::opts_chunk$set(echo = TRUE)
|
|
||||||
# source("../../coding/_common.R")
|
|
||||||
library(tidyverse)
|
|
||||||
library(nycflights13)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## tidy data
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
knitr::include_graphics("../../image/tidy-1.png", dpi = 270)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## pipe(管道) |>
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| eval: false
|
|
||||||
require(patchwork)
|
|
||||||
plot(1:10)
|
|
||||||
1:10 |> plot()
|
|
||||||
plot(x = 1:10, y = sin(1:10))
|
|
||||||
1:10 |> plot(y = sin(1:10))
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
#| layout-nrow: 1
|
|
||||||
#| fig-width: 4
|
|
||||||
#| fig-height: 3
|
|
||||||
#| out-height: 90%
|
|
||||||
require(patchwork)
|
|
||||||
plot(1:10)
|
|
||||||
1:10 |> plot()
|
|
||||||
plot(x = 1:10, y = sin(1:10))
|
|
||||||
1:10 |> plot(y = sin(1:10))
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## pipe(管道):%>%
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| eval: false
|
|
||||||
#| layout-nrow: 1
|
|
||||||
#| fig-width: 3
|
|
||||||
#| fig-height: 4
|
|
||||||
#| out-height: 125%
|
|
||||||
require(magrittr)
|
|
||||||
1:10 %>% plot()
|
|
||||||
1:10 %>% plot(y = sin(1:10))
|
|
||||||
sin(1:10) %>% plot(1:10, .)
|
|
||||||
sin(1:10) |> plot(x = 1:10, y = _)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
#| layout-nrow: 1
|
|
||||||
#| fig-width: 3
|
|
||||||
#| fig-height: 4
|
|
||||||
#| out-height: 125%
|
|
||||||
require(magrittr)
|
|
||||||
1:10 %>% plot()
|
|
||||||
1:10 %>% plot(y = sin(1:10))
|
|
||||||
sin(1:10) %>% plot(1:10, .)
|
|
||||||
sin(1:10) |> plot(x = 1:10, y = _)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 代码编写规则
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| eval: false
|
|
||||||
|
|
||||||
# Strive for:
|
|
||||||
short_flights <- flights |> filter(air_time < 60)
|
|
||||||
# Avoid:
|
|
||||||
SHORTFLIGHTS <- flights |> filter(air_time < 60)
|
|
||||||
|
|
||||||
# Strive for
|
|
||||||
z <- (a + b)^2 / d
|
|
||||||
# Avoid
|
|
||||||
z<-( a + b ) ^ 2/d
|
|
||||||
|
|
||||||
# Strive for
|
|
||||||
mean(x, na.rm = TRUE)
|
|
||||||
# Avoid
|
|
||||||
mean (x ,na.rm=TRUE)
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| eval: false
|
|
||||||
flights|>filter(dest=="IAH")|>
|
|
||||||
group_by(year,month,day)|>summarize(n=n(),
|
|
||||||
delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
|
|
||||||
|
|
||||||
flights|>filter(carrier=="UA",
|
|
||||||
dest%in%c("IAH","HOU"),sched_dep_time>
|
|
||||||
0900,sched_arr_time<2000)|>group_by(flight)|>
|
|
||||||
summarize(delay=mean(
|
|
||||||
arr_delay,na.rm=TRUE),cancelled=sum(is.na(arr_delay)),
|
|
||||||
n=n())|>filter(n>10)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## quarto
|
|
||||||
|
|
||||||
![](../../image/quarto-flow.png)
|
|
||||||
|
|
||||||
|
|
||||||
## 欢迎讨论!{.center}
|
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
../../_extensions
|
|
|
@ -1,310 +0,0 @@
|
||||||
---
|
|
||||||
title: "Data import"
|
|
||||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
|
||||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
|
||||||
date: today
|
|
||||||
lang: zh
|
|
||||||
format:
|
|
||||||
revealjs:
|
|
||||||
theme: dark
|
|
||||||
slide-number: true
|
|
||||||
chalkboard:
|
|
||||||
buttons: true
|
|
||||||
preview-links: auto
|
|
||||||
lang: zh
|
|
||||||
toc: true
|
|
||||||
toc-depth: 1
|
|
||||||
toc-title: 大纲
|
|
||||||
logo: ./_extensions/inst/img/ucaslogo.png
|
|
||||||
css: ./_extensions/inst/css/revealjs.css
|
|
||||||
pointer:
|
|
||||||
key: "p"
|
|
||||||
color: "#32cd32"
|
|
||||||
pointerSize: 18
|
|
||||||
revealjs-plugins:
|
|
||||||
- pointer
|
|
||||||
filters:
|
|
||||||
- d2
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
knitr::opts_chunk$set(echo = TRUE)
|
|
||||||
source("../../coding/_common.R")
|
|
||||||
library(tidyverse)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## tidyverse风格数据分析总体流程
|
|
||||||
|
|
||||||
|
|
||||||
![](../../image/data-science/import.png)
|
|
||||||
|
|
||||||
|
|
||||||
## 导入csv数据
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
read_lines("../../data/students.csv") |> cat(sep = "\n")
|
|
||||||
```
|
|
||||||
|
|
||||||
## 导入csv数据
|
|
||||||
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
read_csv("../../data/students.csv") |>
|
|
||||||
knitr::kable()
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## 读取数据
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
(students <- read_csv("../../data/students.csv"))
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## 读取数据
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| message: false
|
|
||||||
(students <- read_csv("../../data/students.csv", na = c("N/A", "")))
|
|
||||||
```
|
|
||||||
|
|
||||||
## 列名不要有空格
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
students |>
|
|
||||||
rename(
|
|
||||||
student_id = `Student ID`,
|
|
||||||
full_name = `Full Name`
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## `janitor`处理空格
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| message: false
|
|
||||||
|
|
||||||
students |> janitor::clean_names()
|
|
||||||
```
|
|
||||||
|
|
||||||
## `janitor`处理空格
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
students |>
|
|
||||||
janitor::clean_names() |>
|
|
||||||
mutate(meal_plan = factor(meal_plan))
|
|
||||||
```
|
|
||||||
|
|
||||||
## `janitor`处理空格
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
students <- students |>
|
|
||||||
janitor::clean_names() |>
|
|
||||||
mutate(
|
|
||||||
meal_plan = factor(meal_plan),
|
|
||||||
age = parse_number(if_else(age == "five", "5", age))
|
|
||||||
)
|
|
||||||
students
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## 直接录入
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| message: false
|
|
||||||
|
|
||||||
read_csv(
|
|
||||||
"The first line of metadata
|
|
||||||
The second line of metadata
|
|
||||||
x,y,z
|
|
||||||
1,2,3",
|
|
||||||
skip = 2
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## 直接录入
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| message: false
|
|
||||||
read_csv(
|
|
||||||
"# A comment I want to skip
|
|
||||||
x,y,z
|
|
||||||
1,2,3",
|
|
||||||
comment = "#"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## 指定列名
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| message: false
|
|
||||||
|
|
||||||
read_csv(
|
|
||||||
"1,2,3
|
|
||||||
4,5,6",
|
|
||||||
col_names = c("x", "y", "z")
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## 指定列的类型
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
another_csv <- "
|
|
||||||
x,y,z
|
|
||||||
1,2,3"
|
|
||||||
|
|
||||||
read_csv(
|
|
||||||
another_csv,
|
|
||||||
col_types = cols(.default = col_character())
|
|
||||||
)
|
|
||||||
read_csv(
|
|
||||||
another_csv,
|
|
||||||
col_types = cols_only(x = col_character())
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| eval: false
|
|
||||||
|
|
||||||
read_csv("a,b\n1,2,3\n4,5,6")
|
|
||||||
read_csv("a,b,c\n1,2\n1,2,3,4")
|
|
||||||
read_csv("a,b\n\"1")
|
|
||||||
read_csv("a,b\n1,2\na,b")
|
|
||||||
read_csv("a;b\n1;3")
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| eval: false
|
|
||||||
annoying <- tibble(
|
|
||||||
`1` = 1:10,
|
|
||||||
`2` = `1` * 2 + rnorm(length(`1`))
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 批量读取
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| message: false
|
|
||||||
|
|
||||||
sales_files <- c("../../data/01-sales.csv",
|
|
||||||
"../../data/02-sales.csv",
|
|
||||||
"../../data/03-sales.csv")
|
|
||||||
read_csv(sales_files, id = "file")
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 读取Excel,建议用`readxl`包
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
(surveydf <- readxl::read_xlsx("../../data/survey.xlsx"))
|
|
||||||
```
|
|
||||||
|
|
||||||
## 读取Excel
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
(airqualitydf <- readxl::read_xlsx("../../data/airquality.xlsx", sheet = 2))
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 批量读取
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
sales_files <- list.files("../../data",
|
|
||||||
pattern = "sales\\.csv$", full.names = TRUE)
|
|
||||||
sales_files
|
|
||||||
```
|
|
||||||
|
|
||||||
## 写入csv
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| warning: false
|
|
||||||
#| message: false
|
|
||||||
students
|
|
||||||
write_csv(students, "students-2.csv")
|
|
||||||
read_csv("students-2.csv")
|
|
||||||
```
|
|
||||||
|
|
||||||
## 写入Excel
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
writexl::write_xlsx(students, "../../data/writexldemo.xlsx")
|
|
||||||
```
|
|
||||||
|
|
||||||
## 读取数据库,以MySQL为例
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
if (FALSE) {
|
|
||||||
conn <- cctdb::get_dbconn("nationalairquality")
|
|
||||||
DBI::dbListTables(conn)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 读取数据库,以MySQL为例
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
if (FALSE) {
|
|
||||||
conn <- cctdb::get_dbconn("nationalairquality")
|
|
||||||
metadf <- tbl(conn, "metadf") |>
|
|
||||||
head(100) |>
|
|
||||||
collect()
|
|
||||||
DBI::dbDisconnect(conn)
|
|
||||||
saveRDS(metadf, file = "../../data/metadfdemo.RDS")
|
|
||||||
}
|
|
||||||
metadf <- readRDS(file = "../../data/metadfdemo.RDS")
|
|
||||||
lang <- "cn"
|
|
||||||
metadf |>
|
|
||||||
ggplot(aes(lon, lat)) +
|
|
||||||
geom_point(aes(fill = Area)) +
|
|
||||||
dwfun::theme_sci()
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| include: false
|
|
||||||
#| eval: false
|
|
||||||
if (FALSE) {
|
|
||||||
conn <- cctdb::get_dbconn("nationalairquality")
|
|
||||||
metadf <- tbl(conn, "metadf") |>
|
|
||||||
collect()
|
|
||||||
DBI::dbDisconnect(conn)
|
|
||||||
metanestdf <- metadf |>
|
|
||||||
nest(citydf = -Area)
|
|
||||||
names(metanestdf$citydf) <- metanestdf$Area
|
|
||||||
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
|
|
||||||
dir.create("../../data/metacity/")
|
|
||||||
metanestdf |>
|
|
||||||
mutate(flag = purrr::map2(Area, citydf,
|
|
||||||
~ writexl::write_xlsx(.y,
|
|
||||||
path = paste0("../../data/metacity/", .x, ".xlsx")
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
1. 从“../../data/sales.xlsx”读取第9到13行的数据
|
|
||||||
2. 从“../../data/meta_city.xlsx”读取所有的数据,并保存至“../../data/meta_city_onetable1.xlsx”
|
|
||||||
3. 从“../../data/metacity/”读取所有的数据,并保存至“../../data/meta_city_onetable2.xlsx”
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 欢迎讨论!{.center}
|
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`
|
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
student_id,full_name,favourite_food,meal_plan,age
|
|
||||||
1,Sunil Huffmann,Strawberry yoghurt,Lunch only,4
|
|
||||||
2,Barclay Lynn,French fries,Lunch only,5
|
|
||||||
3,Jayendra Lyne,NA,Breakfast and lunch,7
|
|
||||||
4,Leon Rossini,Anchovies,Lunch only,NA
|
|
||||||
5,Chidiegwu Dunkel,Pizza,Breakfast and lunch,5
|
|
||||||
6,Güvenç Attila,Ice cream,Lunch only,6
|
|
|
|
@ -1 +0,0 @@
|
||||||
../../_extensions
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1 +0,0 @@
|
||||||
../../_extensions
|
|
|
@ -1,153 +0,0 @@
|
||||||
---
|
|
||||||
title: "正则表达式"
|
|
||||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
|
||||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
|
||||||
date: today
|
|
||||||
lang: zh
|
|
||||||
format:
|
|
||||||
revealjs:
|
|
||||||
theme: dark
|
|
||||||
slide-number: true
|
|
||||||
chalkboard:
|
|
||||||
buttons: true
|
|
||||||
preview-links: auto
|
|
||||||
lang: zh
|
|
||||||
toc: true
|
|
||||||
toc-depth: 1
|
|
||||||
toc-title: 大纲
|
|
||||||
logo: ./_extensions/inst/img/ucaslogo.png
|
|
||||||
css: ./_extensions/inst/css/revealjs.css
|
|
||||||
pointer:
|
|
||||||
key: "p"
|
|
||||||
color: "#32cd32"
|
|
||||||
pointerSize: 18
|
|
||||||
revealjs-plugins:
|
|
||||||
- pointer
|
|
||||||
filters:
|
|
||||||
- d2
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
knitr::opts_chunk$set(echo = TRUE)
|
|
||||||
source("../../coding/_common.R")
|
|
||||||
library(nycflights13)
|
|
||||||
library(tidyverse)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配数字:
|
|
||||||
|
|
||||||
- \d:匹配任意数字字符。
|
|
||||||
- \d+:匹配一个或多个数字字符。
|
|
||||||
- [0-9]: 匹配数字
|
|
||||||
|
|
||||||
### 匹配字母:
|
|
||||||
|
|
||||||
- \w:匹配任意字母、数字或下划线字符。
|
|
||||||
- \w+:匹配一个或多个字母、数字或下划线字符。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配空白字符:
|
|
||||||
|
|
||||||
- \s:匹配任意空白字符,包括空格、制表符、换行符等。
|
|
||||||
- \s+:匹配一个或多个空白字符。
|
|
||||||
|
|
||||||
### 匹配特定字符:
|
|
||||||
|
|
||||||
- [abc]:匹配字符 a、b 或 c 中的任意一个。
|
|
||||||
- [a-z]:匹配任意小写字母。
|
|
||||||
- [A-Z]:匹配任意大写字母。
|
|
||||||
- [0-9]:匹配任意数字。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配重复次数:
|
|
||||||
|
|
||||||
- {n}:匹配前一个字符恰好 n 次。
|
|
||||||
- {n,}:匹配前一个字符至少 n 次。
|
|
||||||
- {n,m}:匹配前一个字符至少 n 次,但不超过 m 次。
|
|
||||||
|
|
||||||
### 匹配边界:
|
|
||||||
|
|
||||||
- ^:匹配字符串的开头。
|
|
||||||
- $:匹配字符串的结尾。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 匹配特殊字符:
|
|
||||||
|
|
||||||
- \:转义特殊字符,使其按字面意义匹配。
|
|
||||||
- .:匹配任意单个字符。
|
|
||||||
- |:表示“或”关系,匹配两个或多个表达式之一。
|
|
||||||
|
|
||||||
#### 匹配次数:
|
|
||||||
|
|
||||||
- *:匹配前一个字符零次或多次。
|
|
||||||
- +:匹配前一个字符一次或多次。
|
|
||||||
- ?:匹配前一个字符零次或一次。
|
|
||||||
|
|
||||||
## 匹配数字
|
|
||||||
|
|
||||||
### 分组和捕获:
|
|
||||||
|
|
||||||
- ():将一系列模式组合成一个单元,可与特殊字符一起使用。
|
|
||||||
|
|
||||||
### 预定义字符集:
|
|
||||||
|
|
||||||
- \d:任意数字,相当于 [0-9]。
|
|
||||||
- \w:任意字母、数字或下划线字符,相当于 [a-zA-Z0-9_]。
|
|
||||||
- \s:任意空白字符,相当于 [ \t\n\r\f\v]。
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 实例
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
library(babynames)
|
|
||||||
(x <- c("apple", "apppple", "abc123def"))
|
|
||||||
x[str_detect(x, "[0-9]")]
|
|
||||||
x[str_detect(x, "abc[0-9]+")]
|
|
||||||
x[str_detect(x, "pp")]
|
|
||||||
x[str_detect(x, "p{4}")]
|
|
||||||
x[str_detect(x, "p{4}")]
|
|
||||||
x[str_detect("apple", "ap*")]
|
|
||||||
x[str_detect("apple", "app*")]
|
|
||||||
x[str_detect("apple", "a..le")]
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
|
|
||||||
找出`babyname`中名字含有ar的行
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
babynames |>
|
|
||||||
filter(str_detect(name, "ar"))
|
|
||||||
```
|
|
||||||
|
|
||||||
## 练习
|
|
||||||
|
|
||||||
|
|
||||||
找出`babyname`中名字含有ar或者以ry结尾的行。
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| echo: false
|
|
||||||
babynames |>
|
|
||||||
filter(str_detect(name, "ar"))
|
|
||||||
```
|
|
||||||
|
|
||||||
![](../../image/data-science/transform.png)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 欢迎讨论!{.center}
|
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
../../_extensions
|
|
|
@ -1,50 +0,0 @@
|
||||||
---
|
|
||||||
title: "课后作业7"
|
|
||||||
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
|
|
||||||
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
|
|
||||||
date: today
|
|
||||||
lang: zh
|
|
||||||
format:
|
|
||||||
revealjs:
|
|
||||||
theme: dark
|
|
||||||
slide-number: true
|
|
||||||
chalkboard:
|
|
||||||
buttons: true
|
|
||||||
preview-links: auto
|
|
||||||
lang: zh
|
|
||||||
toc: true
|
|
||||||
toc-depth: 1
|
|
||||||
toc-title: 大纲
|
|
||||||
logo: ./_extensions/inst/img/ucaslogo.png
|
|
||||||
css: ./_extensions/inst/css/revealjs.css
|
|
||||||
pointer:
|
|
||||||
key: "p"
|
|
||||||
color: "#32cd32"
|
|
||||||
pointerSize: 18
|
|
||||||
revealjs-plugins:
|
|
||||||
- pointer
|
|
||||||
filters:
|
|
||||||
- d2
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
#| include: false
|
|
||||||
#| cache: false
|
|
||||||
lang <- "cn"
|
|
||||||
require(tidyverse)
|
|
||||||
require(learnr)
|
|
||||||
```
|
|
||||||
|
|
||||||
## 第7次课后作业
|
|
||||||
|
|
||||||
1. 根据`airqualitydf.xlsx`,按采样点统计周末2天与工作日5天中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
|
|
||||||
2. 按照不同城市分组,统计周末2天与工作日5天AQI中位数是否具有显著差异。
|
|
||||||
|
|
||||||
作业模板:[第7次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240326_9_课后作业/第7次课后作业_模板.qmd)
|
|
||||||
|
|
||||||
|
|
||||||
## 欢迎讨论!{.center}
|
|
||||||
|
|
||||||
|
|
||||||
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`
|
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
---
|
|
||||||
title: 课后作业7
|
|
||||||
author: 姓名
|
|
||||||
format: html
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
# 示例问题,在R中输出`1+2+3+...+100`的结果
|
|
||||||
|
|
||||||
采用`for`循环完成,代码如下:
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
total <- 0
|
|
||||||
for (i in 1:100) {
|
|
||||||
total <- total + i
|
|
||||||
}
|
|
||||||
total
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
# 根据`airqualitydf.xlsx`,按采样点统计周末2天与工作日5天中空气质量指数(AQI)中位数,按城市统计低于所有采样点AQI30%分位值的采样点占比,列出上述占比最高的10个城市(不考虑采样点数低于5个的城市)。
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 按照不同城市分组,统计周末2天与工作日5天AQI中位数是否具有显著差异。
|
|
|
@ -1,78 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# 2024-03-26
|
|
||||||
|
|
||||||
# - part: whole-game.qmd
|
|
||||||
# chapters:
|
|
||||||
# - data-visualize.qmd
|
|
||||||
# - workflow-basics.qmd
|
|
||||||
# - data-transform.qmd
|
|
||||||
# - workflow-style.qmd
|
|
||||||
# - data-tidy.qmd
|
|
||||||
# - workflow-scripts.qmd
|
|
||||||
# - data-import.qmd
|
|
||||||
# - workflow-help.qmd
|
|
||||||
|
|
||||||
# - part: visualize.qmd
|
|
||||||
# chapters:
|
|
||||||
# - layers.qmd
|
|
||||||
# - EDA.qmd
|
|
||||||
# - communication.qmd
|
|
||||||
|
|
||||||
# 2024-03-28
|
|
||||||
|
|
||||||
# - part: transform.qmd
|
|
||||||
# chapters:
|
|
||||||
# - logicals.qmd
|
|
||||||
# - numbers.qmd
|
|
||||||
# - strings.qmd
|
|
||||||
# - regexps.qmd
|
|
||||||
# - factors.qmd
|
|
||||||
# - datetimes.qmd
|
|
||||||
# - missing-values.qmd
|
|
||||||
# - joins.qmd
|
|
||||||
#
|
|
||||||
# - part: import.qmd
|
|
||||||
# chapters:
|
|
||||||
# - spreadsheets.qmd
|
|
||||||
# - databases.qmd
|
|
||||||
# - arrow.qmd
|
|
||||||
# - rectangling.qmd
|
|
||||||
# - webscraping.qmd
|
|
||||||
|
|
||||||
# 2024-04-02
|
|
||||||
|
|
||||||
# - part: program.qmd
|
|
||||||
# chapters:
|
|
||||||
# - functions.qmd
|
|
||||||
# - iteration.qmd
|
|
||||||
# - base-R.qmd
|
|
||||||
#
|
|
||||||
# - part: communicate.qmd
|
|
||||||
# chapters:
|
|
||||||
# - quarto.qmd
|
|
||||||
# - quarto-formats.qmd
|
|
||||||
|
|
||||||
r4dsdir="$HOME/research/r4ds"
|
|
||||||
|
|
||||||
datestr="20240326"
|
|
||||||
|
|
||||||
mkdir 20240326_1_data-visualize
|
|
||||||
mkdir 20240326_2_workflow-basics
|
|
||||||
mkdir 20240326_3_data-transform
|
|
||||||
mkdir 20240326_4_workflow-style
|
|
||||||
mkdir 20240326_5_data-tidy
|
|
||||||
mkdir 20240326_6_workflow-scripts
|
|
||||||
mkdir 20240326_7_data-import
|
|
||||||
mkdir 20240326_8_workflow-help
|
|
||||||
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-visualize.qmd >20240326_1_data-visualize/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-basics.qmd >20240326_2_workflow-basics/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-transform.qmd >20240326_3_data-transform/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-style.qmd >20240326_4_workflow-style/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-tidy.qmd >20240326_5_data-tidy/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-scripts.qmd >20240326_6_workflow-scripts/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-import.qmd >20240326_7_data-import/index.qmd
|
|
||||||
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-help.qmd >20240326_8_workflow-help/index.qmd
|
|
||||||
|
|
||||||
find * -type d | grep 0326 | xargs -I{} cp -r ./20240321_0_R实践课程/_extensions {}/
|
|
|
@ -325,7 +325,7 @@ figure > figcaption {
|
||||||
|
|
||||||
.reveal pre code:hover{
|
.reveal pre code:hover{
|
||||||
font-size: xx-large;
|
font-size: xx-large;
|
||||||
line-height: 120%;
|
line-height: 250%;
|
||||||
}
|
}
|
||||||
|
|
||||||
section#title-slide p.subtitle {
|
section#title-slide p.subtitle {
|
||||||
|
|
|
@ -7,7 +7,6 @@ project:
|
||||||
- "!analysis/"
|
- "!analysis/"
|
||||||
- "!*ignored.qmd"
|
- "!*ignored.qmd"
|
||||||
- "!*ignored/"
|
- "!*ignored/"
|
||||||
- "!coding/"
|
|
||||||
|
|
||||||
|
|
||||||
title: "区域水环境污染数据分析实践"
|
title: "区域水环境污染数据分析实践"
|
||||||
|
|
|
@ -1,55 +0,0 @@
|
||||||
set.seed(1014)
|
|
||||||
|
|
||||||
# knitr::opts_chunk$set(
|
|
||||||
# comment = "#>",
|
|
||||||
# collapse = TRUE,
|
|
||||||
# cache = TRUE,
|
|
||||||
# fig.retina = 2,
|
|
||||||
# fig.width = 6,
|
|
||||||
# fig.asp = 2 / 3,
|
|
||||||
# fig.show = "hold"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# options(
|
|
||||||
# dplyr.print_min = 6,
|
|
||||||
# dplyr.print_max = 6,
|
|
||||||
# pillar.max_footer_lines = 2,
|
|
||||||
# pillar.min_chars = 15,
|
|
||||||
# stringr.view_n = 6,
|
|
||||||
# # Temporarily deactivate cli output for quarto
|
|
||||||
# cli.num_colors = 0,
|
|
||||||
# cli.hyperlink = FALSE,
|
|
||||||
# pillar.bold = TRUE,
|
|
||||||
# width = 77 # 80 - 3 for #> comment
|
|
||||||
# )
|
|
||||||
|
|
||||||
ggplot2::theme_set(ggplot2::theme_gray(12))
|
|
||||||
|
|
||||||
# use results: "asis" when setting a status for a chapter
|
|
||||||
status <- function(type) {
|
|
||||||
status <- switch(type,
|
|
||||||
polishing = "should be readable but is currently undergoing final polishing",
|
|
||||||
restructuring = "is undergoing heavy restructuring and may be confusing or incomplete",
|
|
||||||
drafting = "is currently a dumping ground for ideas, and we don't recommend reading it",
|
|
||||||
complete = "is largely complete and just needs final proof reading",
|
|
||||||
stop("Invalid `type`", call. = FALSE)
|
|
||||||
)
|
|
||||||
|
|
||||||
class <- switch(type,
|
|
||||||
polishing = "note",
|
|
||||||
restructuring = "important",
|
|
||||||
drafting = "important",
|
|
||||||
complete = "note"
|
|
||||||
)
|
|
||||||
|
|
||||||
cat(paste0(
|
|
||||||
"\n",
|
|
||||||
":::: status\n",
|
|
||||||
"::: callout-", class, " \n",
|
|
||||||
"You are reading the work-in-progress second edition of R for Data Science. ",
|
|
||||||
"This chapter ", status, ". ",
|
|
||||||
"You can find the complete first edition at <https://r4ds.had.co.nz>.\n",
|
|
||||||
":::\n",
|
|
||||||
"::::\n"
|
|
||||||
))
|
|
||||||
}
|
|
|
@ -1,9 +0,0 @@
|
||||||
---
|
|
||||||
title: 实时代码
|
|
||||||
date: last-modified
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
month,year,brand,item,n
|
|
||||||
January,2019,1,1234,3
|
|
||||||
January,2019,1,8721,9
|
|
||||||
January,2019,1,1822,2
|
|
||||||
January,2019,2,3333,1
|
|
||||||
January,2019,2,2156,9
|
|
||||||
January,2019,2,3987,6
|
|
||||||
January,2019,2,3827,6
|
|
|
|
@ -1,7 +0,0 @@
|
||||||
month,year,brand,item,n
|
|
||||||
February,2019,1,1234,8
|
|
||||||
February,2019,1,8721,2
|
|
||||||
February,2019,1,1822,3
|
|
||||||
February,2019,2,3333,1
|
|
||||||
February,2019,2,2156,3
|
|
||||||
February,2019,2,3987,6
|
|
|
|
@ -1,7 +0,0 @@
|
||||||
month,year,brand,item,n
|
|
||||||
March,2019,1,1234,3
|
|
||||||
March,2019,1,3627,1
|
|
||||||
March,2019,1,8820,3
|
|
||||||
March,2019,2,7253,1
|
|
||||||
March,2019,2,8766,3
|
|
||||||
March,2019,2,8288,6
|
|
|
Binary file not shown.
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
|
|
||||||
repurrrsive::gap_simple |>
|
|
||||||
count(year)
|
|
||||||
|
|
||||||
by_year <- repurrrsive::gap_simple |>
|
|
||||||
group_by(year)
|
|
||||||
paths <- by_year |>
|
|
||||||
group_keys() |>
|
|
||||||
mutate(path = str_glue("data/gapminder/{year}.xlsx")) |>
|
|
||||||
pull()
|
|
||||||
paths
|
|
||||||
|
|
||||||
years <- by_year |>
|
|
||||||
group_split() |>
|
|
||||||
map(\(df) select(df, -year))
|
|
||||||
|
|
||||||
dir.create("data/gapminder")
|
|
||||||
|
|
||||||
walk2(years, paths, writexl::write_xlsx)
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1193
data/heights.csv
1193
data/heights.csv
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue