diff --git a/SD/1.1_R语言介绍/_demo.qmd b/SD/1.1_R语言介绍/_demo.qmd new file mode 100644 index 0000000..14b83f8 --- /dev/null +++ b/SD/1.1_R语言介绍/_demo.qmd @@ -0,0 +1,91 @@ +--- +title: "Lesson 6" +format: html +--- + + + + +```{r} + +https://rs1.drwater.net + +username: + - ruser01 + - ruser02 + - ruser03 + - ruser04 + - ruser05 + - ruser06 + +RWEP2025 + +``` + + +# 安装包 + + +```{r} +install.packages("tidyverse") + +x <- c(1:10, NA) + +hist(x) + +mean(x, na.rm = TRUE) + +median(x, na.rm = TRUE) + +sd(x, na.rm = TRUE) + + +for(i in 1:10){ + print(i) +} + + +x + y + x * y + +myfunc <- function(x, y = 3) { + x + y + x * y +} + + +myfunc(1, 2) + + +myfunc(10) + + +c(FALSE, 2, 1:3, 3) + +c(FALSE, 2, 1:3, 3) > 1 + +all(c(FALSE, 2, 1:3, 3) > 1) + + +c(1L,2L,3L) + +any(c(FALSE, 2, 1:3, 3) > 1) + + +x <- 10 + +sin(x) = ? + +paste("sin(x) = ", sin(x), sep = " ") + +paste0("sin(x) = ", sin(x)) + + +substr("Monday", 1, 3) + + + +``` + + + + + diff --git a/SD/1.2_R语言语法基础/index.qmd b/SD/1.2_R语言语法基础/index.qmd index 31c1e89..18bd519 100644 --- a/SD/1.2_R语言语法基础/index.qmd +++ b/SD/1.2_R语言语法基础/index.qmd @@ -264,6 +264,9 @@ t.test(x, y) wilcox.test(x, y) ``` + +### [什么是 Wilcoxon-Mann-Whitney检验?](https://zhuanlan.zhihu.com/p/613524533) + ## 统计函数 ### 创建向量的直方图 diff --git a/SD/2.1_codestyle/_extensions b/SD/2.1_codestyle/_extensions new file mode 120000 index 0000000..74119e3 --- /dev/null +++ b/SD/2.1_codestyle/_extensions @@ -0,0 +1 @@ +../../_extensions \ No newline at end of file diff --git a/SD/2.1_codestyle/index.qmd b/SD/2.1_codestyle/index.qmd new file mode 100644 index 0000000..b6b2bd5 --- /dev/null +++ b/SD/2.1_codestyle/index.qmd @@ -0,0 +1,162 @@ +--- +title: "代码编写规则" +subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution +author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心 +date: today +lang: zh +format: + revealjs: + theme: dark + slide-number: true + chalkboard: + buttons: true + preview-links: auto + lang: zh + toc: true + toc-depth: 1 + toc-title: 大纲 + logo: ./_extensions/inst/img/ucaslogo.png + css: ./_extensions/inst/css/revealjs.css + pointer: + key: "p" + color: "#32cd32" + pointerSize: 18 +revealjs-plugins: + - pointer +filters: + - d2 +--- + + +```{r} +#| echo: false +knitr::opts_chunk$set(echo = TRUE) +# source("../../coding/_common.R") +library(tidyverse) +library(nycflights13) +``` + +## tidy data + +```{r} +knitr::include_graphics("../../image/tidy-1.png", dpi = 270) +``` + + +## pipe(管道) |> + +```{r} +#| eval: false +require(patchwork) +plot(1:10) +1:10 |> plot() +plot(x = 1:10, y = sin(1:10)) +1:10 |> plot(y = sin(1:10)) + +``` + +```{r} +#| echo: false +#| layout-nrow: 1 +#| fig-width: 4 +#| fig-height: 3 +#| out-height: 90% +require(patchwork) +plot(1:10) +1:10 |> plot() +plot(x = 1:10, y = sin(1:10)) +1:10 |> plot(y = sin(1:10)) + +``` + + + +## pipe(管道):%>% + +```{r} +#| eval: false +#| layout-nrow: 1 +#| fig-width: 3 +#| fig-height: 4 +#| out-height: 125% +require(magrittr) +1:10 %>% plot() +1:10 %>% plot(y = sin(1:10)) +sin(1:10) %>% plot(1:10, .) +sin(1:10) |> plot(x = 1:10, y = _) + +``` + +```{r} +#| echo: false +#| layout-nrow: 1 +#| fig-width: 3 +#| fig-height: 4 +#| out-height: 125% +require(magrittr) +1:10 %>% plot() +1:10 %>% plot(y = sin(1:10)) +sin(1:10) %>% plot(1:10, .) +sin(1:10) |> plot(x = 1:10, y = _) + +``` + + + + + + +## 代码编写规则 + +```{r} +#| eval: false + +# Strive for: +short_flights <- flights |> filter(air_time < 60) +# Avoid: +SHORTFLIGHTS <- flights |> filter(air_time < 60) + +# Strive for +z <- (a + b)^2 / d +# Avoid +z<-( a + b ) ^ 2/d + +# Strive for +mean(x, na.rm = TRUE) +# Avoid +mean (x ,na.rm=TRUE) +``` + +## 练习 + +```{r} +#| eval: false +flights|>filter(dest=="IAH")|> + group_by(year,month,day)|>summarize(n=n(), +delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10) + +``` + +## 练习 + +```{r} +#| eval: false +flights |> + filter(dest == "IAH") |> + group_by(year, month, day) |> + summarize(n = n(), + delay = mean(arr_delay, na.rm = TRUE)) |> + filter(n > 10) + +``` + +## quarto + +![](../../image/quarto-flow.png) + + +## 欢迎讨论!{.center} + + +`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")` + diff --git a/SD/2.2_dataimport/_extensions b/SD/2.2_dataimport/_extensions new file mode 120000 index 0000000..74119e3 --- /dev/null +++ b/SD/2.2_dataimport/_extensions @@ -0,0 +1 @@ +../../_extensions \ No newline at end of file diff --git a/SD/2.2_dataimport/index.qmd b/SD/2.2_dataimport/index.qmd new file mode 100644 index 0000000..0882b47 --- /dev/null +++ b/SD/2.2_dataimport/index.qmd @@ -0,0 +1,324 @@ +--- +title: "Data import" +subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution +author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心 +date: today +lang: zh +format: + revealjs: + theme: dark + slide-number: true + chalkboard: + buttons: true + preview-links: auto + lang: zh + toc: true + toc-depth: 1 + toc-title: 大纲 + logo: ./_extensions/inst/img/ucaslogo.png + css: ./_extensions/inst/css/revealjs.css + pointer: + key: "p" + color: "#32cd32" + pointerSize: 18 +revealjs-plugins: + - pointer +filters: + - d2 +--- + + +```{r} +#| echo: false +knitr::opts_chunk$set(echo = TRUE) +source("../../coding/_common.R") +library(tidyverse) +``` + + +## tidyverse风格数据分析总体流程 + + +![](../../image/data-science/import.png) + + +## 导入csv数据 + +```{r} +read_lines("../../data/students.csv") |> cat(sep = "\n") +``` + +## 导入csv数据 + + +```{r} +read_csv("../../data/students.csv") |> + knitr::kable() +``` + + +## 读取数据 + +```{r} +(students <- read_csv("../../data/students.csv")) +``` + + +## 读取数据 + +```{r} +#| message: false +(students <- read_csv("../../data/students.csv", na = c("N/A", ""))) +``` + +## 列名不要有空格 + +```{r} +students |> + rename( + student_id = `Student ID`, + full_name = `Full Name` + ) +``` + +## `janitor`处理空格 + +```{r} +#| message: false + +students |> janitor::clean_names() +``` + +## `janitor`处理空格 + +```{r} +students |> + janitor::clean_names() |> + mutate(meal_plan = factor(meal_plan)) +``` + +## `janitor`处理空格 + +```{r} +students <- students |> + janitor::clean_names() |> + mutate( + meal_plan = factor(meal_plan), + age = parse_number(if_else(age == "five", "5", age)) + ) +students +``` + + +## 直接录入 + +```{r} +#| message: false + +read_csv( + "The first line of metadata + The second line of metadata + x,y,z + 1,2,3", + skip = 2 +) +``` + + +## 直接录入 + +```{r} +#| message: false +read_csv( + "# A comment I want to skip + x,y,z + 1,2,3", + comment = "#" +) +``` + + +## 指定列名 + +```{r} +#| message: false + +read_csv( + "1,2,3 + 4,5,6", + col_names = c("x", "y", "z") +) +``` + +## 指定列的类型 + +```{r} +another_csv <- " +x,y,z +1,2,3" + +read_csv( + another_csv, + col_types = cols(.default = col_character()) +) +read_csv( + another_csv, + col_types = cols_only(x = col_character()) +) +``` + + +## 练习 + +```{r} +#| eval: false + +read_csv("a,b\n1,2,3\n4,5,6") +read_csv("a,b,c\n1,2\n1,2,3,4") +read_csv("a,b\n\"1") +read_csv("a,b\n1,2\na,b") +read_csv("a;b\n1;3") +``` + +## 练习 + +```{r} +#| eval: false +annoying <- tibble( + `1` = 1:10, + `2` = `1` * 2 + rnorm(length(`1`)) +) +``` + + + +## 批量读取 + +```{r} +#| message: false + +sales_files <- c("../../data/01-sales.csv", + "../../data/02-sales.csv", + "../../data/03-sales.csv") +read_csv(sales_files, id = "file") +``` + + + + +## 读取Excel,建议用`readxl`包 + +```{r} +(surveydf <- readxl::read_xlsx("../../data/survey.xlsx")) +``` + +## 读取Excel + +```{r} +(airqualitydf <- readxl::read_xlsx("../../data/airquality.xlsx", sheet = 2)) +``` + + + +## 批量读取 + +```{r} +sales_files <- list.files("../../data", + pattern = "sales\\.csv$", full.names = TRUE) +sales_files +``` + +## 写入csv + +```{r} +#| warning: false +#| message: false +students +write_csv(students, "students-2.csv") +read_csv("students-2.csv") +``` + +## 写入Excel + +```{r} +writexl::write_xlsx(students, "../../data/writexldemo.xlsx") +``` + +## 读取数据库,以MySQL为例 + +```{r} +if (FALSE) { + conn <- cctdb::get_dbconn("nationalairquality") + DBI::dbListTables(conn) +} +``` + + + +## 读取数据库,以MySQL为例 + +```{r} +if (FALSE) { + conn <- cctdb::get_dbconn("nationalairquality") + metadf <- tbl(conn, "metadf") |> + head(100) |> + collect() + DBI::dbDisconnect(conn) + saveRDS(metadf, file = "../../data/metadfdemo.RDS") +} +metadf <- readRDS(file = "../../data/metadfdemo.RDS") +lang <- "cn" +metadf |> + ggplot(aes(lon, lat)) + +geom_point(aes(fill = Area)) + +dwfun::theme_sci() +``` + + +## 练习 + +```{r} +#| eval: false +metadf <- readxl::read_xlsx("../../data/airquality.xlsx") +dir.create("../../data/metacity2/") +metadf |> + nest(sitedf = -site) |> + mutate(flag = purrr::map2(site, sitedf, + ~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx")))) +``` + + +## 练习 + +```{r} +#| include: false +#| eval: false +if (FALSE) { + require(tidyverse) + conn <- cctdb::get_dbconn("nationalairquality") + metadf <- tbl(conn, "metadf") |> + collect() + DBI::dbDisconnect(conn) + metanestdf <- metadf |> + nest(citydf = -Area) + names(metanestdf$citydf) <- metanestdf$Area + writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx") + dir.create("../../data/metacity/") + metanestdf |> + mutate(flag = purrr::map2(Area, citydf, + ~ writexl::write_xlsx(.y, + path = paste0("../../data/metacity/", .x, ".xlsx") + ))) +} +``` + +1. 从“../../data/sales.xlsx”读取第9到13行的数据 +2. 从“../../data/meta_city.xlsx”读取所有的数据,并保存至“../../data/meta_city_onetable1.xlsx” +3. 从“../../data/metacity/”读取所有的数据,并保存至“../../data/meta_city_onetable2.xlsx” + + + +## 欢迎讨论!{.center} + + +`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")` diff --git a/SD/2.2_dataimport/students-2.csv b/SD/2.2_dataimport/students-2.csv new file mode 100644 index 0000000..3775770 --- /dev/null +++ b/SD/2.2_dataimport/students-2.csv @@ -0,0 +1,7 @@ +student_id,full_name,favourite_food,meal_plan,age +1,Sunil Huffmann,Strawberry yoghurt,Lunch only,4 +2,Barclay Lynn,French fries,Lunch only,5 +3,Jayendra Lyne,NA,Breakfast and lunch,7 +4,Leon Rossini,Anchovies,Lunch only,NA +5,Chidiegwu Dunkel,Pizza,Breakfast and lunch,5 +6,Güvenç Attila,Ice cream,Lunch only,6 diff --git a/_quarto.yml b/_quarto.yml index d9b729a..1f67215 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -9,6 +9,7 @@ project: - "!*ignored/" - "!coding/" - "!SD/_*/" + - "!SD/*/_*.qmd" title: "区域水环境污染数据分析实践" lang: zh