Compare commits

...

5 Commits

Author SHA1 Message Date
Ming Su 437ad3553d merge main 2024-03-25 22:11:41 +08:00
Ming Su 07e0b5a49c merge main 2024-03-25 22:09:43 +08:00
Ming Su 04cc6935d6 第7次课 2024-03-25 21:48:30 +08:00
Ming Su 00c44b652e add data from r4ds 2024-03-21 22:32:02 +08:00
Ming Su efb50900ef 准备第7次课 2024-03-21 22:30:54 +08:00
479 changed files with 121486 additions and 9 deletions

View File

@ -87,7 +87,7 @@ set.seed <- 23
uiddf <- coursememberdf |> uiddf <- coursememberdf |>
dplyr::mutate(uid = paste0("ruser", sprintf("%02d", sample(1:nmem, nmem)))) |> dplyr::mutate(uid = paste0("ruser", sprintf("%02d", sample(1:nmem, nmem)))) |>
dplyr::select(uid, 姓名) |> dplyr::select(uid, 姓名) |>
dplyr::mutate(姓名 = "**") |> dplyr::mutate(姓名 = "XX") |>
dplyr::arrange(uid) |> dplyr::arrange(uid) |>
dplyr::mutate(group = (dplyr::row_number() - 1) %/% 10 + 1) |> dplyr::mutate(group = (dplyr::row_number() - 1) %/% 10 + 1) |>
tidyr::nest(gdf = -group) |> tidyr::nest(gdf = -group) |>
@ -106,7 +106,6 @@ uiddf$tab[[2]]
```{r} ```{r}
#| echo: false #| echo: false
#| output: asis #| output: asis
uiddf$tab[[3]] uiddf$tab[[3]]
uiddf$tab[[4]] uiddf$tab[[4]]
``` ```
@ -143,11 +142,12 @@ author: 姓名
``` ```
```` ````
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@ -218,4 +218,4 @@ devtools::install_github("kjhealy/socviz")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@ -792,4 +792,4 @@ names(Y) <- c("colA", "colB", "colC")
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@ -130,4 +130,4 @@ require(learnr)
## 欢迎讨论!{.center} ## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")` `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@ -0,0 +1 @@
../../_extensions

View File

@ -0,0 +1,157 @@
---
title: "代码编写规则"
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
date: today
lang: zh
format:
revealjs:
theme: dark
slide-number: true
chalkboard:
buttons: true
preview-links: auto
lang: zh
toc: true
toc-depth: 1
toc-title: 大纲
logo: ./_extensions/inst/img/ucaslogo.png
css: ./_extensions/inst/css/revealjs.css
pointer:
key: "p"
color: "#32cd32"
pointerSize: 18
revealjs-plugins:
- pointer
filters:
- d2
---
```{r}
#| echo: false
knitr::opts_chunk$set(echo = TRUE)
# source("../../coding/_common.R")
library(tidyverse)
library(nycflights13)
```
## tidy data
```{r}
knitr::include_graphics("../../image/tidy-1.png", dpi = 270)
```
## pipe管道 |>
```{r}
#| eval: false
require(patchwork)
plot(1:10)
1:10 |> plot()
plot(x = 1:10, y = sin(1:10))
1:10 |> plot(y = sin(1:10))
```
```{r}
#| echo: false
#| layout-nrow: 1
#| fig-width: 4
#| fig-height: 3
#| out-height: 90%
require(patchwork)
plot(1:10)
1:10 |> plot()
plot(x = 1:10, y = sin(1:10))
1:10 |> plot(y = sin(1:10))
```
## pipe管道%>%
```{r}
#| eval: false
#| layout-nrow: 1
#| fig-width: 3
#| fig-height: 4
#| out-height: 125%
require(magrittr)
1:10 %>% plot()
1:10 %>% plot(y = sin(1:10))
sin(1:10) %>% plot(1:10, .)
sin(1:10) |> plot(x = 1:10, y = _)
```
```{r}
#| echo: false
#| layout-nrow: 1
#| fig-width: 3
#| fig-height: 4
#| out-height: 125%
require(magrittr)
1:10 %>% plot()
1:10 %>% plot(y = sin(1:10))
sin(1:10) %>% plot(1:10, .)
sin(1:10) |> plot(x = 1:10, y = _)
```
## 代码编写规则
```{r}
#| eval: false
# Strive for:
short_flights <- flights |> filter(air_time < 60)
# Avoid:
SHORTFLIGHTS <- flights |> filter(air_time < 60)
# Strive for
z <- (a + b)^2 / d
# Avoid
z<-( a + b ) ^ 2/d
# Strive for
mean(x, na.rm = TRUE)
# Avoid
mean (x ,na.rm=TRUE)
```
## 练习
```{r}
#| eval: false
flights|>filter(dest=="IAH")|>
group_by(year,month,day)|>summarize(n=n(),
delay=mean(arr_delay,na.rm=TRUE))|>filter(n>10)
flights|>filter(carrier=="UA",
dest%in%c("IAH","HOU"),sched_dep_time>
0900,sched_arr_time<2000)|>group_by(flight)|>
summarize(delay=mean(
arr_delay,na.rm=TRUE),cancelled=sum(is.na(arr_delay)),
n=n())|>filter(n>10)
```
## quarto
![](../../image/quarto-flow.png)
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@ -0,0 +1 @@
../../_extensions

View File

@ -0,0 +1,310 @@
---
title: "Data import"
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
date: today
lang: zh
format:
revealjs:
theme: dark
slide-number: true
chalkboard:
buttons: true
preview-links: auto
lang: zh
toc: true
toc-depth: 1
toc-title: 大纲
logo: ./_extensions/inst/img/ucaslogo.png
css: ./_extensions/inst/css/revealjs.css
pointer:
key: "p"
color: "#32cd32"
pointerSize: 18
revealjs-plugins:
- pointer
filters:
- d2
---
```{r}
#| echo: false
knitr::opts_chunk$set(echo = TRUE)
source("../../coding/_common.R")
library(tidyverse)
```
## tidyverse风格数据分析总体流程
![](../../image/data-science/import.png)
## 导入csv数据
```{r}
read_lines("../../data/students.csv") |> cat(sep = "\n")
```
## 导入csv数据
```{r}
read_csv("../../data/students.csv") |>
knitr::kable()
```
## 读取数据
```{r}
(students <- read_csv("../../data/students.csv"))
```
## 读取数据
```{r}
#| message: false
(students <- read_csv("../../data/students.csv", na = c("N/A", "")))
```
## 列名不要有空格
```{r}
students |>
rename(
student_id = `Student ID`,
full_name = `Full Name`
)
```
## `janitor`处理空格
```{r}
#| message: false
students |> janitor::clean_names()
```
## `janitor`处理空格
```{r}
students |>
janitor::clean_names() |>
mutate(meal_plan = factor(meal_plan))
```
## `janitor`处理空格
```{r}
students <- students |>
janitor::clean_names() |>
mutate(
meal_plan = factor(meal_plan),
age = parse_number(if_else(age == "five", "5", age))
)
students
```
## 直接录入
```{r}
#| message: false
read_csv(
"The first line of metadata
The second line of metadata
x,y,z
1,2,3",
skip = 2
)
```
## 直接录入
```{r}
#| message: false
read_csv(
"# A comment I want to skip
x,y,z
1,2,3",
comment = "#"
)
```
## 指定列名
```{r}
#| message: false
read_csv(
"1,2,3
4,5,6",
col_names = c("x", "y", "z")
)
```
## 指定列的类型
```{r}
another_csv <- "
x,y,z
1,2,3"
read_csv(
another_csv,
col_types = cols(.default = col_character())
)
read_csv(
another_csv,
col_types = cols_only(x = col_character())
)
```
## 练习
```{r}
#| eval: false
read_csv("a,b\n1,2,3\n4,5,6")
read_csv("a,b,c\n1,2\n1,2,3,4")
read_csv("a,b\n\"1")
read_csv("a,b\n1,2\na,b")
read_csv("a;b\n1;3")
```
## 练习
```{r}
#| eval: false
annoying <- tibble(
`1` = 1:10,
`2` = `1` * 2 + rnorm(length(`1`))
)
```
## 批量读取
```{r}
#| message: false
sales_files <- c("../../data/01-sales.csv",
"../../data/02-sales.csv",
"../../data/03-sales.csv")
read_csv(sales_files, id = "file")
```
## 读取Excel建议用`readxl`包
```{r}
(surveydf <- readxl::read_xlsx("../../data/survey.xlsx"))
```
## 读取Excel
```{r}
(airqualitydf <- readxl::read_xlsx("../../data/airquality.xlsx", sheet = 2))
```
## 批量读取
```{r}
sales_files <- list.files("../../data",
pattern = "sales\\.csv$", full.names = TRUE)
sales_files
```
## 写入csv
```{r}
#| warning: false
#| message: false
students
write_csv(students, "students-2.csv")
read_csv("students-2.csv")
```
## 写入Excel
```{r}
writexl::write_xlsx(students, "../../data/writexldemo.xlsx")
```
## 读取数据库以MySQL为例
```{r}
if (FALSE) {
conn <- cctdb::get_dbconn("nationalairquality")
DBI::dbListTables(conn)
}
```
## 读取数据库以MySQL为例
```{r}
if (FALSE) {
conn <- cctdb::get_dbconn("nationalairquality")
metadf <- tbl(conn, "metadf") |>
head(100) |>
collect()
DBI::dbDisconnect(conn)
saveRDS(metadf, file = "../../data/metadfdemo.RDS")
}
metadf <- readRDS(file = "../../data/metadfdemo.RDS")
lang <- "cn"
metadf |>
ggplot(aes(lon, lat)) +
geom_point(aes(fill = Area)) +
dwfun::theme_sci()
```
## 练习
```{r}
#| include: false
#| eval: false
if (FALSE) {
conn <- cctdb::get_dbconn("nationalairquality")
metadf <- tbl(conn, "metadf") |>
collect()
DBI::dbDisconnect(conn)
metanestdf <- metadf |>
nest(citydf = -Area)
names(metanestdf$citydf) <- metanestdf$Area
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
dir.create("../../data/metacity/")
metanestdf |>
mutate(flag = purrr::map2(Area, citydf,
~ writexl::write_xlsx(.y,
path = paste0("../../data/metacity/", .x, ".xlsx")
)))
}
```
1. 从“../../data/sales.xlsx”读取第9到13行的数据
2. 从“../../data/meta_city.xlsx”读取所有的数据并保存至“../../data/meta_city_onetable1.xlsx”
3. 从“../../data/metacity/”读取所有的数据,并保存至“../../data/meta_city_onetable2.xlsx”
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "../")`

View File

@ -0,0 +1,7 @@
student_id,full_name,favourite_food,meal_plan,age
1,Sunil Huffmann,Strawberry yoghurt,Lunch only,4
2,Barclay Lynn,French fries,Lunch only,5
3,Jayendra Lyne,NA,Breakfast and lunch,7
4,Leon Rossini,Anchovies,Lunch only,NA
5,Chidiegwu Dunkel,Pizza,Breakfast and lunch,5
6,Güvenç Attila,Ice cream,Lunch only,6
1 student_id full_name favourite_food meal_plan age
2 1 Sunil Huffmann Strawberry yoghurt Lunch only 4
3 2 Barclay Lynn French fries Lunch only 5
4 3 Jayendra Lyne NA Breakfast and lunch 7
5 4 Leon Rossini Anchovies Lunch only NA
6 5 Chidiegwu Dunkel Pizza Breakfast and lunch 5
7 6 Güvenç Attila Ice cream Lunch only 6

View File

@ -0,0 +1 @@
../../_extensions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
../../_extensions

View File

@ -0,0 +1,153 @@
---
title: "正则表达式"
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
date: today
lang: zh
format:
revealjs:
theme: dark
slide-number: true
chalkboard:
buttons: true
preview-links: auto
lang: zh
toc: true
toc-depth: 1
toc-title: 大纲
logo: ./_extensions/inst/img/ucaslogo.png
css: ./_extensions/inst/css/revealjs.css
pointer:
key: "p"
color: "#32cd32"
pointerSize: 18
revealjs-plugins:
- pointer
filters:
- d2
---
```{r}
#| echo: false
knitr::opts_chunk$set(echo = TRUE)
source("../../coding/_common.R")
library(nycflights13)
library(tidyverse)
```
## 匹配数字
### 匹配数字:
- \d匹配任意数字字符。
- \d+:匹配一个或多个数字字符。
- [0-9]: 匹配数字
### 匹配字母:
- \w匹配任意字母、数字或下划线字符。
- \w+:匹配一个或多个字母、数字或下划线字符。
## 匹配数字
### 匹配空白字符:
- \s匹配任意空白字符包括空格、制表符、换行符等。
- \s+:匹配一个或多个空白字符。
### 匹配特定字符:
- [abc]:匹配字符 a、b 或 c 中的任意一个。
- [a-z]:匹配任意小写字母。
- [A-Z]:匹配任意大写字母。
- [0-9]:匹配任意数字。
## 匹配数字
### 匹配重复次数:
- {n}:匹配前一个字符恰好 n 次。
- {n,}:匹配前一个字符至少 n 次。
- {n,m}:匹配前一个字符至少 n 次,但不超过 m 次。
### 匹配边界:
- ^:匹配字符串的开头。
- $:匹配字符串的结尾。
## 匹配数字
### 匹配特殊字符:
- \:转义特殊字符,使其按字面意义匹配。
- .:匹配任意单个字符。
- |:表示“或”关系,匹配两个或多个表达式之一。
#### 匹配次数:
- *:匹配前一个字符零次或多次。
- +:匹配前一个字符一次或多次。
- ?:匹配前一个字符零次或一次。
## 匹配数字
### 分组和捕获:
- ():将一系列模式组合成一个单元,可与特殊字符一起使用。
### 预定义字符集:
- \d任意数字相当于 [0-9]。
- \w任意字母、数字或下划线字符相当于 [a-zA-Z0-9_]。
- \s任意空白字符相当于 [ \t\n\r\f\v]。
## 实例
```{r}
library(babynames)
(x <- c("apple", "apppple", "abc123def"))
x[str_detect(x, "[0-9]")]
x[str_detect(x, "abc[0-9]+")]
x[str_detect(x, "pp")]
x[str_detect(x, "p{4}")]
x[str_detect(x, "p{4}")]
x[str_detect("apple", "ap*")]
x[str_detect("apple", "app*")]
x[str_detect("apple", "a..le")]
```
## 练习
找出`babyname`中名字含有ar的行
```{r}
#| echo: false
babynames |>
filter(str_detect(name, "ar"))
```
## 练习
找出`babyname`中名字含有ar或者以ry结尾的行。
```{r}
#| echo: false
babynames |>
filter(str_detect(name, "ar"))
```
![](../../image/data-science/transform.png)
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@ -0,0 +1 @@
../../_extensions

View File

@ -0,0 +1,50 @@
---
title: "课后作业7"
subtitle: 《区域水环境污染数据分析实践》<br>Data analysis practice of regional water environment pollution
author: 苏命、王为东<br>中国科学院大学资源与环境学院<br>中国科学院生态环境研究中心
date: today
lang: zh
format:
revealjs:
theme: dark
slide-number: true
chalkboard:
buttons: true
preview-links: auto
lang: zh
toc: true
toc-depth: 1
toc-title: 大纲
logo: ./_extensions/inst/img/ucaslogo.png
css: ./_extensions/inst/css/revealjs.css
pointer:
key: "p"
color: "#32cd32"
pointerSize: 18
revealjs-plugins:
- pointer
filters:
- d2
---
```{r}
#| include: false
#| cache: false
lang <- "cn"
require(tidyverse)
require(learnr)
```
## 第7次课后作业
1. 根据`airqualitydf.xlsx`按采样点统计周末2天与工作日5天中空气质量指数AQI中位数按城市统计低于所有采样点AQI30%分位值的采样点占比列出上述占比最高的10个城市不考虑采样点数低于5个的城市
2. 按照不同城市分组统计周末2天与工作日5天AQI中位数是否具有显著差异。
作业模板:[第7次课后作业_模板.qmd](https://drwater.rcees.ac.cn/git/course/RWEP/raw/branch/main/SD/20240326_9_课后作业/第7次课后作业_模板.qmd)
## 欢迎讨论!{.center}
`r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drwater.rcees.ac.cn/course/public/RWEP/@PUB/SD/")`

View File

@ -0,0 +1,25 @@
---
title: 课后作业7
author: 姓名
format: html
---
# 示例问题在R中输出`1+2+3+...+100`的结果
采用`for`循环完成,代码如下:
```{r}
total <- 0
for (i in 1:100) {
total <- total + i
}
total
```
# 根据`airqualitydf.xlsx`按采样点统计周末2天与工作日5天中空气质量指数AQI中位数按城市统计低于所有采样点AQI30%分位值的采样点占比列出上述占比最高的10个城市不考虑采样点数低于5个的城市
# 按照不同城市分组统计周末2天与工作日5天AQI中位数是否具有显著差异。

78
SD/makeslides.sh Executable file
View File

@ -0,0 +1,78 @@
#!/bin/bash
# 2024-03-26
# - part: whole-game.qmd
# chapters:
# - data-visualize.qmd
# - workflow-basics.qmd
# - data-transform.qmd
# - workflow-style.qmd
# - data-tidy.qmd
# - workflow-scripts.qmd
# - data-import.qmd
# - workflow-help.qmd
# - part: visualize.qmd
# chapters:
# - layers.qmd
# - EDA.qmd
# - communication.qmd
# 2024-03-28
# - part: transform.qmd
# chapters:
# - logicals.qmd
# - numbers.qmd
# - strings.qmd
# - regexps.qmd
# - factors.qmd
# - datetimes.qmd
# - missing-values.qmd
# - joins.qmd
#
# - part: import.qmd
# chapters:
# - spreadsheets.qmd
# - databases.qmd
# - arrow.qmd
# - rectangling.qmd
# - webscraping.qmd
# 2024-04-02
# - part: program.qmd
# chapters:
# - functions.qmd
# - iteration.qmd
# - base-R.qmd
#
# - part: communicate.qmd
# chapters:
# - quarto.qmd
# - quarto-formats.qmd
r4dsdir="$HOME/research/r4ds"
datestr="20240326"
mkdir 20240326_1_data-visualize
mkdir 20240326_2_workflow-basics
mkdir 20240326_3_data-transform
mkdir 20240326_4_workflow-style
mkdir 20240326_5_data-tidy
mkdir 20240326_6_workflow-scripts
mkdir 20240326_7_data-import
mkdir 20240326_8_workflow-help
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-visualize.qmd >20240326_1_data-visualize/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-basics.qmd >20240326_2_workflow-basics/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-transform.qmd >20240326_3_data-transform/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-style.qmd >20240326_4_workflow-style/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-tidy.qmd >20240326_5_data-tidy/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-scripts.qmd >20240326_6_workflow-scripts/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/data-import.qmd >20240326_7_data-import/index.qmd
awk '/```{r}/{print "\n##\n\n" $0; flag=1; next} /```/{flag=0} flag || /```/' "${r4dsdir}"/workflow-help.qmd >20240326_8_workflow-help/index.qmd
find * -type d | grep 0326 | xargs -I{} cp -r ./20240321_0_R实践课程/_extensions {}/

View File

@ -325,7 +325,7 @@ figure > figcaption {
.reveal pre code:hover{ .reveal pre code:hover{
font-size: xx-large; font-size: xx-large;
line-height: 150%; line-height: 120%;
} }
section#title-slide p.subtitle { section#title-slide p.subtitle {

View File

@ -7,6 +7,7 @@ project:
- "!analysis/" - "!analysis/"
- "!*ignored.qmd" - "!*ignored.qmd"
- "!*ignored/" - "!*ignored/"
- "!coding/"
title: "区域水环境污染数据分析实践" title: "区域水环境污染数据分析实践"
@ -23,7 +24,7 @@ website:
page-navigation: true page-navigation: true
page-footer: "Copyright 2024, [Ming Su](https://drwater.rcees.ac.cn)" page-footer: "Copyright 2024, [Ming Su](https://drwater.rcees.ac.cn)"
navbar: navbar:
background: "light" background: "grey"
search: true search: true
right: right:
- icon: house - icon: house

55
coding/_common.R Normal file
View File

@ -0,0 +1,55 @@
set.seed(1014)
# knitr::opts_chunk$set(
# comment = "#>",
# collapse = TRUE,
# cache = TRUE,
# fig.retina = 2,
# fig.width = 6,
# fig.asp = 2 / 3,
# fig.show = "hold"
# )
# options(
# dplyr.print_min = 6,
# dplyr.print_max = 6,
# pillar.max_footer_lines = 2,
# pillar.min_chars = 15,
# stringr.view_n = 6,
# # Temporarily deactivate cli output for quarto
# cli.num_colors = 0,
# cli.hyperlink = FALSE,
# pillar.bold = TRUE,
# width = 77 # 80 - 3 for #> comment
# )
ggplot2::theme_set(ggplot2::theme_gray(12))
# use results: "asis" when setting a status for a chapter
status <- function(type) {
status <- switch(type,
polishing = "should be readable but is currently undergoing final polishing",
restructuring = "is undergoing heavy restructuring and may be confusing or incomplete",
drafting = "is currently a dumping ground for ideas, and we don't recommend reading it",
complete = "is largely complete and just needs final proof reading",
stop("Invalid `type`", call. = FALSE)
)
class <- switch(type,
polishing = "note",
restructuring = "important",
drafting = "important",
complete = "note"
)
cat(paste0(
"\n",
":::: status\n",
"::: callout-", class, " \n",
"You are reading the work-in-progress second edition of R for Data Science. ",
"This chapter ", status, ". ",
"You can find the complete first edition at <https://r4ds.had.co.nz>.\n",
":::\n",
"::::\n"
))
}

8
data/01-sales.csv Normal file
View File

@ -0,0 +1,8 @@
month,year,brand,item,n
January,2019,1,1234,3
January,2019,1,8721,9
January,2019,1,1822,2
January,2019,2,3333,1
January,2019,2,2156,9
January,2019,2,3987,6
January,2019,2,3827,6
1 month year brand item n
2 January 2019 1 1234 3
3 January 2019 1 8721 9
4 January 2019 1 1822 2
5 January 2019 2 3333 1
6 January 2019 2 2156 9
7 January 2019 2 3987 6
8 January 2019 2 3827 6

7
data/02-sales.csv Normal file
View File

@ -0,0 +1,7 @@
month,year,brand,item,n
February,2019,1,1234,8
February,2019,1,8721,2
February,2019,1,1822,3
February,2019,2,3333,1
February,2019,2,2156,3
February,2019,2,3987,6
1 month year brand item n
2 February 2019 1 1234 8
3 February 2019 1 8721 2
4 February 2019 1 1822 3
5 February 2019 2 3333 1
6 February 2019 2 2156 3
7 February 2019 2 3987 6

7
data/03-sales.csv Normal file
View File

@ -0,0 +1,7 @@
month,year,brand,item,n
March,2019,1,1234,3
March,2019,1,3627,1
March,2019,1,8820,3
March,2019,2,7253,1
March,2019,2,8766,3
March,2019,2,8288,6
1 month year brand item n
2 March 2019 1 1234 3
3 March 2019 1 3627 1
4 March 2019 1 8820 3
5 March 2019 2 7253 1
6 March 2019 2 8766 3
7 March 2019 2 8288 6

BIN
data/bake-sale.xlsx Normal file

Binary file not shown.

20
data/gapminder.R Normal file
View File

@ -0,0 +1,20 @@
repurrrsive::gap_simple |>
count(year)
by_year <- repurrrsive::gap_simple |>
group_by(year)
paths <- by_year |>
group_keys() |>
mutate(path = str_glue("data/gapminder/{year}.xlsx")) |>
pull()
paths
years <- by_year |>
group_split() |>
map(\(df) select(df, -year))
dir.create("data/gapminder")
walk2(years, paths, writexl::write_xlsx)

BIN
data/gapminder/1952.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1957.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1962.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1967.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1972.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1977.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1982.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1987.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1992.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/1997.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/2002.xlsx Normal file

Binary file not shown.

BIN
data/gapminder/2007.xlsx Normal file

Binary file not shown.

1193
data/heights.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
data/meta_city.xlsx Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More