render compile
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
---
|
||||
title: "Lesson 7"
|
||||
format: html
|
||||
---
|
||||
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
|
||||
|
||||
files <- c(
|
||||
"../../data/01-sales.csv",
|
||||
"../../data/02-sales.csv",
|
||||
"../../data/03-sales.csv"
|
||||
)
|
||||
|
||||
files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
|
||||
|
||||
|
||||
readr::read_csv(files, id = "file") |>
|
||||
mutate(file = basename(file))
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
library(tidyverse)
|
||||
|
||||
files <- dir(
|
||||
"../../data/gapminder",
|
||||
pattern = "^[12][09][0-9][0-9].xlsx$",
|
||||
full.names = TRUE
|
||||
)
|
||||
|
||||
|
||||
alldf <- tibble::tibble()
|
||||
for (file in files) {
|
||||
alldf <- alldf |>
|
||||
bind_rows(
|
||||
readxl::read_xlsx(file) |>
|
||||
mutate(year = parse_number(basename(file)))
|
||||
)
|
||||
}
|
||||
|
||||
alldf |>
|
||||
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||
geom_point(aes(color = factor(year))) +
|
||||
geom_smooth(method = "lm", se = FALSE) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||
```
|
||||
|
||||
# slope
|
||||
|
||||
```{r}
|
||||
file <- files[1]
|
||||
|
||||
lm(y ~ x, data)
|
||||
|
||||
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
||||
|
||||
summary(m)
|
||||
|
||||
coef(m)[2]
|
||||
|
||||
slopes <- c()
|
||||
years <- c()
|
||||
for (file in files) {
|
||||
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
||||
years <- c(years, parse_number(basename(file)))
|
||||
slopes <- c(slopes, coef(m)[2])
|
||||
}
|
||||
years
|
||||
slopes
|
||||
|
||||
|
||||
plot(years, as.numeric(slopes), type = "b")
|
||||
```
|
||||
|
||||
# purrr
|
||||
|
||||
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
df <- tibble(
|
||||
filename = dir(
|
||||
"../../data/gapminder",
|
||||
pattern = "^[12][09][0-9][0-9].xlsx$",
|
||||
full.names = TRUE
|
||||
)
|
||||
) |>
|
||||
dplyr::mutate(
|
||||
data = purrr::map(
|
||||
filename,
|
||||
\(x) readxl::read_xlsx(x)
|
||||
)
|
||||
) |>
|
||||
mutate(year = parse_number(basename(filename))) |>
|
||||
mutate(
|
||||
m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
|
||||
) |>
|
||||
mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
|
||||
unnest(data)
|
||||
|
||||
pvalue <- summary(df$m[[1]])$coefficients[2, 4]
|
||||
rsq <- summary(df$m[[1]])$r.squared
|
||||
|
||||
require(tidymodels)
|
||||
generics::tidy(df$m[[1]])
|
||||
|
||||
df |>
|
||||
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||
geom_point(aes(color = factor(year))) +
|
||||
geom_smooth(method = "lm", se = FALSE) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||
|
||||
|
||||
df |>
|
||||
ggplot(aes(x = year, y = slope)) +
|
||||
geom_line() +
|
||||
geom_point()
|
||||
|
||||
|
||||
df
|
||||
df$slope[[1]]
|
||||
|
||||
coef(df$m[[1]])[2]
|
||||
|
||||
lm(y ~ x, data)
|
||||
|
||||
|
||||
df |>
|
||||
unnest(data) |>
|
||||
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||
geom_point(aes(color = factor(year))) +
|
||||
geom_smooth(method = "lm", se = FALSE) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../../_extensions
|
||||
@@ -0,0 +1,326 @@
|
||||
---
|
||||
title: "Data import"
|
||||
format:
|
||||
dwsd-revealjs:
|
||||
logo: _extensions/drwater/dwsd/inst/ucaslogo.png
|
||||
---
|
||||
|
||||
|
||||
```{r}
|
||||
#| echo: false
|
||||
source("../../coding/_common.R")
|
||||
library(tidyverse)
|
||||
```
|
||||
|
||||
|
||||
## tidyverse风格数据分析总体流程
|
||||
|
||||
|
||||

|
||||
|
||||
|
||||
## 导入csv数据
|
||||
|
||||
```{r}
|
||||
read_lines("../../data/students.csv") |> cat(sep = "\n")
|
||||
```
|
||||
|
||||
## 导入csv数据
|
||||
|
||||
|
||||
```{r}
|
||||
read_csv("../../data/students.csv") |>
|
||||
knitr::kable()
|
||||
```
|
||||
|
||||
|
||||
## 读取数据
|
||||
|
||||
```{r}
|
||||
(students <- read_csv("../../data/students.csv"))
|
||||
```
|
||||
|
||||
|
||||
## 读取数据
|
||||
|
||||
```{r}
|
||||
#| message: false
|
||||
(students <- read_csv("../../data/students.csv", na = c("N/A", "")))
|
||||
```
|
||||
|
||||
## 列名不要有空格
|
||||
|
||||
```{r}
|
||||
students |>
|
||||
rename(
|
||||
student_id = `Student ID`,
|
||||
full_name = `Full Name`
|
||||
)
|
||||
```
|
||||
|
||||
## `janitor`处理空格
|
||||
|
||||
```{r}
|
||||
#| message: false
|
||||
|
||||
students |> janitor::clean_names()
|
||||
```
|
||||
|
||||
## `janitor`处理空格
|
||||
|
||||
```{r}
|
||||
students |>
|
||||
janitor::clean_names() |>
|
||||
mutate(meal_plan = factor(meal_plan))
|
||||
```
|
||||
|
||||
## `janitor`处理空格
|
||||
|
||||
```{r}
|
||||
students <- students |>
|
||||
janitor::clean_names() |>
|
||||
mutate(
|
||||
meal_plan = factor(meal_plan),
|
||||
age = parse_number(if_else(age == "five", "5", age))
|
||||
)
|
||||
students
|
||||
```
|
||||
|
||||
|
||||
## 直接录入
|
||||
|
||||
```{r}
|
||||
#| message: false
|
||||
|
||||
read_csv(
|
||||
"The first line of metadata
|
||||
The second line of metadata
|
||||
x,y,z
|
||||
1,2,3",
|
||||
skip = 2
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## 直接录入
|
||||
|
||||
```{r}
|
||||
#| message: false
|
||||
read_csv(
|
||||
"# A comment I want to skip
|
||||
x,y,z
|
||||
1,2,3",
|
||||
comment = "#"
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## 指定列名
|
||||
|
||||
```{r}
|
||||
#| message: false
|
||||
|
||||
read_csv(
|
||||
"1,2,3
|
||||
4,5,6",
|
||||
col_names = c("x", "y", "z")
|
||||
)
|
||||
```
|
||||
|
||||
## 指定列的类型
|
||||
|
||||
```{r}
|
||||
another_csv <- "
|
||||
x,y,z
|
||||
1,2,3"
|
||||
|
||||
read_csv(
|
||||
another_csv,
|
||||
col_types = cols(.default = col_character())
|
||||
)
|
||||
read_csv(
|
||||
another_csv,
|
||||
col_types = cols_only(x = col_character())
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## 练习
|
||||
|
||||
```{r}
|
||||
#| eval: false
|
||||
|
||||
read_csv("a,b\n1,2,3\n4,5,6")
|
||||
read_csv("a,b,c\n1,2\n1,2,3,4")
|
||||
read_csv("a,b\n\"1")
|
||||
read_csv("a,b\n1,2\na,b")
|
||||
read_csv("a;b\n1;3")
|
||||
```
|
||||
|
||||
## 练习
|
||||
|
||||
```{r}
|
||||
#| eval: false
|
||||
annoying <- tibble(
|
||||
`1` = 1:10,
|
||||
`2` = `1` * 2 + rnorm(length(`1`))
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 批量读取
|
||||
|
||||
```{r}
|
||||
#| message: false
|
||||
|
||||
sales_files <- c(
|
||||
"../../data/01-sales.csv",
|
||||
"../../data/02-sales.csv",
|
||||
"../../data/03-sales.csv"
|
||||
)
|
||||
read_csv(sales_files, id = "file")
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
## 读取Excel,建议用`readxl`包
|
||||
|
||||
```{r}
|
||||
(surveydf <- readxl::read_xlsx("../../data/survey.xlsx"))
|
||||
```
|
||||
|
||||
## 读取Excel
|
||||
|
||||
```{r}
|
||||
(airqualitydf <- readxl::read_xlsx("../../data/airquality.xlsx", sheet = 2))
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 批量读取
|
||||
|
||||
```{r}
|
||||
sales_files <- list.files(
|
||||
"../../data",
|
||||
pattern = "sales\\.csv$",
|
||||
full.names = TRUE
|
||||
)
|
||||
sales_files
|
||||
```
|
||||
|
||||
## 写入csv
|
||||
|
||||
```{r}
|
||||
#| warning: false
|
||||
#| message: false
|
||||
students
|
||||
write_csv(students, "students-2.csv")
|
||||
read_csv("students-2.csv")
|
||||
```
|
||||
|
||||
## 写入Excel
|
||||
|
||||
```{r}
|
||||
writexl::write_xlsx(students, "../../data/writexldemo.xlsx")
|
||||
```
|
||||
|
||||
## 读取数据库,以MySQL为例
|
||||
|
||||
```{r}
|
||||
if (FALSE) {
|
||||
conn <- cctdb::get_dbconn("nationalairquality")
|
||||
DBI::dbListTables(conn)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 读取数据库,以MySQL为例
|
||||
|
||||
```{r}
|
||||
if (FALSE) {
|
||||
conn <- cctdb::get_dbconn("nationalairquality")
|
||||
metadf <- tbl(conn, "metadf") |>
|
||||
head(100) |>
|
||||
collect()
|
||||
DBI::dbDisconnect(conn)
|
||||
saveRDS(metadf, file = "../../data/metadfdemo.RDS")
|
||||
}
|
||||
metadf <- readRDS(file = "../../data/metadfdemo.RDS")
|
||||
lang <- "cn"
|
||||
metadf |>
|
||||
ggplot(aes(lon, lat)) +
|
||||
geom_point(aes(fill = Area)) +
|
||||
dwfun::theme_sci()
|
||||
```
|
||||
|
||||
|
||||
## 练习
|
||||
|
||||
```{r}
|
||||
#| eval: false
|
||||
metadf <- readxl::read_xlsx("../../data/airquality.xlsx")
|
||||
dir.create("../../data/metacity2/")
|
||||
metadf |>
|
||||
nest(sitedf = -site) |>
|
||||
mutate(
|
||||
flag = purrr::map2(
|
||||
site,
|
||||
sitedf,
|
||||
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## 练习
|
||||
|
||||
```{r}
|
||||
#| include: false
|
||||
#| eval: false
|
||||
if (FALSE) {
|
||||
require(tidyverse)
|
||||
conn <- cctdb::get_dbconn("nationalairquality")
|
||||
metadf <- tbl(conn, "metadf") |>
|
||||
collect()
|
||||
DBI::dbDisconnect(conn)
|
||||
metanestdf <- metadf |>
|
||||
nest(citydf = -Area)
|
||||
names(metanestdf$citydf) <- metanestdf$Area
|
||||
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
|
||||
dir.create("../../data/metacity/")
|
||||
metanestdf |>
|
||||
mutate(
|
||||
flag = purrr::map2(
|
||||
Area,
|
||||
citydf,
|
||||
~ writexl::write_xlsx(
|
||||
.y,
|
||||
path = paste0("../../data/metacity/", .x, ".xlsx")
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
1. 从“../../data/sales.xlsx”读取第9到13行的数据
|
||||
2. 从“../../data/meta_city.xlsx”读取所有的数据,并保存至“../../data/meta_city_onetable1.xlsx”
|
||||
3. 从“../../data/metacity/”读取所有的数据,并保存至“../../data/meta_city_onetable2.xlsx”
|
||||
|
||||
|
||||
|
||||
## 欢迎讨论!{.center}
|
||||
|
||||
|
||||
```{r}
|
||||
#| results: 'asis'
|
||||
rmdify::slideend(
|
||||
wechat = FALSE,
|
||||
type = "public",
|
||||
tel = FALSE,
|
||||
thislink = "../"
|
||||
)
|
||||
```
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
student_id,full_name,favourite_food,meal_plan,age
|
||||
1,Sunil Huffmann,Strawberry yoghurt,Lunch only,4
|
||||
2,Barclay Lynn,French fries,Lunch only,5
|
||||
3,Jayendra Lyne,NA,Breakfast and lunch,7
|
||||
4,Leon Rossini,Anchovies,Lunch only,NA
|
||||
5,Chidiegwu Dunkel,Pizza,Breakfast and lunch,5
|
||||
6,Güvenç Attila,Ice cream,Lunch only,6
|
||||
|
Reference in New Issue
Block a user