update
This commit is contained in:
154
SD/2.2_dataimport/_demo.qmd
Normal file
154
SD/2.2_dataimport/_demo.qmd
Normal file
@@ -0,0 +1,154 @@
|
||||
---
|
||||
title: "Lesson 7"
|
||||
format: html
|
||||
---
|
||||
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
|
||||
|
||||
files <- c(
|
||||
"../../data/01-sales.csv",
|
||||
"../../data/02-sales.csv",
|
||||
"../../data/03-sales.csv"
|
||||
)
|
||||
|
||||
files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
|
||||
|
||||
|
||||
readr::read_csv(files, id = "file") |>
|
||||
mutate(file = basename(file))
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
library(tidyverse)
|
||||
|
||||
files <- dir(
|
||||
"../../data/gapminder",
|
||||
pattern = "^[12][09][0-9][0-9].xlsx$",
|
||||
full.names = TRUE
|
||||
)
|
||||
|
||||
|
||||
alldf <- tibble::tibble()
|
||||
for (file in files) {
|
||||
alldf <- alldf |>
|
||||
bind_rows(
|
||||
readxl::read_xlsx(file) |>
|
||||
mutate(year = parse_number(basename(file)))
|
||||
)
|
||||
}
|
||||
|
||||
alldf |>
|
||||
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||
geom_point(aes(color = factor(year))) +
|
||||
geom_smooth(method = "lm", se = FALSE) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||
```
|
||||
|
||||
# slope
|
||||
|
||||
```{r}
|
||||
file <- files[1]
|
||||
|
||||
lm(y ~ x, data)
|
||||
|
||||
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
||||
|
||||
summary(m)
|
||||
|
||||
coef(m)[2]
|
||||
|
||||
slopes <- c()
|
||||
years <- c()
|
||||
for (file in files) {
|
||||
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
||||
years <- c(years, parse_number(basename(file)))
|
||||
slopes <- c(slopes, coef(m)[2])
|
||||
}
|
||||
years
|
||||
slopes
|
||||
|
||||
|
||||
plot(years, as.numeric(slopes), type = "b")
|
||||
```
|
||||
|
||||
# purrr
|
||||
|
||||
|
||||
|
||||
```{r}
|
||||
require(tidyverse)
|
||||
df <- tibble(
|
||||
filename = dir(
|
||||
"../../data/gapminder",
|
||||
pattern = "^[12][09][0-9][0-9].xlsx$",
|
||||
full.names = TRUE
|
||||
)
|
||||
) |>
|
||||
dplyr::mutate(
|
||||
data = purrr::map(
|
||||
filename,
|
||||
\(x) readxl::read_xlsx(x)
|
||||
)
|
||||
) |>
|
||||
mutate(year = parse_number(basename(filename))) |>
|
||||
mutate(
|
||||
m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
|
||||
) |>
|
||||
mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
|
||||
unnest(data)
|
||||
|
||||
pvalue <- summary(df$m[[1]])$coefficients[2, 4]
|
||||
rsq <- summary(df$m[[1]])$r.squared
|
||||
|
||||
require(tidymodels)
|
||||
generics::tidy(df$m[[1]])
|
||||
|
||||
df |>
|
||||
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||
geom_point(aes(color = factor(year))) +
|
||||
geom_smooth(method = "lm", se = FALSE) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||
|
||||
|
||||
df |>
|
||||
ggplot(aes(x = year, y = slope)) +
|
||||
geom_line() +
|
||||
geom_point()
|
||||
|
||||
|
||||
df
|
||||
df$slope[[1]]
|
||||
|
||||
coef(df$m[[1]])[2]
|
||||
|
||||
lm(y ~ x, data)
|
||||
|
||||
|
||||
df |>
|
||||
unnest(data) |>
|
||||
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
||||
geom_point(aes(color = factor(year))) +
|
||||
geom_smooth(method = "lm", se = FALSE) +
|
||||
scale_y_log10(
|
||||
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
||||
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
||||
) +
|
||||
facet_wrap(~year, ncol = 4, scale = "fixed")
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ read_csv("../../data/students.csv") |>
|
||||
## 列名不要有空格
|
||||
|
||||
```{r}
|
||||
students |>
|
||||
students |>
|
||||
rename(
|
||||
student_id = `Student ID`,
|
||||
full_name = `Full Name`
|
||||
@@ -158,7 +158,7 @@ x,y,z
|
||||
1,2,3"
|
||||
|
||||
read_csv(
|
||||
another_csv,
|
||||
another_csv,
|
||||
col_types = cols(.default = col_character())
|
||||
)
|
||||
read_csv(
|
||||
@@ -197,9 +197,11 @@ annoying <- tibble(
|
||||
```{r}
|
||||
#| message: false
|
||||
|
||||
sales_files <- c("../../data/01-sales.csv",
|
||||
sales_files <- c(
|
||||
"../../data/01-sales.csv",
|
||||
"../../data/02-sales.csv",
|
||||
"../../data/03-sales.csv")
|
||||
"../../data/03-sales.csv"
|
||||
)
|
||||
read_csv(sales_files, id = "file")
|
||||
```
|
||||
|
||||
@@ -223,8 +225,11 @@ read_csv(sales_files, id = "file")
|
||||
## 批量读取
|
||||
|
||||
```{r}
|
||||
sales_files <- list.files("../../data",
|
||||
pattern = "sales\\.csv$", full.names = TRUE)
|
||||
sales_files <- list.files(
|
||||
"../../data",
|
||||
pattern = "sales\\.csv$",
|
||||
full.names = TRUE
|
||||
)
|
||||
sales_files
|
||||
```
|
||||
|
||||
@@ -260,7 +265,7 @@ if (FALSE) {
|
||||
```{r}
|
||||
if (FALSE) {
|
||||
conn <- cctdb::get_dbconn("nationalairquality")
|
||||
metadf <- tbl(conn, "metadf") |>
|
||||
metadf <- tbl(conn, "metadf") |>
|
||||
head(100) |>
|
||||
collect()
|
||||
DBI::dbDisconnect(conn)
|
||||
@@ -270,8 +275,8 @@ metadf <- readRDS(file = "../../data/metadfdemo.RDS")
|
||||
lang <- "cn"
|
||||
metadf |>
|
||||
ggplot(aes(lon, lat)) +
|
||||
geom_point(aes(fill = Area)) +
|
||||
dwfun::theme_sci()
|
||||
geom_point(aes(fill = Area)) +
|
||||
dwfun::theme_sci()
|
||||
```
|
||||
|
||||
|
||||
@@ -283,8 +288,13 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx")
|
||||
dir.create("../../data/metacity2/")
|
||||
metadf |>
|
||||
nest(sitedf = -site) |>
|
||||
mutate(flag = purrr::map2(site, sitedf,
|
||||
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))))
|
||||
mutate(
|
||||
flag = purrr::map2(
|
||||
site,
|
||||
sitedf,
|
||||
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -296,7 +306,7 @@ metadf |>
|
||||
if (FALSE) {
|
||||
require(tidyverse)
|
||||
conn <- cctdb::get_dbconn("nationalairquality")
|
||||
metadf <- tbl(conn, "metadf") |>
|
||||
metadf <- tbl(conn, "metadf") |>
|
||||
collect()
|
||||
DBI::dbDisconnect(conn)
|
||||
metanestdf <- metadf |>
|
||||
@@ -305,10 +315,16 @@ if (FALSE) {
|
||||
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
|
||||
dir.create("../../data/metacity/")
|
||||
metanestdf |>
|
||||
mutate(flag = purrr::map2(Area, citydf,
|
||||
~ writexl::write_xlsx(.y,
|
||||
path = paste0("../../data/metacity/", .x, ".xlsx")
|
||||
)))
|
||||
mutate(
|
||||
flag = purrr::map2(
|
||||
Area,
|
||||
citydf,
|
||||
~ writexl::write_xlsx(
|
||||
.y,
|
||||
path = paste0("../../data/metacity/", .x, ".xlsx")
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user