This commit is contained in:
2025-04-09 22:28:17 +08:00
parent 6412cc5560
commit 9ecb11b788
28 changed files with 5215 additions and 243 deletions

154
SD/2.2_dataimport/_demo.qmd Normal file
View File

@@ -0,0 +1,154 @@
---
title: "Lesson 7"
format: html
---
```{r}
require(tidyverse)
files <- c(
"../../data/01-sales.csv",
"../../data/02-sales.csv",
"../../data/03-sales.csv"
)
files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
readr::read_csv(files, id = "file") |>
mutate(file = basename(file))
```
```{r}
require(tidyverse)
library(tidyverse)
files <- dir(
"../../data/gapminder",
pattern = "^[12][09][0-9][0-9].xlsx$",
full.names = TRUE
)
alldf <- tibble::tibble()
for (file in files) {
alldf <- alldf |>
bind_rows(
readxl::read_xlsx(file) |>
mutate(year = parse_number(basename(file)))
)
}
alldf |>
ggplot(aes(x = lifeExp, y = gdpPercap)) +
geom_point(aes(color = factor(year))) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
facet_wrap(~year, ncol = 4, scale = "fixed")
```
# slope
```{r}
file <- files[1]
lm(y ~ x, data)
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
summary(m)
coef(m)[2]
slopes <- c()
years <- c()
for (file in files) {
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
years <- c(years, parse_number(basename(file)))
slopes <- c(slopes, coef(m)[2])
}
years
slopes
plot(years, as.numeric(slopes), type = "b")
```
# purrr
```{r}
require(tidyverse)
df <- tibble(
filename = dir(
"../../data/gapminder",
pattern = "^[12][09][0-9][0-9].xlsx$",
full.names = TRUE
)
) |>
dplyr::mutate(
data = purrr::map(
filename,
\(x) readxl::read_xlsx(x)
)
) |>
mutate(year = parse_number(basename(filename))) |>
mutate(
m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
) |>
mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
unnest(data)
pvalue <- summary(df$m[[1]])$coefficients[2, 4]
rsq <- summary(df$m[[1]])$r.squared
require(tidymodels)
generics::tidy(df$m[[1]])
df |>
ggplot(aes(x = lifeExp, y = gdpPercap)) +
geom_point(aes(color = factor(year))) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
facet_wrap(~year, ncol = 4, scale = "fixed")
df |>
ggplot(aes(x = year, y = slope)) +
geom_line() +
geom_point()
df
df$slope[[1]]
coef(df$m[[1]])[2]
lm(y ~ x, data)
df |>
unnest(data) |>
ggplot(aes(x = lifeExp, y = gdpPercap)) +
geom_point(aes(color = factor(year))) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
facet_wrap(~year, ncol = 4, scale = "fixed")
```

View File

@@ -74,7 +74,7 @@ read_csv("../../data/students.csv") |>
## 列名不要有空格
```{r}
students |>
students |>
rename(
student_id = `Student ID`,
full_name = `Full Name`
@@ -158,7 +158,7 @@ x,y,z
1,2,3"
read_csv(
another_csv,
another_csv,
col_types = cols(.default = col_character())
)
read_csv(
@@ -197,9 +197,11 @@ annoying <- tibble(
```{r}
#| message: false
sales_files <- c("../../data/01-sales.csv",
sales_files <- c(
"../../data/01-sales.csv",
"../../data/02-sales.csv",
"../../data/03-sales.csv")
"../../data/03-sales.csv"
)
read_csv(sales_files, id = "file")
```
@@ -223,8 +225,11 @@ read_csv(sales_files, id = "file")
## 批量读取
```{r}
sales_files <- list.files("../../data",
pattern = "sales\\.csv$", full.names = TRUE)
sales_files <- list.files(
"../../data",
pattern = "sales\\.csv$",
full.names = TRUE
)
sales_files
```
@@ -260,7 +265,7 @@ if (FALSE) {
```{r}
if (FALSE) {
conn <- cctdb::get_dbconn("nationalairquality")
metadf <- tbl(conn, "metadf") |>
metadf <- tbl(conn, "metadf") |>
head(100) |>
collect()
DBI::dbDisconnect(conn)
@@ -270,8 +275,8 @@ metadf <- readRDS(file = "../../data/metadfdemo.RDS")
lang <- "cn"
metadf |>
ggplot(aes(lon, lat)) +
geom_point(aes(fill = Area)) +
dwfun::theme_sci()
geom_point(aes(fill = Area)) +
dwfun::theme_sci()
```
@@ -283,8 +288,13 @@ metadf <- readxl::read_xlsx("../../data/airquality.xlsx")
dir.create("../../data/metacity2/")
metadf |>
nest(sitedf = -site) |>
mutate(flag = purrr::map2(site, sitedf,
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))))
mutate(
flag = purrr::map2(
site,
sitedf,
~ writexl::write_xlsx(.y, paste0("../../data/metacity2/", .x, ".xlsx"))
)
)
```
@@ -296,7 +306,7 @@ metadf |>
if (FALSE) {
require(tidyverse)
conn <- cctdb::get_dbconn("nationalairquality")
metadf <- tbl(conn, "metadf") |>
metadf <- tbl(conn, "metadf") |>
collect()
DBI::dbDisconnect(conn)
metanestdf <- metadf |>
@@ -305,10 +315,16 @@ if (FALSE) {
writexl::write_xlsx(metanestdf$citydf, path = "../../data/meta_city.xlsx")
dir.create("../../data/metacity/")
metanestdf |>
mutate(flag = purrr::map2(Area, citydf,
~ writexl::write_xlsx(.y,
path = paste0("../../data/metacity/", .x, ".xlsx")
)))
mutate(
flag = purrr::map2(
Area,
citydf,
~ writexl::write_xlsx(
.y,
path = paste0("../../data/metacity/", .x, ".xlsx")
)
)
)
}
```