155 lines
2.8 KiB
Plaintext
155 lines
2.8 KiB
Plaintext
---
|
|
title: "Lesson 7"
|
|
format: html
|
|
---
|
|
|
|
|
|
```{r}
|
|
require(tidyverse)
|
|
|
|
|
|
files <- c(
|
|
"../../data/01-sales.csv",
|
|
"../../data/02-sales.csv",
|
|
"../../data/03-sales.csv"
|
|
)
|
|
|
|
files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
|
|
|
|
|
|
readr::read_csv(files, id = "file") |>
|
|
mutate(file = basename(file))
|
|
```
|
|
|
|
|
|
```{r}
|
|
require(tidyverse)
|
|
library(tidyverse)
|
|
|
|
files <- dir(
|
|
"../../data/gapminder",
|
|
pattern = "^[12][09][0-9][0-9].xlsx$",
|
|
full.names = TRUE
|
|
)
|
|
|
|
|
|
alldf <- tibble::tibble()
|
|
for (file in files) {
|
|
alldf <- alldf |>
|
|
bind_rows(
|
|
readxl::read_xlsx(file) |>
|
|
mutate(year = parse_number(basename(file)))
|
|
)
|
|
}
|
|
|
|
alldf |>
|
|
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
|
geom_point(aes(color = factor(year))) +
|
|
geom_smooth(method = "lm", se = FALSE) +
|
|
scale_y_log10(
|
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
|
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
|
) +
|
|
facet_wrap(~year, ncol = 4, scale = "fixed")
|
|
```
|
|
|
|
# slope
|
|
|
|
```{r}
|
|
file <- files[1]
|
|
|
|
lm(y ~ x, data)
|
|
|
|
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
|
|
|
summary(m)
|
|
|
|
coef(m)[2]
|
|
|
|
slopes <- c()
|
|
years <- c()
|
|
for (file in files) {
|
|
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
|
|
years <- c(years, parse_number(basename(file)))
|
|
slopes <- c(slopes, coef(m)[2])
|
|
}
|
|
years
|
|
slopes
|
|
|
|
|
|
plot(years, as.numeric(slopes), type = "b")
|
|
```
|
|
|
|
# purrr
|
|
|
|
|
|
|
|
```{r}
|
|
require(tidyverse)
|
|
df <- tibble(
|
|
filename = dir(
|
|
"../../data/gapminder",
|
|
pattern = "^[12][09][0-9][0-9].xlsx$",
|
|
full.names = TRUE
|
|
)
|
|
) |>
|
|
dplyr::mutate(
|
|
data = purrr::map(
|
|
filename,
|
|
\(x) readxl::read_xlsx(x)
|
|
)
|
|
) |>
|
|
mutate(year = parse_number(basename(filename))) |>
|
|
mutate(
|
|
m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
|
|
) |>
|
|
mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
|
|
unnest(data)
|
|
|
|
pvalue <- summary(df$m[[1]])$coefficients[2, 4]
|
|
rsq <- summary(df$m[[1]])$r.squared
|
|
|
|
require(tidymodels)
|
|
generics::tidy(df$m[[1]])
|
|
|
|
df |>
|
|
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
|
geom_point(aes(color = factor(year))) +
|
|
geom_smooth(method = "lm", se = FALSE) +
|
|
scale_y_log10(
|
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
|
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
|
) +
|
|
facet_wrap(~year, ncol = 4, scale = "fixed")
|
|
|
|
|
|
df |>
|
|
ggplot(aes(x = year, y = slope)) +
|
|
geom_line() +
|
|
geom_point()
|
|
|
|
|
|
df
|
|
df$slope[[1]]
|
|
|
|
coef(df$m[[1]])[2]
|
|
|
|
lm(y ~ x, data)
|
|
|
|
|
|
df |>
|
|
unnest(data) |>
|
|
ggplot(aes(x = lifeExp, y = gdpPercap)) +
|
|
geom_point(aes(color = factor(year))) +
|
|
geom_smooth(method = "lm", se = FALSE) +
|
|
scale_y_log10(
|
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
|
labels = scales::trans_format("log10", scales::math_format(10^.x))
|
|
) +
|
|
facet_wrap(~year, ncol = 4, scale = "fixed")
|
|
```
|
|
|
|
|
|
|
|
|