---
title: "Lesson 7"
format: html
---
```{r}
require(tidyverse)
files <- c(
"../../data/01-sales.csv",
"../../data/02-sales.csv",
"../../data/03-sales.csv"
)
files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
readr::read_csv(files, id = "file") |>
mutate(file = basename(file))
```
```{r}
require(tidyverse)
library(tidyverse)
files <- dir(
"../../data/gapminder",
pattern = "^[12][09][0-9][0-9].xlsx$",
full.names = TRUE
)
alldf <- tibble::tibble()
for (file in files) {
alldf <- alldf |>
bind_rows(
readxl::read_xlsx(file) |>
mutate(year = parse_number(basename(file)))
)
}
alldf |>
ggplot(aes(x = lifeExp, y = gdpPercap)) +
geom_point(aes(color = factor(year))) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
facet_wrap(~year, ncol = 4, scale = "fixed")
```
# slope
```{r}
file <- files[1]
lm(y ~ x, data)
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
summary(m)
coef(m)[2]
slopes <- c()
years <- c()
for (file in files) {
m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
years <- c(years, parse_number(basename(file)))
slopes <- c(slopes, coef(m)[2])
}
years
slopes
plot(years, as.numeric(slopes), type = "b")
```
# purrr
```{r}
require(tidyverse)
df <- tibble(
filename = dir(
"../../data/gapminder",
pattern = "^[12][09][0-9][0-9].xlsx$",
full.names = TRUE
)
) |>
dplyr::mutate(
data = purrr::map(
filename,
\(x) readxl::read_xlsx(x)
)
) |>
mutate(year = parse_number(basename(filename))) |>
mutate(
m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
) |>
mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
unnest(data)
pvalue <- summary(df$m[[1]])$coefficients[2, 4]
rsq <- summary(df$m[[1]])$r.squared
require(tidymodels)
generics::tidy(df$m[[1]])
df |>
ggplot(aes(x = lifeExp, y = gdpPercap)) +
geom_point(aes(color = factor(year))) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
facet_wrap(~year, ncol = 4, scale = "fixed")
df |>
ggplot(aes(x = year, y = slope)) +
geom_line() +
geom_point()
df
df$slope[[1]]
coef(df$m[[1]])[2]
lm(y ~ x, data)
df |>
unnest(data) |>
ggplot(aes(x = lifeExp, y = gdpPercap)) +
geom_point(aes(color = factor(year))) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
facet_wrap(~year, ncol = 4, scale = "fixed")
```