--- title: "Lesson 7" format: html --- ```{r} require(tidyverse) files <- c( "../../data/01-sales.csv", "../../data/02-sales.csv", "../../data/03-sales.csv" ) files <- dir("../../data", pattern = "sales.csv", full.names = TRUE) readr::read_csv(files, id = "file") |> mutate(file = basename(file)) ``` ```{r} require(tidyverse) library(tidyverse) files <- dir( "../../data/gapminder", pattern = "^[12][09][0-9][0-9].xlsx$", full.names = TRUE ) alldf <- tibble::tibble() for (file in files) { alldf <- alldf |> bind_rows( readxl::read_xlsx(file) |> mutate(year = parse_number(basename(file))) ) } alldf |> ggplot(aes(x = lifeExp, y = gdpPercap)) + geom_point(aes(color = factor(year))) + geom_smooth(method = "lm", se = FALSE) + scale_y_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + facet_wrap(~year, ncol = 4, scale = "fixed") ``` # slope ```{r} file <- files[1] lm(y ~ x, data) m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file)) summary(m) coef(m)[2] slopes <- c() years <- c() for (file in files) { m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file)) years <- c(years, parse_number(basename(file))) slopes <- c(slopes, coef(m)[2]) } years slopes plot(years, as.numeric(slopes), type = "b") ``` # purrr ```{r} require(tidyverse) df <- tibble( filename = dir( "../../data/gapminder", pattern = "^[12][09][0-9][0-9].xlsx$", full.names = TRUE ) ) |> dplyr::mutate( data = purrr::map( filename, \(x) readxl::read_xlsx(x) ) ) |> mutate(year = parse_number(basename(filename))) |> mutate( m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx)) ) |> mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |> unnest(data) pvalue <- summary(df$m[[1]])$coefficients[2, 4] rsq <- summary(df$m[[1]])$r.squared require(tidymodels) generics::tidy(df$m[[1]]) df |> ggplot(aes(x = lifeExp, y = gdpPercap)) + geom_point(aes(color = factor(year))) + geom_smooth(method = "lm", se = FALSE) + scale_y_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + facet_wrap(~year, ncol = 4, scale = "fixed") df |> ggplot(aes(x = year, y = slope)) + geom_line() + geom_point() df df$slope[[1]] coef(df$m[[1]])[2] lm(y ~ x, data) df |> unnest(data) |> ggplot(aes(x = lifeExp, y = gdpPercap)) + geom_point(aes(color = factor(year))) + geom_smooth(method = "lm", se = FALSE) + scale_y_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x)) ) + facet_wrap(~year, ncol = 4, scale = "fixed") ```