155 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			155 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| ---
 | |
| title: "Lesson 7"
 | |
| format: html
 | |
| ---
 | |
| 
 | |
| 
 | |
| ```{r}
 | |
| require(tidyverse)
 | |
| 
 | |
| 
 | |
| files <- c(
 | |
|   "../../data/01-sales.csv",
 | |
|   "../../data/02-sales.csv",
 | |
|   "../../data/03-sales.csv"
 | |
| )
 | |
| 
 | |
| files <- dir("../../data", pattern = "sales.csv", full.names = TRUE)
 | |
| 
 | |
| 
 | |
| readr::read_csv(files, id = "file") |>
 | |
|   mutate(file = basename(file))
 | |
| ```
 | |
| 
 | |
| 
 | |
| ```{r}
 | |
| require(tidyverse)
 | |
| library(tidyverse)
 | |
| 
 | |
| files <- dir(
 | |
|   "../../data/gapminder",
 | |
|   pattern = "^[12][09][0-9][0-9].xlsx$",
 | |
|   full.names = TRUE
 | |
| )
 | |
| 
 | |
| 
 | |
| alldf <- tibble::tibble()
 | |
| for (file in files) {
 | |
|   alldf <- alldf |>
 | |
|     bind_rows(
 | |
|       readxl::read_xlsx(file) |>
 | |
|         mutate(year = parse_number(basename(file)))
 | |
|     )
 | |
| }
 | |
| 
 | |
| alldf |>
 | |
|   ggplot(aes(x = lifeExp, y = gdpPercap)) +
 | |
|   geom_point(aes(color = factor(year))) +
 | |
|   geom_smooth(method = "lm", se = FALSE) +
 | |
|   scale_y_log10(
 | |
|     breaks = scales::trans_breaks("log10", function(x) 10^x),
 | |
|     labels = scales::trans_format("log10", scales::math_format(10^.x))
 | |
|   ) +
 | |
|   facet_wrap(~year, ncol = 4, scale = "fixed")
 | |
| ```
 | |
| 
 | |
| # slope
 | |
| 
 | |
| ```{r}
 | |
| file <- files[1]
 | |
| 
 | |
| lm(y ~ x, data)
 | |
| 
 | |
| m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
 | |
| 
 | |
| summary(m)
 | |
| 
 | |
| coef(m)[2]
 | |
| 
 | |
| slopes <- c()
 | |
| years <- c()
 | |
| for (file in files) {
 | |
|   m <- lm(log10(gdpPercap) ~ lifeExp, readxl::read_xlsx(file))
 | |
|   years <- c(years, parse_number(basename(file)))
 | |
|   slopes <- c(slopes, coef(m)[2])
 | |
| }
 | |
| years
 | |
| slopes
 | |
| 
 | |
| 
 | |
| plot(years, as.numeric(slopes), type = "b")
 | |
| ```
 | |
| 
 | |
| # purrr
 | |
| 
 | |
| 
 | |
| 
 | |
| ```{r}
 | |
| require(tidyverse)
 | |
| df <- tibble(
 | |
|   filename = dir(
 | |
|     "../../data/gapminder",
 | |
|     pattern = "^[12][09][0-9][0-9].xlsx$",
 | |
|     full.names = TRUE
 | |
|   )
 | |
| ) |>
 | |
|   dplyr::mutate(
 | |
|     data = purrr::map(
 | |
|       filename,
 | |
|       \(x) readxl::read_xlsx(x)
 | |
|     )
 | |
|   ) |>
 | |
|   mutate(year = parse_number(basename(filename))) |>
 | |
|   mutate(
 | |
|     m = purrr::map(data, \(xxxx) lm(log10(gdpPercap) ~ lifeExp, data = xxxx))
 | |
|   ) |>
 | |
|   mutate(slope = purrr::map_dbl(m, \(yyyy) coef(yyyy)[2])) |>
 | |
|   unnest(data)
 | |
| 
 | |
| pvalue <- summary(df$m[[1]])$coefficients[2, 4]
 | |
| rsq <- summary(df$m[[1]])$r.squared
 | |
| 
 | |
| require(tidymodels)
 | |
| generics::tidy(df$m[[1]])
 | |
| 
 | |
| df |>
 | |
|   ggplot(aes(x = lifeExp, y = gdpPercap)) +
 | |
|   geom_point(aes(color = factor(year))) +
 | |
|   geom_smooth(method = "lm", se = FALSE) +
 | |
|   scale_y_log10(
 | |
|     breaks = scales::trans_breaks("log10", function(x) 10^x),
 | |
|     labels = scales::trans_format("log10", scales::math_format(10^.x))
 | |
|   ) +
 | |
|   facet_wrap(~year, ncol = 4, scale = "fixed")
 | |
| 
 | |
| 
 | |
| df |>
 | |
|   ggplot(aes(x = year, y = slope)) +
 | |
|   geom_line() +
 | |
|   geom_point()
 | |
| 
 | |
| 
 | |
| df
 | |
| df$slope[[1]]
 | |
| 
 | |
| coef(df$m[[1]])[2]
 | |
| 
 | |
| lm(y ~ x, data)
 | |
| 
 | |
| 
 | |
| df |>
 | |
|   unnest(data) |>
 | |
|   ggplot(aes(x = lifeExp, y = gdpPercap)) +
 | |
|   geom_point(aes(color = factor(year))) +
 | |
|   geom_smooth(method = "lm", se = FALSE) +
 | |
|   scale_y_log10(
 | |
|     breaks = scales::trans_breaks("log10", function(x) 10^x),
 | |
|     labels = scales::trans_format("log10", scales::math_format(10^.x))
 | |
|   ) +
 | |
|   facet_wrap(~year, ncol = 4, scale = "fixed")
 | |
| ```
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 |