Label x and y arguments in aes(), closes #1159
This commit is contained in:
		
							
								
								
									
										2
									
								
								EDA.qmd
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								EDA.qmd
									
									
									
									
									
								
							| @@ -471,7 +471,7 @@ You can do that by exchanging the x and y aesthetic mappings. | |||||||
| #|   on the y-axis and ordered by increasing median highway mileage. | #|   on the y-axis and ordered by increasing median highway mileage. | ||||||
|  |  | ||||||
| ggplot(mpg, | ggplot(mpg, | ||||||
|        aes(y = fct_reorder(class, hwy, median), x = hwy)) + |        aes(x = hwy, y = fct_reorder(class, hwy, median))) + | ||||||
|   geom_boxplot() |   geom_boxplot() | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|   | |||||||
| @@ -256,7 +256,8 @@ It takes a factor, `f`, and then any number of levels that you want to move to t | |||||||
| #|   bottom of the y-axis. Generally there is a positive association | #|   bottom of the y-axis. Generally there is a positive association | ||||||
| #|   between income and age, and the income band with the highest average | #|   between income and age, and the income band with the highest average | ||||||
| #|   age is "Not applicable". | #|   age is "Not applicable". | ||||||
| ggplot(rincome_summary, aes(age, fct_relevel(rincome, "Not applicable"))) + |  | ||||||
|  | ggplot(rincome_summary, aes(x = age, y = fct_relevel(rincome, "Not applicable"))) + | ||||||
|   geom_point() |   geom_point() | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| @@ -291,7 +292,7 @@ by_age <- gss_cat |> | |||||||
|     prop = n / sum(n) |     prop = n / sum(n) | ||||||
|   ) |   ) | ||||||
|  |  | ||||||
| ggplot(by_age, aes(age, prop, color = marital)) + | ggplot(by_age, aes(x = age, y = prop, color = marital)) + | ||||||
|   geom_line(na.rm = TRUE) |   geom_line(na.rm = TRUE) | ||||||
|  |  | ||||||
| ggplot(by_age, aes(x = age, y = prop, color = fct_reorder2(marital, age, prop))) + | ggplot(by_age, aes(x = age, y = prop, color = fct_reorder2(marital, age, prop))) + | ||||||
|   | |||||||
| @@ -666,11 +666,11 @@ For example, imagine that you're making a lot of histograms: | |||||||
| ```{r} | ```{r} | ||||||
| #| fig-show: hide | #| fig-show: hide | ||||||
| diamonds |>  | diamonds |>  | ||||||
|   ggplot(aes(carat)) + |   ggplot(aes(x = carat)) + | ||||||
|   geom_histogram(binwidth = 0.1) |   geom_histogram(binwidth = 0.1) | ||||||
|  |  | ||||||
| diamonds |>  | diamonds |>  | ||||||
|   ggplot(aes(carat)) + |   ggplot(aes(x = carat)) + | ||||||
|   geom_histogram(binwidth = 0.05) |   geom_histogram(binwidth = 0.05) | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| @@ -680,7 +680,7 @@ This is easy as pie once you know that `aes()` is a data-masking function and yo | |||||||
| ```{r} | ```{r} | ||||||
| histogram <- function(df, var, binwidth = NULL) { | histogram <- function(df, var, binwidth = NULL) { | ||||||
|   df |>  |   df |>  | ||||||
|     ggplot(aes({{ var }})) +  |     ggplot(aes(x = {{ var }})) +  | ||||||
|     geom_histogram(binwidth = binwidth) |     geom_histogram(binwidth = binwidth) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -705,7 +705,7 @@ For example, maybe you want an easy way to eyeball whether or not a data set is | |||||||
| # https://twitter.com/tyler_js_smith/status/1574377116988104704 | # https://twitter.com/tyler_js_smith/status/1574377116988104704 | ||||||
| linearity_check <- function(df, x, y) { | linearity_check <- function(df, x, y) { | ||||||
|   df |> |   df |> | ||||||
|     ggplot(aes({{ x }}, {{ y }})) + |     ggplot(aes(x = {{ x }}, y = {{ y }})) + | ||||||
|     geom_point() + |     geom_point() + | ||||||
|     geom_smooth(method = "loess", color = "red", se = FALSE) + |     geom_smooth(method = "loess", color = "red", se = FALSE) + | ||||||
|     geom_smooth(method = "lm", color = "blue", se = FALSE)  |     geom_smooth(method = "lm", color = "blue", se = FALSE)  | ||||||
| @@ -722,7 +722,7 @@ Or maybe you want an alternative to colored scatterplots for very large datasets | |||||||
| # https://twitter.com/ppaxisa/status/1574398423175921665 | # https://twitter.com/ppaxisa/status/1574398423175921665 | ||||||
| hex_plot <- function(df, x, y, z, bins = 20, fun = "mean") { | hex_plot <- function(df, x, y, z, bins = 20, fun = "mean") { | ||||||
|   df |>  |   df |>  | ||||||
|     ggplot(aes({{ x }}, {{ y }}, z = {{ z }})) +  |     ggplot(aes(x = {{ x }}, y = {{ y }}, z = {{ z }})) +  | ||||||
|     stat_summary_hex( |     stat_summary_hex( | ||||||
|       aes(color = after_scale(fill)), # make border same color as fill |       aes(color = after_scale(fill)), # make border same color as fill | ||||||
|       bins = bins,  |       bins = bins,  | ||||||
| @@ -760,7 +760,7 @@ Or maybe you want to make it easy to draw a bar plot just for a subset of the da | |||||||
| conditional_bars <- function(df, condition, var) { | conditional_bars <- function(df, condition, var) { | ||||||
|   df |>  |   df |>  | ||||||
|     filter({{ condition }}) |>  |     filter({{ condition }}) |>  | ||||||
|     ggplot(aes({{ var }})) +  |     ggplot(aes(x = {{ var }})) +  | ||||||
|     geom_bar() |     geom_bar() | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -779,7 +779,7 @@ fancy_ts <- function(df, val, group) { | |||||||
|     summarize(breaks = max({{ val }})) |     summarize(breaks = max({{ val }})) | ||||||
|    |    | ||||||
|   df |>  |   df |>  | ||||||
|     ggplot(aes(date, {{ val }}, group = {{ group }}, color = {{ group }})) + |     ggplot(aes(x = date, y = {{ val }}, group = {{ group }}, color = {{ group }})) + | ||||||
|     geom_path() + |     geom_path() + | ||||||
|     scale_y_continuous( |     scale_y_continuous( | ||||||
|       breaks = labs$breaks,  |       breaks = labs$breaks,  | ||||||
| @@ -813,7 +813,7 @@ The only advantage of this syntax is that `vars()` uses tidy evaluation so you c | |||||||
| ```{r} | ```{r} | ||||||
| # https://twitter.com/sharoz/status/1574376332821204999 | # https://twitter.com/sharoz/status/1574376332821204999 | ||||||
| foo <- function(x) { | foo <- function(x) { | ||||||
|   ggplot(mtcars, aes(mpg, disp)) + |   ggplot(mtcars, aes(x = mpg, y = disp)) + | ||||||
|     geom_point() + |     geom_point() + | ||||||
|     facet_wrap(vars({{ x }})) |     facet_wrap(vars({{ x }})) | ||||||
| } | } | ||||||
| @@ -828,7 +828,7 @@ For example, the following function makes it particularly easy to interactively | |||||||
| # https://twitter.com/yutannihilat_en/status/1574387230025875457 | # https://twitter.com/yutannihilat_en/status/1574387230025875457 | ||||||
| density <- function(color, facets, binwidth = 0.1) { | density <- function(color, facets, binwidth = 0.1) { | ||||||
|   diamonds |>  |   diamonds |>  | ||||||
|     ggplot(aes(carat, after_stat(density), color = {{ color }})) + |     ggplot(aes(x = carat, y = after_stat(density), color = {{ color }})) + | ||||||
|     geom_freqpoly(binwidth = binwidth) + |     geom_freqpoly(binwidth = binwidth) + | ||||||
|     facet_wrap(vars({{ facets }})) |     facet_wrap(vars({{ facets }})) | ||||||
| } | } | ||||||
| @@ -845,7 +845,7 @@ Remember the histogram function we showed you earlier? | |||||||
| ```{r} | ```{r} | ||||||
| histogram <- function(df, var, binwidth = NULL) { | histogram <- function(df, var, binwidth = NULL) { | ||||||
|   df |>  |   df |>  | ||||||
|     ggplot(aes({{ var }})) +  |     ggplot(aes(x = {{ var }})) +  | ||||||
|     geom_histogram(binwidth = binwidth) |     geom_histogram(binwidth = binwidth) | ||||||
| } | } | ||||||
| ``` | ``` | ||||||
| @@ -863,7 +863,7 @@ histogram <- function(df, var, binwidth) { | |||||||
|   label <- rlang::englue("A histogram of {{var}} with binwidth {binwidth}") |   label <- rlang::englue("A histogram of {{var}} with binwidth {binwidth}") | ||||||
|    |    | ||||||
|   df |>  |   df |>  | ||||||
|     ggplot(aes({{ var }})) +  |     ggplot(aes(x = {{ var }})) +  | ||||||
|     geom_histogram(binwidth = binwidth) +  |     geom_histogram(binwidth = binwidth) +  | ||||||
|     labs(title = label) |     labs(title = label) | ||||||
| } | } | ||||||
| @@ -917,7 +917,7 @@ This makes it easier to see the hierarchy in your code by skimming the left-hand | |||||||
| # missing extra two spaces | # missing extra two spaces | ||||||
| density <- function(color, facets, binwidth = 0.1) { | density <- function(color, facets, binwidth = 0.1) { | ||||||
| diamonds |>  | diamonds |>  | ||||||
|   ggplot(aes(carat, after_stat(density), color = {{ color }})) + |   ggplot(aes(x = carat, y = after_stat(density), color = {{ color }})) + | ||||||
|   geom_freqpoly(binwidth = binwidth) + |   geom_freqpoly(binwidth = binwidth) + | ||||||
|   facet_wrap(vars({{ facets }})) |   facet_wrap(vars({{ facets }})) | ||||||
| } | } | ||||||
| @@ -925,7 +925,7 @@ diamonds |> | |||||||
| # Pipe indented incorrectly | # Pipe indented incorrectly | ||||||
| density <- function(color, facets, binwidth = 0.1) { | density <- function(color, facets, binwidth = 0.1) { | ||||||
|   diamonds |>  |   diamonds |>  | ||||||
|   ggplot(aes(carat, after_stat(density), color = {{ color }})) + |   ggplot(aes(x = carat, y = after_stat(density), color = {{ color }})) + | ||||||
|   geom_freqpoly(binwidth = binwidth) + |   geom_freqpoly(binwidth = binwidth) + | ||||||
|   facet_wrap(vars({{ facets }})) |   facet_wrap(vars({{ facets }})) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -949,10 +949,10 @@ There are two other coordinate systems that are occasionally helpful. | |||||||
|  |  | ||||||
|     nz <- map_data("nz") |     nz <- map_data("nz") | ||||||
|  |  | ||||||
|     ggplot(nz, aes(long, lat, group = group)) + |     ggplot(nz, aes(x = long, y = lat, group = group)) + | ||||||
|       geom_polygon(fill = "white", color = "black") |       geom_polygon(fill = "white", color = "black") | ||||||
|  |  | ||||||
|     ggplot(nz, aes(long, lat, group = group)) + |     ggplot(nz, aes(x = long, y = lat, group = group)) + | ||||||
|       geom_polygon(fill = "white", color = "black") + |       geom_polygon(fill = "white", color = "black") + | ||||||
|       coord_quickmap() |       coord_quickmap() | ||||||
|     ``` |     ``` | ||||||
|   | |||||||
| @@ -365,7 +365,7 @@ flights |> | |||||||
|     prop_delayed = mean(arr_delay > 0, na.rm = TRUE), |     prop_delayed = mean(arr_delay > 0, na.rm = TRUE), | ||||||
|     .groups = "drop" |     .groups = "drop" | ||||||
|   ) |>  |   ) |>  | ||||||
|   ggplot(aes(prop_delayed)) +  |   ggplot(aes(x = prop_delayed)) +  | ||||||
|   geom_histogram(binwidth = 0.05) |   geom_histogram(binwidth = 0.05) | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										10
									
								
								numbers.qmd
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								numbers.qmd
									
									
									
									
									
								
							| @@ -436,7 +436,7 @@ slide_vec(x, sum, .before = 2, .after = 2, .complete = TRUE) | |||||||
|     ```{r} |     ```{r} | ||||||
|     flights |>  |     flights |>  | ||||||
|       filter(month == 1, day == 1) |>  |       filter(month == 1, day == 1) |>  | ||||||
|       ggplot(aes(sched_dep_time, dep_delay)) + |       ggplot(aes(x = sched_dep_time, y = dep_delay)) + | ||||||
|       geom_point() |       geom_point() | ||||||
|     ``` |     ``` | ||||||
|  |  | ||||||
| @@ -649,7 +649,7 @@ flights |> | |||||||
|     n = n(), |     n = n(), | ||||||
|     .groups = "drop" |     .groups = "drop" | ||||||
|   ) |>  |   ) |>  | ||||||
|   ggplot(aes(mean, median)) +  |   ggplot(aes(x = mean, y = median)) +  | ||||||
|   geom_abline(slope = 1, intercept = 0, color = "white", size = 2) + |   geom_abline(slope = 1, intercept = 0, color = "white", size = 2) + | ||||||
|   geom_point() |   geom_point() | ||||||
| ``` | ``` | ||||||
| @@ -731,12 +731,12 @@ This suggests that the mean is unlikely to be a good summary and we might prefer | |||||||
| #| fig-height: 2 | #| fig-height: 2 | ||||||
|  |  | ||||||
| flights |> | flights |> | ||||||
|   ggplot(aes(dep_delay)) +  |   ggplot(aes(x = dep_delay)) +  | ||||||
|   geom_histogram(binwidth = 15) |   geom_histogram(binwidth = 15) | ||||||
|  |  | ||||||
| flights |> | flights |> | ||||||
|   filter(dep_delay < 120) |>  |   filter(dep_delay < 120) |>  | ||||||
|   ggplot(aes(dep_delay)) +  |   ggplot(aes(x = dep_delay)) +  | ||||||
|   geom_histogram(binwidth = 5) |   geom_histogram(binwidth = 5) | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| @@ -756,7 +756,7 @@ The distributions seem to follow a common pattern, suggesting it's fine to use t | |||||||
| #|   overlapping forming a thick black bland. | #|   overlapping forming a thick black bland. | ||||||
| flights |> | flights |> | ||||||
|   filter(dep_delay < 120) |>  |   filter(dep_delay < 120) |>  | ||||||
|   ggplot(aes(dep_delay, group = interaction(day, month))) +  |   ggplot(aes(x = dep_delay, group = interaction(day, month))) +  | ||||||
|   geom_freqpoly(binwidth = 5, alpha = 1/5) |   geom_freqpoly(binwidth = 5, alpha = 1/5) | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|   | |||||||
| @@ -23,6 +23,6 @@ The distribution of the remainder is shown below: | |||||||
| #| echo: false | #| echo: false | ||||||
|  |  | ||||||
| smaller |>  | smaller |>  | ||||||
|   ggplot(aes(carat)) +  |   ggplot(aes(x = carat)) +  | ||||||
|   geom_freqpoly(binwidth = 0.01) |   geom_freqpoly(binwidth = 0.01) | ||||||
| ``` | ``` | ||||||
|   | |||||||
| @@ -166,7 +166,7 @@ It looks like they've radically increased in popularity lately! | |||||||
| babynames |>  | babynames |>  | ||||||
|   group_by(year) |>  |   group_by(year) |>  | ||||||
|   summarize(prop_x = mean(str_detect(name, "x"))) |>  |   summarize(prop_x = mean(str_detect(name, "x"))) |>  | ||||||
|   ggplot(aes(year, prop_x)) +  |   ggplot(aes(x = year, y = prop_x)) +  | ||||||
|   geom_line() |   geom_line() | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|   | |||||||
| @@ -140,7 +140,7 @@ We wish this transition wasn't necessary but unfortunately ggplot2 was created b | |||||||
|  |  | ||||||
| diamonds |>  | diamonds |>  | ||||||
|   count(cut, clarity) |>  |   count(cut, clarity) |>  | ||||||
|   ggplot(aes(clarity, cut, fill = n)) +  |   ggplot(aes(x = clarity, y = cut, fill = n)) +  | ||||||
|   geom_tile() |   geom_tile() | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|   | |||||||
| @@ -298,7 +298,7 @@ Don't worry about the details, you'll learn them later in the book. | |||||||
|  |  | ||||||
| library(tidyverse) | library(tidyverse) | ||||||
|  |  | ||||||
| ggplot(diamonds, aes(carat, price)) +  | ggplot(diamonds, aes(x = carat, y = price)) +  | ||||||
|   geom_hex() |   geom_hex() | ||||||
| ggsave("diamonds.pdf") | ggsave("diamonds.pdf") | ||||||
|  |  | ||||||
|   | |||||||
| @@ -212,7 +212,7 @@ flights |> | |||||||
|   summarize( |   summarize( | ||||||
|     delay = mean(arr_delay, na.rm = TRUE) |     delay = mean(arr_delay, na.rm = TRUE) | ||||||
|   ) |>  |   ) |>  | ||||||
|   ggplot(aes(month, delay)) + |   ggplot(aes(x = month, y = delay)) + | ||||||
|   geom_point() +  |   geom_point() +  | ||||||
|   geom_line() |   geom_line() | ||||||
| ``` | ``` | ||||||
| @@ -228,7 +228,7 @@ flights |> | |||||||
|     distance = mean(distance), |     distance = mean(distance), | ||||||
|     speed = mean(air_time / distance, na.rm = TRUE) |     speed = mean(air_time / distance, na.rm = TRUE) | ||||||
|   ) |>  |   ) |>  | ||||||
|   ggplot(aes(distance, speed)) + |   ggplot(aes(x = distance, y = speed)) + | ||||||
|   geom_smooth( |   geom_smooth( | ||||||
|     method = "loess", |     method = "loess", | ||||||
|     span = 0.5, |     span = 0.5, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user