Convert from %>% to |>
This commit is contained in:
@@ -88,7 +88,7 @@ One easy place to start is to find the vertical distance between each point and
|
||||
(Note that I've shifted the x values slightly so you can see the individual distances.)
|
||||
|
||||
```{r, echo = FALSE}
|
||||
dist1 <- sim1 %>%
|
||||
dist1 <- sim1 |>
|
||||
mutate(
|
||||
dodge = rep(c(-1, 0, 1) / 20, 10),
|
||||
x1 = x + dodge,
|
||||
@@ -137,7 +137,7 @@ sim1_dist <- function(a1, a2) {
|
||||
measure_distance(c(a1, a2), sim1)
|
||||
}
|
||||
|
||||
models <- models %>%
|
||||
models <- models |>
|
||||
mutate(dist = purrr::map2_dbl(a1, a2, sim1_dist))
|
||||
models
|
||||
```
|
||||
@@ -171,10 +171,10 @@ I picked the parameters of the grid roughly by looking at where the best models
|
||||
grid <- expand.grid(
|
||||
a1 = seq(-5, 20, length = 25),
|
||||
a2 = seq(1, 3, length = 25)
|
||||
) %>%
|
||||
) |>
|
||||
mutate(dist = purrr::map2_dbl(a1, a2, sim1_dist))
|
||||
|
||||
grid %>%
|
||||
grid |>
|
||||
ggplot(aes(a1, a2)) +
|
||||
geom_point(data = filter(grid, rank(dist) <= 10), size = 4, colour = "red") +
|
||||
geom_point(aes(colour = -dist))
|
||||
@@ -282,7 +282,7 @@ The easiest way to do that is to use `modelr::data_grid()`.
|
||||
Its first argument is a data frame, and for each subsequent argument it finds the unique variables and then generates all combinations:
|
||||
|
||||
```{r}
|
||||
grid <- sim1 %>%
|
||||
grid <- sim1 |>
|
||||
data_grid(x)
|
||||
grid
|
||||
```
|
||||
@@ -294,7 +294,7 @@ We'll use `modelr::add_predictions()` which takes a data frame and a model.
|
||||
It adds the predictions from the model to a new column in the data frame:
|
||||
|
||||
```{r}
|
||||
grid <- grid %>%
|
||||
grid <- grid |>
|
||||
add_predictions(sim1_mod)
|
||||
grid
|
||||
```
|
||||
@@ -324,7 +324,7 @@ Note, however, that we use the original dataset, not a manufactured grid.
|
||||
This is because to compute residuals we need actual y values.
|
||||
|
||||
```{r}
|
||||
sim1 <- sim1 %>%
|
||||
sim1 <- sim1 |>
|
||||
add_residuals(sim1_mod)
|
||||
sim1
|
||||
```
|
||||
@@ -444,8 +444,8 @@ We can fit a model to it, and generate predictions:
|
||||
```{r}
|
||||
mod2 <- lm(y ~ x, data = sim2)
|
||||
|
||||
grid <- sim2 %>%
|
||||
data_grid(x) %>%
|
||||
grid <- sim2 |>
|
||||
data_grid(x) |>
|
||||
add_predictions(mod2)
|
||||
grid
|
||||
```
|
||||
@@ -463,7 +463,7 @@ You can't make predictions about levels that you didn't observe.
|
||||
Sometimes you'll do this by accident so it's good to recognise this error message:
|
||||
|
||||
```{r, error = TRUE}
|
||||
tibble(x = "e") %>%
|
||||
tibble(x = "e") |>
|
||||
add_predictions(mod2)
|
||||
```
|
||||
|
||||
@@ -501,8 +501,8 @@ To visualise these models we need two new tricks:
|
||||
Together this gives us:
|
||||
|
||||
```{r}
|
||||
grid <- sim3 %>%
|
||||
data_grid(x1, x2) %>%
|
||||
grid <- sim3 |>
|
||||
data_grid(x1, x2) |>
|
||||
gather_predictions(mod1, mod2)
|
||||
grid
|
||||
```
|
||||
@@ -524,7 +524,7 @@ We can take look at the residuals.
|
||||
Here I've facetted by both model and `x2` because it makes it easier to see the pattern within each group.
|
||||
|
||||
```{r}
|
||||
sim3 <- sim3 %>%
|
||||
sim3 <- sim3 |>
|
||||
gather_residuals(mod1, mod2)
|
||||
|
||||
ggplot(sim3, aes(x1, resid, colour = x2)) +
|
||||
@@ -547,11 +547,11 @@ Initially things proceed almost identically to the previous example:
|
||||
mod1 <- lm(y ~ x1 + x2, data = sim4)
|
||||
mod2 <- lm(y ~ x1 * x2, data = sim4)
|
||||
|
||||
grid <- sim4 %>%
|
||||
grid <- sim4 |>
|
||||
data_grid(
|
||||
x1 = seq_range(x1, 5),
|
||||
x2 = seq_range(x2, 5)
|
||||
) %>%
|
||||
) |>
|
||||
gather_predictions(mod1, mod2)
|
||||
grid
|
||||
```
|
||||
@@ -687,8 +687,8 @@ mod3 <- lm(y ~ ns(x, 3), data = sim5)
|
||||
mod4 <- lm(y ~ ns(x, 4), data = sim5)
|
||||
mod5 <- lm(y ~ ns(x, 5), data = sim5)
|
||||
|
||||
grid <- sim5 %>%
|
||||
data_grid(x = seq_range(x, n = 50, expand = 0.1)) %>%
|
||||
grid <- sim5 |>
|
||||
data_grid(x = seq_range(x, n = 50, expand = 0.1)) |>
|
||||
gather_predictions(mod1, mod2, mod3, mod4, mod5, .pred = "y")
|
||||
|
||||
ggplot(sim5, aes(x, y)) +
|
||||
|
||||
@@ -71,8 +71,8 @@ But first, lets make a couple of tweaks to the diamonds dataset to make it easie
|
||||
2. Log-transform the carat and price variables.
|
||||
|
||||
```{r}
|
||||
diamonds2 <- diamonds %>%
|
||||
filter(carat <= 2.5) %>%
|
||||
diamonds2 <- diamonds |>
|
||||
filter(carat <= 2.5) |>
|
||||
mutate(lprice = log2(price), lcarat = log2(carat))
|
||||
```
|
||||
|
||||
@@ -95,10 +95,10 @@ Then we look at what the model tells us about the data.
|
||||
Note that I back transform the predictions, undoing the log transformation, so I can overlay the predictions on the raw data:
|
||||
|
||||
```{r}
|
||||
grid <- diamonds2 %>%
|
||||
data_grid(carat = seq_range(carat, 20)) %>%
|
||||
mutate(lcarat = log2(carat)) %>%
|
||||
add_predictions(mod_diamond, "lprice") %>%
|
||||
grid <- diamonds2 |>
|
||||
data_grid(carat = seq_range(carat, 20)) |>
|
||||
mutate(lcarat = log2(carat)) |>
|
||||
add_predictions(mod_diamond, "lprice") |>
|
||||
mutate(price = 2 ^ lprice)
|
||||
|
||||
ggplot(diamonds2, aes(carat, price)) +
|
||||
@@ -113,7 +113,7 @@ This is probably because no diamond in this dataset costs more than \$19,000.
|
||||
Now we can look at the residuals, which verifies that we've successfully removed the strong linear pattern:
|
||||
|
||||
```{r}
|
||||
diamonds2 <- diamonds2 %>%
|
||||
diamonds2 <- diamonds2 |>
|
||||
add_residuals(mod_diamond, "lresid")
|
||||
|
||||
ggplot(diamonds2, aes(lcarat, lresid)) +
|
||||
@@ -147,8 +147,8 @@ Fortunately, they're currently all independent which means that we can plot them
|
||||
To make the process a little easier, we're going to use the `.model` argument to `data_grid`:
|
||||
|
||||
```{r}
|
||||
grid <- diamonds2 %>%
|
||||
data_grid(cut, .model = mod_diamond2) %>%
|
||||
grid <- diamonds2 |>
|
||||
data_grid(cut, .model = mod_diamond2) |>
|
||||
add_predictions(mod_diamond2)
|
||||
grid
|
||||
|
||||
@@ -160,7 +160,7 @@ If the model needs variables that you haven't explicitly supplied, `data_grid()`
|
||||
For continuous variables, it uses the median, and categorical variables it uses the most common value (or values, if there's a tie).
|
||||
|
||||
```{r}
|
||||
diamonds2 <- diamonds2 %>%
|
||||
diamonds2 <- diamonds2 |>
|
||||
add_residuals(mod_diamond2, "lresid2")
|
||||
|
||||
ggplot(diamonds2, aes(lcarat, lresid2)) +
|
||||
@@ -171,11 +171,11 @@ This plot indicates that there are some diamonds with quite large residuals - re
|
||||
It's often useful to look at unusual values individually:
|
||||
|
||||
```{r}
|
||||
diamonds2 %>%
|
||||
filter(abs(lresid2) > 1) %>%
|
||||
add_predictions(mod_diamond2) %>%
|
||||
mutate(pred = round(2 ^ pred)) %>%
|
||||
select(price, pred, carat:table, x:z) %>%
|
||||
diamonds2 |>
|
||||
filter(abs(lresid2) > 1) |>
|
||||
add_predictions(mod_diamond2) |>
|
||||
mutate(pred = round(2 ^ pred)) |>
|
||||
select(price, pred, carat:table, x:z) |>
|
||||
arrange(price)
|
||||
```
|
||||
|
||||
@@ -203,9 +203,9 @@ This is a really small dataset --- only 365 rows and 2 columns --- and we're not
|
||||
Let's get started by counting the number of flights per day and visualising it with ggplot2.
|
||||
|
||||
```{r}
|
||||
daily <- flights %>%
|
||||
mutate(date = make_date(year, month, day)) %>%
|
||||
group_by(date) %>%
|
||||
daily <- flights |>
|
||||
mutate(date = make_date(year, month, day)) |>
|
||||
group_by(date) |>
|
||||
summarise(n = n())
|
||||
daily
|
||||
|
||||
@@ -219,7 +219,7 @@ Understanding the long-term trend is challenging because there's a very strong d
|
||||
Let's start by looking at the distribution of flight numbers by day-of-week:
|
||||
|
||||
```{r}
|
||||
daily <- daily %>%
|
||||
daily <- daily |>
|
||||
mutate(wday = wday(date, label = TRUE))
|
||||
ggplot(daily, aes(wday, n)) +
|
||||
geom_boxplot()
|
||||
@@ -234,8 +234,8 @@ First, we fit the model, and display its predictions overlaid on the original da
|
||||
```{r}
|
||||
mod <- lm(n ~ wday, data = daily)
|
||||
|
||||
grid <- daily %>%
|
||||
data_grid(wday) %>%
|
||||
grid <- daily |>
|
||||
data_grid(wday) |>
|
||||
add_predictions(mod, "n")
|
||||
|
||||
ggplot(daily, aes(wday, n)) +
|
||||
@@ -246,9 +246,9 @@ ggplot(daily, aes(wday, n)) +
|
||||
Next we compute and visualise the residuals:
|
||||
|
||||
```{r}
|
||||
daily <- daily %>%
|
||||
daily <- daily |>
|
||||
add_residuals(mod)
|
||||
daily %>%
|
||||
daily |>
|
||||
ggplot(aes(date, resid)) +
|
||||
geom_ref_line(h = 0) +
|
||||
geom_line()
|
||||
@@ -272,7 +272,7 @@ This plot is useful because now that we've removed much of the large day-of-week
|
||||
2. There are some days with far fewer flights than expected:
|
||||
|
||||
```{r}
|
||||
daily %>%
|
||||
daily |>
|
||||
filter(resid < -100)
|
||||
```
|
||||
|
||||
@@ -284,7 +284,7 @@ This plot is useful because now that we've removed much of the large day-of-week
|
||||
We can highlight that trend with `geom_smooth()`:
|
||||
|
||||
```{r}
|
||||
daily %>%
|
||||
daily |>
|
||||
ggplot(aes(date, resid)) +
|
||||
geom_ref_line(h = 0) +
|
||||
geom_line(colour = "grey50") +
|
||||
@@ -301,8 +301,8 @@ Let's first tackle our failure to accurately predict the number of flights on Sa
|
||||
A good place to start is to go back to the raw numbers, focussing on Saturdays:
|
||||
|
||||
```{r}
|
||||
daily %>%
|
||||
filter(wday == "Sat") %>%
|
||||
daily |>
|
||||
filter(wday == "Sat") |>
|
||||
ggplot(aes(date, n)) +
|
||||
geom_point() +
|
||||
geom_line() +
|
||||
@@ -329,11 +329,11 @@ term <- function(date) {
|
||||
)
|
||||
}
|
||||
|
||||
daily <- daily %>%
|
||||
daily <- daily |>
|
||||
mutate(term = term(date))
|
||||
|
||||
daily %>%
|
||||
filter(wday == "Sat") %>%
|
||||
daily |>
|
||||
filter(wday == "Sat") |>
|
||||
ggplot(aes(date, n, colour = term)) +
|
||||
geom_point(alpha = 1/3) +
|
||||
geom_line() +
|
||||
@@ -345,7 +345,7 @@ daily %>%
|
||||
It's useful to see how this new variable affects the other days of the week:
|
||||
|
||||
```{r}
|
||||
daily %>%
|
||||
daily |>
|
||||
ggplot(aes(wday, n, colour = term)) +
|
||||
geom_boxplot()
|
||||
```
|
||||
@@ -357,8 +357,8 @@ This improves our model, but not as much as we might hope:
|
||||
mod1 <- lm(n ~ wday, data = daily)
|
||||
mod2 <- lm(n ~ wday * term, data = daily)
|
||||
|
||||
daily %>%
|
||||
gather_residuals(without_term = mod1, with_term = mod2) %>%
|
||||
daily |>
|
||||
gather_residuals(without_term = mod1, with_term = mod2) |>
|
||||
ggplot(aes(date, resid, colour = model)) +
|
||||
geom_line(alpha = 0.75)
|
||||
```
|
||||
@@ -366,8 +366,8 @@ daily %>%
|
||||
We can see the problem by overlaying the predictions from the model on to the raw data:
|
||||
|
||||
```{r}
|
||||
grid <- daily %>%
|
||||
data_grid(wday, term) %>%
|
||||
grid <- daily |>
|
||||
data_grid(wday, term) |>
|
||||
add_predictions(mod2, "n")
|
||||
|
||||
ggplot(daily, aes(wday, n)) +
|
||||
@@ -383,8 +383,8 @@ This greatly reduces the impact of the outliers on our estimates, and gives a mo
|
||||
```{r, warn = FALSE}
|
||||
mod3 <- MASS::rlm(n ~ wday * term, data = daily)
|
||||
|
||||
daily %>%
|
||||
add_residuals(mod3, "resid") %>%
|
||||
daily |>
|
||||
add_residuals(mod3, "resid") |>
|
||||
ggplot(aes(date, resid)) +
|
||||
geom_hline(yintercept = 0, size = 2, colour = "white") +
|
||||
geom_line()
|
||||
@@ -399,7 +399,7 @@ For example, we could write:
|
||||
|
||||
```{r}
|
||||
compute_vars <- function(data) {
|
||||
data %>%
|
||||
data |>
|
||||
mutate(
|
||||
term = term(date),
|
||||
wday = wday(date, label = TRUE)
|
||||
@@ -430,9 +430,9 @@ A simple linear trend isn't adequate, so we could try using a natural spline to
|
||||
library(splines)
|
||||
mod <- MASS::rlm(n ~ wday * ns(date, 5), data = daily)
|
||||
|
||||
daily %>%
|
||||
data_grid(wday, date = seq_range(date, n = 13)) %>%
|
||||
add_predictions(mod) %>%
|
||||
daily |>
|
||||
data_grid(wday, date = seq_range(date, n = 13)) |>
|
||||
add_predictions(mod) |>
|
||||
ggplot(aes(date, pred, colour = wday)) +
|
||||
geom_line() +
|
||||
geom_point()
|
||||
@@ -451,7 +451,7 @@ It's a good sign when you get the same signal from different approaches.
|
||||
How would these days generalise to another year?
|
||||
|
||||
```{r}
|
||||
daily %>%
|
||||
daily |>
|
||||
slice_max(n = 3, resid)
|
||||
```
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ In this case study, we're going to focus on just three variables to answer the q
|
||||
A good place to start is with a plot:
|
||||
|
||||
```{r}
|
||||
gapminder %>%
|
||||
gapminder |>
|
||||
ggplot(aes(year, lifeExp, group = country)) +
|
||||
geom_line(alpha = 1/3)
|
||||
```
|
||||
@@ -79,20 +79,20 @@ You already know how to do that if we had a single country:
|
||||
|
||||
```{r, out.width = "33%", fig.asp = 1, fig.width = 3, fig.align='default'}
|
||||
nz <- filter(gapminder, country == "New Zealand")
|
||||
nz %>%
|
||||
nz |>
|
||||
ggplot(aes(year, lifeExp)) +
|
||||
geom_line() +
|
||||
ggtitle("Full data = ")
|
||||
|
||||
nz_mod <- lm(lifeExp ~ year, data = nz)
|
||||
nz %>%
|
||||
add_predictions(nz_mod) %>%
|
||||
nz |>
|
||||
add_predictions(nz_mod) |>
|
||||
ggplot(aes(year, pred)) +
|
||||
geom_line() +
|
||||
ggtitle("Linear trend + ")
|
||||
|
||||
nz %>%
|
||||
add_residuals(nz_mod) %>%
|
||||
nz |>
|
||||
add_residuals(nz_mod) |>
|
||||
ggplot(aes(year, resid)) +
|
||||
geom_hline(yintercept = 0, colour = "white", size = 3) +
|
||||
geom_line() +
|
||||
@@ -111,8 +111,8 @@ To do that, we need a new data structure: the **nested data frame**.
|
||||
To create a nested data frame we start with a grouped data frame, and "nest" it:
|
||||
|
||||
```{r}
|
||||
by_country <- gapminder %>%
|
||||
group_by(country, continent) %>%
|
||||
by_country <- gapminder |>
|
||||
group_by(country, continent) |>
|
||||
nest()
|
||||
|
||||
by_country
|
||||
@@ -163,7 +163,7 @@ In other words, instead of creating a new object in the global environment, we'r
|
||||
That's a job for `dplyr::mutate()`:
|
||||
|
||||
```{r}
|
||||
by_country <- by_country %>%
|
||||
by_country <- by_country |>
|
||||
mutate(model = map(data, country_model))
|
||||
by_country
|
||||
```
|
||||
@@ -172,9 +172,9 @@ This has a big advantage: because all the related objects are stored together, y
|
||||
The semantics of the data frame takes care of that for you:
|
||||
|
||||
```{r}
|
||||
by_country %>%
|
||||
by_country |>
|
||||
filter(continent == "Europe")
|
||||
by_country %>%
|
||||
by_country |>
|
||||
arrange(continent, country)
|
||||
```
|
||||
|
||||
@@ -188,7 +188,7 @@ Now we have 142 data frames and 142 models.
|
||||
To compute the residuals, we need to call `add_residuals()` with each model-data pair:
|
||||
|
||||
```{r}
|
||||
by_country <- by_country %>%
|
||||
by_country <- by_country |>
|
||||
mutate(
|
||||
resids = map2(data, model, add_residuals)
|
||||
)
|
||||
@@ -209,7 +209,7 @@ Note that each regular column is repeated once for each row of the nested tibble
|
||||
Now we have regular data frame, we can plot the residuals:
|
||||
|
||||
```{r}
|
||||
resids %>%
|
||||
resids |>
|
||||
ggplot(aes(year, resid)) +
|
||||
geom_line(aes(group = country), alpha = 1 / 3) +
|
||||
geom_smooth(se = FALSE)
|
||||
@@ -219,7 +219,7 @@ resids %>%
|
||||
Facetting by continent is particularly revealing:
|
||||
|
||||
```{r}
|
||||
resids %>%
|
||||
resids |>
|
||||
ggplot(aes(year, resid, group = country)) +
|
||||
geom_line(alpha = 1 / 3) +
|
||||
facet_wrap(~continent)
|
||||
@@ -245,9 +245,9 @@ broom::glance(nz_mod)
|
||||
We can use `mutate()` and `unnest()` to create a data frame with a row for each country:
|
||||
|
||||
```{r}
|
||||
glance <- by_country %>%
|
||||
mutate(glance = map(model, broom::glance)) %>%
|
||||
select(country, continent, glance) %>%
|
||||
glance <- by_country |>
|
||||
mutate(glance = map(model, broom::glance)) |>
|
||||
select(country, continent, glance) |>
|
||||
unnest(glance)
|
||||
glance
|
||||
```
|
||||
@@ -257,7 +257,7 @@ glance
|
||||
With this data frame in hand, we can start to look for models that don't fit well:
|
||||
|
||||
```{r}
|
||||
glance %>%
|
||||
glance |>
|
||||
arrange(r.squared)
|
||||
```
|
||||
|
||||
@@ -266,7 +266,7 @@ Let's double check that with a plot.
|
||||
Here we have a relatively small number of observations and a discrete variable, so `geom_jitter()` is effective:
|
||||
|
||||
```{r}
|
||||
glance %>%
|
||||
glance |>
|
||||
ggplot(aes(continent, r.squared)) +
|
||||
geom_jitter(width = 0.5)
|
||||
```
|
||||
@@ -276,8 +276,8 @@ We could pull out the countries with particularly bad $R^2$ and plot the data:
|
||||
```{r}
|
||||
bad_fit <- filter(glance, r.squared < 0.25)
|
||||
|
||||
gapminder %>%
|
||||
semi_join(bad_fit, by = "country") %>%
|
||||
gapminder |>
|
||||
semi_join(bad_fit, by = "country") |>
|
||||
ggplot(aes(year, lifeExp, colour = country)) +
|
||||
geom_line()
|
||||
```
|
||||
@@ -377,15 +377,15 @@ So far you've seen how to use it with a grouped data frame.
|
||||
When applied to a grouped data frame, `nest()` keeps the grouping columns as is, and bundles everything else into the list-column:
|
||||
|
||||
```{r}
|
||||
gapminder %>%
|
||||
group_by(country, continent) %>%
|
||||
gapminder |>
|
||||
group_by(country, continent) |>
|
||||
nest()
|
||||
```
|
||||
|
||||
You can also use it on an ungrouped data frame, specifying which columns you want to nest:
|
||||
|
||||
```{r}
|
||||
gapminder %>%
|
||||
gapminder |>
|
||||
nest(data = c(year:gdpPercap))
|
||||
```
|
||||
|
||||
@@ -402,15 +402,15 @@ df <- tribble(
|
||||
"d,e,f,g"
|
||||
)
|
||||
|
||||
df %>%
|
||||
df |>
|
||||
mutate(x2 = stringr::str_split(x1, ","))
|
||||
```
|
||||
|
||||
`unnest()` knows how to handle these lists of vectors:
|
||||
|
||||
```{r}
|
||||
df %>%
|
||||
mutate(x2 = stringr::str_split(x1, ",")) %>%
|
||||
df |>
|
||||
mutate(x2 = stringr::str_split(x1, ",")) |>
|
||||
unnest(x2)
|
||||
```
|
||||
|
||||
@@ -427,7 +427,7 @@ sim <- tribble(
|
||||
"rpois", list(lambda = 10)
|
||||
)
|
||||
|
||||
sim %>%
|
||||
sim |>
|
||||
mutate(sims = invoke_map(f, params, n = 10))
|
||||
```
|
||||
|
||||
@@ -440,8 +440,8 @@ One restriction of `summarise()` is that it only works with summary functions th
|
||||
That means that you can't use it with functions like `quantile()` that return a vector of arbitrary length:
|
||||
|
||||
```{r, error = TRUE}
|
||||
mtcars %>%
|
||||
group_by(cyl) %>%
|
||||
mtcars |>
|
||||
group_by(cyl) |>
|
||||
summarise(q = quantile(mpg))
|
||||
```
|
||||
|
||||
@@ -449,8 +449,8 @@ You can however, wrap the result in a list!
|
||||
This obeys the contract of `summarise()`, because each summary is now a list (a vector) of length 1.
|
||||
|
||||
```{r}
|
||||
mtcars %>%
|
||||
group_by(cyl) %>%
|
||||
mtcars |>
|
||||
group_by(cyl) |>
|
||||
summarise(q = list(quantile(mpg)))
|
||||
```
|
||||
|
||||
@@ -458,9 +458,9 @@ To make useful results with unnest, you'll also need to capture the probabilitie
|
||||
|
||||
```{r}
|
||||
probs <- c(0.01, 0.25, 0.5, 0.75, 0.99)
|
||||
mtcars %>%
|
||||
group_by(cyl) %>%
|
||||
summarise(p = list(probs), q = list(quantile(mpg, probs))) %>%
|
||||
mtcars |>
|
||||
group_by(cyl) |>
|
||||
summarise(p = list(probs), q = list(quantile(mpg, probs))) |>
|
||||
unnest(c(p, q))
|
||||
```
|
||||
|
||||
@@ -486,7 +486,7 @@ The advantage of this structure is that it generalises in a straightforward way
|
||||
Now if you want to iterate over names and values in parallel, you can use `map2()`:
|
||||
|
||||
```{r}
|
||||
df %>%
|
||||
df |>
|
||||
mutate(
|
||||
smry = map2_chr(name, value, ~ stringr::str_c(.x, ": ", .y[1]))
|
||||
)
|
||||
@@ -503,9 +503,9 @@ df %>%
|
||||
Why isn't that helpful here?
|
||||
|
||||
```{r}
|
||||
mtcars %>%
|
||||
group_by(cyl) %>%
|
||||
summarise(q = list(quantile(mpg))) %>%
|
||||
mtcars |>
|
||||
group_by(cyl) |>
|
||||
summarise(q = list(quantile(mpg))) |>
|
||||
unnest(q)
|
||||
```
|
||||
|
||||
@@ -513,8 +513,8 @@ df %>%
|
||||
Why might it be useful?
|
||||
|
||||
```{r, eval = FALSE}
|
||||
mtcars %>%
|
||||
group_by(cyl) %>%
|
||||
mtcars |>
|
||||
group_by(cyl) |>
|
||||
summarise_all(list(list))
|
||||
```
|
||||
|
||||
@@ -542,7 +542,7 @@ df <- tribble(
|
||||
runif(5)
|
||||
)
|
||||
|
||||
df %>% mutate(
|
||||
df |> mutate(
|
||||
type = map_chr(x, typeof),
|
||||
length = map_int(x, length)
|
||||
)
|
||||
@@ -561,7 +561,7 @@ df <- tribble(
|
||||
list(a = 1, b = 2),
|
||||
list(a = 2, c = 4)
|
||||
)
|
||||
df %>% mutate(
|
||||
df |> mutate(
|
||||
a = map_dbl(x, "a"),
|
||||
b = map_dbl(x, "b", .null = NA_real_)
|
||||
)
|
||||
@@ -573,7 +573,7 @@ df %>% mutate(
|
||||
For example, in the following very simple example we repeat the first row 4 times (because there the first element of `y` has length four), and the second row once:
|
||||
|
||||
```{r}
|
||||
tibble(x = 1:2, y = list(1:4, 1)) %>% unnest(y)
|
||||
tibble(x = 1:2, y = list(1:4, 1)) |> unnest(y)
|
||||
```
|
||||
|
||||
This means that you can't simultaneously unnest two columns that contain different number of elements:
|
||||
@@ -587,7 +587,7 @@ df1 <- tribble(
|
||||
2, "c", 3
|
||||
)
|
||||
df1
|
||||
df1 %>% unnest(c(y, z))
|
||||
df1 |> unnest(c(y, z))
|
||||
|
||||
# Doesn't work because y and z have different number of elements
|
||||
df2 <- tribble(
|
||||
@@ -596,7 +596,7 @@ df2 <- tribble(
|
||||
2, c("b", "c"), 3
|
||||
)
|
||||
df2
|
||||
df2 %>% unnest(c(y, z))
|
||||
df2 |> unnest(c(y, z))
|
||||
```
|
||||
|
||||
The same principle applies when unnesting list-columns of data frames.
|
||||
|
||||
Reference in New Issue
Block a user