171 lines
3.1 KiB
Plaintext
171 lines
3.1 KiB
Plaintext
---
|
|
title: "RWEP L7- Data Transform"
|
|
---
|
|
|
|
# Data Source
|
|
|
|
|
|
```{r}
|
|
dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv")
|
|
metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls")
|
|
monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv")
|
|
weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv")
|
|
```
|
|
|
|
|
|
```{r}
|
|
require(tidyverse)
|
|
dailylanddf
|
|
|
|
|
|
dailylanddf |>
|
|
select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84)
|
|
|
|
dailylanddf |>
|
|
select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84")
|
|
|
|
|
|
dailylanddf |>
|
|
select(1:3)
|
|
|
|
|
|
dailylanddf |>
|
|
rename(x = 1) |>
|
|
rename(y = 2)
|
|
```
|
|
|
|
|
|
|
|
# 内置function
|
|
|
|
|
|
```{r}
|
|
|
|
dailylanddf |>
|
|
select(1:3) |>
|
|
rename_all(\(x) c("a", "b", "c")) |>
|
|
rename_all(\(x) paste0(x, "_new"))
|
|
|
|
dailylanddf |>
|
|
select(1:3) |>
|
|
rename_all(~ c("a", "b", "c")) |>
|
|
rename_all(~ paste0(., "_new"))
|
|
|
|
|
|
dailylanddf |>
|
|
select(1:3) |>
|
|
rename_all(~ c("x", "y", "z"))
|
|
```
|
|
|
|
```{r}
|
|
fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv")
|
|
|
|
datadf <- fulldatadf |>
|
|
select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |>
|
|
mutate(value = as.numeric(value)) |>
|
|
tidyr::pivot_wider(names_from = varname, values_from = value)
|
|
|
|
saveRDS(datadf, "../data/chinawq/datadf.rds")
|
|
```
|
|
|
|
|
|
# 统计
|
|
|
|
## 统计每月的DO的平均值
|
|
|
|
```{r}
|
|
datadf <- readRDS("../data/chinawq/datadf.rds")
|
|
|
|
datadf |>
|
|
select(1:7) |>
|
|
mutate(month = month(date)) |>
|
|
select(lon, lat, month, date, everything()) |>
|
|
group_by(month) |>
|
|
summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |>
|
|
ggplot(aes(x = month, y = DO_mean)) +
|
|
geom_point() +
|
|
geom_smooth(method = "loess") +
|
|
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd))
|
|
```
|
|
|
|
|
|
```{r}
|
|
datadf |>
|
|
select(1:7) |>
|
|
mutate(month = month(date)) |>
|
|
mutate(year = year(date)) |>
|
|
select(lon, lat, year, month, date, everything()) |>
|
|
summarize(
|
|
DO_mean = mean(DO, na.rm = TRUE),
|
|
DO_sd = sd(DO, na.rm = TRUE),
|
|
.by = c(year, month)
|
|
) |>
|
|
ggplot(aes(x = month, y = DO_mean)) +
|
|
geom_point() +
|
|
geom_smooth(method = "loess") +
|
|
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) +
|
|
facet_wrap(~year)
|
|
```
|
|
|
|
|
|
```{r}
|
|
datadf |>
|
|
select(1:7) |>
|
|
mutate(month = month(date)) |>
|
|
mutate(year = year(date)) |>
|
|
select(-lon, -lat, -date) |>
|
|
group_by(year, month) |>
|
|
summarize_all(\(x) mean(x, na.rm = TRUE))
|
|
```
|
|
|
|
## nest
|
|
|
|
```{r}
|
|
|
|
for (isite in 1:10) {
|
|
m <- datadf |>
|
|
select(1:7) |>
|
|
tidyr::nest(wqdf = -c(lon, lat)) |>
|
|
pull(wqdf) |>
|
|
nth(isite) |>
|
|
lm(formula = DO ~ pH, data = _)
|
|
slope <- coef(m)[2]
|
|
print(slope)
|
|
}
|
|
```
|
|
|
|
## purrr::map
|
|
|
|
```{r}
|
|
|
|
datadf |>
|
|
select(1:7) |>
|
|
tidyr::nest(wqdf = -c(lon, lat)) |>
|
|
slice(1:10) |>
|
|
mutate(
|
|
m = purrr::map(
|
|
wqdf,
|
|
\(x) {
|
|
lm(formula = DO ~ pH, data = x)
|
|
}
|
|
)
|
|
) |>
|
|
mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |>
|
|
mutate(
|
|
p = purrr::map(wqdf, \(x) {
|
|
x |>
|
|
ggplot(aes(x = pH, y = DO)) +
|
|
geom_smooth(method = "lm") +
|
|
geom_point()
|
|
})
|
|
) |>
|
|
pull(p) |>
|
|
patchwork::wrap_plots() +
|
|
patchwork::plot_layout(ncol = 2)
|
|
```
|
|
|
|
|
|
|
|
|
|
|