Files
su2026rwep/coding/L7.qmd
T
2026-05-26 13:43:04 +08:00

171 lines
3.1 KiB
Plaintext

---
title: "RWEP L7- Data Transform"
---
# Data Source
```{r}
dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv")
metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls")
monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv")
weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv")
```
```{r}
require(tidyverse)
dailylanddf
dailylanddf |>
select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84)
dailylanddf |>
select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84")
dailylanddf |>
select(1:3)
dailylanddf |>
rename(x = 1) |>
rename(y = 2)
```
# 内置function
```{r}
dailylanddf |>
select(1:3) |>
rename_all(\(x) c("a", "b", "c")) |>
rename_all(\(x) paste0(x, "_new"))
dailylanddf |>
select(1:3) |>
rename_all(~ c("a", "b", "c")) |>
rename_all(~ paste0(., "_new"))
dailylanddf |>
select(1:3) |>
rename_all(~ c("x", "y", "z"))
```
```{r}
fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv")
datadf <- fulldatadf |>
select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |>
mutate(value = as.numeric(value)) |>
tidyr::pivot_wider(names_from = varname, values_from = value)
saveRDS(datadf, "../data/chinawq/datadf.rds")
```
# 统计
## 统计每月的DO的平均值
```{r}
datadf <- readRDS("../data/chinawq/datadf.rds")
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
select(lon, lat, month, date, everything()) |>
group_by(month) |>
summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |>
ggplot(aes(x = month, y = DO_mean)) +
geom_point() +
geom_smooth(method = "loess") +
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd))
```
```{r}
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
mutate(year = year(date)) |>
select(lon, lat, year, month, date, everything()) |>
summarize(
DO_mean = mean(DO, na.rm = TRUE),
DO_sd = sd(DO, na.rm = TRUE),
.by = c(year, month)
) |>
ggplot(aes(x = month, y = DO_mean)) +
geom_point() +
geom_smooth(method = "loess") +
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) +
facet_wrap(~year)
```
```{r}
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
mutate(year = year(date)) |>
select(-lon, -lat, -date) |>
group_by(year, month) |>
summarize_all(\(x) mean(x, na.rm = TRUE))
```
## nest
```{r}
for (isite in 1:10) {
m <- datadf |>
select(1:7) |>
tidyr::nest(wqdf = -c(lon, lat)) |>
pull(wqdf) |>
nth(isite) |>
lm(formula = DO ~ pH, data = _)
slope <- coef(m)[2]
print(slope)
}
```
## purrr::map
```{r}
datadf |>
select(1:7) |>
tidyr::nest(wqdf = -c(lon, lat)) |>
slice(1:10) |>
mutate(
m = purrr::map(
wqdf,
\(x) {
lm(formula = DO ~ pH, data = x)
}
)
) |>
mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |>
mutate(
p = purrr::map(wqdf, \(x) {
x |>
ggplot(aes(x = pH, y = DO)) +
geom_smooth(method = "lm") +
geom_point()
})
) |>
pull(p) |>
patchwork::wrap_plots() +
patchwork::plot_layout(ncol = 2)
```