Files
su2026rwep/coding/L7.qmd
T
2026-05-28 13:08:20 +08:00

287 lines
6.2 KiB
Plaintext

---
title: "RWEP L7- Data Transform"
---
# Data Source
```{r}
dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv")
metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls")
monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv")
weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv")
```
```{r}
require(tidyverse)
dailylanddf
dailylanddf |>
select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84)
dailylanddf |>
select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84")
dailylanddf |>
select(1:3)
dailylanddf |>
rename(x = 1) |>
rename(y = 2)
```
# 内置function
```{r}
dailylanddf |>
select(1:3) |>
rename_all(\(x) c("a", "b", "c")) |>
rename_all(\(x) paste0(x, "_new"))
dailylanddf |>
select(1:3) |>
rename_all(~ c("a", "b", "c")) |>
rename_all(~ paste0(., "_new"))
dailylanddf |>
select(1:3) |>
rename_all(~ c("x", "y", "z"))
```
```{r}
fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv")
datadf <- fulldatadf |>
select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |>
mutate(value = as.numeric(value)) |>
tidyr::pivot_wider(names_from = varname, values_from = value)
saveRDS(datadf, "../data/chinawq/datadf.rds")
```
# 统计
## 统计每月的DO的平均值
```{r}
datadf <- readRDS("../data/chinawq/datadf.rds")
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
select(lon, lat, month, date, everything()) |>
group_by(month) |>
summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |>
ggplot(aes(x = month, y = DO_mean)) +
geom_point() +
geom_smooth(method = "loess") +
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd))
```
```{r}
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
mutate(year = year(date)) |>
select(lon, lat, year, month, date, everything()) |>
summarize(
DO_mean = mean(DO, na.rm = TRUE),
DO_sd = sd(DO, na.rm = TRUE),
.by = c(year, month)
) |>
ggplot(aes(x = month, y = DO_mean)) +
geom_point() +
geom_smooth(method = "loess") +
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) +
facet_wrap(~year)
```
```{r}
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
mutate(year = year(date)) |>
select(-lon, -lat, -date) |>
group_by(year, month) |>
summarize_all(\(x) mean(x, na.rm = TRUE))
```
## nest
```{r}
for (isite in 1:10) {
m <- datadf |>
select(1:7) |>
tidyr::nest(wqdf = -c(lon, lat)) |>
pull(wqdf) |>
nth(isite) |>
lm(formula = DO ~ pH, data = _)
slope <- coef(m)[2]
print(slope)
}
```
## purrr::map
```{r}
require(tidyverse)
datadf <- readRDS("../data/chinawq/datadf.rds")
datadf |>
select(1:7) |>
tidyr::nest(wqdf = -c(lon, lat)) |>
slice(1:10) |>
mutate(
m = purrr::map(
wqdf,
\(x) {
lm(formula = DO ~ pH, data = x)
}
)
) |>
mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |>
mutate(
p = purrr::map2(wqdf, slope, \(x, y) {
x |>
ggplot(aes(x = pH, y = DO)) +
geom_smooth(method = "lm") +
geom_point() +
ggtitle(paste0("Slope: ", round(y, 2)))
})
) |>
pull(p) |>
patchwork::wrap_plots() +
patchwork::plot_layout(ncol = 2)
```
```{r}
longriverdf <- tibble(
fn = dir(
"../data/LongRiver/data/",
pattern = "csv",
full.names = TRUE,
recursive = TRUE
)
) |>
mutate(data = purrr::map(fn, readr::read_csv))
lang <- "cn"
longriverdf |>
mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |>
mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |>
mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |>
select(-fn) |>
slice(1:10) |>
tidyr::unnest(data) |>
ggplot(aes(x = site, y = z)) +
geom_boxplot(colour = "black", size = 0.8) +
facet_wrap(~month) +
dwfun::theme_sci()
```
```{r}
longriverdf |>
mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |>
mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |>
mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |>
select(-fn) |>
slice(1:12) |>
mutate(
p = purrr::map(data, \(x) {
x |>
ggplot(aes(tm, z)) +
geom_point()
})
) |>
pull(p) |>
patchwork::wrap_plots() |>
patchwork::plot_layout(ncol = 3)
```
```{r}
# datadf
# 不同年份,DO 的平均值,
# x lon
# y lat
# colour/ fill Do mean
# facet: year
datadf |>
dplyr::mutate(year = year(date)) |>
summarize(DO_mean = mean(DO, na.rm = TRUE), .by = c(lon, lat, year)) |>
dplyr::filter(year > 2006) |>
ggplot(aes(x = lon, y = lat)) +
geom_point(aes(fill = DO_mean), shape = 21) +
scale_fill_viridis_c() +
facet_wrap(~year) +
labs(x = "Longitude", y = "Latitude", fill = "DO (mg/L)") +
theme_bw()
```
```{r}
# x 月份
# y NH4: pH
p1 <- datadf |>
mutate(month = month(date)) |>
dplyr::filter(NH4N < 100) |>
ggplot(aes(x = month, y = NH4N)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format(
"log10",
scales::math_format(10^.x, format = "%.1f")
)
) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
p2 <- datadf |>
mutate(month = month(date)) |>
ggplot(aes(x = month, y = CODMn)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format(
"log10",
scales::math_format(10^.x, format = "%.1f")
)
) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
p3 <- datadf |>
mutate(month = month(date)) |>
dplyr::filter(DO < 20) |>
ggplot(aes(x = month, y = DO)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
p4 <- datadf |>
mutate(month = month(date)) |>
dplyr::filter(pH > 5) |>
ggplot(aes(x = month, y = pH)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
(p1 | p2) / (p3 | p4) + patchwork::plot_annotation(tag_levels = "A")
```