287 lines
6.2 KiB
Plaintext
287 lines
6.2 KiB
Plaintext
---
|
|
title: "RWEP L7- Data Transform"
|
|
---
|
|
|
|
# Data Source
|
|
|
|
|
|
```{r}
|
|
dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv")
|
|
metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls")
|
|
monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv")
|
|
weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv")
|
|
```
|
|
|
|
|
|
```{r}
|
|
require(tidyverse)
|
|
dailylanddf
|
|
|
|
|
|
dailylanddf |>
|
|
select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84)
|
|
|
|
dailylanddf |>
|
|
select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84")
|
|
|
|
|
|
dailylanddf |>
|
|
select(1:3)
|
|
|
|
|
|
dailylanddf |>
|
|
rename(x = 1) |>
|
|
rename(y = 2)
|
|
```
|
|
|
|
|
|
|
|
# 内置function
|
|
|
|
|
|
```{r}
|
|
|
|
dailylanddf |>
|
|
select(1:3) |>
|
|
rename_all(\(x) c("a", "b", "c")) |>
|
|
rename_all(\(x) paste0(x, "_new"))
|
|
|
|
dailylanddf |>
|
|
select(1:3) |>
|
|
rename_all(~ c("a", "b", "c")) |>
|
|
rename_all(~ paste0(., "_new"))
|
|
|
|
|
|
dailylanddf |>
|
|
select(1:3) |>
|
|
rename_all(~ c("x", "y", "z"))
|
|
```
|
|
|
|
```{r}
|
|
fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv")
|
|
|
|
datadf <- fulldatadf |>
|
|
select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |>
|
|
mutate(value = as.numeric(value)) |>
|
|
tidyr::pivot_wider(names_from = varname, values_from = value)
|
|
|
|
saveRDS(datadf, "../data/chinawq/datadf.rds")
|
|
```
|
|
|
|
|
|
# 统计
|
|
|
|
## 统计每月的DO的平均值
|
|
|
|
```{r}
|
|
datadf <- readRDS("../data/chinawq/datadf.rds")
|
|
|
|
datadf |>
|
|
select(1:7) |>
|
|
mutate(month = month(date)) |>
|
|
select(lon, lat, month, date, everything()) |>
|
|
group_by(month) |>
|
|
summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |>
|
|
ggplot(aes(x = month, y = DO_mean)) +
|
|
geom_point() +
|
|
geom_smooth(method = "loess") +
|
|
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd))
|
|
```
|
|
|
|
|
|
```{r}
|
|
datadf |>
|
|
select(1:7) |>
|
|
mutate(month = month(date)) |>
|
|
mutate(year = year(date)) |>
|
|
select(lon, lat, year, month, date, everything()) |>
|
|
summarize(
|
|
DO_mean = mean(DO, na.rm = TRUE),
|
|
DO_sd = sd(DO, na.rm = TRUE),
|
|
.by = c(year, month)
|
|
) |>
|
|
ggplot(aes(x = month, y = DO_mean)) +
|
|
geom_point() +
|
|
geom_smooth(method = "loess") +
|
|
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) +
|
|
facet_wrap(~year)
|
|
```
|
|
|
|
|
|
```{r}
|
|
datadf |>
|
|
select(1:7) |>
|
|
mutate(month = month(date)) |>
|
|
mutate(year = year(date)) |>
|
|
select(-lon, -lat, -date) |>
|
|
group_by(year, month) |>
|
|
summarize_all(\(x) mean(x, na.rm = TRUE))
|
|
```
|
|
|
|
## nest
|
|
|
|
```{r}
|
|
|
|
for (isite in 1:10) {
|
|
m <- datadf |>
|
|
select(1:7) |>
|
|
tidyr::nest(wqdf = -c(lon, lat)) |>
|
|
pull(wqdf) |>
|
|
nth(isite) |>
|
|
lm(formula = DO ~ pH, data = _)
|
|
slope <- coef(m)[2]
|
|
print(slope)
|
|
}
|
|
```
|
|
|
|
## purrr::map
|
|
|
|
```{r}
|
|
require(tidyverse)
|
|
datadf <- readRDS("../data/chinawq/datadf.rds")
|
|
datadf |>
|
|
select(1:7) |>
|
|
tidyr::nest(wqdf = -c(lon, lat)) |>
|
|
slice(1:10) |>
|
|
mutate(
|
|
m = purrr::map(
|
|
wqdf,
|
|
\(x) {
|
|
lm(formula = DO ~ pH, data = x)
|
|
}
|
|
)
|
|
) |>
|
|
mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |>
|
|
mutate(
|
|
p = purrr::map2(wqdf, slope, \(x, y) {
|
|
x |>
|
|
ggplot(aes(x = pH, y = DO)) +
|
|
geom_smooth(method = "lm") +
|
|
geom_point() +
|
|
ggtitle(paste0("Slope: ", round(y, 2)))
|
|
})
|
|
) |>
|
|
pull(p) |>
|
|
patchwork::wrap_plots() +
|
|
patchwork::plot_layout(ncol = 2)
|
|
```
|
|
|
|
|
|
```{r}
|
|
longriverdf <- tibble(
|
|
fn = dir(
|
|
"../data/LongRiver/data/",
|
|
pattern = "csv",
|
|
full.names = TRUE,
|
|
recursive = TRUE
|
|
)
|
|
) |>
|
|
mutate(data = purrr::map(fn, readr::read_csv))
|
|
|
|
lang <- "cn"
|
|
longriverdf |>
|
|
mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |>
|
|
mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |>
|
|
mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |>
|
|
select(-fn) |>
|
|
slice(1:10) |>
|
|
tidyr::unnest(data) |>
|
|
ggplot(aes(x = site, y = z)) +
|
|
geom_boxplot(colour = "black", size = 0.8) +
|
|
facet_wrap(~month) +
|
|
dwfun::theme_sci()
|
|
```
|
|
|
|
```{r}
|
|
longriverdf |>
|
|
mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |>
|
|
mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |>
|
|
mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |>
|
|
select(-fn) |>
|
|
slice(1:12) |>
|
|
mutate(
|
|
p = purrr::map(data, \(x) {
|
|
x |>
|
|
ggplot(aes(tm, z)) +
|
|
geom_point()
|
|
})
|
|
) |>
|
|
pull(p) |>
|
|
patchwork::wrap_plots() |>
|
|
patchwork::plot_layout(ncol = 3)
|
|
```
|
|
|
|
```{r}
|
|
# datadf
|
|
# 不同年份,DO 的平均值,
|
|
# x lon
|
|
# y lat
|
|
# colour/ fill Do mean
|
|
# facet: year
|
|
datadf |>
|
|
dplyr::mutate(year = year(date)) |>
|
|
summarize(DO_mean = mean(DO, na.rm = TRUE), .by = c(lon, lat, year)) |>
|
|
dplyr::filter(year > 2006) |>
|
|
ggplot(aes(x = lon, y = lat)) +
|
|
geom_point(aes(fill = DO_mean), shape = 21) +
|
|
scale_fill_viridis_c() +
|
|
facet_wrap(~year) +
|
|
labs(x = "Longitude", y = "Latitude", fill = "DO (mg/L)") +
|
|
theme_bw()
|
|
```
|
|
|
|
|
|
```{r}
|
|
# x 月份
|
|
# y NH4: pH
|
|
|
|
p1 <- datadf |>
|
|
mutate(month = month(date)) |>
|
|
dplyr::filter(NH4N < 100) |>
|
|
ggplot(aes(x = month, y = NH4N)) +
|
|
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
|
|
scale_y_log10(
|
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
|
labels = scales::trans_format(
|
|
"log10",
|
|
scales::math_format(10^.x, format = "%.1f")
|
|
)
|
|
) +
|
|
scale_x_continuous(breaks = 1:12, labels = month.abb) +
|
|
theme(legend.position = "none")
|
|
p2 <- datadf |>
|
|
mutate(month = month(date)) |>
|
|
ggplot(aes(x = month, y = CODMn)) +
|
|
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
|
|
scale_y_log10(
|
|
breaks = scales::trans_breaks("log10", function(x) 10^x),
|
|
labels = scales::trans_format(
|
|
"log10",
|
|
scales::math_format(10^.x, format = "%.1f")
|
|
)
|
|
) +
|
|
scale_x_continuous(breaks = 1:12, labels = month.abb) +
|
|
theme(legend.position = "none")
|
|
p3 <- datadf |>
|
|
mutate(month = month(date)) |>
|
|
dplyr::filter(DO < 20) |>
|
|
ggplot(aes(x = month, y = DO)) +
|
|
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
|
|
scale_x_continuous(breaks = 1:12, labels = month.abb) +
|
|
theme(legend.position = "none")
|
|
p4 <- datadf |>
|
|
mutate(month = month(date)) |>
|
|
dplyr::filter(pH > 5) |>
|
|
ggplot(aes(x = month, y = pH)) +
|
|
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
|
|
scale_x_continuous(breaks = 1:12, labels = month.abb) +
|
|
theme(legend.position = "none")
|
|
|
|
(p1 | p2) / (p3 | p4) + patchwork::plot_annotation(tag_levels = "A")
|
|
```
|
|
|
|
|
|
|
|
|
|
|