Compare commits

...

4 Commits

Author SHA1 Message Date
ming 028ef1c471 L10 2026-06-04 15:55:13 +08:00
ming 7aeb071fba L8 2026-05-28 13:08:20 +08:00
ming 29daf1b240 add L7.qmd 2026-05-26 13:43:04 +08:00
ming 982f26736f Update Ref.bib 2026-05-26 13:42:53 +08:00
23 changed files with 674809 additions and 2 deletions
+40
View File
@@ -45809,3 +45809,43 @@ barcante2020cyanobacteria
year = {2008}
}
@Article{tatters2025benthic,
title = {Benthic cyanobacterial accumulations and associated cyanotoxins in coastal urban stormwater pond networks},
author = {Tatters, Avery O. and Clevenger, Courtney and Strangman, Wendy K. and Oehrle, Stuart and Kudela, Raphael M. and Aukamp, Jessica and Wan, Yongshan},
year = 2025,
journal = {Harmful Algae},
publisher = {Elsevier BV},
volume = 144,
pages = 102833,
url = {http://dx.doi.org/10.1016/j.hal.2025.102833},
issn = {1568-9883},
doi = {10.1016/j.hal.2025.102833}
}
@Article{maurer2024temporal,
title = {Temporal Dynamics of Cyanobacterial Bloom Community Composition and Toxin Production from Urban Lakes},
author = {Maurer, Julie A. and Xia, Runjie and Kim, Andrew M. and Oblie, Nana and Hefferan, Sierra and Xie, Hannuo and Slitt, Angela and Jenkins, Bethany D. and Bertin, Matthew J.},
year = 2024,
journal = {ACS ES&T Water},
publisher = {American Chemical Society (ACS)},
volume = 4,
pages = {34233432},
url = {http://dx.doi.org/10.1021/acsestwater.4c00266},
issn = {2690-0637},
doi = {10.1021/acsestwater.4c00266},
number = 8}
@article{paerl2016duelling,
author = {Paerl, Hans W. and Otten, Timothy G.},
title = {Duelling CyanoHABs: unravelling the environmental drivers controlling dominance and succession among diazotrophic and non-N2-fixing harmful cyanobacteria},
journal = {Environmental Microbiology},
volume = {18},
number = {2},
pages = {316-324},
doi = {10.1111/1462-2920.13035},
url = {https://enviromicro-journals.onlinelibrary.wiley.com/doi/abs/10.1111/1462-2920.13035},
eprint = {https://enviromicro-journals.onlinelibrary.wiley.com/doi/pdf/10.1111/1462-2920.13035},
year = {2016}
}
+1 -1
View File
@@ -34,4 +34,4 @@ website:
- text: Codes
url: https://git.drwater.net/{{< var projtype >}}/{{< var reponame >}}/src/branch/{{< var branch >}}
- text: Issue
url: https://git.drwater.net/manuscript/{{< var reponame >}}/issues
url: https://git.drwater.net/{{< var projtype >}}/{{< var reponame >}}/issues
+1 -1
View File
@@ -33,4 +33,4 @@ website:
- text: Codes
url: https://git.drwater.net/{{< var projtype >}}/{{< var reponame >}}/src/branch/{{< var branch >}}
- text: Issue
url: https://git.drwater.net/manuscript/{{< var reponame >}}/issues
url: https://git.drwater.net/{{< var projtype >}}/{{< var reponame >}}/issues
+617
View File
@@ -0,0 +1,617 @@
# 实践部分
## Kriging
### Code
```{r}
#| echo: true
#| eval: false
#| out-width: 50%
require(sf)
require(sp)
require(gstat)
require(raster)
require(ggplot2)
# 读取太湖边界
lake_boundary <- st_read("../../data/taihu.shp") |>
sf::st_make_valid()
main_water <- st_difference(
lake_boundary[lake_boundary$Name == "boundary", ],
st_union(lake_boundary[lake_boundary$Name != "boundary", ])
)
get_kriging <- function(indf, Yvarname, grid = NULL, boundsf = main_water) {
insf <- indf |>
sfext::df_to_sf(crs = 4326, coords = c("long", "lat")) |>
dplyr::select(-c("long", "lat")) |>
dplyr::rename(Y = tidyselect::all_of(Yvarname))
if (is.null(grid)) {
# 1. 将sf边界转为terra格式
v <- terra::vect(boundsf)
# 2. 创建基础网格
base_grid <- terra::rast(v, resolution = 0.002) # 基础低分辨率
# 3. 创建高分辨率区域(例如边界附近)
buffer_zone <- buffer(v, width = 0.005) # 边界附近创建缓冲区
hi_res_grid <- terra::rast(buffer_zone, resolution = 0.001)
# 4. 合并网格
final_grid <- merge(base_grid, hi_res_grid)
# 5. 转为点并裁剪
grid <- terra::as.points(final_grid) |>
sf::st_as_sf() |>
sf::st_filter(main_water)
}
insp <- sf::as_Spatial(insf)
fit <- automap::autofitVariogram(Y ~ 1, insp)
# 克里金插值
m <- gstat::gstat(
formula = Y ~ 1,
data = insp,
model = fit$var_model
)
predsf <- predict(m, grid) |>
sf::st_as_sf()
outsf <- predsf |>
st_coordinates() |>
as.data.frame() |>
cbind(pred = predsf$var1.pred)
return(outsf)
}
wqdf <- readxl::read_xlsx("../data/wqdata.xlsx")
if (FALSE) {
wqsf <- wqdf |>
nest(datedf = -date) |>
dplyr::mutate(
krigingsf = purrr::map(datedf, \(x) {
get_kriging(x, "conc", boundsf = main_water)
})
)
saveRDS(wqsf, "./wqsf.rds")
}
wqsf <- readRDS("./wqsf.rds")
wqsf |>
unnest(krigingsf) |>
ggplot(aes(X, Y)) +
geom_contour_filled(aes(z = pred), bins = 20) +
geom_sf(
data = main_water,
aes(x = NULL, y = NULL),
fill = NA,
colour = "black",
linewidth = 1
) +
# scale_fill_gradientn(colors = hcl.colors(100, "RdYlBu")) + # 设置颜色渐变
# ggsci::scale_fill_aaas() +
coord_sf() +
theme_void()
```
### Output
```{r}
#| echo: false
#| out-width: 80%
#| message: false
#| fig-width: 6
#| fig-height: 3
require(sf)
require(sp)
require(gstat)
require(raster)
require(ggplot2)
# 读取太湖边界
lake_boundary <- st_read("../../data/taihu.shp", quiet = TRUE) |>
sf::st_make_valid()
main_water <- st_difference(
lake_boundary[lake_boundary$Name == "boundary", ],
st_union(lake_boundary[lake_boundary$Name != "boundary", ])
)
get_kriging <- function(indf, Yvarname, grid = NULL, boundsf = main_water) {
insf <- indf |>
sfext::df_to_sf(crs = 4326, coords = c("long", "lat")) |>
dplyr::select(-c("long", "lat")) |>
dplyr::rename(Y = tidyselect::all_of(Yvarname))
if (is.null(grid)) {
# 1. 将sf边界转为terra格式
v <- terra::vect(boundsf)
# 2. 创建基础网格
base_grid <- terra::rast(v, resolution = 0.002) # 基础低分辨率
# 3. 创建高分辨率区域(例如边界附近)
buffer_zone <- buffer(v, width = 0.005) # 边界附近创建缓冲区
hi_res_grid <- terra::rast(buffer_zone, resolution = 0.001)
# 4. 合并网格
final_grid <- merge(base_grid, hi_res_grid)
# 5. 转为点并裁剪
grid <- terra::as.points(final_grid) |>
sf::st_as_sf() |>
sf::st_filter(main_water)
}
insp <- sf::as_Spatial(insf)
fit <- automap::autofitVariogram(Y ~ 1, insp)
# 克里金插值
m <- gstat::gstat(
formula = Y ~ 1,
data = insp,
model = fit$var_model
)
predsf <- predict(m, grid) |>
sf::st_as_sf()
outsf <- predsf |>
st_coordinates() |>
as.data.frame() |>
cbind(pred = predsf$var1.pred)
return(outsf)
}
wqdf <- readxl::read_xlsx("../../data/wqdata.xlsx")
if (FALSE) {
wqsf <- wqdf |>
nest(datedf = -date) |>
dplyr::mutate(
krigingsf = purrr::map(datedf, \(x) {
get_kriging(x, "conc", boundsf = main_water)
})
)
saveRDS(wqsf, "./wqsf.rds")
}
wqsf <- readRDS("./wqsf.rds")
wqsf |>
unnest(krigingsf) |>
ggplot(aes(X, Y)) +
geom_contour_filled(aes(z = pred), bins = 20) +
geom_sf(
data = main_water,
aes(x = NULL, y = NULL),
fill = NA,
colour = "black",
linewidth = 1
) +
# scale_fill_gradientn(colors = hcl.colors(100, "RdYlBu")) + # 设置颜色渐变
# ggsci::scale_fill_aaas() +
coord_sf() +
theme_void()
```
:::
## 数据
```{r}
require(tidyverse)
sitedf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-01/nla2007_sampledlakeinformation_20091113.csv"
) |>
select(
SITE_ID,
lon = LON_DD,
lat = LAT_DD,
name = LAKENAME,
area = LAKEAREA,
zmax = DEPTHMAX
) |>
group_by(SITE_ID) |>
summarize(
lon = mean(lon, na.rm = TRUE),
lat = mean(lat, na.rm = TRUE),
name = unique(name),
area = mean(area, na.rm = TRUE),
zmax = mean(zmax, na.rm = TRUE)
)
visitdf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
) |>
select(SITE_ID, date = DATE_PROFILE, year = YEAR, visit = VISIT_NO) |>
distinct()
waterchemdf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2013-09/nla2007_profile_20091008.csv"
) |>
select(
SITE_ID,
date = DATE_PROFILE,
depth = DEPTH,
temp = TEMP_FIELD,
do = DO_FIELD,
ph = PH_FIELD,
cond = COND_FIELD,
)
sddf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-10/nla2007_secchi_20091008.csv"
) |>
select(
SITE_ID,
date = DATE_SECCHI,
sd = SECMEAN,
clear_to_bottom = CLEAR_TO_BOTTOM
)
trophicdf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-10/nla2007_trophic_conditionestimate_20091123.csv"
) |>
select(SITE_ID, visit = VISIT_NO, tp = PTL, tn = NTL, chla = CHLA) |>
left_join(visitdf, by = c("SITE_ID", "visit")) |>
select(-year, -visit) |>
group_by(SITE_ID, date) |>
summarize(
tp = mean(tp, na.rm = TRUE),
tn = mean(tn, na.rm = TRUE),
chla = mean(chla, na.rm = TRUE)
)
phytodf <- readr::read_csv(
"https://www.epa.gov/sites/default/files/2014-10/nla2007_phytoplankton_softalgaecount_20091023.csv"
) |>
select(
SITE_ID,
date = DATEPHYT,
depth = SAMPLE_DEPTH,
phyta = DIVISION,
genus = GENUS,
species = SPECIES,
tax = TAXANAME,
abund = ABUND
) |>
mutate(phyta = gsub(" .*$", "", phyta)) |>
filter(!is.na(genus)) |>
group_by(SITE_ID, date, depth, phyta, genus) |>
summarize(abund = sum(abund, na.rm = TRUE)) |>
nest(phytodf = -c(SITE_ID, date))
envdf <- waterchemdf |>
filter(depth < 2) |>
select(-depth) |>
group_by(SITE_ID, date) |>
summarise_all(~ mean(., na.rm = TRUE)) |>
ungroup() |>
left_join(sddf, by = c("SITE_ID", "date")) |>
left_join(trophicdf, by = c("SITE_ID", "date"))
nla <- envdf |>
left_join(phytodf) |>
left_join(sitedf, by = "SITE_ID") |>
filter(!purrr::map_lgl(phytodf, is.null)) |>
mutate(
cyanophyta = purrr::map(
phytodf,
~ .x |>
dplyr::filter(phyta == "Cyanophyta") |>
summarize(cyanophyta = sum(abund, na.rm = TRUE))
)
) |>
unnest(cyanophyta) |>
select(-phyta) |>
mutate(clear_to_bottom = ifelse(is.na(clear_to_bottom), TRUE, FALSE))
# library(rmdify)
# library(dwfun)
# dwfun::init()
```
## 数据
```{r}
skimr::skim(nla)
```
## 简单模型
```{r}
nla |>
filter(tp > 1) |>
ggplot(aes(tn, tp)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
m1 <- lm(log10(tp) ~ log10(tn), data = nla)
summary(m1)
```
## 复杂指标
```{r}
nla |>
filter(tp > 1) |>
ggplot(aes(tp, cyanophyta)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
m2 <- lm(log10(cyanophyta) ~ log10(tp), data = nla)
summary(m2)
```
## tidymodels - Data split
```{r}
require(tidymodels)
(nla_split <- rsample::initial_split(nla, prop = 0.7, strata = zmax))
(nla_train <- training(nla_split))
(nla_test <- testing(nla_split))
```
## tidymodels - recipe
```{r}
nla_formula <- as.formula(
"cyanophyta ~ temp + do + ph + cond + sd + tp + tn + chla + clear_to_bottom"
)
# nla_formula <- as.formula("cyanophyta ~ temp + do + ph + cond + sd + tp + tn")
nla_recipe <- recipes::recipe(nla_formula, data = nla_train) |>
recipes::step_string2factor(all_nominal()) |>
recipes::step_nzv(all_nominal()) |>
recipes::step_log(chla, cyanophyta, base = 10) |>
recipes::step_normalize(all_numeric_predictors()) |>
prep()
nla_recipe
```
## tidymodels - cross validation
```{r}
nla_cv <- recipes::bake(
nla_recipe,
new_data = training(nla_split)
) |>
rsample::vfold_cv(v = 10)
nla_cv
```
## tidymodels - Model specification
```{r}
xgboost_model <- parsnip::boost_tree(
mode = "regression",
trees = 1000,
min_n = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune()
) |>
set_engine("xgboost", objective = "reg:squarederror")
xgboost_model
```
## tidymodels - Grid specification
```{r}
# grid specification
xgboost_params <- dials::parameters(
min_n(),
tree_depth(),
learn_rate(),
loss_reduction()
)
xgboost_params
```
## tidymodels - Grid specification
```{r}
xgboost_grid <- dials::grid_max_entropy(
xgboost_params,
size = 60
)
knitr::kable(head(xgboost_grid))
```
## tidymodels - Workflow
```{r}
xgboost_wf <- workflows::workflow() |>
add_model(xgboost_model) |>
add_formula(nla_formula)
xgboost_wf
```
## tidymodels - Tune
```{r}
#| cache: true
# hyperparameter tuning
if (FALSE) {
xgboost_tuned <- tune::tune_grid(
object = xgboost_wf,
resamples = nla_cv,
grid = xgboost_grid,
metrics = yardstick::metric_set(rmse, rsq, mae),
control = tune::control_grid(verbose = TRUE)
)
saveRDS(xgboost_tuned, "./xgboost_tuned.RDS")
}
xgboost_tuned <- readRDS("./xgboost_tuned.RDS")
```
## tidymodels - Best model
```{r}
xgboost_tuned |>
tune::show_best(metric = "rmse") |>
knitr::kable()
```
## tidymodels - Best model
```{r}
xgboost_tuned |>
collect_metrics()
```
## tidymodels - Best model
```{r}
#| fig-width: 9
#| fig-height: 5
#| out-width: "100%"
xgboost_tuned |>
autoplot()
```
## tidymodels - Best model
```{r}
xgboost_best_params <- xgboost_tuned |>
tune::select_best(metric = "rmse")
knitr::kable(xgboost_best_params)
```
## tidymodels - Final model
```{r}
xgboost_model_final <- xgboost_model |>
finalize_model(xgboost_best_params)
xgboost_model_final
```
## tidymodels - Train evaluation
```{r}
(train_processed <- bake(nla_recipe, new_data = nla_train))
```
## tidymodels - Train data
```{r}
train_prediction <- xgboost_model_final |>
# fit the model on all the training data
fit(
formula = nla_formula,
data = train_processed
) |>
# predict the sale prices for the training data
predict(new_data = train_processed) |>
bind_cols(
nla_train |>
mutate(.obs = log10(cyanophyta))
)
xgboost_score_train <-
train_prediction |>
yardstick::metrics(.obs, .pred) |>
mutate(.estimate = format(round(.estimate, 2), big.mark = ","))
knitr::kable(xgboost_score_train)
```
## tidymodels - train evaluation
```{r}
#| fig-width: 5
#| fig-height: 3
#| out-width: "80%"
train_prediction |>
ggplot(aes(.pred, .obs)) +
geom_point() +
geom_smooth(method = "lm")
```
## tidymodels - test data
```{r}
test_processed <- bake(nla_recipe, new_data = nla_test)
test_prediction <- xgboost_model_final |>
# fit the model on all the training data
fit(
formula = nla_formula,
data = train_processed
) |>
# use the training model fit to predict the test data
predict(new_data = test_processed) |>
bind_cols(
nla_test |>
mutate(.obs = log10(cyanophyta))
)
# measure the accuracy of our model using `yardstick`
xgboost_score <- test_prediction |>
yardstick::metrics(.obs, .pred) |>
mutate(.estimate = format(round(.estimate, 2), big.mark = ","))
knitr::kable(xgboost_score)
```
## tidymodels - evaluation
```{r}
#| fig-width: 5
#| fig-height: 3
#| out-width: "80%"
cyanophyta_prediction_residual <- test_prediction |>
arrange(.pred) %>%
mutate(residual_pct = (.obs - .pred) / .pred) |>
select(.pred, residual_pct)
cyanophyta_prediction_residual |>
ggplot(aes(x = .pred, y = residual_pct)) +
geom_point() +
xlab("Predicted Cyanophyta") +
ylab("Residual (%)")
```
## tidymodels - test evaluation
```{r}
#| fig-width: 5
#| fig-height: 3
#| out-width: "80%"
test_prediction |>
ggplot(aes(.pred, .obs)) +
geom_point() +
geom_smooth(method = "lm", colour = "black")
```
+285
View File
@@ -0,0 +1,285 @@
---
title: "RWEP L7- Data Transform"
---
# Data Source
```{r}
dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv")
metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls")
monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv")
weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv")
```
```{r}
require(tidyverse)
dailylanddf
dailylanddf |>
select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84)
dailylanddf |>
select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84")
dailylanddf |>
select(1:3)
dailylanddf |>
rename(x = 1) |>
rename(y = 2)
```
# 内置function
```{r}
dailylanddf |>
select(1:3) |>
rename_all(\(x) c("a", "b", "c")) |>
rename_all(\(x) paste0(x, "_new"))
dailylanddf |>
select(1:3) |>
rename_all(~ c("a", "b", "c")) |>
rename_all(~ paste0(., "_new"))
dailylanddf |>
select(1:3) |>
rename_all(~ c("x", "y", "z"))
```
```{r}
fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv")
datadf <- fulldatadf |>
select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |>
mutate(value = as.numeric(value)) |>
tidyr::pivot_wider(names_from = varname, values_from = value)
saveRDS(datadf, "../data/chinawq/datadf.rds")
```
# 统计
## 统计每月的DO的平均值
```{r}
datadf <- readRDS("../data/chinawq/datadf.rds")
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
select(lon, lat, month, date, everything()) |>
group_by(month) |>
summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |>
ggplot(aes(x = month, y = DO_mean)) +
geom_point() +
geom_smooth(method = "loess") +
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd))
```
```{r}
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
mutate(year = year(date)) |>
select(lon, lat, year, month, date, everything()) |>
summarize(
DO_mean = mean(DO, na.rm = TRUE),
DO_sd = sd(DO, na.rm = TRUE),
.by = c(year, month)
) |>
ggplot(aes(x = month, y = DO_mean)) +
geom_point() +
geom_smooth(method = "loess") +
geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) +
facet_wrap(~year)
```
```{r}
datadf |>
select(1:7) |>
mutate(month = month(date)) |>
mutate(year = year(date)) |>
select(-lon, -lat, -date) |>
group_by(year, month) |>
summarize_all(\(x) mean(x, na.rm = TRUE))
```
## nest
```{r}
for (isite in 1:10) {
m <- datadf |>
select(1:7) |>
tidyr::nest(wqdf = -c(lon, lat)) |>
pull(wqdf) |>
nth(isite) |>
lm(formula = DO ~ pH, data = _)
slope <- coef(m)[2]
print(slope)
}
```
## purrr::map
```{r}
require(tidyverse)
datadf <- readRDS("../data/chinawq/datadf.rds")
datadf |>
select(1:7) |>
tidyr::nest(wqdf = -c(lon, lat)) |>
slice(1:10) |>
mutate(
m = purrr::map(
wqdf,
\(x) {
lm(formula = DO ~ pH, data = x)
}
)
) |>
mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |>
mutate(
p = purrr::map2(wqdf, slope, \(x, y) {
x |>
ggplot(aes(x = pH, y = DO)) +
geom_smooth(method = "lm") +
geom_point() +
ggtitle(paste0("Slope: ", round(y, 2)))
})
) |>
pull(p) |>
patchwork::wrap_plots() +
patchwork::plot_layout(ncol = 2)
```
```{r}
longriverdf <- tibble(
fn = dir(
"../data/LongRiver/data/",
pattern = "csv",
full.names = TRUE,
recursive = TRUE
)
) |>
mutate(data = purrr::map(fn, readr::read_csv))
lang <- "cn"
longriverdf |>
mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |>
mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |>
mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |>
select(-fn) |>
slice(1:10) |>
tidyr::unnest(data) |>
ggplot(aes(x = site, y = z)) +
geom_boxplot(colour = "black", size = 0.8) +
facet_wrap(~month) +
dwfun::theme_sci()
```
```{r}
longriverdf |>
mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |>
mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |>
mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |>
select(-fn) |>
slice(1:12) |>
mutate(
p = purrr::map(data, \(x) {
x |>
ggplot(aes(tm, z)) +
geom_point()
})
) |>
pull(p) |>
patchwork::wrap_plots() |>
patchwork::plot_layout(ncol = 3)
```
```{r}
# datadf
# 不同年份,DO 的平均值,
# x lon
# y lat
# colour/ fill Do mean
# facet: year
datadf |>
dplyr::mutate(year = year(date)) |>
summarize(DO_mean = mean(DO, na.rm = TRUE), .by = c(lon, lat, year)) |>
dplyr::filter(year > 2006) |>
ggplot(aes(x = lon, y = lat)) +
geom_point(aes(fill = DO_mean), shape = 21) +
scale_fill_viridis_c() +
facet_wrap(~year) +
labs(x = "Longitude", y = "Latitude", fill = "DO (mg/L)") +
theme_bw()
```
```{r}
# x 月份
# y NH4: pH
p1 <- datadf |>
mutate(month = month(date)) |>
dplyr::filter(NH4N < 100) |>
ggplot(aes(x = month, y = NH4N)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format(
"log10",
scales::math_format(10^.x, format = "%.1f")
)
) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
p2 <- datadf |>
mutate(month = month(date)) |>
ggplot(aes(x = month, y = CODMn)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format(
"log10",
scales::math_format(10^.x, format = "%.1f")
)
) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
p3 <- datadf |>
mutate(month = month(date)) |>
dplyr::filter(DO < 20) |>
ggplot(aes(x = month, y = DO)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
scale_x_continuous(breaks = 1:12, labels = month.abb) +
theme(legend.position = "none")
p4 <- datadf |>
mutate(month = month(date)) |>
dplyr::filter(pH > 5) |>
ggplot(aes(x = month, y = pH)) +
geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) +
theme(legend.position = "none")
(p1 | p2) / (p3 | p4) + patchwork::plot_annotation(tag_levels = "A")
```
BIN
View File
Binary file not shown.
+1070
View File
File diff suppressed because one or more lines are too long
+202
View File
@@ -0,0 +1,202 @@
---
title: "L9 - Data Virtualization"
format: html
editor: visual
---
# Load Data
```{r}
# getwd()
# setwd("coding")
require(tidyverse)
datadf <- readRDS("../data/chinawq/datadf.rds")
```
# Data view
```{r}
skimr::skim(datadf)
head(datadf)
tail(datadf)
names(datadf)
summary(datadf)
str(datadf)
```
# Plot
```{r}
#| warning: false
#| message: false
# 每月NH4N与pH的相关性散点图
p <- datadf |>
dplyr::filter(NH4N < 100) |>
dplyr::filter(between(year(date), 2016, 2019)) |>
mutate(month = month(date)) |>
ggplot(aes(CODMn, NH4N)) +
geom_point(shape = 21, size = 0.8, fill = "orange") +
geom_smooth(method = "gam", color = "red") +
scale_x_log10() +
scale_y_log10() +
labs(
x = "COD<sub>Mn</sub> (mg L<sup>-1</sup>)",
y = "Ammonia (mg L<sup>-1</sup>)"
) +
facet_wrap(~month, scale = "free", ncol = 4) +
theme(
axis.title.x = ggtext::element_markdown(),
axis.title.y = ggtext::element_markdown()
)
print(p)
plotly::ggplotly(p)
```
```{r}
#| warning: false
#| message: false
# 每月NH4N与pH的相关性散点图
p <- datadf |>
dplyr::filter(NH4N < 100) |>
dplyr::filter(between(year(date), 2016, 2019)) |>
mutate(month = month(date)) |>
ggplot(aes(factor(month), NH4N)) +
geom_jitter(size = 0.1, colour = "gray95") +
geom_violin(fill = "orange", alpha = 0.3) +
scale_y_log10() +
labs(x = "Month", y = "Ammonia (mg L<sup>-1</sup>)") +
theme_classic() +
theme(axis.title.y = ggtext::element_markdown())
print(p)
plotly::ggplotly(p)
```
# Plotly
```{r}
p <- datadf |>
dplyr::filter(year(date) == 2018) |>
ggplot(aes(date, NH4N)) +
geom_point()
ggsave("L9-1.pdf", width = 4, height = 3)
# install.packages("plotly")
plotly::ggplotly(p)
```
# Map - sf
```{r}
# install.packages("sf")
# install.packages("sfext")
require(sf)
require(sfext)
chinawqsf <- sf::read_sf(
"../data/chinawq/Monitoring_sites/Monitoring_sites.shp"
)
chinawqsf |>
ggplot() +
geom_sf(shape = 21, size = 1, fill = "orange")
chinamapsf <- sf::read_sf("../data/中国省级地图GS20191719号.geojson")
ninelinesf <- sf::read_sf("../data/九段线GS20191719号.geojson")
chinamapsf <- sf::read_sf(
"https://git.drwater.net/course/su2026rwep/raw/branch/PUB/data/中国省级地图GS20191719号.geojson"
)
ninelinesf <- sf::read_sf(
"https://git.drwater.net/course/su2026rwep/raw/branch/PUB/data/九段线GS20191719号.geojson"
)
chinacrs <- "+proj=laea +lat_0=40 +lon_0=104"
mapeR::get_chinacrs()
chinawqsf |>
ggplot() +
geom_sf(data = chinamapsf) +
geom_sf(data = ninelinesf) +
geom_sf(shape = 21, size = 1, fill = "orange") +
coord_sf(datum = chinacrs)
```
```{r}
require(mapeR)
map_chinese()
```
```{r}
chinamapsf <- sf::read_sf("../data/中国省级地图GS20191719号.geojson")
ninelinesf <- sf::read_sf("../data/九段线GS20191719号.geojson")
chinamapsf <- sf::read_sf(
"https://git.drwater.net/course/su2026rwep/raw/branch/PUB/data/中国省级地图GS20191719号.geojson"
)
ninelinesf <- sf::read_sf(
"https://git.drwater.net/course/su2026rwep/raw/branch/PUB/data/九段线GS20191719号.geojson"
)
chinacrs <- "+proj=laea +lat_0=40 +lon_0=104"
# 安装
install.packages("ggspatial")
datadf |>
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) |>
sf::st_transform(crs = chinacrs) |>
dplyr::filter(!is.na(CODMn)) |>
ggplot() +
geom_sf(data = chinamapsf, aes(fill = CNAME), alpha = 0.2) +
geom_sf(data = ninelinesf) +
geom_sf(aes(colour = log1p(CODMn))) +
labs(x = NULL, y = NULL, fill = NULL, colour = "CODMn") +
scale_colour_viridis_c() +
ggspatial::annotation_north_arrow(location = "tl") +
ggspatial::annotation_scale(location = "bl") +
theme(legend.position = "none")
mapeR::map_chinese
```
# 具体某个湖泊
```{r}
sf::read_sf("../data/taihu.shp") |>
ggplot() +
geom_sf()
sitesf <- sf::read_sf("~/Desktop/qingcaosha.kml")
st_layers("~/Desktop/qingcaosha.kml")
sitesf <- sf::read_sf("~/Desktop/qingcaosha.kml")
boundsf <- sf::read_sf("~/Desktop/qingcaosha.kml", layer = "qingcaosha")
ggplot() +
geom_sf(data = boundsf) +
geom_sf(data = sitesf) +
geom_sf_text(data = sitesf, aes(label = Name), vjust = 1, size = 3)
```
Binary file not shown.
+1
Submodule data/LongRiver added at d316bf4656
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff