--- title: "RWEP L7- Data Transform" --- # Data Source ```{r} dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv") metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls") monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv") weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv") ``` ```{r} require(tidyverse) dailylanddf dailylanddf |> select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84) dailylanddf |> select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84") dailylanddf |> select(1:3) dailylanddf |> rename(x = 1) |> rename(y = 2) ``` # 内置function ```{r} dailylanddf |> select(1:3) |> rename_all(\(x) c("a", "b", "c")) |> rename_all(\(x) paste0(x, "_new")) dailylanddf |> select(1:3) |> rename_all(~ c("a", "b", "c")) |> rename_all(~ paste0(., "_new")) dailylanddf |> select(1:3) |> rename_all(~ c("x", "y", "z")) ``` ```{r} fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv") datadf <- fulldatadf |> select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |> mutate(value = as.numeric(value)) |> tidyr::pivot_wider(names_from = varname, values_from = value) saveRDS(datadf, "../data/chinawq/datadf.rds") ``` # 统计 ## 统计每月的DO的平均值 ```{r} datadf <- readRDS("../data/chinawq/datadf.rds") datadf |> select(1:7) |> mutate(month = month(date)) |> select(lon, lat, month, date, everything()) |> group_by(month) |> summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |> ggplot(aes(x = month, y = DO_mean)) + geom_point() + geom_smooth(method = "loess") + geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) ``` ```{r} datadf |> select(1:7) |> mutate(month = month(date)) |> mutate(year = year(date)) |> select(lon, lat, year, month, date, everything()) |> summarize( DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE), .by = c(year, month) ) |> ggplot(aes(x = month, y = DO_mean)) + geom_point() + geom_smooth(method = "loess") + geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) + facet_wrap(~year) ``` ```{r} datadf |> select(1:7) |> mutate(month = month(date)) |> mutate(year = year(date)) |> select(-lon, -lat, -date) |> group_by(year, month) |> summarize_all(\(x) mean(x, na.rm = TRUE)) ``` ## nest ```{r} for (isite in 1:10) { m <- datadf |> select(1:7) |> tidyr::nest(wqdf = -c(lon, lat)) |> pull(wqdf) |> nth(isite) |> lm(formula = DO ~ pH, data = _) slope <- coef(m)[2] print(slope) } ``` ## purrr::map ```{r} require(tidyverse) datadf <- readRDS("../data/chinawq/datadf.rds") datadf |> select(1:7) |> tidyr::nest(wqdf = -c(lon, lat)) |> slice(1:10) |> mutate( m = purrr::map( wqdf, \(x) { lm(formula = DO ~ pH, data = x) } ) ) |> mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |> mutate( p = purrr::map2(wqdf, slope, \(x, y) { x |> ggplot(aes(x = pH, y = DO)) + geom_smooth(method = "lm") + geom_point() + ggtitle(paste0("Slope: ", round(y, 2))) }) ) |> pull(p) |> patchwork::wrap_plots() + patchwork::plot_layout(ncol = 2) ``` ```{r} longriverdf <- tibble( fn = dir( "../data/LongRiver/data/", pattern = "csv", full.names = TRUE, recursive = TRUE ) ) |> mutate(data = purrr::map(fn, readr::read_csv)) lang <- "cn" longriverdf |> mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |> mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |> mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |> select(-fn) |> slice(1:10) |> tidyr::unnest(data) |> ggplot(aes(x = site, y = z)) + geom_boxplot(colour = "black", size = 0.8) + facet_wrap(~month) + dwfun::theme_sci() ``` ```{r} longriverdf |> mutate(year = gsub("^.*([0-9)]{4})-.*$", "\\1", fn)) |> mutate(month = gsub("^.*[0-9)]{4}-([0-9]{2}).*$", "\\1", fn)) |> mutate(site = gsub("^.*_(.*).csv$", "\\1", fn)) |> select(-fn) |> slice(1:12) |> mutate( p = purrr::map(data, \(x) { x |> ggplot(aes(tm, z)) + geom_point() }) ) |> pull(p) |> patchwork::wrap_plots() |> patchwork::plot_layout(ncol = 3) ``` ```{r} # datadf # 不同年份,DO 的平均值, # x lon # y lat # colour/ fill Do mean # facet: year datadf |> dplyr::mutate(year = year(date)) |> summarize(DO_mean = mean(DO, na.rm = TRUE), .by = c(lon, lat, year)) |> dplyr::filter(year > 2006) |> ggplot(aes(x = lon, y = lat)) + geom_point(aes(fill = DO_mean), shape = 21) + scale_fill_viridis_c() + facet_wrap(~year) + labs(x = "Longitude", y = "Latitude", fill = "DO (mg/L)") + theme_bw() ``` ```{r} # x 月份 # y NH4: pH p1 <- datadf |> mutate(month = month(date)) |> dplyr::filter(NH4N < 100) |> ggplot(aes(x = month, y = NH4N)) + geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) + scale_y_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format( "log10", scales::math_format(10^.x, format = "%.1f") ) ) + scale_x_continuous(breaks = 1:12, labels = month.abb) + theme(legend.position = "none") p2 <- datadf |> mutate(month = month(date)) |> ggplot(aes(x = month, y = CODMn)) + geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) + scale_y_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format( "log10", scales::math_format(10^.x, format = "%.1f") ) ) + scale_x_continuous(breaks = 1:12, labels = month.abb) + theme(legend.position = "none") p3 <- datadf |> mutate(month = month(date)) |> dplyr::filter(DO < 20) |> ggplot(aes(x = month, y = DO)) + geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) + scale_x_continuous(breaks = 1:12, labels = month.abb) + theme(legend.position = "none") p4 <- datadf |> mutate(month = month(date)) |> dplyr::filter(pH > 5) |> ggplot(aes(x = month, y = pH)) + geom_boxplot(aes(fill = as.factor(month)), colour = "black", size = 0.8) + scale_x_continuous(breaks = 1:12, labels = month.abb) + theme(legend.position = "none") (p1 | p2) / (p3 | p4) + patchwork::plot_annotation(tag_levels = "A") ```