--- title: "RWEP L7- Data Transform" --- # Data Source ```{r} dailylanddf <- readr::read_csv("../data/chinawq/daily_land.csv") metadatadf <- readxl::read_xls("../data/chinawq/metadata_and_statistics.xls") monthlylanddf <- readr::read_csv("../data/chinawq/monthly_ocean.csv") weeklylanddf <- readr::read_csv("../data/chinawq/weekly_land.csv") ``` ```{r} require(tidyverse) dailylanddf dailylanddf |> select(.data$MonitoringLocationIdentifier, .data$LongitudeMeasure_WGS84) dailylanddf |> select("MonitoringLocationIdentifier", "LongitudeMeasure_WGS84") dailylanddf |> select(1:3) dailylanddf |> rename(x = 1) |> rename(y = 2) ``` # 内置function ```{r} dailylanddf |> select(1:3) |> rename_all(\(x) c("a", "b", "c")) |> rename_all(\(x) paste0(x, "_new")) dailylanddf |> select(1:3) |> rename_all(~ c("a", "b", "c")) |> rename_all(~ paste0(., "_new")) dailylanddf |> select(1:3) |> rename_all(~ c("x", "y", "z")) ``` ```{r} fulldatadf <- readr::read_csv("../data/chinawq/full_dataset.csv") datadf <- fulldatadf |> select(lon = 2, lat = 3, date = 4, varname = 5, value = 6) |> mutate(value = as.numeric(value)) |> tidyr::pivot_wider(names_from = varname, values_from = value) saveRDS(datadf, "../data/chinawq/datadf.rds") ``` # 统计 ## 统计每月的DO的平均值 ```{r} datadf <- readRDS("../data/chinawq/datadf.rds") datadf |> select(1:7) |> mutate(month = month(date)) |> select(lon, lat, month, date, everything()) |> group_by(month) |> summarize(DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE)) |> ggplot(aes(x = month, y = DO_mean)) + geom_point() + geom_smooth(method = "loess") + geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) ``` ```{r} datadf |> select(1:7) |> mutate(month = month(date)) |> mutate(year = year(date)) |> select(lon, lat, year, month, date, everything()) |> summarize( DO_mean = mean(DO, na.rm = TRUE), DO_sd = sd(DO, na.rm = TRUE), .by = c(year, month) ) |> ggplot(aes(x = month, y = DO_mean)) + geom_point() + geom_smooth(method = "loess") + geom_errorbar(aes(ymin = DO_mean - DO_sd, ymax = DO_mean + DO_sd)) + facet_wrap(~year) ``` ```{r} datadf |> select(1:7) |> mutate(month = month(date)) |> mutate(year = year(date)) |> select(-lon, -lat, -date) |> group_by(year, month) |> summarize_all(\(x) mean(x, na.rm = TRUE)) ``` ## nest ```{r} for (isite in 1:10) { m <- datadf |> select(1:7) |> tidyr::nest(wqdf = -c(lon, lat)) |> pull(wqdf) |> nth(isite) |> lm(formula = DO ~ pH, data = _) slope <- coef(m)[2] print(slope) } ``` ## purrr::map ```{r} datadf |> select(1:7) |> tidyr::nest(wqdf = -c(lon, lat)) |> slice(1:10) |> mutate( m = purrr::map( wqdf, \(x) { lm(formula = DO ~ pH, data = x) } ) ) |> mutate(slope = purrr::map_dbl(m, \(x) coef(x)[2])) |> mutate( p = purrr::map(wqdf, \(x) { x |> ggplot(aes(x = pH, y = DO)) + geom_smooth(method = "lm") + geom_point() }) ) |> pull(p) |> patchwork::wrap_plots() + patchwork::plot_layout(ncol = 2) ```