--- title: "空间分析" subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心 date: today lang: zh format: revealjs: theme: dark slide-number: true chalkboard: buttons: true preview-links: auto lang: zh toc: true toc-depth: 1 toc-title: 大纲 logo: ./_extensions/inst/img/ucaslogo.png css: ./_extensions/inst/css/revealjs.css pointer: key: "p" color: "#32cd32" pointerSize: 18 revealjs-plugins: - pointer filters: - d2 knitr: opts_chunk: dev: "svg" retina: 3 execute: freeze: auto cache: true echo: true fig-width: 5 fig-height: 6 --- ## 目录 1. 空间数据基础概念 2. Simple Features (SF) 核心规范 3. SFEXT扩展功能实战 4. 空间关系与几何操作 5. 空间统计分析方法 6. 综合实战案例 ## 1. 空间数据基础概念 ### 空间数据类型体系 ```r library(sf) # 创建示例几何类型 st_point(c(0, 0)) # 点 st_linestring(rbind(c(0, 0), c(1, 1))) # 线 st_polygon(list(rbind(c(0, 0), c(1, 0), c(1, 1), c(0, 0)))) # 面 ``` ### 坐标参考系(CRS) ```r # 常用坐标系定义 wgs84 <- st_crs(4326) # 经纬度坐标 utm50n <- st_crs(32650) # UTM 50N投影 cat(st_crs(wgs84)$proj4string) ``` ## 2. Simple Features (SF) 核心规范 ### 创建SF对象 ```r # 从数据框转换 df <- data.frame( city = c("Beijing", "Shanghai"), pop = c(2171, 2424), geometry = st_sfc( st_point(c(116.4, 39.9)), st_point(c(121.5, 31.2)) ) ) sf_df <- st_as_sf(df, crs = 4326) ``` ### 数据IO操作 ```r # 写入/读取空间文件 st_write(sf_df, "cities.shp") new_sf <- st_read("cities.shp") # 从内置数据集加载 nc <- st_read(system.file("shape/nc.shp", package = "sf")) ``` ## 3. SFEXT扩展功能实战 ### 高级几何生成 ```r remotes::install_github("r-spatial/sfext") library(sfext) # 生成渔网网格 grid <- st_regular_grid( xmin = 0, ymin = 0, xmax = 100, ymax = 100, cell_size = 10 ) plot(grid) ``` ### 空间索引加速 ```r # 创建空间索引 system.time( without_idx <- st_intersects(nc, nc) ) system.time( with_idx <- st_intersects(st_make_valid(nc), sparse = TRUE) ) ``` ## 4. 空间关系与操作 ### 拓扑关系判断 ```r p1 <- st_point(c(0, 0)) p2 <- st_point(c(1, 1)) st_distance(p1, p2) # 距离计算 poly <- st_polygon(list(rbind(c(0, 0), c(1, 0), c(1, 1), c(0, 0)))) st_contains(poly, p1) # 包含关系 ``` ### 几何变换操作 ```r # 缓冲区分析 buf <- st_buffer(nc[1, ], dist = 0.1) plot(buf) plot(nc[1, ]$geometry, add = TRUE, col = 'red') # 凸包计算 convex <- st_convex_hull(nc) plot(convex) ``` ## 5. 空间统计分析方法 ### 点模式分析 ```r library(spatstat) # 创建点模式对象 ppp <- ppp(x = runif(100), y = runif(100), window = owin(c(0, 1), c(0, 1))) # K函数分析 K <- Kest(ppp) plot(K, main = "Ripley's K函数") ``` ### 空间自相关检验 ```r library(spdep) # Moran's I检验 nb <- poly2nb(nc) lw <- nb2listw(nb) moran.test(nc$BIR74, lw) ``` ## 6. 综合实战案例 ### 城市公园服务区分析 ```r library(osmdata) library(sfnetworks) # 获取OSM数据 parks <- opq("New York") %>% add_osm_feature("leisure", "park") %>% osmdata_sf() roads <- opq("New York") %>% add_osm_feature("highway") %>% osmdata_sf() # 构建网络分析 net <- as_sfnetwork(roads$osm_lines) %>% st_transform(32618) # 计算服务范围(示例) service_area <- st_network_blend(net, parks$osm_polygons) %>% st_buffer(500) # 500米服务区 ``` ## 可视化增强技巧 ### 分层绘图 ```r library(ggplot2) ggplot() + geom_sf(data = nc, aes(fill = BIR74)) + geom_sf(data = service_area, alpha = 0.3) + scale_fill_viridis_c() + theme_minimal() ``` ### 交互可视化 ```r library(mapview) mapview(nc, zcol = "BIR74", burst = TRUE) + mapview(service_area, alpha.regions = 0.2) ``` # 实践案例 ## 中国地图 ::: panel-tabset ### Code ```{r} #| echo: true #| eval: false #| out-width: 50% library(tidyverse) library(sf) # 更稳定的预处理数据源(含港澳台) # china <- st_read("https://geo.datav.aliyun.com/areas_v3/bound/100000.json") china <- st_read("./中华人民共和国.json", quiet = TRUE) library(ggplot2) ggplot(china) + geom_sf(fill = "#F6F6F6", color = "#666666", size = 0.3) + theme_void() + labs(title = "China") # 添加模拟经济数据(实际应用可连接统计年鉴) set.seed(123) china$gdp <- runif(nrow(china), 100, 1000) # 模拟GDP数据(亿元) china |> ggplot() + geom_sf(aes(fill = gdp), color = "gray90", size = 0.2) + scale_fill_gradientn( colours = c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c"), name = NULL ) + theme_void() + theme(legend.position = c(0.85, 0.3)) ``` ### Output ```{r} #| echo: false #| out-width: 80% #| fig-width: 6 #| fig-height: 3 library(tidyverse) library(sf) # 更稳定的预处理数据源(含港澳台) # china <- st_read("https://geo.datav.aliyun.com/areas_v3/bound/100000.json") china <- st_read("./中华人民共和国.json", quiet = TRUE) library(ggplot2) ggplot(china) + geom_sf(fill = "#F6F6F6", color = "#666666", size = 0.3) + theme_void() + labs(title = "China") # 添加模拟经济数据(实际应用可连接统计年鉴) set.seed(123) china$gdp <- runif(nrow(china), 100, 1000) # 模拟GDP数据(亿元) china |> ggplot() + geom_sf(aes(fill = gdp), color = "gray90", size = 0.2) + scale_fill_gradientn( colours = c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c"), name = NULL ) + theme_void() + theme(legend.position = c(0.85, 0.3)) ``` ::: ## Kriging ::: panel-tabset ### Code ```{r} #| echo: true #| eval: false #| out-width: 50% require(sf) require(sp) require(gstat) require(raster) require(ggplot2) # 读取太湖边界 lake_boundary <- st_read("../../data/taihu.shp") |> sf::st_make_valid() main_water <- st_difference( lake_boundary[lake_boundary$Name == "boundary", ], st_union(lake_boundary[lake_boundary$Name != "boundary", ]) ) get_kriging <- function(indf, Yvarname, grid = NULL, boundsf = main_water) { insf <- indf |> sfext::df_to_sf(crs = 4326, coords = c("long", "lat")) |> dplyr::select(-c("long", "lat")) |> dplyr::rename(Y = tidyselect::all_of(Yvarname)) if (is.null(grid)) { # 1. 将sf边界转为terra格式 v <- terra::vect(boundsf) # 2. 创建基础网格 base_grid <- terra::rast(v, resolution = 0.002) # 基础低分辨率 # 3. 创建高分辨率区域(例如边界附近) buffer_zone <- buffer(v, width = 0.005) # 边界附近创建缓冲区 hi_res_grid <- terra::rast(buffer_zone, resolution = 0.001) # 4. 合并网格 final_grid <- merge(base_grid, hi_res_grid) # 5. 转为点并裁剪 grid <- terra::as.points(final_grid) |> sf::st_as_sf() |> sf::st_filter(main_water) } insp <- sf::as_Spatial(insf) fit <- automap::autofitVariogram(Y ~ 1, insp) # 克里金插值 m <- gstat::gstat( formula = Y ~ 1, data = insp, model = fit$var_model ) predsf <- predict(m, grid) |> sf::st_as_sf() outsf <- predsf |> st_coordinates() |> as.data.frame() |> cbind(pred = predsf$var1.pred) return(outsf) } wqdf <- readxl::read_xlsx("../../data/wqdata.xlsx") if (FALSE) { wqsf <- wqdf |> nest(datedf = -date) |> dplyr::mutate( krigingsf = purrr::map(datedf, \(x) { get_kriging(x, "conc", boundsf = main_water) }) ) saveRDS(wqsf, "./wqsf.rds") } wqsf <- readRDS("./wqsf.rds") wqsf |> unnest(krigingsf) |> ggplot(aes(X, Y)) + geom_contour_filled(aes(z = pred), bins = 20) + geom_sf( data = main_water, aes(x = NULL, y = NULL), fill = NA, colour = "black", linewidth = 1 ) + # scale_fill_gradientn(colors = hcl.colors(100, "RdYlBu")) + # 设置颜色渐变 # ggsci::scale_fill_aaas() + coord_sf() + theme_void() ``` ### Output ```{r} #| echo: false #| out-width: 80% #| message: false #| fig-width: 6 #| fig-height: 3 require(sf) require(sp) require(gstat) require(raster) require(ggplot2) # 读取太湖边界 lake_boundary <- st_read("../../data/taihu.shp", quiet = TRUE) |> sf::st_make_valid() main_water <- st_difference( lake_boundary[lake_boundary$Name == "boundary", ], st_union(lake_boundary[lake_boundary$Name != "boundary", ]) ) get_kriging <- function(indf, Yvarname, grid = NULL, boundsf = main_water) { insf <- indf |> sfext::df_to_sf(crs = 4326, coords = c("long", "lat")) |> dplyr::select(-c("long", "lat")) |> dplyr::rename(Y = tidyselect::all_of(Yvarname)) if (is.null(grid)) { # 1. 将sf边界转为terra格式 v <- terra::vect(boundsf) # 2. 创建基础网格 base_grid <- terra::rast(v, resolution = 0.002) # 基础低分辨率 # 3. 创建高分辨率区域(例如边界附近) buffer_zone <- buffer(v, width = 0.005) # 边界附近创建缓冲区 hi_res_grid <- terra::rast(buffer_zone, resolution = 0.001) # 4. 合并网格 final_grid <- merge(base_grid, hi_res_grid) # 5. 转为点并裁剪 grid <- terra::as.points(final_grid) |> sf::st_as_sf() |> sf::st_filter(main_water) } insp <- sf::as_Spatial(insf) fit <- automap::autofitVariogram(Y ~ 1, insp) # 克里金插值 m <- gstat::gstat( formula = Y ~ 1, data = insp, model = fit$var_model ) predsf <- predict(m, grid) |> sf::st_as_sf() outsf <- predsf |> st_coordinates() |> as.data.frame() |> cbind(pred = predsf$var1.pred) return(outsf) } wqdf <- readxl::read_xlsx("../../data/wqdata.xlsx") if (FALSE) { wqsf <- wqdf |> nest(datedf = -date) |> dplyr::mutate( krigingsf = purrr::map(datedf, \(x) { get_kriging(x, "conc", boundsf = main_water) }) ) saveRDS(wqsf, "./wqsf.rds") } wqsf <- readRDS("./wqsf.rds") wqsf |> unnest(krigingsf) |> ggplot(aes(X, Y)) + geom_contour_filled(aes(z = pred), bins = 20) + geom_sf( data = main_water, aes(x = NULL, y = NULL), fill = NA, colour = "black", linewidth = 1 ) + # scale_fill_gradientn(colors = hcl.colors(100, "RdYlBu")) + # 设置颜色渐变 # ggsci::scale_fill_aaas() + coord_sf() + theme_void() ``` ::: ## 学习资源 1. SF官方文档:https://r-spatial.github.io/sf/ 2. 《Geocomputation with R》在线书 3. R-spatial教程:https://github.com/r-spatial/workshops ``` ### 使用说明: 1. 需要R 4.0+环境 2. 推荐安装依赖: ```r install.packages(c("sf", "spatstat", "spdep", "osmdata", "sfnetworks", "ggplot2", "mapview")) remotes::install_github("r-spatial/sfext") ``` 3. 实际案例数据可替换为本地shapefile 4. 交互可视化部分需要浏览器支持 ## 欢迎讨论!{.center} `r rmdify::slideend(wechat = FALSE, type = "public", tel = FALSE, thislink = "https://drc.drwater.net/course/public/RWEP/PUB/SD/")`