--- title: "Data import" subtitle: 《区域水环境污染数据分析实践》
Data analysis practice of regional water environment pollution author: 苏命、王为东
中国科学院大学资源与环境学院
中国科学院生态环境研究中心 date: today lang: zh format: revealjs: theme: dark slide-number: true chalkboard: buttons: true preview-links: auto lang: zh toc: true toc-depth: 1 toc-title: 大纲 logo: ./_extensions/inst/img/ucaslogo.png css: ./_extensions/inst/css/revealjs.css pointer: key: "p" color: "#32cd32" pointerSize: 18 revealjs-plugins: - pointer filters: - d2 --- ```{r} #| echo: false knitr::opts_chunk$set(echo = TRUE) source("../../coding/_common.R") library(tidyverse) ``` ## 导入csv数据 ```{r} #| echo: false #| message: false #| comment: "" read_lines("../../data/students.csv") |> cat(sep = "\n") ``` ## 导入csv数据 ```{r} #| label: tbl-students-table #| echo: false #| message: false #| tbl-cap: Data from the students.csv file as a table. read_csv("../../data/students.csv") |> knitr::kable() ``` ## 读取数据 ```{r} #| message: true (students <- read_csv("../../data/students.csv")) (students <- read_csv("https://pos.it/r4ds-students-csv")) ``` ## 读取数据 ```{r} #| message: false (students <- read_csv("../../data/students.csv", na = c("N/A", ""))) ``` ## 列名不要有空格 ```{r} students |> rename( student_id = `Student ID`, full_name = `Full Name` ) ``` ## `janitor`处理空格 ```{r} #| message: false students |> janitor::clean_names() ``` ## `janitor`处理空格 ```{r} students |> janitor::clean_names() |> mutate(meal_plan = factor(meal_plan)) ``` ## `janitor`处理空格 ```{r} students <- students |> janitor::clean_names() |> mutate( meal_plan = factor(meal_plan), age = parse_number(if_else(age == "five", "5", age)) ) students ``` ## 直接录入 ```{r} read_csv( "a,b,c 1,2,3 4,5,6" ) ``` ## 直接录入 ```{r} #| message: false read_csv( "The first line of metadata The second line of metadata x,y,z 1,2,3", skip = 2 ) read_csv( "# A comment I want to skip x,y,z 1,2,3", comment = "#" ) ``` ## ```{r} #| message: false read_csv( "1,2,3 4,5,6", col_names = FALSE ) ``` ## ```{r} #| message: false read_csv( "1,2,3 4,5,6", col_names = c("x", "y", "z") ) ``` ## ```{r} #| eval: false "x,y\n1,'a,b'" ``` ## ```{r} #| eval: false read_csv("a,b\n1,2,3\n4,5,6") read_csv("a,b,c\n1,2\n1,2,3,4") read_csv("a,b\n\"1") read_csv("a,b\n1,2\na,b") read_csv("a;b\n1;3") ``` ## ```{r} annoying <- tibble( `1` = 1:10, `2` = `1` * 2 + rnorm(length(`1`)) ) ``` ## ```{r} #| message: false read_csv(" logical,numeric,date,string TRUE,1,2021-01-15,abc false,4.5,2021-02-15,def T,Inf,2021-02-16,ghi ") ``` ## ```{r} simple_csv <- " x 10 . 20 30" ``` ## ```{r} #| message: false read_csv(simple_csv) ``` ## ```{r} df <- read_csv( simple_csv, col_types = list(x = col_double()) ) ``` ## ```{r} problems(df) ``` ## ```{r} #| message: false read_csv(simple_csv, na = ".") ``` ## ```{r} another_csv <- " x,y,z 1,2,3" read_csv( another_csv, col_types = cols(.default = col_character()) ) ``` ## ```{r} read_csv( another_csv, col_types = cols_only(x = col_character()) ) ``` ## ```{r} #| message: false sales_files <- c("../../data/01-sales.csv", "../../data/02-sales.csv", "../../data/03-sales.csv") read_csv(sales_files, id = "file") ``` ## ```{r} #| eval: false sales_files <- c( "https://pos.it/r4ds-01-sales", "https://pos.it/r4ds-02-sales", "https://pos.it/r4ds-03-sales" ) read_csv(sales_files, id = "file") ``` ## ```{r} sales_files <- list.files("../../data", pattern = "sales\\.csv$", full.names = TRUE) sales_files ``` ## ```{r} #| eval: false write_csv(students, "students.csv") ``` ## ```{r} #| warning: false #| message: false students write_csv(students, "students-2.csv") read_csv("students-2.csv") ``` ## ```{r} write_rds(students, "students.rds") read_rds("students.rds") ``` ## ```{r} #| eval: false library(arrow) write_parquet(students, "students.parquet") read_parquet("students.parquet") #> # A tibble: 6 × 5 #> student_id full_name favourite_food meal_plan age #> #> 1 1 Sunil Huffmann Strawberry yoghurt Lunch only 4 #> 2 2 Barclay Lynn French fries Lunch only 5 #> 3 3 Jayendra Lyne NA Breakfast and lunch 7 #> 4 4 Leon Rossini Anchovies Lunch only NA #> 5 5 Chidiegwu Dunkel Pizza Breakfast and lunch 5 #> 6 6 Güvenç Attila Ice cream Lunch only 6 ``` ## ```{r} #| include: false file.remove("students-2.csv") file.remove("students.rds") ``` ## ```{r} tibble( x = c(1, 2, 5), y = c("h", "m", "g"), z = c(0.08, 0.83, 0.60) ) ``` ## ```{r} tribble( ~x, ~y, ~z, 1, "h", 0.08, 2, "m", 0.83, 5, "g", 0.60 ) ```