RWEP/SD/20240328_9_课后作业/第8次课后作业_模板.sas

290 lines
9.9 KiB
SAS
Raw Permalink Normal View History

options ls=256 ps=32767 nodate validmemname=extend validvarname=any;
title 'The SAS System';
%macro print(d);
proc print data=&d;run;
%mend;
%macro printobs(d,obs);
proc print data=&d (obs=&obs);run;
%mend;
%macro printfirstobsobs(d,firstobs,obs);
proc print data=&d (firstobs=&firstobs obs=&obs);run;
%mend;
%macro contents(d);
proc contents data=&d varnum;run;
%mend;
%macro contentsshort(d);
proc contents data=&d varnum short;run;
%mend;
%macro save_dataset(d);
data "d:&d";
set &d;
run;
%mend;
%macro load_dataset(d);
data &d;
set "d:&d";
run;
%mend;
%macro kill;
PROC DATASETS LIB=work KILL;RUN;quit;
%mend;
proc template;
list styles;
run;
%kill;
PROC IMPORT OUT=WORK.raw_metadf
DATAFILE="d:airquality.xlsx"
DBMS=EXCEL REPLACE;
RANGE="metadf$";
GETNAMES=YES;
MIXED=YES;
SCANTEXT=YES;
USEDATE=NO;
SCANTIME=NO;
RUN;
%print(raw_metadf);
%save_dataset(raw_metadf); *ԭʼ<D4AD><CABC><EFBFBD>ݼ<EFBFBD><DDBC><EFBFBD><EFBFBD><EFBFBD>;
PROC IMPORT OUT=WORK.raw_airqualitydf
DATAFILE="d:airquality.xlsx"
DBMS=EXCEL REPLACE;
RANGE="airqualitydf$";
GETNAMES=YES;
MIXED=YES;
SCANTEXT=YES;
USEDATE=NO; *ΪYES<45><53><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>;
SCANTIME=NO; *ΪYES<45><53><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>;
RUN;
%print(raw_airqualitydf);
%save_dataset(raw_airqualitydf); *ԭʼ<D4AD><CABC><EFBFBD>ݼ<EFBFBD><DDBC><EFBFBD><EFBFBD><EFBFBD>;
%kill;
/*<2A><><EFBFBD><EFBFBD>Ӳ<EFBFBD><D3B2>ԭʼ<D4AD><CABC><EFBFBD>ݼ<EFBFBD>*/
%load_dataset(raw_metadf);
%load_dataset(raw_airqualitydf);
/*<2A><EFBFBD><E9BFB4><EFBFBD>ݼ<EFBFBD><DDBC><EFBFBD><EFBFBD><EFBFBD>*/
%contents(raw_metadf);
%contentsshort(raw_metadf);
/*site name Area lon lat*/
%contents(raw_airqualitydf);
%contentsshort(raw_airqualitydf);
/*datetime site 'CO_mg/m3'n 'CO_24h_mg/m3'n 'NO2_<32><5F>g/m3'n 'NO2_24h_<68><5F>g/m3'n 'O3_<33><5F>g/m3'n 'O3_24h_<68><5F>g/m3'n 'O3_8h_<68><5F>g/m3'n 'O3_8h_24h_<68><5F>g/m3'n 'PM10_<30><5F>g/m3'n 'PM10_24h_<68><5F>g/m3'n 'PM2#5_<35><5F>g/m3'n 'PM2#5_24h_<68><5F>g/m3'n 'SO2_<32><5F>g/m3'n 'SO2_24h_<68><5F>g/m3'n AQI PrimaryPollutant Quality Unheathful*/
%printobs(raw_metadf,10);
%printobs(raw_airqualitydf,10);
proc sort data=raw_metadf out=metadfsorted;
by site;
run;
proc sort data=raw_airqualitydf out=airqualitydfsorted;
by site;
run;
/*<2A>ϲ<EFBFBD><CFB2><EFBFBD><EFBFBD>ݼ<EFBFBD><DDBC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ԥ<EFBFBD><D4A4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD>ڡ<EFBFBD>ʱ<EFBFBD>䲿<EFBFBD>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD>day<61><79>night*/
data airquality;
retain datetime date time DayNight site name Area AQI lon lat;
length DayNight $ 5;
merge metadfsorted airqualitydfsorted;
by site;
date=datepart(datetime);
time=timepart(datetime);
if '8:00't<=time<'20:00't then DayNight='day';
else DayNight='night';
format datetime e8601dt25. date yymmdd10. time time5.;
keep site name Area lon lat datetime date time DayNight AQI;
run;
%printobs(airquality,100);
%save_dataset(airquality); *<2A>ϲ<EFBFBD><CFB2><EFBFBD><EFBFBD>ݼ<EFBFBD><DDBC><EFBFBD><EFBFBD><EFBFBD>;
%kill;
/*#################### DATA SET airquality ####################*/
/*<2A><><EFBFBD><EFBFBD>Ӳ<EFBFBD>̺ϲ<CCBA><CFB2><EFBFBD><EFBFBD>ݼ<EFBFBD>*/
%load_dataset(airquality);
%printobs(airquality,100);
/*<2A><><EFBFBD><EFBFBD>site<74><65>name<6D><65><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>һ<EFBFBD>£<EFBFBD><C2A3><EFBFBD><EFBFBD>ֲ<EFBFBD>һ<EFBFBD>£<EFBFBD><C2A3><EFBFBD><EFBFBD><EFBFBD>site<74><65><EFBFBD><EFBFBD>ͳ<EFBFBD><CDB3>*/
proc sql;
select count(distinct(site)) as count_site from airquality;
select count(distinct(name)) as count_name from airquality;
quit;
/*
count_site
1714
count_name
1522
*/
/*#########################################################################*/
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͳ<EFBFBD>ư<EFBFBD><C6B0>죨8:00-20:00<30><30><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9><EFBFBD><EFBFBD>20:00-8:00<30><30><EFBFBD>п<EFBFBD><D0BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD>AQI<51><49><EFBFBD><EFBFBD>λ<EFBFBD><CEBB>*/
/*#########################################################################*/
/*@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@*/
ods pdf file='d:means.pdf' style=sapphire dpi=1200;
proc means data=airquality median maxdec=1;
class site DayNight;
var AQI;
where AQI is not missing;
run;
ods pdf close;
/*@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@*/
/*####################################################################################################*/
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͳ<EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD>ȣ<EFBFBD><C8A3>г<EFBFBD><D0B3><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD><D5BC><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>10<31><30><EFBFBD><EFBFBD><EFBFBD>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Dz<EFBFBD><C7B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5<EFBFBD><35><EFBFBD>ij<EFBFBD><C4B3>У<EFBFBD>*/
/*####################################################################################################*/
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E9BFB4><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB><D6B5>Ϊ<EFBFBD><CEAA>SQL<51><4C>֤<EFBFBD><D6A4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ*/
proc univariate data=airquality noprint;
var AQI;
output out=airqualitystats pctlpts=30 pctlpre=P;
run;
%print(airqualitystats); /*42*/
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
proc sql;
select AQI into : xvalues separated by ',' from airquality;
select distinct(pctl(30, &xvalues)) into : P30 from airquality;
quit;
/*<2A><EFBFBD><E9BFB4><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB>ĺ<EFBFBD><C4BA><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
%put P30=&P30.;
/*<2A><><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB><D6B5>AQI<51>ּ<EFBFBD><D6BC><EFBFBD><EFBFBD>Ժϲ<D4BA><CFB2><EFBFBD><EFBFBD>ݼ<EFBFBD><DDBC><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD>ӷּ<D3B7><D6BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ø<EFBFBD><C3B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>λ<EFBFBD><CEBB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͳ<EFBFBD>ƣ<EFBFBD><C6A3><EFBFBD><EFBFBD><EFBFBD>ȥ*/
data airquality1;
set airquality;
if AQI<&P30. then quality='good';
else quality='fair';
run;
%printobs(airquality1,100);
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI<51><49>λ<EFBFBD><CEBB>*/
proc means data=airquality median maxdec=1;
class Area site;
var AQI;
where AQI is not missing;
output out=airqualitymedian median=;
run;
%print(airqualitymedian);
/*<2A><><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB><D6B5>AQI<51><49>λ<EFBFBD><CEBB><EFBFBD>ּ<EFBFBD>*/
data airqualitymedian1;
set airqualitymedian;
if AQI<&P30. then quality='good';
else quality='fair';
where _TYPE_=3;
run;
%print(airqualitymedian1);
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͳ<EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD><E9BFB4><EFBFBD><EFBFBD>*/
/*@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@*/
ods pdf file='d:freq.pdf' style=sapphire dpi=1200;
proc freq data=airqualitymedian1;
table Area*quality /nocol nopercent;
run;
ods pdf close;
/*@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@*/
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͳ<EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>AQI30%<25><>λֵ<CEBB>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD>ȣ<EFBFBD><C8A3><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5>ͳ<EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݼ<EFBFBD>*/
proc freq data=airqualitymedian1;
table Area*quality /outpct out=airqualitymedianoutrow(drop=percent pct_col); *<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD><EFBFBD>аٷֱ<D9B7>;
run;
%printobs(airqualitymedianoutrow,100);
/*<2A><><EFBFBD><EFBFBD><EFBFBD>жԲ<D0B6><D4B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͳ<EFBFBD>ƣ<EFBFBD><C6A3><EFBFBD><E9BFB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ*/
proc means data=airqualitymedianoutrow sum maxdec=0;
class Area;
var COUNT;
run;
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5<EFBFBD><35><EFBFBD>ij<EFBFBD><C4B3>У<EFBFBD><D0A3><EFBFBD><E9BFB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ*/
proc sql;
select *,sum(COUNT) as total_COUNT from airqualitymedianoutrow group by Area having calculated total_COUNT>=5 order by quality desc,PCT_ROW desc,COUNT desc;
quit;
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5<EFBFBD><35><EFBFBD>ij<EFBFBD><C4B3>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݼ<EFBFBD>*/
proc sql;
create table airqualitymedianoutrow5 as select *,sum(COUNT) as total_COUNT from airqualitymedianoutrow group by Area having calculated total_COUNT>=5 order by quality desc,PCT_ROW desc,COUNT desc;
quit;
/*<2A>г<EFBFBD><D0B3><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD><D5BC><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>10<31><30><EFBFBD><EFBFBD><EFBFBD>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5<EFBFBD><35><EFBFBD>ij<EFBFBD><C4B3>У<EFBFBD>*/
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
ods pdf file='d:airqualitymedianoutrow5.pdf' style=sapphire dpi=1200;
%printobs(airqualitymedianoutrow5,10);
%printobs(airqualitymedianoutrow5,20);
%printobs(airqualitymedianoutrow5,30);
%print(airqualitymedianoutrow5);
ods pdf close;
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/*#####################################################*/
/*<2A><><EFBFBD>ղ<EFBFBD>ͬ<EFBFBD><CDAC><EFBFBD>з<EFBFBD><D0B7>飬ͳ<E9A3AC>ư<EFBFBD><C6B0><EFBFBD><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9>AQI<51><49>λ<EFBFBD><CEBB><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
/*#####################################################*/
/*<2A><><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>siteû<65><C3BB>Area<65><61>Ӧ*/
proc sql;
select distinct(Area),count(distinct(Area)) as count_Area from airquality;
quit;
proc print data=airquality;
where Area is missing;
run;
proc sql;
select distinct(site),count(distinct(site)) as count_site from airquality where Area is missing;
quit;
/*<2A><>4<EFBFBD><EFBFBD>site<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>㣩û<EFBFBD><EFBFBD>Area<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>У<EFBFBD><EFBFBD><EFBFBD>Ӧ
site count_site
2628A 4
3128A 4
4034A 4
4036A 4
*/
proc sort data=airquality out=airqualitysorted;
by Area;
run;
/*<2A><><EFBFBD>ղ<EFBFBD>ͬ<EFBFBD><CDAC><EFBFBD>з<EFBFBD><D0B7>飬ͳ<E9A3AC>ư<EFBFBD><C6B0><EFBFBD><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9>AQI<51><49>λ<EFBFBD><CEBB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E9BFB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ*/
proc means data=airqualitysorted median maxdec=1;
by Area;
class DayNight;
var AQI;
where AQI is not missing;
run;
/*<2A><>ͳ<EFBFBD>ؿ<EFBFBD><D8BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9>AQI<51><49>λ<EFBFBD><CEBB><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
proc npar1way data=airquality median;
class DayNight;
var AQI;
run;
/*<2A><><EFBFBD>ղ<EFBFBD>ͬ<EFBFBD><CDAC><EFBFBD>з<EFBFBD><D0B7>飬ͳ<E9A3AC>ư<EFBFBD><C6B0><EFBFBD><EFBFBD><EFBFBD>ҹ<EFBFBD><D2B9>AQI<51><49>λ<EFBFBD><CEBB><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
/*@@@@@@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@@@@*/
ods pdf file='d:npar1waymedian.pdf' style=sapphire dpi=1200;
proc npar1way data=airqualitysorted median;
class DayNight;
var AQI;
by Area;
where Area is not missing;
run;
ods pdf close;
/*@@@@@@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@@@@*/
/*
Using Wilcoxon scores in the linear rank statistic for two-sample data produces the rank sum statistic of the Mann-Whitney-Wilcoxon test.
Using Wilcoxon scores in the one-way ANOVA statistic produces the Kruskal-Wallis test.
Wilcoxon scores are locally most powerful for location shifts of a logistic distribution.
*//*
Using median scores in the linear rank statistic for two-sample data produces the two-sample median test.
The one-way ANOVA statistic with median scores is equivalent to the Brown-Mood test.
Median scores are particularly powerful for distributions that are symmetric and heavy-tailed.*/
/*
Scores for Linear Rank and One-Way ANOVA Tests
For each score type that you specify, PROC NPAR1WAY computes a one-way ANOVA statistic and also a linear rank statistic for two-sample data. The following score types are used primarily to test for differences in location: Wilcoxon, median, Van der Waerden (normal), and Savage. The following scores types are used to test for scale differences: Siegel-Tukey, Ansari-Bradley, Klotz, and Mood. Conover scores can be used to test for differences in both location and scale. This section gives formulas for the score types available in PROC NPAR1WAY. For further information about the formulas and the applicability of each score, see Randles and Wolfe (1979), Gibbons and Chakraborti (2010), Conover (1999), and Hollander and Wolfe (1999).
In addition to the score types described in this section, you can specify the SCORES=DATA option to use the input data observations as scores. This enables you to produce a wide variety of tests. You can construct any scores by using the DATA step, and then you can use PROC NPAR1WAY to compute the corresponding linear rank and one-way ANOVA tests for these scores. You can also analyze raw (unscored) data by using the SCORES=DATA option; for two-sample data, the corresponding exact test is a permutation test that is known as Pitman<61><6E>s test.
*/
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@@@@*/
ods pdf file='d:npar1wayConover.pdf' style=sapphire dpi=1200;
proc npar1way data=airqualitysorted Conover;
class DayNight;
var AQI;
by Area;
where Area is not missing;
run;
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@ PDF @@@@@@@@@@@@@@@@@@@@@@@@@*/
/*Conover scores can be used to test for differences in both location and scale.*/