SASUnit Examples  Version 1.2
boxplot.sas
Go to the documentation of this file.
1 
36 /* History
37  05.10.2010 AM Changed output format to pdf in order to be able to run on linux
38 */
39 
40 %MACRO boxplot(
41  data =
42  ,x =
43  ,y =
44  ,group =
45  ,report =
46 );
47 
48 %local dsid grouptype xvalues xvalues2;
49 
50 /*-- check input data set ----------------------------------------------------*/
51 %let dsid=%sysfunc(open(&data));
52 %if &dsid=0 %then %do;
53  %put ERROR: boxplot: Data set &data does not exist;
54  %return;
55 %end;
56 /*-- check whether x variable has been specified -----------------------------*/
57 %if "&x"="" %then %do;
58  %put ERROR: boxplot: X variable not specified;
59  %let dsid=%sysfunc(close(&dsid));
60  %return;
61 %end;
62 /*-- check for existence of x variable ---------------------------------------*/
63 %if %sysfunc(varnum(&dsid,&x))=0 %then %do;
64  %put ERROR: boxplot: Variable &x does not exist in data set &data ;
65  %let dsid=%sysfunc(close(&dsid));
66  %return;
67 %end;
68 
69 /*-- check whether x variable is numeric -------------------------------------*/
70 %if %sysfunc(vartype(&dsid,%sysfunc(varnum(&dsid,&x)))) NE N %then %do;
71  %put ERROR: boxplot: Variable &x in data set &data must be numeric;
72  %let dsid=%sysfunc(close(&dsid));
73  %return;
74 %end;
75 /*-- determine values of x variable for axis ---------------------------------*/
76 proc sql noprint;
77  select distinct &x into :xvalues separated by '" "' from &data;
78  select distinct &x into :xvalues2 separated by ' ' from &data;
79 quit;
80 /*-- check whether y variable has been specified -----------------------------*/
81 %if "&y"="" %then %do;
82  %put ERROR: boxplot: Y variable not specified;
83  %let dsid=%sysfunc(close(&dsid));
84  %return;
85 %end;
86 /*-- check for existence of y variable ---------------------------------------*/
87 %if %sysfunc(varnum(&dsid,&y))=0 %then %do;
88  %put ERROR: boxplot: Variable &y does not exist in data set &data ;
89  %let dsid=%sysfunc(close(&dsid));
90  %return;
91 %end;
92 /*-- check wheter y variable ist numeric -------------------------------------*/
93 %if %sysfunc(vartype(&dsid,%sysfunc(varnum(&dsid,&y)))) NE N %then %do;
94  %put ERROR: boxplot: Variable &y in data set &data must be numeric;
95  %let dsid=%sysfunc(close(&dsid));
96  %return;
97 %end;
98 /*-- check whether group variable has been specified -------------------------*/
99 %if "&group"="" %then %do;
100  %put ERROR: boxplot: Group variable must be specified;
101  %let dsid=%sysfunc(close(&dsid));
102  %return;
103 %end;
104 /*-- check for existence of group variable -----------------------------------*/
105 %if %sysfunc(varnum(&dsid,&group))=0 %then %do;
106  %put ERROR: boxplot: Variable &group does not exist in data set &data ;
107  %let dsid=%sysfunc(close(&dsid));
108  %return;
109 %end;
110 /*-- check for number of groups and determine variable type and group sequence -*/
111 %let grouptype=%sysfunc(vartype(&dsid,%sysfunc(varnum(&dsid,&group))));
112 %local count lower;
113 proc sql noprint;
114  select count(distinct &group) into :count from &data;
115  select min(&group) into :lower from &data;
116 quit;
117 %if &lower=. %then %do;
118  %put ERROR: boxplot: Missing values in group variable are not allowed;
119  %return;
120  proc sql noprint; drop table &d_1; quit;
121 %end;
122 %if &count NE 2 %then %do;
123  %put ERROR: boxplot: Variable &group must have exactly two values;
124  %return;
125 %end;
126 
127 %let dsid=%sysfunc(close(&dsid));
128 
129 /*-- calculate distance between the x values ---------------------------------*/
130 %local d_1;
131 DATA; RUN;
132 %let d_1=&syslast;
133 
134 proc sql noprint;
135  create table &d_1 as select distinct &x from &data;
136 quit;
137 
138 data &d_1;
139  set &d_1;
140  &x = &x - lag(&x);
141  if _n_>1 then output;
142 run;
143 
144 %local xdiff1 xdiff2 xmin xmax misscount;
145 proc sql noprint;
146  select mean(&x), min(&x) into :xdiff1, :xdiff2 from &d_1;
147  select min(&x), max(&x) into :xmin, :xmax from &data;
148 %let misscount=0;
149  select count(*) into :misscount from &data where &x is missing;
150 quit;
151 %if &xdiff1=. %then %do;
152  %put ERROR: boxplot: x variable must have at least two values;
153  %return;
154  proc sql noprint; drop table &d_1; quit;
155 %end;
156 %if &misscount>0 %then %do;
157  %put ERROR: boxplot: Missing values in x variable are not allowed;
158  %return;
159  proc sql noprint; drop table &d_1; quit;
160 %end;
161 
162 %let xmin=%sysevalf(&xmin-&xdiff1);
163 %let xmax=%sysevalf(&xmax+&xdiff1);
164 
165 run;
166 %if &xdiff1 ne &xdiff2 %then %do;
167  %put ERROR: boxplot: Values of x variable are not equidistant;
168  %return;
169  proc sql noprint; drop table &d_1; quit;
170 %end;
171 
172 /*-- calculate offset between the plots of the two groups --------------------*/
173 %local d_plot;
174 data;
175  SET &data (KEEP=&x &y &group);
176  IF &group = %if &grouptype=N %then &lower; %else "&lower"; THEN DO;
177  &x = &x - 0.11*&xdiff1;
178  END;
179  ELSE DO;
180  &x = &x + 0.11*&xdiff1;
181  END;
182 RUN;
183 %let d_plot=&syslast;
184 
185 /*-- create chart ------------------------------------------------------------*/
186 GOPTIONS FTEXT="Helvetica" HTEXT=12pt hsize=16cm vsize=16cm;
187 SYMBOL1 WIDTH = 3 BWIDTH = 3 COLOR = gray LINE = 2 VALUE = none INTERPOL = BOXJT00 MODE = include;
188 SYMBOL2 WIDTH = 3 BWIDTH = 3 COLOR = black LINE = 1 VALUE = none INTERPOL = BOXJT00 MODE = include;
189 AXIS1 LABEL=(ANGLE=90) MINOR=none;
190 AXIS2 ORDER=(&xmin &xvalues2 &xmax) VALUE=(" " "&xvalues" " ") MINOR=none;
191 LEGEND1 FRAME;
192 
193 ODS PDF FILE="&report";
194 ODS LISTING CLOSE;
195 PROC GPLOT DATA=&d_plot;
196  PLOT &y * &x = &group / VAXIS=Axis1 HAXIS=Axis2 LEGEND=Legend1 NOFRAME;
197 RUN;
198 QUIT;
199 ODS PDF CLOSE;
200 ODS LISTING;
201 
202 proc sql noprint;
203  drop table &d_plot;
204  drop table &d_1;
205 quit;
206 
207 %MEND boxplot;