SASUnit Examples  Version 1.2.1
boxplot.sas
Go to the documentation of this file.
1 
36 /* History
37  05.10.2010 AM Changed output format to pdf in order to be able to run on linux
38 */
39 
40 %MACRO boxplot(
41  data =
42  ,x =
43  ,y =
44  ,group =
45  ,report =
46 );
47 
48 %local dsid grouptype xvalues xvalues2;
49 
50 /*-- check input data set ----------------------------------------------------*/
51 %let dsid=%sysfunc(open(&data));
52 %if &dsid=0 %then %do;
53  %put ERROR: boxplot: Data set &data does not exist;
54  %return;
55 %end;
56 /*-- check whether x variable has been specified -----------------------------*/
57 %if "&x"="" %then %do;
58  %put ERROR: boxplot: X variable not specified;
59  %let dsid=%sysfunc(close(&dsid));
60  %return;
61 %end;
62 /*-- check for existence of x variable ---------------------------------------*/
63 %if %sysfunc(varnum(&dsid,&x))=0 %then %do;
64  %put ERROR: boxplot: Variable &x does not exist in data set &data ;
65  %let dsid=%sysfunc(close(&dsid));
66  %return;
67 %end;
68 
69 /*-- check whether x variable is numeric -------------------------------------*/
70 %if %sysfunc(vartype(&dsid,%sysfunc(varnum(&dsid,&x)))) NE N %then %do;
71  %put ERROR: boxplot: Variable &x in data set &data must be numeric;
72  %let dsid=%sysfunc(close(&dsid));
73  %return;
74 %end;
75 /*-- determine values of x variable for axis ---------------------------------*/
76 proc sql noprint;
77  select distinct &x into :xvalues separated by '" "' from &data;
78  select distinct &x into :xvalues2 separated by ' ' from &data;
79 quit;
80 /*-- check whether y variable has been specified -----------------------------*/
81 %if "&y"="" %then %do;
82  %put ERROR: boxplot: Y variable not specified;
83  %let dsid=%sysfunc(close(&dsid));
84  %return;
85 %end;
86 /*-- check for existence of y variable ---------------------------------------*/
87 %if %sysfunc(varnum(&dsid,&y))=0 %then %do;
88  %put ERROR: boxplot: Variable &y does not exist in data set &data ;
89  %let dsid=%sysfunc(close(&dsid));
90  %return;
91 %end;
92 /*-- check wheter y variable ist numeric -------------------------------------*/
93 %if %sysfunc(vartype(&dsid,%sysfunc(varnum(&dsid,&y)))) NE N %then %do;
94  %put ERROR: boxplot: Variable &y in data set &data must be numeric;
95  %let dsid=%sysfunc(close(&dsid));
96  %return;
97 %end;
98 /*-- check whether group variable has been specified -------------------------*/
99 %if "&group"="" %then %do;
100  %put ERROR: boxplot: Group variable must be specified;
101  %let dsid=%sysfunc(close(&dsid));
102  %return;
103 %end;
104 /*-- check for existence of group variable -----------------------------------*/
105 %if %sysfunc(varnum(&dsid,&group))=0 %then %do;
106  %put ERROR: boxplot: Variable &group does not exist in data set &data ;
107  %let dsid=%sysfunc(close(&dsid));
108  %return;
109 %end;
110 /*-- check for number of groups and determine variable type and group sequence -*/
111 %let grouptype=%sysfunc(vartype(&dsid,%sysfunc(varnum(&dsid,&group))));
112 %local count lower;
113 proc sql noprint;
114  select count(distinct &group) into :count from &data;
115  select min(&group) into :lower from &data;
116 quit;
117 %if &lower=. %then %do;
118  %put ERROR: boxplot: Missing values in group variable are not allowed;
119  %return;
120  %*** This is code not covered by any testcase and left intetionally here to ***;
121  %*** demonstrate the functionality of the test coverage ***;
122  proc sql noprint; drop table &d_1; quit;
123 %end;
124 %if &count NE 2 %then %do;
125  %put ERROR: boxplot: Variable &group must have exactly two values;
126  %return;
127 %end;
128 
129 %let dsid=%sysfunc(close(&dsid));
130 
131 /*-- calculate distance between the x values ---------------------------------*/
132 %local d_1;
133 DATA; RUN;
134 %let d_1=&syslast;
135 
136 proc sql noprint;
137  create table &d_1 as select distinct &x from &data;
138 quit;
139 
140 data &d_1;
141  set &d_1;
142  &x = &x - lag(&x);
143  if _n_>1 then output;
144 run;
145 
146 %local xdiff1 xdiff2 xmin xmax misscount;
147 proc sql noprint;
148  select mean(&x), min(&x) into :xdiff1, :xdiff2 from &d_1;
149  select min(&x), max(&x) into :xmin, :xmax from &data;
150 %let misscount=0;
151  select count(*) into :misscount from &data where &x is missing;
152 quit;
153 %if &xdiff1=. %then %do;
154  %put ERROR: boxplot: x variable must have at least two values;
155  proc sql noprint; drop table &d_1; quit;
156  %return;
157 %end;
158 %if &misscount>0 %then %do;
159  %put ERROR: boxplot: Missing values in x variable are not allowed;
160  proc sql noprint; drop table &d_1; quit;
161  %return;
162 %end;
163 
164 %let xmin=%sysevalf(&xmin-&xdiff1);
165 %let xmax=%sysevalf(&xmax+&xdiff1);
166 
167 run;
168 %if &xdiff1 ne &xdiff2 %then %do;
169  %put ERROR: boxplot: Values of x variable are not equidistant;
170  proc sql noprint; drop table &d_1; quit;
171  %return;
172 %end;
173 
174 /*-- calculate offset between the plots of the two groups --------------------*/
175 %local d_plot;
176 data;
177  SET &data (KEEP=&x &y &group);
178  IF &group = %if &grouptype=N %then &lower; %else "&lower"; THEN DO;
179  &x = &x - 0.11*&xdiff1;
180  END;
181  ELSE DO;
182  &x = &x + 0.11*&xdiff1;
183  END;
184 RUN;
185 %let d_plot=&syslast;
186 
187 /*-- create chart ------------------------------------------------------------*/
188 GOPTIONS FTEXT="Helvetica" HTEXT=12pt hsize=16cm vsize=16cm;
189 SYMBOL1 WIDTH = 3 BWIDTH = 3 COLOR = gray LINE = 2 VALUE = none INTERPOL = BOXJT00 MODE = include;
190 SYMBOL2 WIDTH = 3 BWIDTH = 3 COLOR = black LINE = 1 VALUE = none INTERPOL = BOXJT00 MODE = include;
191 AXIS1 LABEL=(ANGLE=90) MINOR=none;
192 AXIS2 ORDER=(&xmin &xvalues2 &xmax) VALUE=(" " "&xvalues" " ") MINOR=none;
193 LEGEND1 FRAME;
194 
195 ODS PDF FILE="&report";
196 ODS LISTING CLOSE;
197 PROC GPLOT DATA=&d_plot;
198  PLOT &y * &x = &group / VAXIS=Axis1 HAXIS=Axis2 LEGEND=Legend1 NOFRAME;
199 RUN;
200 QUIT;
201 ODS PDF CLOSE;
202 ODS LISTING;
203 
204 proc sql noprint;
205  drop table &d_plot;
206  drop table &d_1;
207 quit;
208 
209 %MEND boxplot;