/* cholesterol-simplified.sas */ /* ---------------------------------------------------------*/ /* ------ Guatemalan cholesterol example -------------------*/ /* ---------------------------------------------------------*/ /* ---------------------------------------------------------*/ data cholest1; input level location $; cards; 95 rural 108 rural 108 rural 114 rural 115 rural 124 rural 129 rural 129 rural 131 rural 131 rural 135 rural 136 rural 136 rural 139 rural 140 rural 142 rural 142 rural 143 rural 143 rural 144 rural 144 rural 145 rural 146 rural 148 rural 152 rural 152 rural 155 rural 157 rural 158 rural 158 rural 162 rural 165 rural 166 rural 171 rural 172 rural 173 rural 174 rural 175 rural 180 rural 181 rural 189 rural 192 rural 194 rural 197 rural 204 rural 220 rural 223 rural 226 rural 231 rural 133 urban 134 urban 155 urban 170 urban 175 urban 179 urban 181 urban 184 urban 188 urban 189 urban 190 urban 196 urban 197 urban 199 urban 200 urban 200 urban 201 urban 201 urban 204 urban 205 urban 205 urban 205 urban 206 urban 214 urban 217 urban 222 urban 222 urban 227 urban 227 urban 228 urban 234 urban 234 urban 236 urban 239 urban 241 urban 242 urban 244 urban 249 urban 252 urban 273 urban 279 urban 284 urban 284 urban 284 urban 330 urban ; /* -------------------------------------------------------- */ /* Guatemalan cholesterol level distribution all observations */ /* ---------------------------------------------------------*/ proc univariate data=cholest1 nextrval=5; class location; var level; ods select ExtremeValues BasicMeasures Quantiles; title 'Guatemalan cholesterol example summary'; /* class location; indicates that the data are to be divided into classes corresponding to the levels of the classification variable -- location in this example NOTE: the data do not need to be sorted with the class statement */ proc means data=cholest1 maxdec=2 n min q1 median q3 max range qrange mean std; class location; var level; proc means data=cholest1 maxdec=2 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class location; var level; /* ---------------------------------------------------------*/ /* cholesterol level distribution by location -- histogram and smoothed histogram */ /* -------------------------------------------------------- */ /* title assigns a title for the output -- panelby location; requests a graph for each location --histogram options-- datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density binstart=90 -- midpoint of the first bin/interval binwidth=20 -- bin/interval length -- thus we have: 80--100, 100-120, etc */ /* -------------------------------------------------------- */ proc sgpanel data=cholest1 pctlevel=group; title 'Guatemalan cholesterol example'; panelby location; histogram level / binstart=90 binwidth=20 datalabel=percent; density level / type=kernel; run; title; /* title assigns a title for the output -- panelby group; requests a graph for each group --histogram options-- datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density */ /* -------------------------------------------------------- */ /* cholesterol level distribution by group -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=cholest1 pctlevel=group; title 'Guatemalan cholesterol example'; panelby location; histogram level / binstart=90 binwidth=20 datalabel=percent; density level / type=normal; density level / type=kernel; run; title; /* -------------------------------------------------------- */ /* cholesterol level distribution by location -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=cholest1; hbox level / category=location; title 'cholesterol level distribution box plots'; run; title; /* -------------------------------------------------------- */ /* Histograms with normal and kernel density curves for each location overlaid on the same graph */ /* -------------------------------------------------------- */ proc sgplot data=cholest1; title 'Guatemalan cholesterol level distributions (overlaid plots)'; histogram level / binstart=90 binwidth=20 transparency=0.4 datalabel=percent group=location; density level / type=normal group=location; density level / type=kernel group=location; run; title; proc univariate data=cholest1 noprint; class location; var level; histogram level / endpoints = 80 100 outhistogram=cholesthist; data cholestfreq; set cholesthist; frequency=_count_; percentage=_obspct_; binstart=_minpt_; proc print data=cholestfreq; var binstart frequency percentage; by location; title 'cholesterol level relative frequency distributions'; title2 'binstart is the left endpoint of the interval -- left endpoint is included in frequency'; title3 'these intervals are the same as those of the histograms'; run; title; title2; title3; /* -------------------------------------------------------- */ /* Recompute summary values omitting the urban outlier (330) */ /* ---------------------------------------------------------*/ data cholest2; set cholest1; if level=330 then delete; proc univariate data=cholest2 nextrval=5; class location; var level; ods select ExtremeValues BasicMeasures Quantiles; title 'Guatemalan cholesterol example summary without the outlier'; proc means data=cholest2 maxdec=2 n min q1 median q3 max range qrange mean std; class location; var level; proc means data=cholest2 maxdec=2 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class location; var level; run; title;