/* nhanes2013-2014-simplified.sas */ /* NHANES 2013-2014 Body Measurement Data */ /* RIAGENDR -- gender 1:male 2:female */ /* RIDAGEYR -- age at screening years 80 = 80 or more */ /* BMXWT -- weight kg */ /* BMXHT -- standing height cm */ /* BMXBMI -- bmi kg/m^2 */ /* BMXWAIST -- waist circumference cm */ /* RIAGENDR2 -- extra variable for csv processing */ data nhanes1; infile '/folders/myfolders/CBSASfiles/NHANES20132014_SelectedBodyMeasurements.dat' firstobs=2; input RIAGENDR RIDAGEYR BMXWT BMXHT BMXBMI BMXWAIST RIAGENDR2; sex = 'female'; if RIAGENDR=1 then sex='male'; age=RIDAGEYR; weight=BMXWT*2.2; height=BMXHT/2.54; bmi=BMXBMI; waist=BMXWAIST/2.54; agegroup='adult'; if age<20 then agegroup='child'; /* -------------------------------------------------- */ /* these lines create the SAS data set named "nhanes1" the "infile" line indicates the location of the data file the "firstobs=2" command indicates that the first line of the data file should be skipped -- this is because I have included the names of the variables on that line the "input" line names the variables (using their initial names)and indicates their order in the data file the lines which follow create new versions of the variables with more convenient names and character valued coding the variable "agegroup" is created to split the data into adults (age 20 and over) and children */ /* -------------------------------------------------- */ data nhanes2; set nhanes1; if agegroup='adult'; /* -------------------------------------------------- */ /* this data paragraph creates the data set "nhanes2" which contains data for the adults only the lines "set nhanes1;" and "if agegroup=1" indicate that we want the subset of nhanes1 corresponding to adults only */ /* --------------------------------------------------- */ /* examine weight distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2; title 'NHANES: weight distribution by sex'; panelby sex; histogram weight / binwidth=20 boundary=lower datalabel=percent; density weight / type=kernel; run; title; /* title assigns a title for the output -- panelby sex; requests a graph for each level of sex --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint boundary=lower: include lower endpt, default is upper datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density */ /* -------------------------------------------------------- */ /* weight distribution by sex -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=nhanes2; title 'NHANES: weight distribution by sex'; panelby sex; histogram weight / binwidth=20; density weight / type=normal; density weight / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* weight distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; hbox weight / category=sex; title 'NHANES: weight dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* weight distribution by sex -- summary */ /* proc univariate output simplified, */ /* summary statistics and percentiles from proc means */ /* proc univariate histogram/outhistogram used to form */ /* frequency and relative frequency distributions */ /* -------------------------------------------------------- */ proc univariate data=nhanes2 nextrval=5; var weight; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES: weight distributions by sex'; run; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var weight; proc means data=nhanes2 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var weight; proc univariate data=nhanes2 noprint; var weight; class sex; histogram weight / endpoints = 70 90 outhistogram=weighthist; title2 'histograms and relative frequency distributions'; data weightfreq; set weighthist; frequency=_count_; percentage=_obspct_; binstart=_minpt_; proc print data=weightfreq; var binstart frequency percentage; by sex; /* -------------------------------------------------------- */ /* overlaid weight distributions */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; title2 'weight distribution histograms (overlaid plots)'; histogram weight / binwidth=20 transparency=0.4 group=sex; density weight / type=normal group=sex; density weight / type=kernel group=sex; run; title2; title; /* --------------------------------------------------- */ /* examine height distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2; title 'NHANES: height distribution by sex'; panelby sex; histogram height / binwidth=1; density height / type=kernel; proc sgpanel data=nhanes2; title 'NHANES: height distribution by sex'; panelby sex; histogram height / binwidth=1; density height / type=normal; density height / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* height distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; hbox height / category=sex; title 'NHANES: height dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* height distribution by sex -- summary */ /* proc univariate output simplified, */ /* summary statistics and percentiles from proc means */ /* proc univariate histogram/outhistogram used to form */ /* frequency and relative frequency distributions */ /* -------------------------------------------------------- */ proc univariate data=nhanes2 nextrval=5; var height; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES: height distributions by sex'; run; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var height; proc means data=nhanes2 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var height; proc univariate data=nhanes2 noprint; var height; class sex; histogram height / endpoints = 50 51 outhistogram=heighthist; title2 'histograms and relative frequency distributions'; data heightfreq; set heighthist; frequency=_count_; percentage=_obspct_; binstart=_minpt_; proc print data=heightfreq; var binstart frequency percentage; by sex; /* -------------------------------------------------------- */ /* overlaid height distributions */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; title2 'height distribution histograms (overlaid plots)'; histogram height / binwidth=1 transparency=0.4 group=sex; density height / type=normal group=sex; density height / type=kernel group=sex; run; title2; title; /* --------------------------------------------------- */ /* examine body mass index distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2; title 'NHANES: bmi distribution by sex'; panelby sex; histogram bmi / binwidth=1; density bmi / type=normal; density bmi / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* bmi distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; hbox bmi / category=sex; title 'NHANES: bmi dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* bmi distribution by sex -- summary */ /* -------------------------------------------------------- */ proc univariate data=nhanes2 nextrval=5; var bmi; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES: bmi distributions by sex'; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var bmi; proc means data=nhanes2 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var bmi; /* --------------------------------------------------- */ /* examine waist circumference distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2; title 'NHANES: waist distribution by sex'; panelby sex; histogram waist / binwidth=1; density waist / type=normal; density waist / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* waist distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; hbox waist / category=sex; title 'NHANES: waist dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* waist distribution by sex -- summary */ /* -------------------------------------------------------- */ proc univariate data=nhanes2 nextrval=5; var waist; class sex; ods select ExtremeValues BasicMeasures Quantiles; ods show; title 'NHANES: waist distributions by sex'; run; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var waist; proc means data=nhanes2 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var waist; title; /* --------------------------------------------------- */ /* examine age distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2; title 'NHANES: age distribution by sex -- 5 year binwidth'; title2 'age censored at 80 i.e. 80 or more recorded as 80'; panelby sex / columns=1; histogram age / binwidth=5 boundary=lower datalabel=percent; density age / type=normal; density age / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* age distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; hbox age / category=sex; title 'NHANES: age distribution box plots'; title2 'age censored at 80 i.e. 80 or more recorded as 80'; run; title; /* -------------------------------------------------------- */ /* age distribution by sex -- summary */ /* -------------------------------------------------------- */ proc univariate data=nhanes2 nextrval=5; var age; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES: age distribution by sex'; title2 'age censored at 80 i.e. 80 or more recorded as 80'; run; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var age; proc means data=nhanes2 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var age; title; /* --------------------------------------------------- */ /* examine the sex distribution */ /* --------------------------------------------------- */ proc freq data=nhanes2; tables sex; title 'NHANES: sex distribution'; proc sgplot data=nhanes2; hbar sex / stat=percent datalabel; run; title; /* -------------------------------------------------------- */ /* explore the relationship between height and weight */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2; title 'NHANES: weight vs height scatterplot'; scatter x=height y=weight / group=sex; reg x=height y=weight / group=sex; run; title; proc sort data=nhanes2 out=nhanes3; by sex; proc sgplot data=nhanes3; title 'NHANES: weight vs height scatterplot'; scatter x=height y=weight; reg x=height y=weight; ellipse x=height y=weight; by sex; run; title;