/* nhanes2013-2014_SRS.sas */ /* selection of a simple random sample */ /* NHANES 2013-2014 Body Measurement Data */ /* RIAGENDR -- gender 1:male 2:female */ /* RIDAGEYR -- age at screening years 80 = 80 or more */ /* BMXWT -- weight kg */ /* BMXHT -- standing height cm */ /* BMXBMI -- bmi kg/m^2 */ /* BMXWAIST -- waist circumference cm */ /* RIAGENDR2 -- extra variable for csv processing */ data nhanes1; infile '/folders/myfolders/CBSASfiles/NHANES20132014_SelectedBodyMeasurements.dat' firstobs=2; input RIAGENDR RIDAGEYR BMXWT BMXHT BMXBMI BMXWAIST RIAGENDR2; sex = 'female'; if RIAGENDR=1 then sex='male'; age=RIDAGEYR; weight=BMXWT*2.2; height=BMXHT/2.54; bmi=BMXBMI; waist=BMXWAIST/2.54; agegroup='adult'; if age<20 then agegroup='child'; /* proc print data=nhanes1; */ data nhanes2; set nhanes1; if agegroup='adult'; /* --------------------------------------------------- */ /* select a moderate sized random sample of adults */ /* Here we use the survey select procedure to select a simple random sample of size n=50 from the data set "nhanes2" and save it in the output data set nhanes2SRS1 "method=srs" indicates a simple random sample selected without replacement "seed=123456" sets the seed to start the random sampling change this to get a different random sample */ /* --------------------------------------------------- */ proc surveyselect data=nhanes2 seed=123456 method=srs n=50 out=nhanes2SRS1; title 'SRS of n=50 adults from nhanes 2013/2014'; /* --------------------------------------------------- */ /* print the sample values */ /* --------------------------------------------------- */ proc print data=nhanes2SRS1; var sex age weight height bmi waist agegroup; /* --------------------------------------------------- */ /* get means for the entire adult group and the sample */ /* proc means is used to find the means and some other summary values for the variables listed in the "var" line */ /* --------------------------------------------------- */ proc means data=nhanes2; var age weight height bmi waist; title 'entire adult group from nhanes2013/2014'; proc means data=nhanes2SRS1; var age weight height bmi waist; title 'SRS of n=50 adults from nhanes2013/2014'; /* --------------------------------------------------- */ /* examine weight distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES: weight distribution by sex'; panelby sex; histogram weight / binwidth=20 boundary=lower datalabel=percent; density weight / type=kernel; run; title; /* title assigns a title for the output -- panelby sex; requests a graph for each level of sex --histogram options-- these options are chosen to make the histogram correspond to a stem and leaf histogram with stem=10's binstart: first bin midpoint boundary=lower: include lower endpt, default is upper datalabel=percent: display percentage at end of bin use datalabel=count to get frequency --density options-- type=kernel requests a fitted smooth density */ /* -------------------------------------------------------- */ /* weight distribution by sex -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES SRS of n=50 adults: weight distribution by sex'; panelby sex; histogram weight / binwidth=20; density weight / type=normal; density weight / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* weight distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; hbox weight / category=sex; title 'NHANES SRS of n=50 adults: weight dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* weight distribution by sex -- summary */ /* proc univariate output simplified, */ /* summary statistics and percentiles from proc means */ /* proc univariate histogram/outhistogram used to form */ /* frequency and relative frequency distributions */ /* -------------------------------------------------------- */ proc univariate data=nhanes2SRS1 nextrval=5; var weight; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES SRS of n=50 adults: weight distributions by sex'; run; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var weight; proc means data=nhanes2SRS1 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var weight; proc univariate data=nhanes2SRS1 noprint; var weight; class sex; histogram weight / endpoints = 70 90 outhistogram=weighthist; title2 'NHANES SRS of n=50 adults: histograms and relative frequency distributions'; data weightfreq; set weighthist; frequency=_count_; percentage=_obspct_; binstart=_minpt_; proc print data=weightfreq; var binstart frequency percentage; by sex; /* -------------------------------------------------------- */ /* overlaid weight distributions */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; title2 'NHANES SRS of n=50 adults: weight distribution histograms (overlaid plots)'; histogram weight / binwidth=20 transparency=0.4 group=sex; density weight / type=normal group=sex; density weight / type=kernel group=sex; run; title2; title; /* --------------------------------------------------- */ /* examine height distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES: height distribution by sex'; panelby sex; histogram height / binwidth=2 boundary=lower datalabel=percent; density height / type=kernel; run; title; /* -------------------------------------------------------- */ /* height distribution by sex -- histogram, smoothed histogram, and fitted normal distribution */ /* -------------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES SRS of n=50 adults: height distribution by sex'; panelby sex; histogram height / binwidth=2; density height / type=normal; density height / type=kernel; run; title; /* -------------------------------------------------------- */ /* height distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; hbox height / category=sex; title 'NHANES SRS of n=50 adults: height dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* height distribution by sex -- summary */ /* proc univariate output simplified, */ /* summary statistics and percentiles from proc means */ /* proc univariate histogram/outhistogram used to form */ /* frequency and relative frequency distributions */ /* -------------------------------------------------------- */ proc univariate data=nhanes2SRS1 nextrval=5; var height; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES SRS of n=50 adults: height distributions by sex'; run; proc means data=nhanes2 maxdec=4 n min q1 median q3 max range qrange mean std; class sex; var height; proc means data=nhanes2SRS1 maxdec=4 p1 p5 p10 p20 p30 p40 p50 p60 p70 p80 p90 p95 p99; class sex; var height; proc univariate data=nhanes2SRS1 noprint; var height; class sex; histogram height / endpoints = 50 52 outhistogram=heighthist; title2 'NHANES SRS of n=50 adults: histograms and relative frequency distributions'; data heightfreq; set heighthist; frequency=_count_; percentage=_obspct_; binstart=_minpt_; proc print data=heightfreq; var binstart frequency percentage; by sex; /* -------------------------------------------------------- */ /* overlaid height distributions */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; title2 'NHANES SRS of n=50 adults: height distribution histograms (overlaid plots)'; histogram height / binwidth=2 transparency=0.4 group=sex; density height / type=normal group=sex; density height / type=kernel group=sex; run; title2; title; /* --------------------------------------------------- */ /* examine body mass index distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES SRS of n=50 adults: bmi distribution by sex'; panelby sex; histogram bmi / binwidth=1; density bmi / type=normal; density bmi / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* bmi distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; hbox bmi / category=sex; title 'NHANES SRS of n=50 adults: bmi dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* bmi distribution by sex -- summary */ /* -------------------------------------------------------- */ proc univariate data=nhanes2SRS1 nextrval=5; var bmi; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES SRS of n=50 adults: bmi distributions by sex'; /* --------------------------------------------------- */ /* examine waist circumference distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES SRS of n=50 adults: waist distribution by sex'; panelby sex; histogram waist / binwidth=1; density waist / type=normal; density waist / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* waist distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; hbox waist / category=sex; title 'NHANES SRS of n=50 adults: waist dist box plots by sex'; run; title; /* -------------------------------------------------------- */ /* waist distribution by sex -- summary */ /* -------------------------------------------------------- */ proc univariate data=nhanes2SRS1 nextrval=5; var waist; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES SRS of n=50 adults: waist distributions by sex'; run; title; /* --------------------------------------------------- */ /* examine age distributions */ /* --------------------------------------------------- */ proc sgpanel data=nhanes2SRS1; title 'NHANES SRS of n=50 adults: age distribution by sex -- 5 year binwidth'; title2 'age censored at 80 i.e. 80 or more recorded as 80'; panelby sex / columns=1; histogram age / binwidth=5 boundary=lower datalabel=percent; density age / type=normal; density age / type=kernel; /* type=normal requests a fitted normal density type=kernel requests a fitted smooth density */ run; title; /* -------------------------------------------------------- */ /* age distribution by sex -- box plots */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; hbox age / category=sex; title 'NHANES SRS of n=50 adults: age distribution box plots'; title2 'age censored at 80 i.e. 80 or more recorded as 80'; run; title; /* -------------------------------------------------------- */ /* age distribution by sex -- summary */ /* -------------------------------------------------------- */ proc univariate data=nhanes2SRS1 nextrval=5; var age; class sex; ods select ExtremeValues BasicMeasures Quantiles; title 'NHANES SRS of n=50 adults: age distribution by sex'; title2 'age censored at 80 i.e. 80 or more recorded as 80'; run; title; /* --------------------------------------------------- */ /* examine the sex distribution */ /* --------------------------------------------------- */ proc freq data=nhanes2SRS1; tables sex; title 'NHANES SRS of n=50 adults: sex distribution'; proc sgplot data=nhanes2SRS1; hbar sex / stat=percent datalabel; run; title; /* -------------------------------------------------------- */ /* explore the relationship between height and weight */ /* -------------------------------------------------------- */ /* The remainder of this command file requests several scatterplots to explore the relationship between height and weight. "scatter" requests the plot "reg" adds a line of best fit (simple linear regression line) "ellipse" adds a 95% confidence ellipse */ /* -------------------------------------------------------- */ proc sgplot data=nhanes2SRS1; title 'NHANES: weight vs height scatterplot'; scatter x=height y=weight / group=sex; reg x=height y=weight / group=sex; run; title; proc sort data=nhanes2 out=nhanes3; by sex; proc sgplot data=nhanes3; title 'NHANES: weight vs height scatterplot'; scatter x=height y=weight; reg x=height y=weight; ellipse x=height y=weight; by sex; run; title;