/* Programmer: Timothy McNeel This program creates a SAS transport dataset of diet variables plus HHX, FMX, and FPX identifier variables from the 2005 NHIS data. */ ***MODIFY THE FOLLOWING LINES AS APPROPRIATE FOR YOUR LOCAL ENVIRONMENT.; ***Output dataset; filename outfile1 pipe 'gzip -c > nhis2005dietvars.03-29-2007.v8x.gz'; ***The 2005 NHIS CANCERXX dataset; filename can2005 pipe 'gunzip -c /prj/arb/nhis/data/2005/sasdata/cancerxx.v8x.gz'; ***Formats for the 2005 NHIS CANCERXX dataset; %include '/prj/arb/nhis/data/2005/progs/formats.cancerxx.sas'; proc cimport infile=can2005 data=nhis2005; ***MODIFY THE PRECEDING LINES AS APPROPRIATE FOR YOUR LOCAL ENVIRONMENT.; data nhis2005; set nhis2005; label AgeA = 'Age (18-27, 28-37, 38-47, 58-67, 68-77, 78+)'; if 18<=age_p<=27 then agea=1; else if 28<=age_p<=37 then agea=2; else if 38<=age_p<=47 then agea=3; else if 48<=age_p<=57 then agea=4; else if 58<=age_p<=67 then agea=5; else if 68<=age_p<=77 then agea=6; else if age_p>=78 then agea=7; ***Create new variables for the number of times eating each food per day; array a_food[*] cereal milk soda frtjuice frtdrink fruit salad fries potato beans oveg tomsauce salsa bread donut cookies cheese; array a_foodpd[*] CerealPD MilkPD SodaPD FrtJuicePD FrtDrinkPD FruitPD SaladPD FriesPD PotatoPD BeansPD OVegPD TomSaucePD SalsaPD BreadPD DonutPD CookiesPD CheesePD; do i = 1 to dim(a_food); if a_food[i]=0 then a_foodpd[i]=0; else if a_food[i]=1 then a_foodpd[i]=0.067; else if a_food[i]=2 then a_foodpd[i]=0.214; else if a_food[i]=3 then a_foodpd[i]=0.5; else if a_food[i]=4 then a_foodpd[i]=0.786; else if a_food[i]=5 then a_foodpd[i]=1; else if a_food[i]=6 then a_foodpd[i]=2; else if a_food[i]=7 then a_foodpd[i]=3; else if a_food[i]=8 then a_foodpd[i]=4; else if a_food[i]=9 then a_foodpd[i]=5; else a_foodpd[i]=.; end; ***Set number of cereal types mentioned (missing, 0, 1, or 2); cerealcnt=0; array a_cerkind[*] cerkind1-cerkind5; do i = 1 to dim(a_cerkind); if a_cerkind[i] in (7 8 9) then cerealcnt=.; else if a_cerkind[i]=1 then cerealcnt=cerealcnt+1; end; ***Distribute number of times eating cereal per day (cerealpd) among the ; ***cereal types, as directed by Fran Thompson 11/22/2006 ; array a_cerpd[*] hotcerpd hifibcerpd gdfibcerpd othcerpd; if cerealpd=. or cerealcnt=. then do i=1 to dim(a_cerpd); a_cerpd[i]=.; end; else do i=1 to dim(a_cerpd); a_cerpd[i]=0; end; if cerkind1=1 then hotcerpd = hotcerpd + (cerealpd/cerealcnt); if cerkind2=1 then hifibcerpd = hifibcerpd + (cerealpd/cerealcnt); if cerkind3=1 then gdfibcerpd = gdfibcerpd + (cerealpd/cerealcnt); if cerkind4=1 then othcerpd = othcerpd + (cerealpd/cerealcnt); if cerkind5=1 then do; hotcerpd = hotcerpd + ((0.23*cerealpd)/cerealcnt); hifibcerpd = hifibcerpd + ((0.02*cerealpd)/cerealcnt); gdfibcerpd = gdfibcerpd + ((0.44*cerealpd)/cerealcnt); othcerpd = othcerpd + ((0.31*cerealpd)/cerealcnt); end; run; ***Input gram size adjustments, from fran.predict.nhis.gradj.txt provided by Lisa Kahle 02/24/2007; data gramsizeadj; input Sex AgeA HotCerGSA HiFibCerGSA GdFibCerGSA OthCerGSA SalsaGSA CheeseGSA MilkGSA FrtJuiceGSA FruitGSA SaladGSA FriesGSA PotatoGSA BeansGSA OVegGSA TomSauceGSA BreadGSA CookiesGSA DonutGSA SodaGSA FrtDrinkGSA; datalines; 1 1 354.000000 33.000000 84.000000 64.000000 62.250000 35.440000 325.333333 372.000000 131.750000 30.000000 112.500000 210.000000 222.500000 61.250000 63.000000 56.000000 64.000000 71.000000 453.866667 480.000000 1 2 219.630000 33.000000 66.000000 54.000000 62.250000 28.350000 268.400000 311.250000 128.000000 47.833333 114.000000 193.000000 188.095000 74.166667 125.000000 54.000000 66.000000 77.500000 372.000000 376.500000 1 3 247.000000 33.000000 58.000000 54.000000 49.275000 30.470000 274.500000 249.000000 123.200000 42.500000 100.000000 193.000000 178.000000 75.625000 125.000000 52.000000 66.000000 72.800000 372.000000 378.265000 1 4 247.000000 33.000000 64.125000 40.625000 43.875000 29.390000 244.000000 249.000000 127.500000 41.250000 100.000000 161.000000 189.750000 81.666667 156.250000 52.000000 73.733333 65.000000 372.000000 372.000000 1 5 234.000000 22.000000 50.000000 40.000000 16.000000 28.350000 233.833333 248.000000 122.000000 42.500000 85.500000 150.000000 226.800000 76.000000 122.500000 51.000000 67.500000 63.000000 370.200000 306.000000 1 6 239.000000 22.000000 47.000000 30.375000 31.130000 28.350000 206.000000 186.750000 118.000000 41.250000 85.500000 127.000000 199.333333 73.332500 125.000000 48.250000 64.000000 57.000000 368.400000 248.000000 1 7 234.000000 22.000000 39.000000 29.000000 31.130000 28.350000 183.000000 186.750000 114.250000 44.666667 97.000000 113.250000 214.000000 70.000000 125.000000 48.000000 61.000000 57.000000 368.000000 248.000000 2 1 234.000000 42.750000 60.000000 46.500000 32.000000 28.250000 244.000000 280.125000 118.000000 33.750000 79.500000 122.000000 132.750000 56.000000 125.000000 50.000000 56.700000 67.333333 372.000000 360.000000 2 2 234.000000 42.750000 57.000000 37.500000 31.130000 24.000000 244.000000 249.000000 118.000000 32.083333 70.000000 127.000000 126.500000 62.043333 113.400000 48.000000 50.000000 58.000000 372.000000 341.000000 2 3 234.000000 42.750000 53.000000 36.250000 36.565000 24.000000 244.000000 248.800000 118.000000 47.000000 70.000000 119.000000 126.500000 64.415000 62.500000 47.500000 48.800000 57.000000 370.200000 250.000000 2 4 234.000000 42.750000 49.500000 33.000000 27.847500 26.250000 214.250000 233.250000 118.000000 55.000000 70.000000 113.000000 141.750000 64.920000 125.000000 45.000000 55.200000 58.500000 368.400000 250.000000 2 5 226.800000 27.970000 42.000000 27.000000 31.130000 28.350000 183.750000 189.755000 118.000000 43.750000 66.000000 105.000000 130.550000 65.000000 62.500000 45.000000 57.000000 57.000000 330.666667 248.000000 2 6 234.000000 27.970000 39.083333 26.000000 16.000000 26.250000 183.000000 186.600000 112.427143 34.333333 70.000000 105.000000 172.000000 67.375000 62.500000 42.400000 48.675000 59.000000 366.000000 240.000000 2 7 227.475000 27.970000 40.000000 25.000000 16.000000 28.350000 183.000000 186.700000 109.000000 41.250000 64.000000 105.000000 178.000000 71.333333 62.500000 34.000000 52.666667 47.000000 368.200000 221.200000 ; run; ***Input F/V serving adjustments, from fran.predict.nhis.fvadj.txt provided by Lisa Kahle 02/24/2007; data fvadj; input Sex AgeA FruitFVA FrtJuiceFVA FriesFVA PotatoFVA SaladFVA OVegFVA BeansFVA SalsaFVA TomSauceFVA; datalines; 1 1 1.301000 2.000000 2.000000 2.000000 0.545000 0.750000 1.374000 0.533000 0.500000 1 2 1.301000 1.667500 2.000000 2.000000 0.708000 0.906000 1.047000 0.533000 0.541000 1 3 1.229571 1.335000 1.773000 1.999000 0.754500 0.974500 1.065000 0.421500 0.541000 1 4 1.227333 1.335000 1.710000 1.999000 0.750000 1.000000 1.227000 0.386500 0.812000 1 5 1.168000 1.334000 1.400000 1.914000 0.833500 1.000000 1.000000 0.137000 0.541000 1 6 1.168000 1.001000 1.250000 1.544000 0.750000 0.880000 1.000000 0.266000 0.541000 1 7 1.052333 1.001000 1.250000 1.508000 0.822500 0.833333 1.114000 0.266000 0.541000 2 1 1.168000 1.500500 1.481000 1.544000 0.613500 0.702200 0.964000 0.274000 0.541000 2 2 1.168000 1.334000 1.365500 1.544000 0.572500 0.779333 0.684000 0.266000 0.541000 2 3 1.168000 1.334000 1.272000 1.528000 0.833333 0.792500 0.800000 0.322500 0.273000 2 4 1.168000 1.251250 1.400000 1.544000 1.000000 0.788500 0.687000 0.238250 0.541000 2 5 1.150500 1.019500 1.000000 1.499000 0.795500 0.774000 0.822000 0.266000 0.500000 2 6 1.083833 1.000500 1.026000 1.516000 0.625000 0.833000 0.807000 0.137000 0.500000 2 7 1.000000 1.000500 1.000000 1.272000 0.750000 0.856750 1.000000 0.137000 0.500000 ; run; ***Input dairy portion size adjustments, from fran.predict.nhis.tdadj.txt provided by Lisa Kahle 02/24/2007; data dairyadj; input Sex AgeA CheeseDA MilkDA; datalines; 1 1 0.741000 1.250500 1 2 0.641333 1.083000 1 3 0.667000 1.100400 1 4 0.600000 1.000000 1 5 0.575000 0.916667 1 6 0.499000 0.833333 1 7 0.370000 0.750000 2 1 0.517000 1.000000 2 2 0.470000 1.000000 2 3 0.494000 0.999000 2 4 0.494000 0.874000 2 5 0.470000 0.750000 2 6 0.379000 0.718750 2 7 0.494000 0.750000 ; run; ***Input added sugar size adjustments, from fran.predict.nhis.sugadj.txt provided by Lisa Kahle 02/24/2007; data sugaradj; input Sex AgeA SodaSA FrtDrinkSA CookiesSA DonutSA; datalines; 1 1 11.835000 9.627000 5.189000 4.308000 1 2 9.990000 8.561000 5.027000 4.196000 1 3 9.947000 8.985000 4.845000 3.707000 1 4 9.683000 8.194000 4.716000 3.095000 1 5 9.683000 6.815000 4.730000 2.897000 1 6 9.631000 5.463000 4.428000 2.837000 1 7 9.605000 5.307000 3.968000 2.781500 2 1 9.815000 7.997000 4.133000 2.966000 2 2 9.683000 7.876000 3.650000 2.966000 2 3 9.683000 6.418000 3.842000 2.797000 2 4 9.644000 6.002000 3.719000 2.966000 2 5 8.443500 6.418000 4.027500 2.498000 2 6 8.370000 5.116000 3.571000 2.627000 2 7 9.683000 5.116000 3.166500 2.627000 ; run; proc sort data=nhis2005; by sex agea; run; data nhis2005; merge nhis2005(in=a) gramsizeadj fvadj dairyadj sugaradj; by sex agea; if a; label Fiber = 'Fiber (gm) per day'; if sex=1 then cubertfiber = 2.015301 + (0.000558 * hotcergsa * hotcerpd) + (0.011463 * hifibcergsa * hifibcerpd) + (0.003515 * gdfibcergsa * gdfibcerpd) - (0.000425 * othcergsa * othcerpd) + (0.003632 * salsagsa * salsapd) + (0.001206 * cheesegsa * cheesepd) + (0.000180 * milkgsa * milkpd) + (0.000043 * sodagsa * sodapd) + (0.000141 * frtdrinkgsa * frtdrinkpd) + (0.000166 * frtjuicegsa * frtjuicepd) + (0.000985 * fruitgsa * fruitpd) - (0.000447 * saladgsa * saladpd) + (0.001517 * friesgsa * friespd) + (0.000720 * potatogsa * potatopd) + (0.002156 * beansgsa * beanspd) + (0.000899 * oveggsa * ovegpd) + (0.001315 * tomsaucegsa * tomsaucepd) + (0.002927 * breadgsa * breadpd) + (0.001006 * cookiesgsa * cookiespd) + (0.000979 * donutgsa * donutpd); else if sex=2 then cubertfiber = 1.838259 + (0.000671 * hotcergsa * hotcerpd) + (0.019873 * hifibcergsa * hifibcerpd) + (0.004688 * gdfibcergsa * gdfibcerpd) + (0.001493 * othcergsa * othcerpd) + (0.003239 * salsagsa * salsapd) + (0.000513 * cheesegsa * cheesepd) + (0.000169 * milkgsa * milkpd) + (0.000115 * frtdrinkgsa * frtdrinkpd) + (0.000229 * frtjuicegsa * frtjuicepd) + (0.001009 * fruitgsa * fruitpd) + (0.001381 * friesgsa * friespd) + (0.000693 * potatogsa * potatopd) + (0.003217 * beansgsa * beanspd) + (0.000925 * oveggsa * ovegpd) + (0.001204 * tomsaucegsa * tomsaucepd) + (0.003401 * breadgsa * breadpd) + (0.001377 * cookiesgsa * cookiespd) + (0.001683 * donutgsa * donutpd); fiber = cubertfiber**3; label Calcium = 'Calcium (mg) per day'; if sex=1 then quarterrtcalcium = 4.482732 + (0.000318 * hotcergsa * hotcerpd) + (0.006716 * hifibcergsa * hifibcerpd) - (0.000355 * gdfibcergsa * gdfibcerpd) - (0.002023 * othcergsa * othcerpd) + (0.002179 * salsagsa * salsapd) + (0.014186 * cheesegsa * cheesepd) + (0.002204 * milkgsa * milkpd) + (0.000089 * sodagsa * sodapd) + (0.000105 * frtdrinkgsa * frtdrinkpd) + (0.000123 * frtjuicegsa * frtjuicepd) + (0.000170 * fruitgsa * fruitpd) - (0.000938 * saladgsa * saladpd) + (0.001159 * friesgsa * friespd) + (0.000349 * potatogsa * potatopd) + (0.000511 * beansgsa * beanspd) + (0.000400 * oveggsa * ovegpd) + (0.000948 * tomsaucegsa * tomsaucepd) + (0.000406 * breadgsa * breadpd) + (0.001771 * cookiesgsa * cookiespd) + (0.001198 * donutgsa * donutpd); else if sex=2 then quarterrtcalcium = 4.155762 + (0.000484 * hotcergsa * hotcerpd) + (0.006744 * hifibcergsa * hifibcerpd) + (0.000074 * gdfibcergsa * gdfibcerpd) - (0.001305 * othcergsa * othcerpd) + (0.002679 * salsagsa * salsapd) + (0.015442 * cheesegsa * cheesepd) + (0.002580 * milkgsa * milkpd) + (0.000095 * sodagsa * sodapd) + (0.000326 * frtdrinkgsa * frtdrinkpd) + (0.000195 * frtjuicegsa * frtjuicepd) + (0.000264 * fruitgsa * fruitpd) - (0.000723 * saladgsa * saladpd) + (0.000414 * friesgsa * friespd) + (0.000489 * potatogsa * potatopd) + (0.001035 * beansgsa * beanspd) + (0.000396 * oveggsa * ovegpd) + (0.000287 * tomsaucegsa * tomsaucepd) + (0.000680 * breadgsa * breadpd) + (0.002451 * cookiesgsa * cookiespd) + (0.001873 * donutgsa * donutpd); calcium = quarterrtcalcium**4; label FV = 'Pyramid servings (1992 definition) of fruits and vegetables per day'; if sex=1 then sqrtfv = 0.704319 + 0.835532*sqrt( (fruitfva * fruitpd) + (frtjuicefva * frtjuicepd) + (friesfva * friespd) + (potatofva * potatopd) + (saladfva * saladpd) + (ovegfva * ovegpd) + (beansfva * beanspd) + (salsafva * salsapd) + (tomsaucefva * tomsaucepd) ); else if sex=2 then sqrtfv = 0.658819 + 0.796243*sqrt( (fruitfva * fruitpd) + (frtjuicefva * frtjuicepd) + (friesfva * friespd) + (potatofva * potatopd) + (saladfva * saladpd) + (ovegfva * ovegpd) + (beansfva * beanspd) + (salsafva * salsapd) + (tomsaucefva * tomsaucepd) ); fv = sqrtfv**2; label FVNoFF = 'Pyramid servings (1992 definition) of fruits and vegetables excluding French fries per day'; if sex=1 then sqrtfvnoff = 0.729653 + 0.822694*sqrt( (fruitfva * fruitpd) + (frtjuicefva * frtjuicepd) + (potatofva * potatopd) + (saladfva * saladpd) + (ovegfva * ovegpd) + (beansfva * beanspd) + (salsafva * salsapd) + (tomsaucefva * tomsaucepd) ); else if sex=2 then sqrtfvnoff = 0.639540 + 0.804796*sqrt( (fruitfva * fruitpd) + (frtjuicefva * frtjuicepd) + (potatofva * potatopd) + (saladfva * saladpd) + (ovegfva * ovegpd) + (beansfva * beanspd) + (salsafva * salsapd) + (tomsaucefva * tomsaucepd) ); fvnoff = sqrtfvnoff**2; label Dairy = 'Pyramid servings (1992 definition) of dairy per day'; if sex=1 then sqrtdairy = 0.417414 + 0.831739*sqrt(cheeseda*cheesepd + milkda*milkpd); else if sex=2 then sqrtdairy = 0.385301 + 0.782852*sqrt(cheeseda*cheesepd + milkda*milkpd); dairy = sqrtdairy**2; label Sugar = 'Added sugar (tsp) (1992 definition) per day'; if sex=1 then cubertsugar = 1.672746 + 0.534485*( (sodasa*sodapd + frtdrinksa*frtdrinkpd + cookiessa*cookiespd + donutsa*donutpd)**(1/3) ); else if sex=2 then cubertsugar = 1.591494 + 0.491231*( (sodasa*sodapd + frtdrinksa*frtdrinkpd + cookiessa*cookiespd + donutsa*donutpd)**(1/3) ); sugar = cubertsugar**3; run; proc summary nway data=nhis2005; weight WtFA_SA; class Sex; var CubeRtFiber; output out=FiberMeans mean=CubeRtFiberMeanBySex; run; proc summary nway data=nhis2005; weight WtFA_SA; class Sex; var QuarterRtCalcium; output out=CalciumMeans mean=QuarterRtCalciumMeanBySex; run; proc summary nway data=nhis2005; weight WtFA_SA; class Sex; var SqRtFV; output out=FVMeans mean=SqRtFVMeanBySex; run; proc summary nway data=nhis2005; weight WtFA_SA; class Sex; var SqRtFVNoFF; output out=FVNoFFMeans mean=SqRtFVNoFFMeanBySex; run; proc summary nway data=nhis2005; weight WtFA_SA; class Sex; var SqRtDairy; output out=DairyMeans mean=SqRtDairyMeanBySex; run; proc summary nway data=nhis2005; weight WtFA_SA; class Sex; var CubeRtSugar; output out=SugarMeans mean=CubeRtSugarMeanBySex; run; data nhis2005(keep=HHX FMX FPX Fiber FiberAdj Calcium CalciumAdj FV FVAdj FVNoFF FVNoFFAdj Dairy DairyAdj Sugar SugarAdj); merge nhis2005 fibermeans(keep=sex cubertfibermeanbysex) calciummeans(keep=sex quarterrtcalciummeanbysex) fvmeans(keep=sex sqrtfvmeanbysex) fvnoffmeans(keep=sex sqrtfvnoffmeanbysex) dairymeans(keep=sex sqrtdairymeanbysex) sugarmeans(keep=sex cubertsugarmeanbysex) ; by sex; ***Apply variance-adjustment factors. These are the weighted averages of ; ***factors from the Observing Protein and Energy Nutrition Study (OPEN) and ; ***factors from the Eating at America's Table Study (EATS). ; label FiberAdj = 'Adjusted fiber (gm) per day'; if Sex = 1 then FiberAdj = (1.4*(CubeRtFiber - CubeRtFiberMeanBySex) + CubeRtFiberMeanBySex)**3; else if Sex = 2 then FiberAdj = (1.2*(CubeRtFiber - CubeRtFiberMeanBySex) + CubeRtFiberMeanBySex)**3; label CalciumAdj = 'Adjusted calcium (mg) per day'; if Sex = 1 then CalciumAdj = (1.0*(QuarterRtCalcium - QuarterRtCalciumMeanBySex) + QuarterRtCalciumMeanBySex)**4; else if Sex = 2 then CalciumAdj = (0.9*(QuarterRtCalcium - QuarterRtCalciumMeanBySex) + QuarterRtCalciumMeanBySex)**4; label FVAdj = 'Adjusted Pyramid servings (1992 definition) of fruits and vegetables per day'; if Sex = 1 then FVAdj = (1.2*(SqRtFV - SqRtFVMeanBySex) + SqRtFVMeanBySex)**2; else if Sex = 2 then FVAdj = (1.0*(SqRtFV - SqRtFVMeanBySex) + SqRtFVMeanBySex)**2; label FVNoFFAdj = 'Adjusted Pyramid servings (1992 definition) of fruits and vegetables excluding French fries per day'; if Sex = 1 then FVNoFFAdj = (1.2*(SqRtFVNoFF - SqRtFVNoFFMeanBySex) + SqRtFVNoFFMeanBySex)**2; else if Sex = 2 then FVNoFFAdj = (1.0*(SqRtFVNoFF - SqRtFVNoFFMeanBySex) + SqRtFVNoFFMeanBySex)**2; label DairyAdj = 'Adjusted Pyramid servings (1992 definition) of dairy per day'; if Sex = 1 then DairyAdj = (1.1*(SqRtDairy - SqRtDairyMeanBySex) + SqRtDairyMeanBySex)**2; else if Sex = 2 then DairyAdj = (1.1*(SqRtDairy - SqRtDairyMeanBySex) + SqRtDairyMeanBySex)**2; label SugarAdj = 'Adjusted added sugar (tsp) (1992 definition) per day'; if Sex = 1 then SugarAdj = (1.5*(CubeRtSugar - CubeRtSugarMeanBySex) + CubeRtSugarMeanBySex)**3; else if Sex = 2 then SugarAdj = (1.3*(CubeRtSugar - CubeRtSugarMeanBySex) + CubeRtSugarMeanBySex)**3; array a_vars[*] Fiber FiberAdj Calcium CalciumAdj FV FVAdj FVNoFF FVNoFFAdj Dairy DairyAdj Sugar SugarAdj; do i = 1 to dim(a_vars); a_vars[i] = round(a_vars[i],.000001); end; ***Remove unnecessary format; format hhx; run; proc sort data=nhis2005; by hhx fmx fpx; run; proc contents data=nhis2005; run; proc cport data=nhis2005 file=outfile1; run;