import os
os.chdir('/Applications/stata/utilities/')
from pystata import config
config.init('mp')
___ ____ ____ ____ ____ ® /__ / ____/ / ____/ 17.0 ___/ / /___/ / /___/ MP—Parallel Edition Statistics and Data Science Copyright 1985-2021 StataCorp LLC StataCorp 4905 Lakeway Drive College Station, Texas 77845 USA 800-STATA-PC https://www.stata.com 979-696-4600 stata@stata.com Stata license: Single-user 4-core perpetual Serial number: 501706307451 Licensed to: MALF IBS.GRANADA Notes: 1. Unicode is supported; see help unicode_advice. 2. More than 2 billion observations are allowed; see help obs_advice. 3. Maximum number of variables is set to 5,000; see help set_maxvar.
%%stata
cd /Users/MALF/Dropbox/CANSURV-slides-course/Granada/
use melanoma
stset surv_mm, failure(status=1 2) scale(12) exit(time 120.5) id(id)
gen _age = min(int(age + _t),99)
gen _year = int(yydx + _t)
sort _year sex _age
merge m:1 _year sex _age using popmort, keep(match master)
tab agegrp, gen(agegrp)
gen female = sex == 2
/* Fit initial model */
stpm2, df(3) scale(hazard) bhazard(rate)
predict h1, hazard per (1000) ci
predict s1, survival ci
. cd /Users/MALF/Dropbox/CANSURV-slides-course/Granada/ /Users/MALF/Dropbox/CANSURV-slides-course/Granada . use melanoma (Skin melanoma, diagnosed 1975-94, follow-up to 1995) . stset surv_mm, failure(status=1 2) scale(12) exit(time 120.5) id(id) Survival-time data settings ID variable: id Failure event: status==1 2 Observed time interval: (surv_mm[_n-1], surv_mm] Exit on or before: time 120.5 Time for analysis: time/12 -------------------------------------------------------------------------- 7,775 total observations 0 exclusions -------------------------------------------------------------------------- 7,775 observations remaining, representing 7,775 subjects 2,777 failures in single-failure-per-subject data 43,384.625 total analysis time at risk and under observation At risk from t = 0 Earliest observed entry t = 0 Last observed exit t = 10.04167 . gen _age = min(int(age + _t),99) . gen _year = int(yydx + _t) . sort _year sex _age . merge m:1 _year sex _age using popmort, keep(match master) Result Number of obs ----------------------------------------- Not matched 0 Matched 7,775 (_merge==3) ----------------------------------------- . tab agegrp, gen(agegrp) Age in 4 | categories | Freq. Percent Cum. ------------+----------------------------------- 0-44 | 2,046 26.32 26.32 45-59 | 2,238 28.78 55.10 60-74 | 2,280 29.32 84.42 75+ | 1,211 15.58 100.00 ------------+----------------------------------- Total | 7,775 100.00 . gen female = sex == 2 . . /* Fit initial model */ . . stpm2, df(3) scale(hazard) bhazard(rate) Iteration 0: log likelihood = -8731.8053 Iteration 1: log likelihood = -8590.4493 Iteration 2: log likelihood = -8590.0259 Iteration 3: log likelihood = -8590.0249 Iteration 4: log likelihood = -8590.0249 Log likelihood = -8590.0249 Number of obs = 7,775 ------------------------------------------------------------------------------ | Coefficient Std. err. z P>|z| [95% conf. interval] -------------+---------------------------------------------------------------- xb | _rcs1 | .8252308 .0249859 33.03 0.000 .7762595 .8742022 _rcs2 | .2110309 .0235833 8.95 0.000 .1648085 .2572534 _rcs3 | .0631928 .0109672 5.76 0.000 .0416974 .0846881 _cons | -1.813097 .0314253 -57.70 0.000 -1.87469 -1.751505 ------------------------------------------------------------------------------ . predict h1, hazard per (1000) ci . predict s1, survival ci .
%%stata
twoway (rarea h1_lci h1_uci _t, sort pstyle(ci)) ///
(line h1 _t, sort), name(h1,replace) legend(off) ///
xtitle("time since diagnosis") ///
ytitle("excess mortality rate (per 1000 py's)")
twoway (rarea s1_lci s1_uci _t, sort pstyle(ci)) ///
(line s1 _t, sort), name(s1,replace) legend(off) ///
xtitle("time since diagnosis") ///
ytitle("relative survival")
. . twoway (rarea h1_lci h1_uci _t, sort pstyle(ci)) /// > (line h1 _t, sort), name(h1,replace) legend(off) /// > xtitle("time since diagnosis") /// > ytitle("excess mortality rate (per 1000 py's)") . . twoway (rarea s1_lci s1_uci _t, sort pstyle(ci)) /// > (line s1 _t, sort), name(s1,replace) legend(off) /// > xtitle("time since diagnosis") /// > ytitle("relative survival") .
%%stata
foreach df in 2 4 6 {
stpm2, df(`df') scale(hazard) bhazard(rate)
predict h_df`df', hazard
replace h_df`df' = h_df`df' * 1000
predict s_df`df', survival
estimates store df`df'
}
/*Plot the excess hazard functions*/
twoway (line h_df2 h_df4 h_df6 _t, sort lcolor(red blue black))
/*Plot the survival functions*/
twoway (line s_df2 s_df4 s_df6 _t, sort lcolor(red blue black))
estimates stats df2 df4 df6, n(2773)
. . foreach df in 2 4 6 { 2. stpm2, df(`df') scale(hazard) bhazard(rate) 3. predict h_df`df', hazard 4. replace h_df`df' = h_df`df' * 1000 5. predict s_df`df', survival 6. estimates store df`df' 7. } Iteration 0: log likelihood = -8857.8555 Iteration 1: log likelihood = -8602.0397 Iteration 2: log likelihood = -8598.8872 Iteration 3: log likelihood = -8598.8825 Iteration 4: log likelihood = -8598.8825 Log likelihood = -8598.8825 Number of obs = 7,775 ------------------------------------------------------------------------------ | Coefficient Std. err. z P>|z| [95% conf. interval] -------------+---------------------------------------------------------------- xb | _rcs1 | .8587922 .027247 31.52 0.000 .805389 .9121954 _rcs2 | .2786853 .0239657 11.63 0.000 .2317133 .3256572 _cons | -1.820875 .0317465 -57.36 0.000 -1.883097 -1.758653 ------------------------------------------------------------------------------ (7,775 real changes made) Iteration 0: log likelihood = -8743.3857 Iteration 1: log likelihood = -8588.5984 Iteration 2: log likelihood = -8588.1174 Iteration 3: log likelihood = -8588.1166 Log likelihood = -8588.1166 Number of obs = 7,775 ------------------------------------------------------------------------------ | Coefficient Std. err. z P>|z| [95% conf. interval] -------------+---------------------------------------------------------------- xb | _rcs1 | .8217794 .0245014 33.54 0.000 .7737575 .8698014 _rcs2 | .2007402 .0225589 8.90 0.000 .1565255 .2449548 _rcs3 | .0786269 .012199 6.45 0.000 .0547173 .1025365 _rcs4 | .0018648 .0067393 0.28 0.782 -.0113439 .0150736 _cons | -1.812389 .0313923 -57.73 0.000 -1.873916 -1.750861 ------------------------------------------------------------------------------ (7,775 real changes made) Iteration 0: log likelihood = -8734.395 Iteration 1: log likelihood = -8587.549 Iteration 2: log likelihood = -8587.1424 Iteration 3: log likelihood = -8587.1412 Iteration 4: log likelihood = -8587.1412 Log likelihood = -8587.1412 Number of obs = 7,775 ------------------------------------------------------------------------------ | Coefficient Std. err. z P>|z| [95% conf. interval] -------------+---------------------------------------------------------------- xb | _rcs1 | .8216408 .0245061 33.53 0.000 .7736098 .8696719 _rcs2 | .1979202 .02278 8.69 0.000 .1532723 .2425681 _rcs3 | .0841377 .0133232 6.32 0.000 .0580248 .1102506 _rcs4 | .019898 .0084671 2.35 0.019 .0033027 .0364933 _rcs5 | .0010064 .0053114 0.19 0.850 -.0094038 .0114166 _rcs6 | .0059215 .0037762 1.57 0.117 -.0014797 .0133227 _cons | -1.812651 .0314002 -57.73 0.000 -1.874194 -1.751107 ------------------------------------------------------------------------------ (7,775 real changes made) . . /*Plot the excess hazard functions*/ . twoway (line h_df2 h_df4 h_df6 _t, sort lcolor(red blue black)) . . /*Plot the survival functions*/ . twoway (line s_df2 s_df4 s_df6 _t, sort lcolor(red blue black)) . . . estimates stats df2 df4 df6, n(2773) Akaike's information criterion and Bayesian information criterion ----------------------------------------------------------------------------- Model | N ll(null) ll(model) df AIC BIC -------------+--------------------------------------------------------------- df2 | 2,773 . -8598.882 3 17203.76 17221.55 df4 | 2,773 . -8588.117 5 17186.23 17215.87 df6 | 2,773 . -8587.141 7 17188.28 17229.78 ----------------------------------------------------------------------------- .
%%stata
stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) ///
df(4) scale(hazard) eform
predict h2, hazard per(1000) ci
predict s2, survival ci
. stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) /// > df(4) scale(hazard) eform Iteration 0: log likelihood = -8592.7514 Iteration 1: log likelihood = -8485.9814 Iteration 2: log likelihood = -8483.7774 Iteration 3: log likelihood = -8483.7688 Iteration 4: log likelihood = -8483.7688 Log likelihood = -8483.7688 Number of obs = 7,775 ------------------------------------------------------------------------------ | exp(b) Std. err. z P>|z| [95% conf. interval] -------------+---------------------------------------------------------------- xb | agegrp2 | 1.285544 .0963537 3.35 0.001 1.109911 1.48897 agegrp3 | 1.729918 .1311554 7.23 0.000 1.491045 2.007059 agegrp4 | 2.613592 .262209 9.58 0.000 2.147044 3.18152 female | .5819093 .0335882 -9.38 0.000 .519665 .6516092 year8594 | .6785354 .0390329 -6.74 0.000 .6061874 .759518 _rcs1 | 2.308665 .0542546 35.60 0.000 2.204739 2.417489 _rcs2 | 1.217353 .0260828 9.18 0.000 1.16729 1.269562 _rcs3 | 1.084414 .0126098 6.97 0.000 1.059979 1.109413 _rcs4 | 1.004209 .0065616 0.64 0.520 .9914306 1.017152 _cons | .1948568 .0131564 -24.22 0.000 .1707041 .2224268 ------------------------------------------------------------------------------ Note: Estimates are transformed only in the first equation. . predict h2, hazard per(1000) ci . predict s2, survival ci .
%%stata
twoway (line h2 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) ///
(line h2 _t if agegrp4 == 1 & female == 0 & year8594 == 1, sort) ///
,legend(label (1 "Youngest") label (2 "Oldest")) scheme(sj) ///
ytitle("Excess hazard rate") xtitle("Time since diagnosis")
. twoway (line h2 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) /// > (line h2 _t if agegrp4 == 1 & female == 0 & year8594 > == 1, sort) /// > ,legend(label (1 "Youngest") label (2 "Oldest")) sche > me(sj) /// > ytitle("Excess hazard rate") xtitle("Time since diagn > osis") . .
%%stata
stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) df(3) scale(hazard) ///
tvc(agegrp2 agegrp3 agegrp4) dftvc(2)
predict h3, hazard per(1000) ci
predict s3, survival ci
twoway (line h3 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) ///
(line h3 _t if agegrp4 == 1 & female == 0 & year8594 == 1, sort) ///
, legend(label (1 "Youngest") label (2 "Oldest")) scheme(sj) ///
ytitle("Excess hazard rate") xtitle("Time since diagnosis")
. stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) df(3) scale(haza > rd) /// > tvc(agegrp2 agegrp3 agegrp4) dftvc(2) Iteration 0: log likelihood = -8586.4362 Iteration 1: log likelihood = -8482.059 Iteration 2: log likelihood = -8479.6676 Iteration 3: log likelihood = -8479.6437 Iteration 4: log likelihood = -8479.6437 Log likelihood = -8479.6437 Number of obs = 7,775 ------------------------------------------------------------------------------ | Coefficient Std. err. z P>|z| [95% conf. interval] -------------+---------------------------------------------------------------- xb | agegrp2 | .2741607 .0798607 3.43 0.001 .1176365 .4306849 agegrp3 | .555553 .0812003 6.84 0.000 .3964034 .7147026 agegrp4 | .934842 .110683 8.45 0.000 .7179073 1.151777 female | -.5457334 .0579363 -9.42 0.000 -.6592864 -.4321804 year8594 | -.3873942 .0576354 -6.72 0.000 -.5003576 -.2744309 _rcs1 | .851634 .0459294 18.54 0.000 .761614 .941654 _rcs2 | .1365924 .0357271 3.82 0.000 .0665685 .2066162 _rcs3 | .0697446 .0112343 6.21 0.000 .0477257 .0917635 _rcs_ageg~21 | -.0210178 .0626366 -0.34 0.737 -.1437832 .1017477 _rcs_ageg~22 | .0706612 .0480804 1.47 0.142 -.0235747 .164897 _rcs_ageg~31 | -.0256869 .0665868 -0.39 0.700 -.1561946 .1048208 _rcs_ageg~32 | .1174402 .0534022 2.20 0.028 .0127739 .2221065 _rcs_ageg~41 | -.037214 .0856722 -0.43 0.664 -.2051285 .1307005 _rcs_ageg~42 | .1407585 .0726802 1.94 0.053 -.0016921 .2832092 _cons | -1.655974 .0703345 -23.54 0.000 -1.793827 -1.518121 ------------------------------------------------------------------------------ . predict h3, hazard per(1000) ci . predict s3, survival ci . . twoway (line h3 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) /// > (line h3 _t if agegrp4 == 1 & female == 0 & year8594 > == 1, sort) /// > , legend(label (1 "Youngest") label (2 "Oldest")) sch > eme(sj) /// > ytitle("Excess hazard rate") xtitle("Time since diagn > osis") .
%%stata
predict hr2, hrnum(agegrp2 1) ci
predict hr3, hrnum(agegrp3 1) ci
predict hr4, hrnum(agegrp4 1) ci
twoway (line hr2 _t if agegrp2 == 1 & female == 0 & year8594 == 1, sort) ///
(line hr3 _t if agegrp3 == 1 & female == 0 & year8594 == 1, sort) ///
(line hr4 _t if agegrp4 == 1 & female == 0 & year8594 == 1, sort) ///
, legend(label (1 "Agegrp2") label (2 "Agegrp3") label(3 "Agegrp4")) ///
xtitle("Time since diagnosis") ytitle("Excess mortality rate") scheme(sj)
. . predict hr2, hrnum(agegrp2 1) ci . predict hr3, hrnum(agegrp3 1) ci . predict hr4, hrnum(agegrp4 1) ci . . twoway (line hr2 _t if agegrp2 == 1 & female == 0 & year8594 == 1, sort) /// > (line hr3 _t if agegrp3 == 1 & female == 0 & year8594 == 1, s > ort) /// > (line hr4 _t if agegrp4 == 1 & female == 0 & year8594 == 1, s > ort) /// > , legend(label (1 "Agegrp2") label (2 "Agegrp3") label(3 "Age > grp4")) /// > xtitle("Time since diagnosis") ytitle("Excess mortality rate" > ) scheme(sj) .
%%stata
twoway (rarea hr4_lci hr4_uci _t, sort pstyle(ci)) ///
(line hr4 _t, sort), yline(1) legend(off) ///
xtitle("Years from Diagnosis") ///
ytitle("Excess Mortality Rate Ratio")
. twoway (rarea hr4_lci hr4_uci _t, sort pstyle(ci)) /// > (line hr4 _t, sort), yline(1) legend(off) /// > xtitle("Years from Diagnosis") /// > ytitle("Excess Mortality Rate Ratio") .
%%stata
predict sdiff4, sdiff1(agegrp4 1 female 0 year8594 1) ///
sdiff2(agegrp4 0 female 0 year8594 1) ci
twoway (rarea sdiff4_lci sdiff4_uci _t, sort pstyle(ci)) ///
(line sdiff4 _t, sort), yline(0) legend(off) ///
xtitle("Years from Diagnosis") ///
ytitle("Difference in Relative Survival")
. predict sdiff4, sdiff1(agegrp4 1 female 0 year8594 1) /// > sdiff2(agegrp4 0 female 0 year8594 1) ci . twoway (rarea sdiff4_lci sdiff4_uci _t, sort pstyle(ci)) /// > (line sdiff4 _t, sort), yline(0) legend(off) /// > xtitle("Years from Diagnosis") /// > ytitle("Difference in Relative Survival") .
%%stata
predict hdiff4, hdiff1(agegrp4 1 female 0 year8594 1) ///
hdiff2(agegrp4 0 female 0 year8594 1) ci
replace hdiff4 = hdiff4*1000
replace hdiff4_lci = hdiff4_lci*1000
replace hdiff4_uci = hdiff4_uci*1000
twoway (rarea hdiff4_lci hdiff4_uci _t, sort pstyle(ci)) ///
(line hdiff4 _t, sort), yline(0) legend(off) ///
xtitle("Years from Diagnosis") ///
ytitle("Difference in Excess Mortality Rate")
. predict hdiff4, hdiff1(agegrp4 1 female 0 year8594 1) /// > hdiff2(agegrp4 0 female 0 year8594 1) ci . replace hdiff4 = hdiff4*1000 (7,775 real changes made) . replace hdiff4_lci = hdiff4_lci*1000 (7,775 real changes made) . replace hdiff4_uci = hdiff4_uci*1000 (7,775 real changes made) . . twoway (rarea hdiff4_lci hdiff4_uci _t, sort pstyle(ci)) /// > (line hdiff4 _t, sort), yline(0) legend(off) /// > xtitle("Years from Diagnosis") /// > ytitle("Difference in Excess Mortality Rate") .