import os
os.chdir('/Applications/stata/utilities/')
from pystata import config
config.init('mp')
___ ____ ____ ____ ____ ®
/__ / ____/ / ____/ 17.0
___/ / /___/ / /___/ MP—Parallel Edition
Statistics and Data Science Copyright 1985-2021 StataCorp LLC
StataCorp
4905 Lakeway Drive
College Station, Texas 77845 USA
800-STATA-PC https://www.stata.com
979-696-4600 stata@stata.com
Stata license: Single-user 4-core perpetual
Serial number: 501706307451
Licensed to: MALF
IBS.GRANADA
Notes:
1. Unicode is supported; see help unicode_advice.
2. More than 2 billion observations are allowed; see help obs_advice.
3. Maximum number of variables is set to 5,000; see help set_maxvar.
%%stata
cd /Users/MALF/Dropbox/CANSURV-slides-course/Granada/
use melanoma
stset surv_mm, failure(status=1 2) scale(12) exit(time 120.5) id(id)
gen _age = min(int(age + _t),99)
gen _year = int(yydx + _t)
sort _year sex _age
merge m:1 _year sex _age using popmort, keep(match master)
tab agegrp, gen(agegrp)
gen female = sex == 2
/* Fit initial model */
stpm2, df(3) scale(hazard) bhazard(rate)
predict h1, hazard per (1000) ci
predict s1, survival ci
. cd /Users/MALF/Dropbox/CANSURV-slides-course/Granada/
/Users/MALF/Dropbox/CANSURV-slides-course/Granada
. use melanoma
(Skin melanoma, diagnosed 1975-94, follow-up to 1995)
. stset surv_mm, failure(status=1 2) scale(12) exit(time 120.5) id(id)
Survival-time data settings
ID variable: id
Failure event: status==1 2
Observed time interval: (surv_mm[_n-1], surv_mm]
Exit on or before: time 120.5
Time for analysis: time/12
--------------------------------------------------------------------------
7,775 total observations
0 exclusions
--------------------------------------------------------------------------
7,775 observations remaining, representing
7,775 subjects
2,777 failures in single-failure-per-subject data
43,384.625 total analysis time at risk and under observation
At risk from t = 0
Earliest observed entry t = 0
Last observed exit t = 10.04167
. gen _age = min(int(age + _t),99)
. gen _year = int(yydx + _t)
. sort _year sex _age
. merge m:1 _year sex _age using popmort, keep(match master)
Result Number of obs
-----------------------------------------
Not matched 0
Matched 7,775 (_merge==3)
-----------------------------------------
. tab agegrp, gen(agegrp)
Age in 4 |
categories | Freq. Percent Cum.
------------+-----------------------------------
0-44 | 2,046 26.32 26.32
45-59 | 2,238 28.78 55.10
60-74 | 2,280 29.32 84.42
75+ | 1,211 15.58 100.00
------------+-----------------------------------
Total | 7,775 100.00
. gen female = sex == 2
.
. /* Fit initial model */
.
. stpm2, df(3) scale(hazard) bhazard(rate)
Iteration 0: log likelihood = -8731.8053
Iteration 1: log likelihood = -8590.4493
Iteration 2: log likelihood = -8590.0259
Iteration 3: log likelihood = -8590.0249
Iteration 4: log likelihood = -8590.0249
Log likelihood = -8590.0249 Number of obs = 7,775
------------------------------------------------------------------------------
| Coefficient Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
xb |
_rcs1 | .8252308 .0249859 33.03 0.000 .7762595 .8742022
_rcs2 | .2110309 .0235833 8.95 0.000 .1648085 .2572534
_rcs3 | .0631928 .0109672 5.76 0.000 .0416974 .0846881
_cons | -1.813097 .0314253 -57.70 0.000 -1.87469 -1.751505
------------------------------------------------------------------------------
. predict h1, hazard per (1000) ci
. predict s1, survival ci
.
%%stata
twoway (rarea h1_lci h1_uci _t, sort pstyle(ci)) ///
(line h1 _t, sort), name(h1,replace) legend(off) ///
xtitle("time since diagnosis") ///
ytitle("excess mortality rate (per 1000 py's)")
twoway (rarea s1_lci s1_uci _t, sort pstyle(ci)) ///
(line s1 _t, sort), name(s1,replace) legend(off) ///
xtitle("time since diagnosis") ///
ytitle("relative survival")
.
. twoway (rarea h1_lci h1_uci _t, sort pstyle(ci)) ///
> (line h1 _t, sort), name(h1,replace) legend(off) ///
> xtitle("time since diagnosis") ///
> ytitle("excess mortality rate (per 1000 py's)")
.
. twoway (rarea s1_lci s1_uci _t, sort pstyle(ci)) ///
> (line s1 _t, sort), name(s1,replace) legend(off) ///
> xtitle("time since diagnosis") ///
> ytitle("relative survival")
.
%%stata
foreach df in 2 4 6 {
stpm2, df(`df') scale(hazard) bhazard(rate)
predict h_df`df', hazard
replace h_df`df' = h_df`df' * 1000
predict s_df`df', survival
estimates store df`df'
}
/*Plot the excess hazard functions*/
twoway (line h_df2 h_df4 h_df6 _t, sort lcolor(red blue black))
/*Plot the survival functions*/
twoway (line s_df2 s_df4 s_df6 _t, sort lcolor(red blue black))
estimates stats df2 df4 df6, n(2773)
.
. foreach df in 2 4 6 {
2. stpm2, df(`df') scale(hazard) bhazard(rate)
3. predict h_df`df', hazard
4. replace h_df`df' = h_df`df' * 1000
5. predict s_df`df', survival
6. estimates store df`df'
7. }
Iteration 0: log likelihood = -8857.8555
Iteration 1: log likelihood = -8602.0397
Iteration 2: log likelihood = -8598.8872
Iteration 3: log likelihood = -8598.8825
Iteration 4: log likelihood = -8598.8825
Log likelihood = -8598.8825 Number of obs = 7,775
------------------------------------------------------------------------------
| Coefficient Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
xb |
_rcs1 | .8587922 .027247 31.52 0.000 .805389 .9121954
_rcs2 | .2786853 .0239657 11.63 0.000 .2317133 .3256572
_cons | -1.820875 .0317465 -57.36 0.000 -1.883097 -1.758653
------------------------------------------------------------------------------
(7,775 real changes made)
Iteration 0: log likelihood = -8743.3857
Iteration 1: log likelihood = -8588.5984
Iteration 2: log likelihood = -8588.1174
Iteration 3: log likelihood = -8588.1166
Log likelihood = -8588.1166 Number of obs = 7,775
------------------------------------------------------------------------------
| Coefficient Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
xb |
_rcs1 | .8217794 .0245014 33.54 0.000 .7737575 .8698014
_rcs2 | .2007402 .0225589 8.90 0.000 .1565255 .2449548
_rcs3 | .0786269 .012199 6.45 0.000 .0547173 .1025365
_rcs4 | .0018648 .0067393 0.28 0.782 -.0113439 .0150736
_cons | -1.812389 .0313923 -57.73 0.000 -1.873916 -1.750861
------------------------------------------------------------------------------
(7,775 real changes made)
Iteration 0: log likelihood = -8734.395
Iteration 1: log likelihood = -8587.549
Iteration 2: log likelihood = -8587.1424
Iteration 3: log likelihood = -8587.1412
Iteration 4: log likelihood = -8587.1412
Log likelihood = -8587.1412 Number of obs = 7,775
------------------------------------------------------------------------------
| Coefficient Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
xb |
_rcs1 | .8216408 .0245061 33.53 0.000 .7736098 .8696719
_rcs2 | .1979202 .02278 8.69 0.000 .1532723 .2425681
_rcs3 | .0841377 .0133232 6.32 0.000 .0580248 .1102506
_rcs4 | .019898 .0084671 2.35 0.019 .0033027 .0364933
_rcs5 | .0010064 .0053114 0.19 0.850 -.0094038 .0114166
_rcs6 | .0059215 .0037762 1.57 0.117 -.0014797 .0133227
_cons | -1.812651 .0314002 -57.73 0.000 -1.874194 -1.751107
------------------------------------------------------------------------------
(7,775 real changes made)
.
. /*Plot the excess hazard functions*/
. twoway (line h_df2 h_df4 h_df6 _t, sort lcolor(red blue black))
.
. /*Plot the survival functions*/
. twoway (line s_df2 s_df4 s_df6 _t, sort lcolor(red blue black))
.
.
. estimates stats df2 df4 df6, n(2773)
Akaike's information criterion and Bayesian information criterion
-----------------------------------------------------------------------------
Model | N ll(null) ll(model) df AIC BIC
-------------+---------------------------------------------------------------
df2 | 2,773 . -8598.882 3 17203.76 17221.55
df4 | 2,773 . -8588.117 5 17186.23 17215.87
df6 | 2,773 . -8587.141 7 17188.28 17229.78
-----------------------------------------------------------------------------
.
%%stata
stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) ///
df(4) scale(hazard) eform
predict h2, hazard per(1000) ci
predict s2, survival ci
. stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) ///
> df(4) scale(hazard) eform
Iteration 0: log likelihood = -8592.7514
Iteration 1: log likelihood = -8485.9814
Iteration 2: log likelihood = -8483.7774
Iteration 3: log likelihood = -8483.7688
Iteration 4: log likelihood = -8483.7688
Log likelihood = -8483.7688 Number of obs = 7,775
------------------------------------------------------------------------------
| exp(b) Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
xb |
agegrp2 | 1.285544 .0963537 3.35 0.001 1.109911 1.48897
agegrp3 | 1.729918 .1311554 7.23 0.000 1.491045 2.007059
agegrp4 | 2.613592 .262209 9.58 0.000 2.147044 3.18152
female | .5819093 .0335882 -9.38 0.000 .519665 .6516092
year8594 | .6785354 .0390329 -6.74 0.000 .6061874 .759518
_rcs1 | 2.308665 .0542546 35.60 0.000 2.204739 2.417489
_rcs2 | 1.217353 .0260828 9.18 0.000 1.16729 1.269562
_rcs3 | 1.084414 .0126098 6.97 0.000 1.059979 1.109413
_rcs4 | 1.004209 .0065616 0.64 0.520 .9914306 1.017152
_cons | .1948568 .0131564 -24.22 0.000 .1707041 .2224268
------------------------------------------------------------------------------
Note: Estimates are transformed only in the first equation.
. predict h2, hazard per(1000) ci
. predict s2, survival ci
.
%%stata
twoway (line h2 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) ///
(line h2 _t if agegrp4 == 1 & female == 0 & year8594 == 1, sort) ///
,legend(label (1 "Youngest") label (2 "Oldest")) scheme(sj) ///
ytitle("Excess hazard rate") xtitle("Time since diagnosis")
. twoway (line h2 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) ///
> (line h2 _t if agegrp4 == 1 & female == 0 & year8594
> == 1, sort) ///
> ,legend(label (1 "Youngest") label (2 "Oldest")) sche
> me(sj) ///
> ytitle("Excess hazard rate") xtitle("Time since diagn
> osis")
.
.
%%stata
stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) df(3) scale(hazard) ///
tvc(agegrp2 agegrp3 agegrp4) dftvc(2)
predict h3, hazard per(1000) ci
predict s3, survival ci
twoway (line h3 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) ///
(line h3 _t if agegrp4 == 1 & female == 0 & year8594 == 1, sort) ///
, legend(label (1 "Youngest") label (2 "Oldest")) scheme(sj) ///
ytitle("Excess hazard rate") xtitle("Time since diagnosis")
. stpm2 agegrp2 agegrp3 agegrp4 female year8594, bhazard(rate) df(3) scale(haza
> rd) ///
> tvc(agegrp2 agegrp3 agegrp4) dftvc(2)
Iteration 0: log likelihood = -8586.4362
Iteration 1: log likelihood = -8482.059
Iteration 2: log likelihood = -8479.6676
Iteration 3: log likelihood = -8479.6437
Iteration 4: log likelihood = -8479.6437
Log likelihood = -8479.6437 Number of obs = 7,775
------------------------------------------------------------------------------
| Coefficient Std. err. z P>|z| [95% conf. interval]
-------------+----------------------------------------------------------------
xb |
agegrp2 | .2741607 .0798607 3.43 0.001 .1176365 .4306849
agegrp3 | .555553 .0812003 6.84 0.000 .3964034 .7147026
agegrp4 | .934842 .110683 8.45 0.000 .7179073 1.151777
female | -.5457334 .0579363 -9.42 0.000 -.6592864 -.4321804
year8594 | -.3873942 .0576354 -6.72 0.000 -.5003576 -.2744309
_rcs1 | .851634 .0459294 18.54 0.000 .761614 .941654
_rcs2 | .1365924 .0357271 3.82 0.000 .0665685 .2066162
_rcs3 | .0697446 .0112343 6.21 0.000 .0477257 .0917635
_rcs_ageg~21 | -.0210178 .0626366 -0.34 0.737 -.1437832 .1017477
_rcs_ageg~22 | .0706612 .0480804 1.47 0.142 -.0235747 .164897
_rcs_ageg~31 | -.0256869 .0665868 -0.39 0.700 -.1561946 .1048208
_rcs_ageg~32 | .1174402 .0534022 2.20 0.028 .0127739 .2221065
_rcs_ageg~41 | -.037214 .0856722 -0.43 0.664 -.2051285 .1307005
_rcs_ageg~42 | .1407585 .0726802 1.94 0.053 -.0016921 .2832092
_cons | -1.655974 .0703345 -23.54 0.000 -1.793827 -1.518121
------------------------------------------------------------------------------
. predict h3, hazard per(1000) ci
. predict s3, survival ci
.
. twoway (line h3 _t if agegrp1 == 1 & female == 0 & year8594 == 1, sort) ///
> (line h3 _t if agegrp4 == 1 & female == 0 & year8594
> == 1, sort) ///
> , legend(label (1 "Youngest") label (2 "Oldest")) sch
> eme(sj) ///
> ytitle("Excess hazard rate") xtitle("Time since diagn
> osis")
.
%%stata
predict hr2, hrnum(agegrp2 1) ci
predict hr3, hrnum(agegrp3 1) ci
predict hr4, hrnum(agegrp4 1) ci
twoway (line hr2 _t if agegrp2 == 1 & female == 0 & year8594 == 1, sort) ///
(line hr3 _t if agegrp3 == 1 & female == 0 & year8594 == 1, sort) ///
(line hr4 _t if agegrp4 == 1 & female == 0 & year8594 == 1, sort) ///
, legend(label (1 "Agegrp2") label (2 "Agegrp3") label(3 "Agegrp4")) ///
xtitle("Time since diagnosis") ytitle("Excess mortality rate") scheme(sj)
.
. predict hr2, hrnum(agegrp2 1) ci
. predict hr3, hrnum(agegrp3 1) ci
. predict hr4, hrnum(agegrp4 1) ci
.
. twoway (line hr2 _t if agegrp2 == 1 & female == 0 & year8594 == 1, sort) ///
> (line hr3 _t if agegrp3 == 1 & female == 0 & year8594 == 1, s
> ort) ///
> (line hr4 _t if agegrp4 == 1 & female == 0 & year8594 == 1, s
> ort) ///
> , legend(label (1 "Agegrp2") label (2 "Agegrp3") label(3 "Age
> grp4")) ///
> xtitle("Time since diagnosis") ytitle("Excess mortality rate"
> ) scheme(sj)
.
%%stata
twoway (rarea hr4_lci hr4_uci _t, sort pstyle(ci)) ///
(line hr4 _t, sort), yline(1) legend(off) ///
xtitle("Years from Diagnosis") ///
ytitle("Excess Mortality Rate Ratio")
. twoway (rarea hr4_lci hr4_uci _t, sort pstyle(ci)) ///
> (line hr4 _t, sort), yline(1) legend(off) ///
> xtitle("Years from Diagnosis") ///
> ytitle("Excess Mortality Rate Ratio")
.
%%stata
predict sdiff4, sdiff1(agegrp4 1 female 0 year8594 1) ///
sdiff2(agegrp4 0 female 0 year8594 1) ci
twoway (rarea sdiff4_lci sdiff4_uci _t, sort pstyle(ci)) ///
(line sdiff4 _t, sort), yline(0) legend(off) ///
xtitle("Years from Diagnosis") ///
ytitle("Difference in Relative Survival")
. predict sdiff4, sdiff1(agegrp4 1 female 0 year8594 1) ///
> sdiff2(agegrp4 0 female 0 year8594 1) ci
. twoway (rarea sdiff4_lci sdiff4_uci _t, sort pstyle(ci)) ///
> (line sdiff4 _t, sort), yline(0) legend(off) ///
> xtitle("Years from Diagnosis") ///
> ytitle("Difference in Relative Survival")
.
%%stata
predict hdiff4, hdiff1(agegrp4 1 female 0 year8594 1) ///
hdiff2(agegrp4 0 female 0 year8594 1) ci
replace hdiff4 = hdiff4*1000
replace hdiff4_lci = hdiff4_lci*1000
replace hdiff4_uci = hdiff4_uci*1000
twoway (rarea hdiff4_lci hdiff4_uci _t, sort pstyle(ci)) ///
(line hdiff4 _t, sort), yline(0) legend(off) ///
xtitle("Years from Diagnosis") ///
ytitle("Difference in Excess Mortality Rate")
. predict hdiff4, hdiff1(agegrp4 1 female 0 year8594 1) ///
> hdiff2(agegrp4 0 female 0 year8594 1) ci
. replace hdiff4 = hdiff4*1000
(7,775 real changes made)
. replace hdiff4_lci = hdiff4_lci*1000
(7,775 real changes made)
. replace hdiff4_uci = hdiff4_uci*1000
(7,775 real changes made)
.
. twoway (rarea hdiff4_lci hdiff4_uci _t, sort pstyle(ci)) ///
> (line hdiff4 _t, sort), yline(0) legend(off) ///
> xtitle("Years from Diagnosis") ///
> ytitle("Difference in Excess Mortality Rate")
.