% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{AnnArbor} % CambridgeUS Blue and yellow, Shows current section title % \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Weibull Regression\footnote{See last slide for copyright information.}} \subtitle{STA312 Spring 2019} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Background Reading} %\framesubtitle{} Section 10.6 in the text, but it refers to a lot of things we have not covered yet. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Exponential} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{A multiplicative regression model} \framesubtitle{Exponential model, just one explanatory variable} \pause Independently for $i=1, \ldots n$, {\LARGE \begin{displaymath} t_i = e^{\beta_0+\beta_1x_i}\times \epsilon_i \end{displaymath} \pause } % End size where \pause \begin{itemize} \item[] $\beta_0$ and $\beta_1$ are unknown constants (parameters). \pause \item[] $x_1, \ldots, x_n$ are known, observed constants. \pause \item[] $\epsilon_1, \ldots, \epsilon_n$ are independent exponential(1) random variables. \pause \item[] $t_1, \ldots, t_n$ are observed failure times. \pause \item[] $\delta_1, \ldots, \delta_n$ are indicators for uncensored. \end{itemize} \pause \begin{itemize} \item These are sometimes called \emph{accelerated failure time} models. \pause \item Because the effect of $x \neq 0$ is to \emph{multiply} the failure time by a constant. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Distribution of $t_i = e^{\beta_0+\beta_1x_i}\times \epsilon_i$, with $\epsilon_i$ exponential(1)} \pause %\framesubtitle{Comments} \begin{itemize} \item If $\epsilon \sim \exp(1)$ and $a>0$, $x=a\epsilon$ is also exponential. \pause \item Expected value $a$ (or $\lambda=1/a$). \pause \item Thus, $E(t_i) = e^{\beta_0+\beta_1x_i} \pause \Leftrightarrow \log E(t_i) = \beta_0+\beta_1x_i$. \pause \item We are adopting a linear model for the log of the expected value. \pause \item Or, we can transform the failure times by taking logs. \pause \begin{eqnarray*} \log t_i & = & \beta_0+\beta_1x_i + \log\epsilon_i \\ \pause & = & \beta_0+\beta_1x_i + \epsilon^*_i \end{eqnarray*} \pause where $\epsilon^*_i = \log\epsilon_i \pause \sim G(0,1)$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Meaning of $\beta_1$} \framesubtitle{With $E(t_i) = e^{\beta_0+\beta_1x_i}$} \pause \begin{itemize} \item Increase $x_i$ by one unit. \pause \item The effect is to multiply $E(t_i)$ by a constant. \pause \begin{eqnarray*} e^{\beta_0+\beta_1(x_i+1)} & = & c \, e^{\beta_0+\beta_1x_i} \\ \pause \Leftrightarrow c &=& \frac{e^{\beta_0+\beta_1(x_i+1)}}{e^{\beta_0+\beta_1x_i}} \\ \pause & = & \frac{e^{\beta_0+\beta_1x_i+\beta_1}}{e^{\beta_0+\beta_1x_i}} \\ \pause & = & e^{\beta_1} \end{eqnarray*} \pause \item So when $x_i$ is increased by one unit, $E(t_i)$ is multiplied by $e^{\beta_1}$. \pause \item If $\beta_1>0$, $E(t_i)$ goes up. \pause \item If $\beta_1<0$, $E(t_i)$ goes down. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Natural extensions} \pause %\framesubtitle{} \begin{itemize} \item More than one explanatory variable. \pause \item Centering the quantitative explanatory variables. \pause \begin{displaymath} t_i = \exp\{\beta_0+\beta_1(x_{i,1}-\bar{x}_1) + \ldots + \beta_{p-1}(x_{i,p-1} - \bar{x}_{p-1})\} \cdot \epsilon_i \end{displaymath} \pause \item In this case, $e^{\beta_0}$ is the expected failure time for average values of all the explanatory variables. \pause \item If there are dummy variables, center only the quantitative variables (covariates). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Equivalent model on the log scale} \framesubtitle{Starting with $ t_i = \exp\{\beta_0+\beta_1(x_{i,1}-\bar{x}_1) + \ldots + \beta_{p-1}(x_{i,p-1} - \bar{x}_{p-1})\} \cdot \epsilon_i$} \pause \begin{eqnarray*} \log t_i & = & \beta_0 + \beta_1x_{i,1} + \ldots + \beta_{p-1}x_{i,p-1} + \log \epsilon_i \\ \pause & = & \beta_0 + \beta_1x_{i,1} + \ldots + \beta_{p-1}x_{i,p-1} + \epsilon^*_i \\ \pause & = & \mathbf{x}_i^\top \boldsymbol{\beta} + \epsilon^*_i, \pause \end{eqnarray*} where $\epsilon_i^* \sim G(0,1)$. \pause \begin{itemize} \item Recall, if $Z \sim G(0,1)$, then $\sigma Z + \mu \sim G(\mu,\sigma)$. \pause \item So the model says $\log t_i \sim G(\mathbf{x}_i^\top \boldsymbol{\beta},1)$ \pause \item Why should the variance of log survival time be $\frac{\pi^2}{6}$? \pause \item Much more reasonable is $ \log t_i = \beta_0 + \beta_1x_{i,1} + \ldots + \beta_{p-1}x_{i,p-1} + \sigma \epsilon^*_i$ \pause \item In this case, $\log t_i \sim G(\mathbf{x}_i^\top \boldsymbol{\beta},\sigma)$. % \pause %\item Implying that $t_i$ is Weibull, with $\lambda = e^{-\mathbf{x}_i^\top \boldsymbol{\beta}}$ and $\alpha = 1/\sigma$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Weibull} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Switching back to the time scale} \framesubtitle{From the log time scale} \pause \begin{eqnarray*} && \log t_i = \beta_0 + \beta_1x_{i,1} + \ldots + \beta_{p-1}x_{i,p-1} + \sigma \epsilon^*_i \\ \pause &\Leftrightarrow& t_i = e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, e^{\sigma \epsilon^*_i} \pause = e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, e^{\sigma \log\epsilon_i} \pause = e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, e^{\log(\epsilon_i^\sigma )} \\ \pause &\Leftrightarrow& t_i = e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, \epsilon_i^\sigma \end{eqnarray*} \pause We have arrived at the multiplicative regression model: \pause {\LARGE \begin{displaymath} t_i = \exp\{\beta_0 + \beta_1x_{i,1} + \ldots + \beta_{p-1}x_{i,p-1} \} \cdot \epsilon_i^\sigma \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$t_i = \exp\{\beta_0 + \beta_1x_{i,1} + \ldots + \beta_{p-1}x_{i,p-1} \} \cdot \epsilon_i^\sigma$} \pause %\framesubtitle{} \begin{itemize} \item It's an accelerated failure time model. \pause Changing one of the $x$ values multiplies $t_i$ by something. \pause \item In particular, increase $x_{i,k}$ by one unit while holding all other $x_{i,j}$ values constant. \pause \item Then $t_i$ is multiplied by $e^{\beta_k}$. \pause \item Holding $x_{i,j}$ values constant is the meaning of ``controlling" for explanatory variables in Weibull regression. \pause \item Note that if $\beta_k$ is negative, $e^{\beta_k}<1$ and $t_i$ goes down. \pause \item Call it a ``negative relationship" (controlling for the other variables). \pause \item If $\beta_k$ is positive, $e^{\beta_k}>1$ and $t_i$ goes up. \pause \item Call this a ``positive relationship" (controlling for the other variables). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Distribution of $t_i$} \pause Recall \begin{itemize} \item We have established that $\log t_i \sim G(\mathbf{x}_i^\top \boldsymbol{\beta},\sigma)$. \pause %\item Log of Weibull$(\alpha,\lambda)$ is Gumbel$(\mu,\sigma)$ with $\mu=-\log\lambda$ and $\sigma = \frac{1}{\alpha}$. \pause \item Exponential function of Gumbel$(\mu,\sigma)$ is Weibull$(\alpha,\lambda)$ \pause with $\lambda = e^{-\mu}$ and $\alpha = 1/\sigma$. \pause \item Note that here, $\mu_i = \mathbf{x}_i^\top \boldsymbol{\beta}$. \pause \item So, $t_i$ is Weibull, with $\lambda_i = e^{-\mathbf{x}_i^\top \boldsymbol{\beta}}$ and $\alpha = 1/\sigma$. \pause \item This means \pause \end{itemize} \begin{eqnarray*} E(T_i) & = & \frac{\Gamma(1+\frac{1}{\alpha})}{\lambda} \pause = e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, \Gamma(1+\sigma) \\ \pause \mbox{Median}(T_i) & = & \frac{[\log(2)]^{1/\alpha}}{\lambda} \pause = e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, \log(2)^\sigma \\ \pause h(t) & = & \alpha\lambda^\alpha t^{\alpha-1} \pause = \frac{1}{\sigma} \exp\{-\frac{1}{\sigma}\mathbf{x}^\top \boldsymbol{\beta}\}t^{\frac{1}{\sigma}-1} \end{eqnarray*} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Conclusions} \framesubtitle{Following from $\log t_i \sim G(\mathbf{x}_i^\top \boldsymbol{\beta},\sigma)$} \pause \begin{eqnarray*} E(T_i) & = & e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, \Gamma(1+\sigma) \\ \mbox{Median}(T_i) & = & e^{\mathbf{x}_i^\top \boldsymbol{\beta}} \, \log(2)^\sigma \\ h(t) & = & \frac{1}{\sigma} \exp\{-\frac{1}{\sigma}\mathbf{x}^\top \boldsymbol{\beta}\}t^{\frac{1}{\sigma}-1} \end{eqnarray*} \pause \begin{itemize} \item Increasing value of $x_j$ by $c$ units multiplies the mean and median by $e^{c\beta_j}$. \pause \item Same effect on the hazard function. \pause \item Remarkable because the hazard function is a function of time $t$. \pause \item And the effect is the same for every value of $t$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proportional Hazards} \framesubtitle{$h(t) = \frac{1}{\sigma} \exp\{-\frac{1}{\sigma}\mathbf{x}^\top \boldsymbol{\beta}\}t^{\frac{1}{\sigma}-1}$} \pause \begin{itemize} \item Suppose two individuals have different $\mathbf{x}$ vectors of explanatory variable values. \pause \item They have different hazard functions because their $\lambda$ values are different. \pause \item Look at the \emph{ratio}: \pause \end{itemize} \begin{eqnarray*} \frac{h_1(t)}{h_2(t)} & = & \pause \frac{\frac{1}{\sigma} \exp\{-\frac{1}{\sigma}\mathbf{x}_1^\top \boldsymbol{\beta}\}t^{\frac{1}{\sigma}-1}} {\frac{1}{\sigma} \exp\{-\frac{1}{\sigma}\mathbf{x}_2^\top \boldsymbol{\beta}\}t^{\frac{1}{\sigma}-1}} \\ \pause & = & \frac{ \exp\{-\frac{1}{\sigma}\mathbf{x}_1^\top \boldsymbol{\beta}\}} { \exp\{-\frac{1}{\sigma}\mathbf{x}_2^\top \boldsymbol{\beta}\}} \\ \pause & = & \exp\{\frac{1}{\sigma}(\mathbf{x}_2-\mathbf{x}_1)^\top \boldsymbol{\beta}\} \end{eqnarray*} \pause The point is that $h_1(t)$ and $h_2(t)$ are always in the same proportion for every value of $t$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proportional Hazards} \framesubtitle{$h_1(t) = 2 \, h_2(t)$ with $\sigma=2$} \begin{center} \includegraphics[width=3.3in]{HazardPlot} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proportional Hazards} \framesubtitle{$h_1(t) = 2 \, h_2(t)$ with $\sigma=1/3$} \begin{center} \includegraphics[width=3.2in]{HazardPlot2} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/312s19} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/312s19}} \end{frame} \end{document} # R for proportional hazards plot rm(list=ls()) t = seq(from=0.1,to=16,length=100) h1 = 2*t^(1/2-1); h2 = t^(1/2-1) # sigma=2 cbind(h1,h2) plot(t,h1,type='l',ylab='Hazard',ylim=c(0,6.5)) lines(t,h2,lty=2) # Plot 2 h1 = 2*t^2; h2 = t^2 # sigma = 1/3 cbind(h1,h2) plot(t,h1,type='l',ylab='Hazard') lines(t,h2,lty=2)