% \documentclass[serif]{beamer} % Serif for Computer Modern math font.
\documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements
\hypersetup{colorlinks,linkcolor=,urlcolor=red}

\usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice!
\setbeamertemplate{navigation symbols}{} % Supress navigation symbols
\usetheme{AnnArbor}  % CambridgeUS Blue and yellow, Shows current section title
% \usetheme{Berlin} % Displays sections on top
\usepackage[english]{babel}
\usepackage{comment}
% \definecolor{links}{HTML}{2A1B81}
% \definecolor{links}{red}

\setbeamertemplate{footline}[frame number] 

\mode<presentation>
% \mode<handout>{\setbeamercolor{background canvas}{bg=black!5}}

\title{Proportional Hazards Regression: Part Two\footnote{See last slide for copyright information.}}
\subtitle{STA312 Fall 2023}
\date{} % To suppress date

\begin{document}

\begin{frame}
  \titlepage
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Proportional Hazards Regression Model}
% \framesubtitle{Also called Cox regression after Sir David Cox}  
Based on the hazard function
\begin{displaymath}
    h(t) =  h_0(t) \,  e^{\beta_0 + \mathbf{x}^\top \boldsymbol{\beta}}
\end{displaymath} \pause

Swallow $e^{\beta_0}$ into the baseline hazard function and get
\begin{displaymath}
    h(t) =  h_0(t) \,  e^{\mathbf{x}^\top \boldsymbol{\beta}}
\end{displaymath}

\begin{itemize}
    \item  The regression model has no intercept.  \pause
    \item  It's common practice to center the explanatory variables (but not the dummy variables) by subtracting off the overall sample mean of the variable. \pause
    \item  Then, the baseline hazard function is the hazard function of an individual in the reference category, who is ``average" on all the quantitive explanatory variables. 
    \item  It's quite meaningful. 
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Hazard Ratio} 
%\framesubtitle{} 

\begin{eqnarray*}
    \frac{h_1(t)}{h_2(t)} & = & 
    \frac{h_0(t)\,e^{\mathbf{x}_1^\top \boldsymbol{\beta}}}
         {h_0(t)\,e^{\mathbf{x}_2^\top \boldsymbol{\beta}}} \\
    & = & \frac{e^{\mathbf{x}_1^\top \boldsymbol{\beta}}}
               {e^{\mathbf{x}_2^\top \boldsymbol{\beta}}} 
\end{eqnarray*} 
\begin{itemize}
    \item Proportional hazards. \pause
    \item If $x_k$ is increased by one unit, the hazard function is multiplied by $e^{\beta_k}$. \pause
    \item This is true for every time $t$ (according to the model).
    \item So you can just say the ``hazard" or ``risk" or even ``chances" of the event are twice as much.
    \item It's a good way to talk and think about the results.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Need to estimate the hazard and survival functions} 
%\framesubtitle{} 
\begin{itemize}
    \item What we have so far is good for significance testing.
    \item Need to estimate the hazard and survival functions.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Estimating the baseline hazard}
\framesubtitle{$h_0(t)$ in $h(t) =  h_0(t) \,  e^{\mathbf{x}^\top \boldsymbol{\beta}}$}
\pause  
Remember how partial likelihood started.
\begin{eqnarray*}
    h_0(t) e^{\mathbf{x}_{(i)}^\top \boldsymbol{\beta}} & \approx & 
    \frac{h_0(t) e^{\mathbf{x}_{(i)}^\top \boldsymbol{\beta}}}
           {\displaystyle \sum_{j \in R_{(i)}} h_0(t) e^{\mathbf{x}_j^\top \boldsymbol{\beta}}} \\ \pause
   & = & \frac{e^{\mathbf{x}_{(i)}^\top \boldsymbol{\beta}}}
           {\displaystyle \sum_{j \in R_{(i)}} e^{\mathbf{x}_j^\top \boldsymbol{\beta}}} \\ \pause
   & = & \frac{1}{\displaystyle \sum_{j \in R_{(i)}} e^{\mathbf{x}_j^\top \boldsymbol{\beta}}} \times e^{\mathbf{x}_{(i)}^\top \boldsymbol{\beta}} \\
\end{eqnarray*} 
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{A leap of intuition}
%\framesubtitle{} 
Humm, 
\begin{displaymath}
    h_0(t_{(i)}) \times e^{\mathbf{x}_{(i)}^\top \boldsymbol{\beta}}  \approx   
    \frac{1}{\displaystyle \sum_{j \in R_{(i)}} e^{\mathbf{x}_j^\top \boldsymbol{\beta}}} \times e^{\mathbf{x}_{(i)}^\top \boldsymbol{\beta}}
\end{displaymath} \pause
So how about
\begin{displaymath}
    \widehat{h}_0(t_{(i)}) =   
    \frac{1}{\displaystyle \sum_{j \in R_{(i)}} e^{\mathbf{x}_j^\top \widehat{\boldsymbol{\beta}}}} 
\end{displaymath} \pause
Well, there could be ties in practice, so based on the Kaplan-Meier estimated hazard 
$\widehat{q}_{(i)} = \frac{d_{(i)}}{n_{(i)}}$, \pause
\begin{displaymath}
    \widehat{h}_0(t_{(i)}) =   
    \frac{d_{(i)}}{\displaystyle \sum_{j \in R_{(i)}} e^{\mathbf{x}_j^\top \widehat{\boldsymbol{\beta}}}} 
\end{displaymath}

Almost always, $d_{(i)}=1$ anyway. 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Estimated Hazard Function(s)}
\framesubtitle{Based on $h(t) =  h_0(t) \,  e^{\mathbf{x}^\top \boldsymbol{\beta}}$} 

{\LARGE
\begin{displaymath}
    \widehat{h}(t_{(i)}) =  \widehat{h}_0(t_{(i)}) \,  e^{\mathbf{x}^\top \widehat{\boldsymbol{\beta}}}
\end{displaymath} 
} % End size

\begin{itemize}
    \item Nice for display. Can plot $D$ points.
    \item Notice it depends on $\mathbf{x}$.
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Estimating the Survival Function: Background}
\framesubtitle{Using $H(t) = \int_0^t h(y) \, dy$ and $S(t) = e^{-H(t)}$} \pause
\begin{itemize}
    \item $H_0(t) = \int_0^t h_0(y) \, dy$ is the baseline cumulative hazard function. 
    \item $S_0(t) = e^{-H_0(t)} = e^{-\int_0^t h_0(y) \, dy}$ is the baseline survival function. \pause
    \item With a little work we can show $S(t) =  S_0(t)^{\exp\{\mathbf{x}_i^\top \boldsymbol{\beta} \}}$. 
    \item This could be written $S(t|\mathbf{x}_i)$.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Estimating the Survival Curve (Cox and Oakes, 1982)} 
\framesubtitle{Using $S_0(t) = e^{-H_0(t)}$ and $S(t) =  S_0(t)^{\exp\{ \mathbf{x}^\top \boldsymbol{\beta} \}}$} \pause
Want an estimate of $H_0(t) = \int_0^t h_0(y) \, dy$, but \pause
\begin{displaymath}
    \widehat{h}_0(t_{(i)}) =   
    \frac{d_{(i)}}{\displaystyle \sum_{j \in R_{(i)}} e^{\mathbf{x}_j^\top \widehat{\boldsymbol{\beta}}}}
\end{displaymath}
is only defined for $t_{(1)}, \ldots, t_{(D)}$, the times where uncensored observations occurred. \pause

Approximate the integral with a finite sum:

\begin{displaymath}
    \widehat{H}_0(t) = \sum_{t_{(i)}\leq t } \frac{d_{(i)}}
           {\displaystyle  \sum_{j \in R_{(i)}}  e^{\mathbf{x}_j^\top \widehat{\boldsymbol{\beta}}}}
\end{displaymath}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Cox and Oakes argument continued} 
\framesubtitle{Using $S_0(t) = e^{-H_0(t)}$ and $S(t|\mathbf{x}) =  S_0(t)^{\exp\{ \mathbf{x}^\top \boldsymbol{\beta} \}}$} 
Have
\begin{displaymath}
    \widehat{H}_0(t) = \sum_{t_{(i)} \leq t } \frac{d_{(i)}}
           {\displaystyle  \sum_{j \in R_{(i)}}  e^{\mathbf{x}_j^\top \widehat{\boldsymbol{\beta}}}}
\end{displaymath} \pause
Then
\begin{eqnarray*}
    \widehat{S}_0(t) & = & e^{-\widehat{H}_0(t)} \\ \pause
    \widehat{S}(t|\mathbf{x}) & = & \widehat{S}_0(t)^{ \exp\{ \mathbf{x}^\top \widehat{\boldsymbol{\beta}} \} }
\end{eqnarray*} 

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}
\frametitle{It works} 
%\framesubtitle{} 
\begin{itemize}
    \item As usual, later work clarified matters and eliminated most of the guesswork. 
    \item Cox's estimate of $S(t)$ is shown to arise from Breslow's method of approximating the partial likelihood when there are ties. 
    \item There are several other estimates, all yielding results that are pretty close. \pause
    \item To me, the biggest payoff is that $\widehat{S}(t|\mathbf{x})$ allows estimation of the median for any particular set of explanatory variable values.
\end{itemize}
\end{frame}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%





%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Copyright Information}

This slide show was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistics, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/brunner/oldclass/312f23} {\footnotesize \texttt{http://www.utstat.toronto.edu/brunner/oldclass/312f23}}

\end{frame}

\end{document}

% 2019
\begin{frame}
\frametitle{Estimation (Cox and Oakes, 1982, p. 108)} 
\framesubtitle{Using $S_0(t) = e^{-H_0(t)}$ and $S(t|\mathbf{x}_i) =  S_0(t)^{\exp\{\beta_0 + \mathbf{x}_i^\top \boldsymbol{\beta} \}}$} \pause
Cox suggested $H_0(t)  \approx  \displaystyle \sum_{t_{(i)}< t } \frac{d_{(i)}}
           {\displaystyle  \sum_{j \in R_{(i)}}  e^{\beta_0 + \mathbf{x}_j^\top \boldsymbol{\beta}}}$. \pause Multiplying both sides by $e^{\beta_0}$\pause, which is invisible in Cox's argument\pause, arrive at \pause
\begin{displaymath}
    e^{\widehat{\beta}_0}\widehat{H}_0(t) = \sum_{t_{(i)}< t } \frac{d_{(i)}}
           {\displaystyle  \sum_{j \in R_{(i)}}  e^{\mathbf{x}_j^\top \widehat{\boldsymbol{\beta}}}}
\end{displaymath} \pause
Then, $e^{-\widehat{H}_0(t) e^{\widehat{\beta}_0}} = \widehat{S}_0(t)^{e^{\widehat{\beta}_0}}$. \pause
Raise that to the power $\mathbf{x}_i^\top \widehat{\boldsymbol{\beta}}$, and get \pause
{\LARGE
\begin{displaymath}
    \widehat{S}_0(t)^{e^{\widehat{\beta}_0 + \mathbf{x}_i^\top \widehat{\boldsymbol{\beta}}}} \pause = \widehat{S}(t|\mathbf{x}_i)
\end{displaymath} 
} % End size
\end{frame}