% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols \usetheme{Berlin} % Displays sections on top %\usetheme{Berkeley} % \usetheme{Frankfurt} % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides % \usetheme{AnnArbor} % CambridgeUS: Displays one section at a time. Good if there are a lot of sections or if they have long titles. \usepackage{comment} \usepackage{alltt} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage{mathtools} % For symbol under multiple integrals % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Prediction Intervals\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2020} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Prediction} \pause %\framesubtitle{} \begin{itemize} \item You have a data set, and you fit a regression model. \pause \item Now you obtain a \emph{new} observation, independently sampled from the same population. \pause \item You have $(1,x_{n+1,1}, \ldots, x_{n+1,k})^\prime$. \pause Or $(x_{n+1,1}, \ldots, x_{n+1,k})^\prime$. \pause \item Want to predict $y_{n+1}$. \pause \item For example, based on the \texttt{cars} data, you want to predict litres per kilometer for a Japanese car 4.52 metres long, weighing 1,295 kilograms. \pause \item I wish we could write $(1,x_{n+1,1}, \ldots, x_{n+1,k})^\prime = \mathbf{x}_{n+1}$. \pause \item But we will follow the book's notation and call it $\mathbf{x}_0$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{New observation: $y_0 = \mathbf{x}_0^\prime\boldsymbol{\beta} + \epsilon_0$} \pause %\framesubtitle{} \begin{itemize} \item $E(y_0) = \mathbf{x}_0^\prime\boldsymbol{\beta}$. \pause \item Estimate $E(y_0)$ with $\mathbf{x}_0^\prime\widehat{\boldsymbol{\beta}}$. \pause \item That's a reasonable \emph{prediction} of $y_0$, too. \pause \item But the intervals are different. \pause \item Prediction intervals are not the same as confidence intervals. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Prediction intervals versus confidence intervals} \pause \framesubtitle{Based on $y_0 = \mathbf{x}_0^\prime\boldsymbol{\beta} + \epsilon_0$} \begin{itemize} \item A confidence interval tries to trap the unknown constant $\mathbf{x}_0^\prime\boldsymbol{\beta}$ with high probability, say $1-\alpha = 0.95$ \pause \item Have $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{\mbox{\small\emph{MSE}} \, \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{a}}$. \pause Let $\mathbf{a} = \mathbf{x}_0$. \pause \item A prediction interval seeks to trap $y_0$, a random variable. \pause \item It makes sense that the prediction interval should be wider. \pause \item We will have $\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{\mbox{\small\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Theorem} %\framesubtitle{} \begin{itemize} \item Assume the usual linear regression model with normal error terms. \pause \item Let $y_0 = \mathbf{x}_0^\prime\boldsymbol{\beta} + \epsilon_0$, where $\epsilon_0 \sim N(0,\sigma^2)$, independently of $\epsilon_1, \ldots, \epsilon_n$. \pause \end{itemize} \vspace{3mm} A $(1-\alpha)100\%$ prediction interval for $y_0$ is given by {\LARGE \begin{displaymath} \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{\mbox{\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)} \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proof will use $t = \frac{z}{\sqrt{w/\nu}}$} \pause %\framesubtitle{} \begin{itemize} \item Predict $y_0$ with $\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}$ \pause $\sim N(\mathbf{x}_0^\prime \boldsymbol{\beta},\pause \sigma^2 \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)$. \pause \item $y_0 \sim N(\mathbf{x}_0^\prime \boldsymbol{\beta},\sigma^2)$. \pause \item And $y_0$ is independent of $\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}$. Why? \pause \item Error of prediction: $y_0 - \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} \pause \sim N( \pause 0, \pause \sigma^2 + \sigma^2 \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)$. \pause % Okay, it's minus the error of prediction, but so what? \item Standardize this to get the $z$ in the numerator of $t$. \pause \begin{displaymath} z = \frac{y_0 - \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}} {\sqrt{\sigma^2(1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}} \pause \sim N(0,1) \end{displaymath} \item And $w = \frac{\small{\mbox{\emph{SSE}}}}{\sigma^2}$ (what else?) \pause \item $z$ and $w$ are independent. Why? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$t = \frac{z}{\sqrt{w/(n-k-1)}} \sim t(n-k-1)$} \framesubtitle{With $z = \frac{y_0 - \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}} {\sqrt{\sigma^2(1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}}$ and $w = \frac{{\mbox{\footnotesize\emph{SSE}}}}{\sigma^2}$} \begin{eqnarray*} t & = & \frac{z}{\sqrt{w/\nu}} \\ \pause & = & \frac{y_0 - \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}} {\sqrt{\sigma^2(1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}} \left/ \sqrt{\frac{{\color{blue} \small \mbox{\emph{SSE}} }}{\sigma^2}/({\color{blue}n-k-1})} \right. \\ \pause & = & \frac{y_0 - \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}} {\sqrt{ {\color{blue} \small \mbox{\emph{MSE}}}(1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}} \pause \sim t(n-k-1) \end{eqnarray*} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Deriving the prediction interval } %\framesubtitle{} %\begin{center} %\includegraphics[width=3in]{CI-curve} % \end{center} {\small \begin{eqnarray*} 1-\alpha & = & P\{ -t_{\alpha/2} < t < t_{\alpha/2} \} \\ \pause & = & P\left\{ -t_{\alpha/2} < \frac{y_0 - \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}} {\sqrt{ \footnotesize \mbox{\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}} < t_{\alpha/2} \right\} \\ \pause && \vdots \\ \pause & = & P\left\{ \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} - t_{\alpha/2} \sqrt{ \small \mbox{\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)} < y_0 \right. \\ && \hspace{12mm} < \left. \mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} + t_{\alpha/2} \sqrt{ \small \mbox{\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)} \right\} \pause \end{eqnarray*} } % End size Or, $\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{\mbox{\small\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}$. \pause \hspace{5mm} $\blacksquare$ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} \pause %\framesubtitle{} \begin{itemize} \item \pause \item \pause \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } % End size \begin{comment} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%