% \documentclass[serif]{beamer} % Serif for Computer Modern math font.
\documentclass[serif, handout]{beamer} % Handout to ignore pause statements

\hypersetup{colorlinks,linkcolor=,urlcolor=red}

\usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice!
\setbeamertemplate{navigation symbols}{} % Suppress navigation symbols
% \usetheme{Berlin} % Displays sections on top
% \usetheme{Frankfurt}  % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides
%\usetheme{Berkeley}
\usetheme{AnnArbor}  % CambridgeUS


\usepackage[english]{babel}
\usepackage{amsmath} % for binom
% \usepackage{graphicx} % To include pdf files!
\usepackage{comment}
% \definecolor{links}{HTML}{2A1B81}
% \definecolor{links}{red}
\setbeamertemplate{footline}[frame number] 

\mode<presentation>

\title{Least Squares Estimation\footnote{See last slide for copyright information.}}
\subtitle{STA 302 Fall 2020}
\date{} % To suppress date

\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Overview}
\tableofcontents
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Reading in In Rencher and Schaalje's \emph{Linear Models In Statistics}} 
%\framesubtitle{} 
\begin{itemize}
    \item Glance at Ch.~6 first.
    \item Sections 7.1, 7.2, 7.3.1 (pp.~137-145).
    % Properties section 7.3.2 (unbiased, GM) later
    \item Section 7.3.3 (pp.~149-151) on estimation of $\sigma^2$.
    %  They use the hat matrix $\mathbf{H}$ on page 154 without naming it. X-prime epsilon-hat = 0 is part of the normal equation geometric argument on page 154, but they do not seem to call it a theorem or anything. 
    \item Section 7.7 on $R^2$, but they use material in a section we will cover later. %(centered model). Interesting properties of  on p.~162. Expected value is something I cannot do without assuming normality. 
\end{itemize}


\end{frame}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{The Model}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Multiple regression in scalar form}
%\framesubtitle{} 
For $i=1, \ldots, n$, let $ y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$\pause, where
\begin{itemize}
     \item[] $x_{ij}$ are observed, known constants. \pause
     \item[] $\epsilon_1, \ldots, \epsilon_n$ are independent random variables with $E(\epsilon_i)=0$ and $Var(\epsilon_i)=\sigma^2$. \pause
     \item[] $\beta_0, \ldots \beta_k$ and $\sigma^2$ are unknown constants, with $\sigma^2>0$.
\end{itemize} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{For example}
\framesubtitle{$y_i = \beta_0 + \beta_1 x_{i1} + \beta_2 x_{i2} + \beta_3 x_{i3} + \epsilon_i$}  \pause
For customer $i=1, \ldots, n$, \pause
\begin{itemize}
    \item $y_i$ is purchases in \$. % \pause
    \item $x_{i1}$ is income. % \pause
    \item $x_{i2}$ is age. % \pause
    \item $x_{i3}$ is advertising recall. 
\end{itemize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Multiple regression in matrix form}
\framesubtitle{Compare $y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$} 
{ \small
\begin{equation*}
\begin{array}{cccccccc} % 6 columns
\mathbf{y} & = &  \mathbf{X} & \boldsymbol{\beta} & + & \boldsymbol{\epsilon} \\
&&&&& \\ % Another space
\left( \begin{array}{c}
y_1 \\ y_2 \\ y_3 \\ \vdots \\ y_n
\end{array} \right)        
&=&
\left(\begin{array}{cccc}
1 & 14.2 & \cdots & 1 \\
1 & 11.9 & \cdots & 0 \\
1 & ~3.7 & \cdots & 0 \\
\vdots & \vdots & \vdots & \vdots \\
1 & ~6.2 & \cdots & 1 
\end{array}\right)
&
\left( \begin{array}{c}
\beta_0 \\ \beta_1 \\ \vdots \\  \beta_k
\end{array} \right)
&+&     
\left( \begin{array}{c}
\epsilon_1 \\ \epsilon_2 \\ \epsilon_3  \\ \vdots \\ \epsilon_n 
\end{array} \right)        
\end{array} % 24
\end{equation*}
where 
\begin{itemize}
     \item[] $\mathbf{X}$ is an $n \times (k+1)$ matrix of observed constants
     \item[] $\boldsymbol{\beta}$ is a $(k+1) \times 1$ matrix of unknown constants
     \item[] $E(\boldsymbol{\epsilon}) = \mathbf{0}$ and $cov(\boldsymbol{\epsilon}) = \sigma^2 \mathbf{I}_n$, where $\sigma^2$ is an unknown constant.
\end{itemize} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Scalar Least Squares}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Least Squares Estimation: The idea}
\framesubtitle{ $y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$}  \pause
\begin{itemize}
    \item Statistical model says the distribution of $y_i$ is determined by parameter $\theta$ (could be a vector). \pause
    \item Calculate $E(y_i)$. \pause 
    \item Expected value depends on $\theta$, so write $E_\theta(y_i)$. \pause
    \item How should we estimate $\theta$? \pause
    \item Choose the value of $\theta$ that gets the observed $y_i$ as close as possible to their expected values \pause,
    \item By minimizing 
        \begin{displaymath}
            Q(\theta) = \sum_{i=1}^n(y_i-E_\theta(y_i))^2
        \end{displaymath} 
over $\theta$.     
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Least Squares}
{\small
\framesubtitle{ $y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$ ~~~~~ 
$E_{\boldsymbol{\beta}}(y_i) = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik}$}  \pause

Estimate $\beta_j$ by minimizing 
\begin{displaymath}
    Q(\boldsymbol{\beta}) = \sum_{i=1}^n(y_i-E_{\boldsymbol{\beta}}(y_i))^2 \pause
    = \sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2
\end{displaymath} \pause 

{ \small
\begin{eqnarray*}
    \frac{\partial Q}{\partial\beta_0} & \stackrel{\mbox{\scriptsize set}}{=} & 0 \\
    &&\\
    \frac{\partial Q}{\partial\beta_1} & \stackrel{\mbox{\scriptsize set}}{=} & 0 \\
     & \vdots &  \\
     &&\\
    \frac{\partial Q}{\partial\beta_k} & \stackrel{\mbox{\scriptsize set}}{=} & 0 \\
\end{eqnarray*} \pause
} % End size
Solve $k+1$ equations in $k+1$ unknowns.
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Differentiate with respect to $\beta_0$}\pause
%\framesubtitle{}  

{\small
\begin{eqnarray*}
    \frac{\partial Q}{\partial\beta_0} & = & \frac{\partial}{\partial\beta_0}
    \sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n  \frac{\partial}{\partial\beta_0}
    (y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n 2(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})(-1) \\ \pause
    & = & -2\left(\sum_{i=1}^n y_i - n\beta_0 - \beta_1 \sum_{i=1}^n x_{i1} - \cdots 
          - \beta_k \sum_{i=1}^n x_{ik} \right) \pause \stackrel{\mbox{\scriptsize set}}{=} 0
\end{eqnarray*} 
} % End size

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Differentiate with respect to $\beta_1$}
%\framesubtitle{} 
{\small
\begin{eqnarray*}
    \frac{\partial Q}{\partial\beta_1} & = & \frac{\partial}{\partial\beta_1}
    \sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n  \frac{\partial}{\partial\beta_1}
    (y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n 2(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})(-x_{i1}) \\ \pause
    & = & -2 \sum_{i=1}^n (x_{i1}y_i - \beta_0x_{i1} - \beta_1 x_{i1}^2 - \cdots - \beta_k x_{i1} x_{ik}) \\ \pause
    & = & -2\left(\sum_{i=1}^n x_{i1}y_i - \beta_0\sum_{i=1}^n x_{i1} - \beta_1 \sum_{i=1}^n x_{i1}^2 - \cdots - \beta_k \sum_{i=1}^n x_{i1} x_{ik}
 \right) \\ \pause 
 & \stackrel{\mbox{\scriptsize set}}{=} & 0
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Differentiate with respect to $\beta_j$}
%\framesubtitle{} 
{\scriptsize
\begin{eqnarray*}
    \frac{\partial Q}{\partial\beta_j} & = & \frac{\partial}{\partial{\color{red}\beta_j}}
    \sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - 
    {\color{red}\beta_j} x_{ij} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n \frac{\partial}{\partial{\color{red}\beta_j}}
    (y_i - \beta_0 - \beta_1 x_{i1} - \cdots - 
    {\color{red}\beta_j} x_{ij} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n 2(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - 
    \beta_j x_{ij} - \cdots - \beta_k x_{ik})(-x_{ij}) \\ \pause
    & = & -2 \sum_{i=1}^n (x_{ij}y_i - \beta_0x_{ij} - \beta_1 x_{i1}x_{ij} - \cdots - 
    \beta_j x_{ij}^2 - \cdots - \beta_k x_{ij} x_{ik}) \\ \pause
    & = & -2 \left( \sum_{i=1}^n x_{ij}y_i - \beta_0 \sum_{i=1}^n x_{ij} 
    - \beta_1 \sum_{i=1}^n x_{i1}x_{ij} - \cdots - 
     \sum_{i=1}^n \beta_j x_{ij}^2 - \cdots - \beta_k \sum_{i=1}^n x_{ij} x_{ik}\right) \\ \pause
 & \stackrel{\mbox{\scriptsize set}}{=} & 0
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Differentiate with respect to $\beta_k$}
%\framesubtitle{} 
{\small
\begin{eqnarray*}
    \frac{\partial Q}{\partial\beta_k} & = & \frac{\partial}{\partial\beta_k}
    \sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n  \frac{\partial}{\partial\beta_1}
    (y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\ \pause
    & = & \sum_{i=1}^n 2(y_i - \beta_0 - \beta_1 x_{i1} - \cdots 
    - \beta_k x_{ik})(-x_{ik}) \\ \pause
    & = & -2 \sum_{i=1}^n (x_{ik}y_i - \beta_0x_{ik} - \beta_1 x_{i1}x_{ik} 
    - \cdots - \beta_k x_{ik}^2) \\ \pause
    & = & -2\left(\sum_{i=1}^n x_{ik}y_i - \beta_0\sum_{i=1}^n x_{ik} - \beta_1 \sum_{i=1}^n x_{i1}x_{ik} - \cdots - \beta_k \sum_{i=1}^n x_{ik}^2 \right) \\ \pause 
 & \stackrel{\mbox{\scriptsize set}}{=} & 0
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

\begin{frame}                                           % & = & 0
\frametitle{Have $k+1$ equations in $k+1$ unknowns}
\framesubtitle{Solve for $\beta_0, \ldots, \beta_k$} 
{\small
\begin{eqnarray*}
    -2\left(\sum_{i=1}^n y_i - n\beta_0 - \beta_1 \sum_{i=1}^n x_{i1} - \cdots 
          - \beta_k \sum_{i=1}^n x_{ik} \right) & = & 0 \\
    -2\left(\sum_{i=1}^n x_{i1}y_i - \beta_0\sum_{i=1}^n x_{i1} - \beta_1 \sum_{i=1}^n x_{i1}^2 - \cdots - \beta_k \sum_{i=1}^n x_{i1} x_{ik} \right)  & = & 0 \\
    &&\\  &&\\
    \vdots \hspace{40mm} &  &  \\
    &&\\
    -2\left(\sum_{i=1}^n x_{ik}y_i - \beta_0\sum_{i=1}^n x_{ik} - \beta_1 \sum_{i=1}^n x_{i1}x_{ik} - \cdots - \beta_k \sum_{i=1}^n x_{ik}^2 \right) & = & 0 \\
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Divide by -2 and re-arrange, obtaining}
%\framesubtitle{} 
{\footnotesize
\begin{displaymath}
\begin{array}{lllllllll}
\beta_0 n &+& \beta_1 \sum_{i=1}^n x_{i1}  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{ik}
& = & \sum_{i=1}^n y_i \\
\beta_0\sum_{i=1}^n x_{i1} &+& \beta_1 \sum_{i=1}^n x_{i1}^2  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{i1} x_{ik}
& = & \sum_{i=1}^n x_{i1}y_i \\
\beta_0\sum_{i=1}^n x_{i2} &+& \beta_1 \sum_{i=1}^n x_{i1}x_{i2}  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{i2} x_{ik}
& = & \sum_{i=1}^n x_{i2}y_i \\
~~~~~~~\vdots & ~\vdots & ~~~~~~~\vdots & ~\vdots & ~\vdots  & ~\vdots & ~~~~~~\vdots & ~\vdots & ~~~~~\vdots  \\
\beta_0\sum_{i=1}^n x_{ik} &+& \beta_1 \sum_{i=1}^n x_{i1}x_{ik}  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{ik}^2
& = & \sum_{i=1}^n x_{ik}y_i \\  
\end{array}
\end{displaymath}  \pause

\begin{itemize}
    \item These are called the \emph{normal equations}. 
    \item It has nothing to do with the normal distribution. \pause
    \item Wikipedia says" ``In geometry, a normal is an object such as a line, ray, or vector that is perpendicular to a given object." \pause 
    \item The normal equations are a system of $k+1$ \emph{linear} equations in $k+1$ unknowns. All the $x_{ij}$ and $y_i$ are constants.
\end{itemize}
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame} % It was really tricky to fit this material on the slide.
\frametitle{Solve the Normal Equations}
%\framesubtitle{} 

{\footnotesize
~The normal equations are
\begin{displaymath}
\begin{array}{lllllllll}
\beta_0 n &+& \beta_1 \sum_{i=1}^n x_{i1}  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{ik}
& = & \sum_{i=1}^n y_i \\
\beta_0\sum_{i=1}^n x_{i1} &+& \beta_1 \sum_{i=1}^n x_{i1}^2  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{i1} x_{ik}
& = & \sum_{i=1}^n x_{i1}y_i \\
\beta_0\sum_{i=1}^n x_{i2} &+& \beta_1 \sum_{i=1}^n x_{i1}x_{i2}  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{i2} x_{ik}
& = & \sum_{i=1}^n x_{i2}y_i \\
~~~~~~~\vdots & ~\vdots & ~~~~~~~\vdots & ~\vdots & ~\vdots  & ~\vdots & ~~~~~~\vdots & ~\vdots & ~~~~~\vdots  \\
\beta_0\sum_{i=1}^n x_{ik} &+& \beta_1 \sum_{i=1}^n x_{i1}x_{ik}  &+& \cdots &+& \beta_k \sum_{i=1}^n x_{ik}^2
& = & \sum_{i=1}^n x_{ik}y_i \\  
\end{array}
\end{displaymath}  \pause

\vspace{2mm}
~In matrix form,  \pause
\vspace{2mm}
} % End size

{\scriptsize
\begin{displaymath} \hspace{-4mm}
    \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
    \left(\begin{array}{c}
    \beta_0 \\ \beta_1 \\ \beta_2 \\ \vdots \\ \beta_k \\ 
    \end{array}\right) = 
    \left(\begin{array}{c}
    \sum_{i=1}^n y_i \\ \sum_{i=1}^n x_{i1}y_i \\ \sum_{i=1}^n x_{i2}y_i \\ \vdots \\  \sum_{i=1}^n x_{ik}y_i \\
    \end{array}\right)
\end{displaymath} \pause
} % End size
% Ugly visual formatting

\hspace{40mm}$\mathbf{X}^\prime\mathbf{X}
\hspace{42mm}\boldsymbol{\beta} 
\hspace{5mm}= 
\hspace{7mm}\mathbf{X}^\prime\mathbf{y}$
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Matrix Version}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Multiple regression in matrix form}
\framesubtitle{Compare $y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$} 
{ \small
\begin{equation*}
\begin{array}{cccccccc} % 6 columns
\mathbf{Y} & = &  \mathbf{X} & \boldsymbol{\beta} & + & \boldsymbol{\epsilon} \\
&&&&& \\ % Another space
\left( \begin{array}{c}
y_1 \\ y_2 \\ y_3 \\ \vdots \\ y_n
\end{array} \right)        
&=&
\left(\begin{array}{cccc}
1 & 14.2 & \cdots & 1 \\
1 & 11.9 & \cdots & 0 \\
1 & ~3.7 & \cdots & 0 \\
\vdots & \vdots & \vdots & \vdots \\
1 & ~6.2 & \cdots & 1 
\end{array}\right)
&
\left( \begin{array}{c}
\beta_0 \\ \beta_1 \\ \vdots \\  \beta_k
\end{array} \right)
&+&     
\left( \begin{array}{c}
\epsilon_1 \\ \epsilon_2 \\ \epsilon_3  \\ \vdots \\ \epsilon_n 
\end{array} \right)        
\end{array} % 24
\end{equation*}
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The $\mathbf{X}$ Matrix}
%\framesubtitle{} 
\begin{displaymath}
 \mathbf{X} = 
    \left(\begin{array}{ccccc}
    1 & x_{11} & x_{12} & \cdots & x_{1k} \\
    1 & x_{21} & x_{22} & \cdots & x_{2k} \\
    1 & x_{31} & x_{32} & \cdots & x_{3k} \\
    1 & x_{41} & x_{42} & \cdots & x_{4k} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
    1 & x_{n1} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} 
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & x_{11} & x_{12} & \cdots & x_{1k} \\
    1 & x_{21} & x_{22} & \cdots & x_{2k} \\
    1 & x_{31} & x_{32} & \cdots & x_{3k} \\
    1 & x_{41} & x_{42} & \cdots & x_{4k} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
    1 & x_{n1} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Light it up! Tedious but worth it I hope.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (0,0)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc} 
    {\color{red}1} & {\color{red}1} & {\color{red}1} & 
    {\color{red}1} & {\color{red}\cdots} & {\color{red}1}  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    {\color{red}1} & x_{11} & x_{12} & \cdots & x_{1k} \\
    {\color{red}1} & x_{21} & x_{22} & \cdots & x_{2k} \\
    {\color{red}1} & x_{31} & x_{32} & \cdots & x_{3k} \\
    {\color{red}1} & x_{41} & x_{42} & \cdots & x_{4k} \\
    {\color{red}\vdots} & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}1} & x_{n1} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    {\color{red}n} & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame} % Colour (0,1)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc} 
    {\color{red}1} & {\color{red}1} & {\color{red}1} & 
    {\color{red}1} & {\color{red}\cdots} & {\color{red}1}  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & {\color{red}x_{11}} & x_{12} & \cdots & x_{1k} \\
    1 & {\color{red}x_{21}} & x_{22} & \cdots & x_{2k} \\
    1 & {\color{red}x_{31}} & x_{32} & \cdots & x_{3k} \\
    1 & {\color{red}x_{41}} & x_{42} & \cdots & x_{4k} \\
    \vdots & {\color{red}\vdots} & \vdots & \vdots & \vdots \\
    1 &{\color{red}x_{n1}} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & {\color{red}\sum_{i=1}^n x_{i1}} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame} % Colour (0,2)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc} 
    {\color{red}1} & {\color{red}1} & {\color{red}1} & 
    {\color{red}1} & {\color{red}\cdots} & {\color{red}1}  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & x_{11} & {\color{red}x_{12}} & \cdots & x_{1k} \\
    1 & x_{21} & {\color{red}x_{22}} & \cdots & x_{2k} \\
    1 & x_{31} & {\color{red}x_{32}} & \cdots & x_{3k} \\
    1 & x_{41} & {\color{red}x_{42}} & \cdots & x_{4k} \\
    \vdots & \vdots & {\color{red}\vdots} & \vdots & \vdots \\
    1 & x_{n1} & {\color{red}x_{n2}} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & {\color{red}\sum_{i=1}^n x_{i2}} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (1,0)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    {\color{red}x_{11}} & {\color{red}x_{21}} & {\color{red}x_{31}} & 
    {\color{red}x_{41}} & {\color{red}\cdots} & {\color{red}x_{n1}} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    {\color{red}1} & x_{11} & x_{12} & \cdots & x_{1k} \\
    {\color{red}1} & x_{21} & x_{22} & \cdots & x_{2k} \\
    {\color{red}1} & x_{31} & x_{32} & \cdots & x_{3k} \\
    {\color{red}1} & x_{41} & x_{42} & \cdots & x_{4k} \\
    {\color{red}\vdots} & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}1} & x_{n1} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    {\color{red}\sum_{i=1}^n x_{i1}} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (1,1)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    {\color{red}x_{11}} & {\color{red}x_{21}} & {\color{red}x_{31}} & 
    {\color{red}x_{41}} & {\color{red}\cdots} & {\color{red}x_{n1}} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & {\color{red}x_{11}} & x_{12} & \cdots & x_{1k} \\
    1 & {\color{red}x_{21}} & x_{22} & \cdots & x_{2k} \\
    1 & {\color{red}x_{31}} & x_{32} & \cdots & x_{3k} \\
    1 & {\color{red}x_{41}} & x_{42} & \cdots & x_{4k} \\
    \vdots & {\color{red}\vdots} & \vdots & \vdots & \vdots \\
    1 &{\color{red}x_{n1}} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & {\color{red}\sum_{i=1}^n x_{i1}^2} & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (1,2)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    {\color{red}x_{11}} & {\color{red}x_{21}} & {\color{red}x_{31}} & 
    {\color{red}x_{41}} & {\color{red}\cdots} & {\color{red}x_{n1}} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & x_{11} & {\color{red}x_{12}} & \cdots & x_{1k} \\
    1 & x_{21} & {\color{red}x_{22}} & \cdots & x_{2k} \\
    1 & x_{31} & {\color{red}x_{32}} & \cdots & x_{3k} \\
    1 & x_{41} & {\color{red}x_{42}} & \cdots & x_{4k} \\
    \vdots & \vdots & {\color{red}\vdots} & \vdots & \vdots \\
    1 & x_{n1} & {\color{red}x_{n2}} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & {\color{red}\sum_{i=1}^n x_{i1}x_{i2}} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (2,0)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    {\color{red}x_{12}} & {\color{red}x_{22}} & {\color{red}x_{32}} & 
    {\color{red}x_{42}} & {\color{red}\cdots} & {\color{red}x_{n2}} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    {\color{red}1} & x_{11} & x_{12} & \cdots & x_{1k} \\
    {\color{red}1} & x_{21} & x_{22} & \cdots & x_{2k} \\
    {\color{red}1} & x_{31} & x_{32} & \cdots & x_{3k} \\
    {\color{red}1} & x_{41} & x_{42} & \cdots & x_{4k} \\
    {\color{red}\vdots} & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}1} & x_{n1} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    {\color{red}\sum_{i=1}^n x_{i2}} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (2,1)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    {\color{red}x_{12}} & {\color{red}x_{22}} & {\color{red}x_{32}} & 
    {\color{red}x_{42}} & {\color{red}\cdots} & {\color{red}x_{n2}} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & {\color{red}x_{11}} & x_{12} & \cdots & x_{1k} \\
    1 & {\color{red}x_{21}} & x_{22} & \cdots & x_{2k} \\
    1 & {\color{red}x_{31}} & x_{32} & \cdots & x_{3k} \\
    1 & {\color{red}x_{41}} & x_{42} & \cdots & x_{4k} \\
    \vdots & {\color{red}\vdots} & \vdots & \vdots & \vdots \\
    1 &{\color{red}x_{n1}} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & {\color{red}\sum_{i=1}^n x_{i1}x_{i2}} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (2,k)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    {\color{red}x_{12}} & {\color{red}x_{22}} & {\color{red}x_{32}} & 
    {\color{red}x_{42}} & {\color{red}\cdots} & {\color{red}x_{n2}} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & x_{11} & x_{12} & \cdots & {\color{red}x_{1k}} \\
    1 & x_{21} & x_{22} & \cdots & {\color{red}x_{2k}} \\
    1 & x_{31} & x_{32} & \cdots & {\color{red}x_{3k}} \\
    1 & x_{41} & x_{42} & \cdots & {\color{red}x_{4k}} \\
    \vdots & \vdots & \vdots & \vdots & {\color{red}\vdots} \\
    1 & x_{n1} & x_{n2} & \cdots & {\color{red}x_{nk}} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & {\color{red}\sum_{i=1}^n x_{i2} x_{ik}} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (k,0)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}x_{1k}} & {\color{red}x_{2k}} & {\color{red}x_{3k}} & 
    {\color{red}x_{4k}} & {\color{red}\cdots} & {\color{red}x_{nk}} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    {\color{red}1} & x_{11} & x_{12} & \cdots & x_{1k} \\
    {\color{red}1} & x_{21} & x_{22} & \cdots & x_{2k} \\
    {\color{red}1} & x_{31} & x_{32} & \cdots & x_{3k} \\
    {\color{red}1} & x_{41} & x_{42} & \cdots & x_{4k} \\
    {\color{red}\vdots} & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}1} & x_{n1} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     {\color{red}\sum_{i=1}^n x_{ik}} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (k,1)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}x_{1k}} & {\color{red}x_{2k}} & {\color{red}x_{3k}} & 
    {\color{red}x_{4k}} & {\color{red}\cdots} & {\color{red}x_{nk}} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & {\color{red}x_{11}} & x_{12} & \cdots & x_{1k} \\
    1 & {\color{red}x_{21}} & x_{22} & \cdots & x_{2k} \\
    1 & {\color{red}x_{31}} & x_{32} & \cdots & x_{3k} \\
    1 & {\color{red}x_{41}} & x_{42} & \cdots & x_{4k} \\
    \vdots & {\color{red}\vdots} & \vdots & \vdots & \vdots \\
    1 &{\color{red}x_{n1}} & x_{n2} & \cdots & x_{nk} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & {\color{red}\sum_{i=1}^n x_{i1}x_{ik}} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame} % Colour (k,k)
\frametitle{The $\mathbf{X}^\prime \mathbf{X}$ Matrix}
%\framesubtitle{} 
$\mathbf{X}^\prime\mathbf{X} = $
\begin{displaymath}
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    {\color{red}x_{1k}} & {\color{red}x_{2k}} & {\color{red}x_{3k}} & 
    {\color{red}x_{4k}} & {\color{red}\cdots} & {\color{red}x_{nk}} \\
    \end{array}\right)
    \left(\begin{array}{ccccc}
    1 & x_{11} & x_{12} & \cdots & {\color{red}x_{1k}} \\
    1 & x_{21} & x_{22} & \cdots & {\color{red}x_{2k}} \\
    1 & x_{31} & x_{32} & \cdots & {\color{red}x_{3k}} \\
    1 & x_{41} & x_{42} & \cdots & {\color{red}x_{4k}} \\
    \vdots & \vdots & \vdots & \vdots & {\color{red}\vdots} \\
    1 & x_{n1} & x_{n2} & \cdots & {\color{red}x_{nk}} \\
    \end{array}\right)
\end{displaymath} \pause
\begin{displaymath}
    = \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & {\color{red}\sum_{i=1}^n x_{ik}^2} \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The $\mathbf{X}^\prime \mathbf{y}$ Matrix}
%\framesubtitle{} 
\begin{displaymath}
\mathbf{X}^\prime\mathbf{y} = 
    \left(\begin{array}{cccccc}
    1     & 1     & 1     & 1     & \cdots & 1  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{c}
    y_1 \\ y_2 \\ y_3 \\ \vdots \\ y_n 
    \end{array}\right) \pause
    = 
        \left(\begin{array}{c}
    \sum_{i=1}^n y_i \\ \sum_{i=1}^n x_{i1}y_i \\ \sum_{i=1}^n x_{i2}y_i \\ \vdots \\  \sum_{i=1}^n x_{ik}y_i \\
    \end{array}\right)
\end{displaymath} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The Normal Equations in Matrix Form} \pause
%\framesubtitle{} 

% \begin{columns}   % Use Beamer's columns to use more of the margins!
% \column{1.2\textwidth}
{\scriptsize
\begin{displaymath} \hspace{-4mm}
    \left(\begin{array}{ccccc}
    n & \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i2} & \cdots & \sum_{i=1}^n x_{ik} \\
    \sum_{i=1}^n x_{i1} & \sum_{i=1}^n x_{i1}^2 & \sum_{i=1}^n x_{i1}x_{i2} & \cdots & \sum_{i=1}^n x_{i1} x_{ik} \\
    \sum_{i=1}^n x_{i2} & \sum_{i=1}^n x_{i1}x_{i2} & \sum_{i=1}^n x_{i2}^2 & \cdots & \sum_{i=1}^n x_{i2} x_{ik} \\
    \vdots & \vdots & \vdots & \vdots & \vdots \\
     \sum_{i=1}^n x_{ik} & \sum_{i=1}^n x_{i1}x_{ik} & \sum_{i=1}^n x_{i2}x_{ik} & \cdots & \sum_{i=1}^n x_{ik}^2 \\
    \end{array}\right)
    \left(\begin{array}{c}
    \beta_0 \\ \beta_1 \\ \beta_2 \\ \vdots \\ \beta_k \\ 
    \end{array}\right) = 
    \left(\begin{array}{c}
    \sum_{i=1}^n y_i \\ \sum_{i=1}^n x_{i1}y_i \\ \sum_{i=1}^n x_{i2}y_i \\ \vdots \\  \sum_{i=1}^n x_{ik}y_i \\
    \end{array}\right)
\end{displaymath} % \pause
} % End size

% Ugly visual formatting
\hspace{40mm}$\mathbf{X}^\prime\mathbf{X}
\hspace{42mm}\boldsymbol{\beta} 
\hspace{5mm}= 
\hspace{7mm}\mathbf{X}^\prime\mathbf{y}$
%\end{columns}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Solve the Normal Equations} 
%\framesubtitle{} 
{\LARGE
\begin{eqnarray*}
& & \mathbf{X}^\prime\mathbf{X} \boldsymbol{\beta} = \mathbf{X}^\prime\mathbf{y} \\ \pause
& \Rightarrow & (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{X} \boldsymbol{\beta} = (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{y} \\ \pause
& \Rightarrow & \boldsymbol{\beta} = (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{y} 
\end{eqnarray*} \pause

\vspace{3mm}
Provided $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists.
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{What is $\boldsymbol{\beta}$??} \pause
%\framesubtitle{} 
\begin{itemize}
    \item We have arrived at $\boldsymbol{\beta} = (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{y}$, provided $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists. \pause
    \item But $\boldsymbol{\beta}$ is an unknown parameter, while $(\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{y}$ is a statistic that can be calculated exactly from the sample data. What's going on? \pause
    \item Almost always, $\boldsymbol{\beta}$ is a vector of unknown parameters in the model
    $\mathbf{y} =  \mathbf{X}\boldsymbol{\beta} + \boldsymbol{\epsilon}$.  \pause
    \item But just temporarily, for least squares estimation, $\boldsymbol{\beta}$ is a vector of variables over which we are minimizing the sum of squares $Q$.\pause
    \item The solution is an \emph{estimate}, so we write 
    $\widehat{\boldsymbol{\beta}} = 
    (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{y}$. \pause
    \item Provided $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{When Does $(\mathbf{X}^\prime\mathbf{X})^{-1}$ Exist?} \pause
%\framesubtitle{} 
\textbf{Theorem}~~ The following 3 conditions are equivalent: \pause
  \begin{itemize}
    \item[(a)] The columns of $\mathbf{X}$ are linearly independent.
    \item[(b)] $\mathbf{X}^\prime\mathbf{X}$ is positive definite.
    \item[(c)] $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists.
  \end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Proof of equivalence}
\framesubtitle{  \begin{itemize}
    \item[](a) The columns of $\mathbf{X}$ are linearly independent.
    \item[](b) $\mathbf{X}^\prime\mathbf{X}$ is positive definite.
    \item[](c) $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists.
  \end{itemize}  
} % End subtitle 
\pause

\begin{itemize}
    \item Assume the columns of $\mathbf{X}$ are linearly independent. \pause
    \item Columns of $\mathbf{X}$ linearly independent means \\ $\mathbf{Xv} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$. \pause
    \item Seek to show $\mathbf{X}^\prime\mathbf{X}$ positive definite\pause, \\
          meaning $\mathbf{v}^\prime(\mathbf{X}^\prime\mathbf{X})\mathbf{v} > 0$ for $\mathbf{v} \neq \mathbf{0}$. \pause
    \item First, $\mathbf{X}^\prime\mathbf{X}$ is non-negative definite\pause, because \\
    $\mathbf{v}^\prime(\mathbf{X}^\prime\mathbf{X})\mathbf{v} \pause 
    = (\mathbf{Xv})^\prime(\mathbf{Xv}) \pause
    = \mathbf{z}^\prime\mathbf{z} \pause
    = \sum_{i=1}^n z_i^2 \pause \geq 0.$ \pause
    \item And if $\mathbf{v}^\prime(\mathbf{X}^\prime\mathbf{X})\mathbf{v} = 0$\pause, then $\mathbf{Xv} = \mathbf{z} = \mathbf{0} \pause \Rightarrow \mathbf{v} = \mathbf{0}$. \pause
    \item Thus $\mathbf{v}^\prime(\mathbf{X}^\prime\mathbf{X})\mathbf{v} > 0$ for $\mathbf{v} \neq \mathbf{0}$. \pause
    \item Proving $\mathbf{X}^\prime\mathbf{X}$ positive definite. \pause
    \item This establishes (a) $\Rightarrow$ (b). 
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Showing (b) $\Rightarrow$ (c)}
\framesubtitle{  \begin{itemize}
    \item[](a) The columns of $\mathbf{X}$ are linearly independent.
    \item[](b) $\mathbf{X}^\prime\mathbf{X}$ is positive definite.
    \item[](c) $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists.
  \end{itemize}  
} % End subtitle 
\pause

\begin{itemize}
    \item $\mathbf{X}^\prime\mathbf{X}$ is symmetric, for 
    $(\mathbf{X}^\prime\mathbf{X})^\prime = \mathbf{X}^\prime\mathbf{X}$. \pause
    \item Thus we have the spectral decomposition 
    $\mathbf{X}^\prime\mathbf{X} = \mathbf{CDC}^\prime$. \pause
    \item And because $\mathbf{X}^\prime\mathbf{X}$ is positive definite\pause, 
    its eigenvalues are all positive\pause, $\mathbf{D}^{-1}$ is defined\pause,
    and $(\mathbf{X}^\prime\mathbf{X})^{-1} = \mathbf{CD}^{-1}\mathbf{C}^\prime$.
    \item So $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists. \pause
    \item This establishes (b) $\Rightarrow$ (c).
\end{itemize}  
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Showing (c) $\Rightarrow$ (a)}
\framesubtitle{  \begin{itemize}
    \item[](a) The columns of $\mathbf{X}$ are linearly independent.
    \item[](b) $\mathbf{X}^\prime\mathbf{X}$ is positive definite.
    \item[](c) $(\mathbf{X}^\prime\mathbf{X})^{-1}$ exists.
  \end{itemize}  
} % End subtitle 
\pause

\begin{itemize}
    \item Let $\mathbf{Xv} = \mathbf{0}$. Seek to show $\mathbf{v} = \mathbf{0}$. \pause
    \item $\mathbf{Xv} = \mathbf{0} \Rightarrow \mathbf{X}^\prime\mathbf{Xv} = \mathbf{X}^\prime\mathbf{0} \pause = \mathbf{0}$
    \item $\Rightarrow (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{Xv} 
    = (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{0} \pause = \mathbf{0}$.   \pause
    \item $\Rightarrow \mathbf{v}  = \mathbf{0}$. \pause
    \item And the columns of $\mathbf{X}$ are linearly independent. \pause
    \item This establishes (c) $\Rightarrow$ (a).
\end{itemize}  
$\blacksquare$
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The Message} 
%\framesubtitle{} 
\begin{itemize}
    \item The least squares estimate $\widehat{\boldsymbol{\beta}} = 
    (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{y}$ exists if and only if the columns of $\mathbf{X}$ are linearly independent. \pause
    %\item[]
    \item This just means the explanatory variables are not redundant. \pause
    %\item[]
    \item Example: Predicting final exam score. \pause
    %\item[]
    \item We will always assume that the columns of $\mathbf{X}$ are linearly independent. If not, fix it up.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{More Notation} % Should this even be a section?
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{``Predicted" $\mathbf{y}$}
%\framesubtitle{} 
{\LARGE
\begin{displaymath}
    \widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}}
\end{displaymath} \pause
} % End size
\begin{itemize}
    \item More like an estimated $E(\mathbf{y}) \pause 
          = E(\mathbf{X}\boldsymbol{\beta} + \boldsymbol{\epsilon}) 
          = \mathbf{X}\boldsymbol{\beta}$. \pause 
    \item Could be denoted $\widehat{E(\mathbf{y})}$\pause, but it's not. \pause
    \item It would be \emph{predicted} $\mathbf{y}$ only for a new sample with the same set of $\mathbf{X}$ values. \pause
    \item $\widehat{y}_i = \widehat{\beta}_0 + \widehat{\beta}_1 x_{i1} + \cdots + \widehat{\beta}_k x_{ik}$ is a point on the best-fitting hyper-plane.
\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Residuals}
\framesubtitle{Vertical distances of the points from the hyperplane} \pause 
{\LARGE
\begin{displaymath}
    \widehat{\boldsymbol{\epsilon}} = \mathbf{y} - \widehat{\mathbf{y}}
\end{displaymath} \pause
} % End size
Why the funny notation? \pause
{\LARGE
\begin{eqnarray*}
    \mathbf{y} = \mathbf{X}\boldsymbol{\beta} + \boldsymbol{\epsilon} 
    & \Leftrightarrow & 
    \boldsymbol{\epsilon} = \mathbf{y} - \mathbf{X}\boldsymbol{\beta} \\ \pause
    && 
    \widehat{\boldsymbol{\epsilon}} = \mathbf{y} 
    - \mathbf{X}\widehat{\boldsymbol{\beta}}
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Hospital-Acquired Infection}
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.2in]{Scatter0}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Residual for Hospital 48}
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.2in]{Scatter2}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The Hat Matrix: $\mathbf{H} = \mathbf{X}(\mathbf{X}^\prime 
                             \mathbf{X})^{-1} \mathbf{X}^\prime$}
\framesubtitle{The hat matrix puts a hat on $\mathbf{y}$} \pause
{\LARGE
\begin{eqnarray*}
    \widehat{\mathbf{y}} & = & \mathbf{X}\widehat{\boldsymbol{\beta}} \\ \pause 
    & = & \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} 
                     \mathbf{X}^\prime\mathbf{y} \\ \pause
    & = &  \mathbf{Hy}
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The Hat Matrix is Symmetric}
\framesubtitle{Recall $\mathbf{H} = \mathbf{X}(\mathbf{X}^\prime 
                             \mathbf{X})^{-1} \mathbf{X}^\prime$} \pause
{\LARGE
\begin{eqnarray*}
    \mathbf{H}^\prime & = & 
    \left( \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1}
    \mathbf{X}^\prime\right)^\prime \\ \pause
    & = & \mathbf{X}^{\prime\prime}(\mathbf{X}^\prime \mathbf{X})^{-1^\prime}
    \mathbf{X}^\prime \\ \pause
    & = & \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{^\prime-1}
    \mathbf{X}^\prime \\ \pause
    & = & \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \\ \pause
    & = &  \mathbf{H}
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{The Hat Matrix is Idempotent}
\framesubtitle{Meaning $\mathbf{H\mathbf{H}} =\mathbf{H}$} 
{\LARGE
\begin{eqnarray*}
\mathbf{HH} 
&=& \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} {\color{blue}\mathbf{X}^\prime ~~
    \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} } \mathbf{X}^\prime \\ \pause
&=& \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \\ \pause
&=& \mathbf{H}
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$\widehat{\boldsymbol{\epsilon}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$} 
%\framesubtitle{} 
{\LARGE
\begin{eqnarray*}
    \widehat{\boldsymbol{\epsilon}} &=& \mathbf{y}-\widehat{\mathbf{y}} \\ \pause
    & = & \mathbf{y}-\mathbf{Hy} \\ \pause
    & = & \mathbf{Iy}-\mathbf{Hy} \\ \pause
    & = & (\mathbf{I}-\mathbf{H})\mathbf{y}
\end{eqnarray*} \pause
} % End size
\vspace{5mm}
$(\mathbf{I}-\mathbf{H})$ is also symmetric and idempotent.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Now X-prime e = 0 and it's really a minimum. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$\mathbf{X}^\prime \, \widehat{\boldsymbol{\epsilon}} = \mathbf{0}$}
\framesubtitle{An important result} \pause
{\LARGE
\begin{eqnarray*}
    \mathbf{X}^\prime \, \widehat{\boldsymbol{\epsilon}} 
    &=& \mathbf{X}^\prime(\mathbf{y} - \widehat{\mathbf{y}}) \\ \pause
    &=& \mathbf{X}^\prime(\mathbf{y} - \mathbf{X}\widehat{\boldsymbol{\beta}}) 
         \\ \pause
    &=&  \mathbf{X}^\prime\mathbf{y} 
         - \mathbf{X}^\prime\mathbf{X}\widehat{\boldsymbol{\beta}} \\ \pause
    &=&  \mathbf{X}^\prime\mathbf{y} 
         - {\color{blue}\mathbf{X}^\prime\mathbf{X}
         (\mathbf{X}^\prime \mathbf{X})^{-1} } 
         \mathbf{X}^\prime \mathbf{y}  \\ \pause
    &=&  \mathbf{X}^\prime\mathbf{y} - \mathbf{X}^\prime\mathbf{y} \\
    &=&  \mathbf{0}
\end{eqnarray*} 
} % End size
\end{frame}

% Another slide here about orthoganal

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$\mathbf{X}^\prime \, 
             \widehat{\boldsymbol{\epsilon}} = \mathbf{0}$}
\framesubtitle{The vector $\mathbf{0}$ is $(k+1) \times 1$}   \pause
\begin{itemize}
    \item The inner product of each row of $\mathbf{X}^\prime$ and the vector of residuals is \emph{zero}. \pause
    %\item If the first column of $\mathbf{X}$ is all ones (denote it by $\mathbf{j}$)\pause, then 
    % $\mathbf{j}^\prime \widehat{\boldsymbol{\epsilon}} \pause = \sum_{i=1}^n\widehat{\epsilon}_i \pause = 0$. \pause
    \item The vector of residuals $\widehat{\boldsymbol{\epsilon}}$ is at right angles (orthogonal) to each column of $\mathbf{X}$, as vectors in $\mathbb{R}^n$. \pause

    \item Also, this little formula makes certain calculations much easier.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Is it really a minimum?} \pause
%\framesubtitle{} 
\begin{itemize}
    \item We have found that all the derivatives of 
\begin{eqnarray*}
    Q(\boldsymbol{\beta}) 
    & = & \sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2 \\
    & = & (\mathbf{y}-\mathbf{X}\boldsymbol{\beta})^\prime
          (\mathbf{y}-\mathbf{X}\boldsymbol{\beta})  
\end{eqnarray*} \pause
are zero at $\boldsymbol{\beta} = \widehat{\boldsymbol{\beta}}$. \pause
    \item Is the function really a minimum there, and not a maximum or saddle point? \pause
    \item Multivariable second derivative test is to check whether all the eigenvalues of the Hessian matrix $\left(\frac{\partial^2Q}{\partial\beta_i\partial\beta_j} \right)$ are positive. \pause
    \item No thank you!
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Minimize $Q(\boldsymbol{\beta})$ without calculus}
\framesubtitle{Using $\mathbf{X}^\prime \, \widehat{\boldsymbol{\epsilon}} = \mathbf{0}$} 
\begin{eqnarray*}
    Q(\boldsymbol{\beta}) 
    & = & (\mathbf{y}-\mathbf{X}\boldsymbol{\beta})^\prime
          (\mathbf{y}-\mathbf{X}\boldsymbol{\beta}) \\ \pause
    & = & (\mathbf{y}-\widehat{\mathbf{y}} + \widehat{\mathbf{y}}
          - \mathbf{X}\boldsymbol{\beta})^\prime
          (\mathbf{y}-\widehat{\mathbf{y}} + \widehat{\mathbf{y}}
          - \mathbf{X}\boldsymbol{\beta}) \\ \pause
    & = & (\widehat{\boldsymbol{\epsilon}} + 
          \mathbf{X}\widehat{\boldsymbol{\beta}} - \mathbf{X}\boldsymbol{\beta})^\prime 
          (\widehat{\boldsymbol{\epsilon}} + 
          \mathbf{X}\widehat{\boldsymbol{\beta}} - \mathbf{X}\boldsymbol{\beta})
          \\ \pause
    & = & \left(\widehat{\boldsymbol{\epsilon}} + 
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})\right)^\prime 
          \left(\widehat{\boldsymbol{\epsilon}} + 
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})\right)
          \\ \pause
    & = & \left(\widehat{\boldsymbol{\epsilon}}^{\,\prime} + 
          (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime\right) 
          \left(\widehat{\boldsymbol{\epsilon}} + 
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})\right) \\ \pause
    & = & \widehat{\boldsymbol{\epsilon}}^{\,\prime} \widehat{\boldsymbol{\epsilon}}
        + \widehat{\boldsymbol{\epsilon}}^{\,\prime} 
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})
        + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^{\prime\,}
          \widehat{\boldsymbol{\epsilon}}
        + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})
          \\ \pause
    & = & \widehat{\boldsymbol{\epsilon}}^{\,\prime} \widehat{\boldsymbol{\epsilon}}
        + \hspace{8mm}0 \hspace{10mm}+ \hspace{8mm}0 \hspace{11mm}
        + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})
          \\ \pause
    & = & \widehat{\boldsymbol{\epsilon}}^{\,\prime} \widehat{\boldsymbol{\epsilon}}
        + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})
\end{eqnarray*} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$Q(\boldsymbol{\beta}) = \widehat{\boldsymbol{\epsilon}}^{\,\prime} 
            \widehat{\boldsymbol{\epsilon}}
        + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})$} \pause
%\framesubtitle{} 
\begin{itemize}
    \item The first term, $\sum_{i=1}^n\hat{\epsilon}_i^2 \pause = \sum_{i=1}^n(y_i-\widehat{y}_i)^2$\pause, is called the residual sum of squares, or sum of squares error.  \pause
    \item It does not depend (functionally) on $\boldsymbol{\beta}$. \pause
    \item In the second term, \pause
        \begin{itemize}
            \item The columns of $\mathbf{X}$ are linearly independent, so $\mathbf{X}^\prime\mathbf{X}$ is positive definite. \pause
            \item This means the second term is strictly positive except when 
            $\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta} = \mathbf{0}$. \pause
            \item[$\Leftrightarrow$] $\boldsymbol{\beta} 
            = \widehat{\boldsymbol{\beta}}$. \pause Then, the second term equals zero. \pause
            \item So, $Q(\boldsymbol{\beta})$ has a unique minimum over $\boldsymbol{\beta}$ 
                  when $\boldsymbol{\beta}  = \widehat{\boldsymbol{\beta}}$. \pause
        \end{itemize}
    \item $\widehat{\boldsymbol{\beta}}$ really is the least squares estimate.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Regression through the origin} \pause
%\framesubtitle{} 
{\LARGE
\begin{displaymath}
    Q(\boldsymbol{\beta}) = \widehat{\boldsymbol{\epsilon}}^{\,\prime} 
            \widehat{\boldsymbol{\epsilon}}
        + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime
          \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})
\end{displaymath} 
} % End size

\begin{itemize}
    \item This requires only that the columns of $\mathbf{X}$ be linearly independent. \pause
    \item The first column does not have to be all ones. \pause
    \item We can have regression models without an intercept.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Regression through the origin: $\widehat{y}_i = \widehat{\beta}x_i$}
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.2in]{Rice}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{$R^2$}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{If there \emph{is} an Intercept}
%\framesubtitle{} 
First column of $\mathbf{X}$ is all ones and
\begin{displaymath}
    \mathbf{X}^\prime \, \widehat{\boldsymbol{\epsilon}} = 
    \left(\begin{array}{cccccc} 
    {\color{red}1} & {\color{red}1} & {\color{red}1} & 
    {\color{red}1} & {\color{red}\cdots} & {\color{red}1}  \\
    x_{11} & x_{21} & x_{31} & x_{41} & \cdots & x_{n1} \\
    x_{12} & x_{22} & x_{32} & x_{42} & \cdots & x_{n2} \\
    \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
    x_{1k} & x_{2k} & x_{3k} & x_{4k} & \cdots & x_{nk} \\
    \end{array}\right)
    \left(\begin{array}{c}
    {\color{red}\widehat{\epsilon}_1} \\ {\color{red}\widehat{\epsilon}_2} \\ 
    {\color{red}\widehat{\epsilon}_3} \\ {\color{red}\vdots} \\ 
    {\color{red}\widehat{\epsilon}_n} 
    \end{array}\right) \pause
    =  \left(\begin{array}{c}
       {\color{red}0} \\ 0 \\ 0 \\ \vdots \\ 0 
       \end{array}\right)
\end{displaymath} \pause
\begin{eqnarray*}
     &  & \sum_{i=1}^n \widehat{\epsilon}_i = 0 \\ \pause
    & \Leftrightarrow & \sum_{i=1}^n(y_i-\widehat{y}_i) = 0 \\ \pause 
    & \Leftrightarrow & \sum_{i=1}^n y_i = \sum_{i=1}^n \widehat{y}_i 
\end{eqnarray*} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Analysis of Variance} \pause
%\framesubtitle{} 
\begin{itemize}
    \item Variance just means variation. \pause
    \item Variation in a phenomenon means there is something to explain. \pause
    \item Some businesses make more money than others. Why? \pause
    \item Some students get higher grades. Why? \pause
    \item Some covid-19 patients get a lot sicker. Why? \pause
    \item We will measure variation to explain by variation around the sample mean:
\end{itemize}
{\LARGE
\begin{displaymath}
    SST = \sum_{i=1}^n(y_i-\overline{y})^2
\end{displaymath} 
} % End size
\end{frame} 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Theorem} 
%\framesubtitle{} 
If the regression model has an intercept, \pause
{\Large
\begin{displaymath}
\begin{array}{ccccc}
SST &=& SSR &+& SSE \\
%&&&&\\
\sum_{i=1}^n(y_i-\overline{y})^2 &=& 
\sum_{i=1}^n(\widehat{y}_i-\overline{y})^2 &+& 
\sum_{i=1}^n(y_i-\widehat{y}_i)^2
\end{array}
\end{displaymath} \pause
} % End size
Interpretation:  % Talk this through first. Then display the batch.
\begin{itemize}
    \item With no predictor variables, the best guess at $y_i$ is $\overline{y}$. 
    \item $SST$ is variation to be explained. 
    \item With predictor variables, best guess is $\widehat{y}_i$ 
    \item $SSE$ is variation still unexplained.
    \item So $SSR$ must be variation that was explained.
\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Proof of $SST = SSR+SSE$}
%\framesubtitle{} 
\begin{eqnarray*}
    SST & = & \sum_{i=1}^n(y_i-\overline{y})^2 \pause
          =   \sum_{i=1}^n(y_i-\widehat{y}_i+\widehat{y}_i-\overline{y})^2 \\ \pause
        & = &  \sum_{i=1}^n(\widehat{\epsilon}_i+\widehat{y}_i-\overline{y})^2 \\ \pause
        & = &  \sum_{i=1}^n\left(\hat{\epsilon}_i^2
               + 2 \, \widehat{\epsilon}_i(\widehat{y}_i-\overline{y})
               + (\widehat{y}_i-\overline{y})^2\right) \\ \pause
        & = &  \sum_{i=1}^n\hat{\epsilon}_i^2 
               + 2 {\color{red} \sum_{i=1}^n\widehat{\epsilon}_i(\widehat{y}_i-\overline{y}) }
               + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2 \\ \pause
         & = & \sum_{i=1}^n(y_i-\widehat{y}_i)^2 
               + {\color{red}0}
               + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2 \\ \pause
         & = & SSE + SSR 
\end{eqnarray*} \pause
because \ldots
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Showing middle term equals zero}
%\framesubtitle{} 
%{\LARGE
\begin{eqnarray*}
    \sum_{i=1}^n\widehat{\epsilon}_i(\widehat{y}_i-\overline{y})  \pause
    & = &   \sum_{i=1}^n\widehat{\epsilon}_i\widehat{y}_i 
          - \sum_{i=1}^n \widehat{\epsilon}_i \overline{y}\\ \pause
    & = &   \sum_{i=1}^n\widehat{y}_i\widehat{\epsilon}_i 
          - \overline{y}\sum_{i=1}^n \widehat{\epsilon}_i \\ \pause
    & = & \widehat{\mathbf{y}}^\prime \, \widehat{\boldsymbol{\epsilon}} + 0\\ \pause
    & = & \left(\mathbf{X}\widehat{\boldsymbol{\beta}}\right)^\prime  \widehat{\boldsymbol{\epsilon}} \\  \pause
    & = & \widehat{\boldsymbol{\beta}}^\prime\mathbf{X}^\prime \, \widehat{\boldsymbol{\epsilon}} \\ \pause
    & = & \widehat{\boldsymbol{\beta}}^\prime\mathbf{0} \\ \pause
    & = & 0
\end{eqnarray*} 
%} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Proportion of Variation Explained by Predictor Variables}
\framesubtitle{Using \emph{SST = SSR + SSE}} 
{\Huge
\begin{displaymath}
    R^2 = \frac{SSR}{SST}
\end{displaymath} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{In simple regression $R^2 = r^2$} \pause
%\framesubtitle{Proof:} 
\begin{center}
$r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})}
               {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$ and
$\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})}
           {\sum_{i=1}^n(x_i-\overline{x})^2} $\pause, so 
$\widehat{\beta}_1 = r\frac{s_y}{s_x}$ \pause
\end{center}
\vspace{2mm} \hrule

{\small
\begin{eqnarray*}
r\frac{s_y}{s_x} \pause & = & 
\left(\frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})}
{\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}} \right)
\left( \frac{\sqrt{\sum_{i=1}^n (y_i-\overline{y})^2/(n-1)}}
            {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2/(n-1)}} \right)  \\ \pause
&&\\
    & = & \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})}
               {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n 
               (x_i-\overline{x})^2}} \\ \pause
&&\\
    & = & \widehat{\beta}_1
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Still showing $R^2 = r^2$, using $\widehat{\beta}_1 = r\frac{s_y}{s_x}$ and $\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$}
%\framesubtitle{} 
{\small
\begin{eqnarray*}
    R^2 & = & \frac{SSR}{SST}  \\ \pause
        & = & \frac{1}{SST} \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2 \\ \pause
        & = & \frac{1}{SST} \sum_{i=1}^n
        (\widehat{\beta}_0 + \widehat{\beta}_1x_i - \overline{y})^2 \\ \pause
        & = & \frac{1}{SST} \sum_{i=1}^n
        (\overline{y} - \widehat{\beta}_1\overline{x} + \widehat{\beta}_1x_i 
        - \overline{y})^2 \\ \pause
        & = & \frac{1}{SST} \sum_{i=1}^n
        \left( \widehat{\beta}_1(x_i-\overline{x}) \right)^2 \\ \pause
        & = & \frac{\widehat{\beta}_1^{\,2}}{SST} 
               \sum_{i=1}^n(x_i-\overline{x})^2 \\ 
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Continued \ldots}
\framesubtitle{Using $\widehat{\beta}_1 = r\frac{s_y}{s_x}$} 
{\large
\begin{eqnarray*}
    & = & \frac{\widehat{\beta}_1^{\,2}}{SST} 
          \sum_{i=1}^n(x_i-\overline{x})^2 \\  \pause
&&\\
    & = & \left(r\frac{s_y}{s_x} \right)^2 
           \frac{\sum_{i=1}^n(x_i-\overline{x})^2}{\sum_{i=1}^n(y_i-\overline{y})^2} \\ \pause
&&\\
    & = & r^2\frac{s^2_y}{s^2_x} 
           \frac{\sum_{i=1}^n(x_i-\overline{x})^2}{\sum_{i=1}^n(y_i-\overline{y})^2} \\ \pause
&&\\
    & = & r^2
\end{eqnarray*} 
} % End size
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$R^2 = r^2$ helps with interpretation of $R^2$} \pause
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.4in]{r0}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$R^2 = r^2$ helps with interpretation of $R^2$} 
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.4in]{rpoint1}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$R^2 = r^2$ helps with interpretation of $R^2$} 
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.4in]{rpoint2}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$R^2 = r^2$ helps with interpretation of $R^2$} 
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.4in]{rpoint3}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$R^2 = r^2$ helps with interpretation of $R^2$} 
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.4in]{rpoint4}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$R^2 = r^2$ helps with interpretation of $R^2$} 
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.4in]{rpoint5}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Lesson}
%\framesubtitle{} 
\begin{itemize}
    \item Since I start to see a relationship at around $r = 0.3$, I start to get interested in a multiple regression when $R^2>0.09$. \pause
    \item Also, the squared sample correlation between $y_i$ and $\widehat{y}_i$ is $R^2$.
\end{itemize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Estimating $\sigma^2$}     
% This might be better in the next lecture set. It mars the continuity, and it might benefit from some of the projection stuff first. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Estimating $\sigma^2$}
\framesubtitle{} 
Why estimate $\sigma^2$? \pause \vspace{5mm}
\begin{itemize}
    \item The model says that $y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$. \pause
    \item The response is an expected value plus a piece of random noise, $\epsilon_i$.  \pause
    \item $Var(\epsilon_i) = \sigma^2$, so $\sigma^2$ is how loud the noise is. \pause
    \item The more noisy the data, the less precise the estimated $\beta_j$. \pause
% Less sensitive tests. Mention tradeoff between noisiness and sample size.
    \item Need to estimate how precise our estimates are. \pause
    \item Estimated $\sigma^2$ appears in all the tests and confidence intervals.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Base estimate of $\sigma^2$ on \emph{SSE}} \pause
%\framesubtitle{} 
\begin{itemize}
    \item Can't estimate $\sigma^2$ by least squares, because $E(y_i)$ is not a function of $\sigma^2$. \pause
    \item But think of $s^2 = \frac{\sum_{i=1}^n(y_i-\overline{y}_n)^2}{n-1}$ \pause
    \item Seek an estimator based on \emph{SSE} = $\sum_{i=1}^n(y_i-\widehat{y}_i)^2$. \pause
    \item But first, some preliminary results:
        \begin{itemize}
            \item $tr(\mathbf{H}) = k+1$
            \item $(\mathbf{I}-\mathbf{H})\mathbf{y} = 
                   (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon}$
        \end{itemize}
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Preliminaries}
%\framesubtitle{} 
{\LARGE
\begin{eqnarray*}
    tr(\mathbf{H}) & = & 
    tr\left( \mathbf{X}(\mathbf{X}^\prime\mathbf{X})^{-1} 
            {\color{blue}\mathbf{X}^\prime} \right) \\ \pause
    & = &   tr\left( {\color{blue}\mathbf{X}^\prime}\mathbf{X}
            (\mathbf{X}^\prime\mathbf{X})^{-1} \right) \\  \pause
    & = &   tr\left( \mathbf{I}_{_{k+1}} \right) \\  \pause
    & = &   k+1
\end{eqnarray*} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Preliminaries}
%\framesubtitle{} 
{\Large
\begin{eqnarray*}
    (\mathbf{I}-\mathbf{H})\mathbf{y} 
    & = & (\mathbf{I}-\mathbf{H})
    (\mathbf{X}\boldsymbol{\beta}+\boldsymbol{\epsilon}) \\ \pause
    & = & (\mathbf{I}-\mathbf{H})\mathbf{X}\boldsymbol{\beta} + 
          (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon} \\ \pause
    & = & \mathbf{X}\boldsymbol{\beta}
      -   {\color{blue}\mathbf{H}}\mathbf{X}\boldsymbol{\beta} 
      +   (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon} \\ \pause
    & = & \mathbf{X}\boldsymbol{\beta} 
      -   {\color{blue}\mathbf{X}(\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime} 
          \mathbf{X}\boldsymbol{\beta} 
      +   (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon} \\ \pause
    & = & \mathbf{X}\boldsymbol{\beta} - \mathbf{X}\boldsymbol{\beta}
      +   (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon} \\ \pause
    & = & (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon}
\end{eqnarray*} 
} % End size
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Seek an unbiased estimator of $\sigma^2$}
%\framesubtitle{} 
{\footnotesize
\begin{eqnarray*}
    E\left\{ \sum_{i=1}^n(y_i-\widehat{y}_i)^2 \right\} \pause
    & = & E\left\{ \widehat{\boldsymbol{\epsilon}}^{\,\prime} 
                  \widehat{\boldsymbol{\epsilon}} \right\} \\ \pause
    & = & E\left\{ tr\left(\widehat{\boldsymbol{\epsilon}}^{\,\prime} 
                  \widehat{\boldsymbol{\epsilon}} \right)\right\} \\  \pause
    & = & E\left\{ tr\left([(\mathbf{I}-\mathbf{H})\mathbf{y}]^\prime 
                  (\mathbf{I}-\mathbf{H})\mathbf{y} \right)\right\} \\  \pause
    & = & E\left\{ tr\left([(\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon}]^\prime 
                  (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon} \right)\right\} \\  \pause
    & = & E\left\{ tr\left( 
                  \boldsymbol{\epsilon}^\prime
                  (\mathbf{I}-\mathbf{H})^\prime
                  (\mathbf{I}-\mathbf{H})\boldsymbol{\epsilon} 
                  \right)\right\} \\  \pause
    & = & E\left\{ tr\left( 
                  \boldsymbol{\epsilon}^\prime
                  (\mathbf{I}-\mathbf{H}){\color{blue}\boldsymbol{\epsilon}}
                  \right)\right\} \\  \pause
    & = & E\left\{ tr\left( 
                  {\color{blue}\boldsymbol{\epsilon}}\boldsymbol{\epsilon}^\prime
                  (\mathbf{I}-\mathbf{H})
                  \right)\right\} \\  \pause
    & = & tr\left( E\left\{\boldsymbol{\epsilon}\boldsymbol{\epsilon}^\prime \right\}
                  (\mathbf{I}-\mathbf{H})
                  \right) \\  \pause
    & = & tr\left( E\left\{(\boldsymbol{\epsilon}-\mathbf{0})
                           (\boldsymbol{\epsilon}-\mathbf{0})^\prime \right\}
                  (\mathbf{I}-\mathbf{H})
                  \right) \\  \pause
    & = & tr\left( cov(\boldsymbol{\epsilon}) (\mathbf{I}-\mathbf{H}) \right) \\  \pause
    & = & tr\left( \sigma^2\mathbf{I}_{_n} (\mathbf{I}-\mathbf{H}) \right) \\  \pause
    & = & \sigma^2 tr(\mathbf{I}-\mathbf{H}) \pause
      =   \sigma^2 \left( tr(\mathbf{I}) - tr(\mathbf{H}) \right)\\  \pause
    & = & \sigma^2 \left( n - (k+1) \right)
\end{eqnarray*} 
} % End size                    
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$E\left( \sum_{i=1}^n(y_i-\widehat{y}_i)^2 \right) = \sigma^2 (n-k-1)$} \pause
%\framesubtitle{} 
\begin{itemize}
    \item $E(SSE) = \sigma^2 (n-k-1)$, so $E\left(\frac{SSE}{n-k-1}\right) = \sigma^2$.  \pause
    \item $E(MSE) = \sigma^2$ ~~ ($MSE$ = Mean Squared Error) \pause
    \item $s^2$ is an unbiased estimator of $\sigma^2$, where
{\LARGE
\begin{displaymath}
    s^2 =  \frac{\sum_{i=1}^n(y_i-\widehat{y}_i)^2}{n-k-1} \pause = 
    \frac{ \widehat{\boldsymbol{\epsilon}}^{\,\prime} \widehat{\boldsymbol{\epsilon}}}
         {n-k-1} = \mbox{\emph{MSE}}
\end{displaymath} \pause
} % End size
    \item We are estimating $\sigma^2$ with average squared vertical distance from the points to the plane. \pause
    \item To avoid confusion, we will usually call it \emph{MSE}.
\end{itemize}



\end{frame}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Maximum likelihood HW
% $E\left( \sum_{i=1}^n(y_i-\widehat{y}_i)^2 \right)$
% \mathbf{y}=\mathbf{X}\boldsymbol{\beta}+\boldsymbol{\epsilon}
% \mathbf{X}(\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Curve Fitting}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Least Squares Estimation is Curve Fitting}
\framesubtitle{Minimizing $\sum_{i=1}^n(y_i - \widehat{\beta}_0 - \widehat{\beta}_1 x_i)^2$} 
\begin{center}
\includegraphics[width=3in]{Generic1}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Least Squares Estimation is Curve Fitting}
\framesubtitle{Minimizing $\sum_{i=1}^n(y_i - \widehat{\beta}_0 - \widehat{\beta}_1 x_i)^2$} 
\begin{center}
\includegraphics[width=3in]{Generic2}
\end{center}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Best Fitting Line or Plane}
%\framesubtitle{} 
\begin{columns}  
\column{0.5\textwidth}
\includegraphics[width=2in]{Generic1}
\column{0.5\textwidth}
  \begin{itemize}
    \item $\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1 x$ is the equation of a line.  \pause
    \item $\widehat{y}_i$ is the point on the line corresponding to $x_i$. \pause
    \item  $\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1 x_1 +  \widehat{\beta}_2 x_2$ is the equation of a plane. \pause
    \item $\widehat{y}_i$ is the point on the plane corresponding to $(x_{i,1},x_{i,2})$. \pause
  \end{itemize}
\end{columns}
\vspace{5mm}

$\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1 x_1 + \cdots + \widehat{\beta}_k x_k$ is the equation of a hyper-plane.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Fitting a curve: $\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1 \sqrt{x}$}
\framesubtitle{Transform the explanatory variable} 
\begin{center}
\includegraphics[width=3in]{rocks}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
\frametitle{R Code for the Record}
%\framesubtitle{} 
{\scriptsize
\begin{verbatim}
rm(list=ls())
rocks = 
read.table('http://www.utstat.toronto.edu/~brunner/data/legal/rock1.data.txt')
head(rocks); attach(rocks)
plot(support,bforce, xlab = 'Lateral Support', ylab='Breaking Strength',
main = 'Lateral Support and Breaking Strength of Rock Cores')
sqrtsup = sqrt(support)
# Fit the model bforce_i = beta_0 + beta_1 sqrtsup_i + epsilon_i
fit = lm(bforce ~ sqrtsup) 
betahat = coefficients(fit); betahat
xx = seq(from=0,to=10,by=0.1); yy = betahat[1] + betahat[2]*sqrt(xx)
lines(xx,yy)
\end{verbatim} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Our text emphasizes curve fitting}
\framesubtitle{In the presentation of least squares}  \pause
\begin{itemize}
    \item They minimize over $\widehat{\beta}_j$ rather than $\beta_j$ right from the beginning. They minimize $Q(\widehat{\boldsymbol{\beta}}) = 
    \widehat{\boldsymbol{\epsilon}}^{\,\prime} \widehat{\boldsymbol{\epsilon}}$. \pause
    \item We minimize over $\beta_j$ and put hats on the answer. \pause
    \item Their point is that the curve fitting can be useful (maybe for prediction) even if you don't believe the model at all.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{}
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.5in]{bend1}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{}
%\framesubtitle{} 
\begin{center}
\includegraphics[width=3.5in]{bend2}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Machine Learning} \pause
%\framesubtitle{} 
\begin{itemize}
    \item Machine learning algorithms are often based on statistical models, but the models are often not mentioned.  \pause
    \item Prediction is emphasized over tests and confidence intervals. \pause
    \item ``Learning" means parameter estimation. \pause
    \item The algorithm ``learns" by minimizing a ``loss function." \pause
    \item In our case, the loss function is 
    $\sum_{i=1}^n(y_i - \beta_0 - \beta_1 x_{i1} - \cdots - \beta_k x_{ik})^2$. \pause
    \item In machine learning, the loss function is usually minimized numerically, but here we can do it explicitly. \pause
    \item Sometimes, disregarding the model can lead to important new methods and insights. \pause
    \item But even hard core machine learning hackers should know the details of one good model-based method. 
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}
\frametitle{Copyright Information}

This slide show was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistical Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}

% Possible homework

    % Show X'X-inverse positive definite.
    % Minimize sum of (y_i-mu)^2 without calculus.
    % Show that if H (or any idempotent matrix) is nonsingular, then H=I.
    % What if X is square and non-singular. Show epsilon-hat = 0.
    % Show (I-H) symmetric and idempotent.
    % Show (I-H)y = (I-H)epsilon 
    % Decomposition of sum(y_i)^2? = SST + n ybar^2  ?
    % Show H = I ..
    % Maximum likelihood HW: sigma^2 as part b.


\begin{frame}
\frametitle{}
%\framesubtitle{} 
  \begin{itemize}
    \item 
    \item 
    \item 
  \end{itemize}
\end{frame}

{\LARGE
\begin{displaymath}
    
\end{displaymath} }

\begin{comment}
\begin{displaymath}
    \left(\begin{array}{cccc}
    \end{array}\right)
\end{displaymath}
\end{comment}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{}
\section{}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{comment}
# Picture of residual 48
senic = read.table('http://www.utstat.toronto.edu/~brunner/data/legal/openSENIC.data.txt')
attach(senic)
b = coefficients(lm(infpercent ~ census)); b
plot(census,infpercent,main = 'Number of Patients and Percent Infected')
x = c(0,800); y = b[1] + b[2]*x
lines(x,y)

# Just finding a good one for illustrating residual.
n = 100
id = 1:n; c1 = c2 = c3 = numeric(n)
c1[300 < census] = 1; c2[census < 400] = 1; c3[infpercent<3] = 1
id[c1*c2*c3 == 1] # 8 48
census[c(8,48)] # Pick census[48] = 320
infpercent[48] # 2.9
x48 = c(320,320) ; y48 = c(2.9,b[1]+b[2]*320); y48
# points(320,y48[2]) # Just checking ...

lines(x48,y48,lty=2)
\end{comment}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Regression through the origin: 

rm(list=ls())
n = 10; beta = 0.7; sigma = 0.05
set.seed(9999)
x = rnorm(n,0.35,0.10); x = round(x,2)
y = beta*x + rnorm(n,0,sigma); y = round(y,2)
rto = lm(y ~ 0 + x);  betahat = coefficients(rto); betahat
xx = c(0,0.5); yy = c(0,betahat*.5); lines(xx,yy)

plot(x,y,xlim=c(0,.5),ylim=c(0,.5), xlab='Before', ylab='After',
main = 'Arsenic mg/kg in Rice Before and After Washing')
rto = lm(y ~ 0 + x); summary(rto)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Generic scatterplot with beta-hats

rm(list=ls())
n = 100; beta0 = 0; beta1 = 1;  sigma = 10
set.seed(9999)
x = rnorm(n,100,15); x = round(x)
y = beta0 + beta1*x + rnorm(n,0,sigma); y = round(y)
mod = lm(y ~ x); b = coefficients(mod)
xx = c(min(x),max(x)); yy = b[1] + b[2]*xx
tstring = expression(paste(hat(y),' = ',hat(beta)[0],' + ',hat(beta)[1],'x'))
plot(x,y,main=tstring); lines(xx,yy)
# Try drawing the residuals
for(i in 1:n)
    {
    xx = c(x[i],x[i]); yhat = b[1] + b[2]*x[i]; yy = c(y[i],yhat)
    lines(xx,yy,lty=2)
    }



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

rm(list=ls())
rocks = read.table('http://www.utstat.toronto.edu/~brunner/data/legal/rock1.data.txt')
head(rocks); attach(rocks)
plot(support,bforce, xlab = 'Lateral Support', ylab='Breaking Strength',
main = 'Lateral Support and Breaking Strength of Rock Cores')
sqrtsup = sqrt(support)
# Fit the model bforce_i = beta_0 + beta_1 sqrtsup_i + epsilon_i
fit = lm(bforce ~ sqrtsup) 
betahat = coefficients(fit); betahat
xx = seq(from=0,to=10,by=0.1); yy = betahat[1] + betahat[2]*sqrt(xx)
lines(xx,yy)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Lifted from 2020 A2 Q 7e. There is a visible bend.
rm(list=ls())
x = c( 0.0,  1.3,  3.2, -2.5, -4.6, -1.6,  4.5,  3.8)
y = c(-0.8, -1.3,  7.4, -5.2, -6.5, -4.9,  9.9,  7.2)
plot(x,y, xlim = c(-5,5), ylim = c(-10,10))
fit = lm(y ~ x)
betahat = coefficients(fit) 
xx = c(-5,5); yy = betahat[1] + betahat[2]*xx
lines(xx,yy)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Scatterplots with various r values, no axis labels, writes on pdf file.
rm(list=ls())
# rmvn <- function(nn,mu,sigma)
source("http://www.utstat.toronto.edu/~brunner/Rfunctions/rmvn.txt")
n = 10000; Mu = c(0,0)
##########
rho = 0.0
##########
Sigma = rbind(c(1,rho),
              c(rho,1))
set.seed(9999)
dat = rmvn(n,Mu,Sigma); x=dat[,1]; y=dat[,2]
r = round(cor(x,y),2)
fname = paste("r",rho,".pdf",sep="")
pdf(file=fname,width=8,height=8) 
plot(x,y, ann=F, axes=F, pch='.')
axis(side=1, labels=F, at=c(-10,10)) # Below, no labels, tick marks off the map
axis(side=2, labels=F, at=c(-10,10)) # Left,  no labels, tick marks off the map
title(paste("r = ",r))
dev.off() # Turns off writing to pdf, closes (creates) file.
################################################################################