\documentclass[10pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
%\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links

\oddsidemargin=0in                  % Good for US Letter paper
\evensidemargin=0in
\textwidth=6.3in
\topmargin=-0.5in
\headheight=0.1in
\headsep=0.1in
\textheight=9.4in

\pagestyle{empty} % No page numbers

\begin{document}
\enlargethispage*{1000 pt} 


\begin{center}    % Version 9
{\Large \textbf{STA 302 Formulas}\footnote{This formula sheet was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistical Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}}} .  }\\  
\vspace{1 mm}
\end{center}

%%%%%%%%%%%%%%%%%%%%%% Expected value, variance and covariance %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\noindent
\renewcommand{\arraystretch}{2.0}
\begin{tabular}{ll}
$E(X) \stackrel{def}{=} \sum_x \, x \, p_{_X}(x)$  & $E(X) \stackrel{def}{=} \int_{-\infty}^\infty x f_{_X}(x) \, dx$ \\
$E(g(X)) = \sum_x g(x) \, p_{_X}(x)$ &
$E(g(\mathbf{X})) =  
\sum_{x_1} \cdots \sum_{x_p} g(x_1, \ldots, x_p) \, 
p_{_\mathbf{X}}(x_1, \ldots, x_p) $   \\ 
$E(g(X)) = \int_{-\infty}^\infty g(x) \, f_{_X}(x) \, dx$ &
$E(g(\mathbf{X})) =  
\int_{-\infty}^\infty \cdots \int_{-\infty}^\infty g(x_1, \ldots, x_p) \, 
f_{_\mathbf{X}}(x_1, \ldots, x_p) \, dx_1 \ldots dx_p $   \\ 
$E(\sum_{i=1}^na_iX_i) = \sum_{i=1}^na_iE(X_i)$ &
$Var(X) \stackrel{def}{=} E\left( \, (X-\mu_{_X})^2 \, \right) = E(X^2)-[E(X)]^2$  \\ 
$Cov(X,Y) \stackrel{def}{=} E\left( \, (X-\mu_{_X})(Y-\mu_{_Y}) \, \right)$    &
$Cov(X,Y) = E(XY)-E(X)E(Y)$ \\
$Corr(X,Y) \stackrel{def}{=} \frac{Cov(X,Y)}{\sqrt{Var(X)Var(Y)} } $ & 
$Cov\left(\sum_{i=1}^na_iX_i~,\,\sum_{j=1}^m b_j Y_j \right) = 
\sum_{i=1}^n\sum_{j=1}^m  a_i b_j Cov\left( X_i, Y_j \right)$\\
$M_{_X}(t) = E(e^{Xt})$ &  $M_{_{aX}}(t) = M_{_X}(at)$ \\
$M_{_{X+a}}(t) = e^{at}M_{_X}(t)$  & 
$M_{_{\sum_{i=1}^n X_i}}(t) = \prod_{i=1}^n M_{X_i}(t)$ \\
$X \sim N(\mu,\sigma^2)$ means $M_{_X}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$ & 
$X \sim \chi^2(\nu)$ means $M_{_X}(t) = (1-2t)^{-\nu/2}$ \\
\multicolumn{2}{l}{If  $W=W_1+W_2$ with $W_1$ and $W_2$ independent, $W\sim\chi^2(\nu_1+\nu_2)$, $W_2\sim\chi^2(\nu_2)$ then $W_1\sim\chi^2(\nu_1)$} \\ 
\\
\end{tabular}

\renewcommand{\arraystretch}{1.0}       

\vspace{-8mm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Normal distribution basics %%%%%%%%%%%%%%%%%%%%%%%
\noindent
~~\,If $x_1, \ldots, x_n$ is a random sample from a Normal$(\mu,\sigma^2)$ distribution, then
$f(x) = \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{(x-\mu)^2}{2 \sigma^2}}$, and 
    \begin{itemize}
        \item $\widehat{\mu} = \overline{x}_n$ and 
        $\widehat{\sigma}^2 = \frac{1}{n} \sum_{i=1}^n(x_i-\overline{x}_n)^2 = \left(\frac{n-1}{n} \right)s^2$, where $s^2 = \frac{1}{n-1} \sum_{i=1}^n(x_i-\overline{x}_n)^2 $
        \item $\overline{x}_n$ and $s^2$ are independent.
        \item $\frac{(n-1)s^2}{\sigma^2} \sim \chi^2(n-1)$. 
        \item $t = \frac{\sqrt{n}(\overline{s}-\mu)}{s} \sim t(n-1)$
    \end{itemize} \vspace{3mm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Matrices %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\noindent
\begin{tabular}{lll}
$\left(\begin{array}{cc}
a & b \\
c & d
\end{array}\right)^{-1} = \frac{1}{ad-bc}
\left(\begin{array}{rr}
d & -b \\
-c & a
\end{array}\right)$
& ~~~~~ & 
\hspace{16mm}
$\mathbf{AB}=\left[ \displaystyle \sum_{k}a_{i,k}b_{k,j}\right]$  
\hspace{7mm} $tr(\mathbf{AB}) = tr(\mathbf{BA})$
\\
&&\\
\multicolumn{3}{l}{The square matrix $\mathbf{A}$ has an eigenvalue equal to $\lambda$ with corresponding eigenvector  $\mathbf{x} \neq \mathbf{0}$ if $\mathbf{Ax} = \lambda\mathbf{x}$.}
\end{tabular}
\vspace{2mm}

\noindent
\renewcommand{\arraystretch}{2.0}
\begin{tabular}{lll}
\parbox{7 cm}{Columns of  $\mathbf{A}$ \emph{linearly dependent} means there is a vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$.} & ~~~~~ &
\parbox{7 cm}{Columns of  $\mathbf{A}$ \emph{linearly independent} means that $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$.}
\\
\multicolumn{3}{l}{$\mathbf{A}$ \emph{positive definite} means  $\mathbf{v}^\prime \mathbf{Av} > 0$ for all vectors $\mathbf{v} \neq \mathbf{0}$.} \\
$\boldsymbol{\Sigma} = \mathbf{CD} \mathbf{C}^\prime$
& ~~~~~ & 
$\boldsymbol{\Sigma}^{-1} = \mathbf{C} \mathbf{D}^{-1} \mathbf{C}^\prime$
\\
$\boldsymbol{\Sigma}^{1/2} = \mathbf{CD}^{1/2} \mathbf{C}^\prime$
& ~~~~~ &
$\boldsymbol{\Sigma}^{-1/2} = \mathbf{CD}^{-1/2} \mathbf{C}^\prime$
\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%% Random vectors %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$cov(\mathbf{y}) = 
E\left\{(\mathbf{y}-\boldsymbol{\mu}_y)(\mathbf{y}-\boldsymbol{\mu}_y)^\prime\right\}$ 
& ~~~~~ & 
$cov(\mathbf{y,t}) = E\left\{ (\mathbf{y}-\boldsymbol{\mu}_y)
                             (\mathbf{t}-\boldsymbol{\mu}_t)^\prime\right\}$
\\
$cov(\mathbf{y}) = E\{\mathbf{yy}^\prime\} - \boldsymbol{\mu}_y\boldsymbol{\mu}_y^\prime$
& ~~~~~ &
$cov(\mathbf{Ay}) = \mathbf{A}cov(\mathbf{y}) \mathbf{A}^\prime$
\\
$cov(\mathbf{Ay,By}) = \mathbf{A}cov(\mathbf{y}) \mathbf{B}^\prime$
& ~~~~~ &       \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\\
\end{tabular}
\renewcommand{\arraystretch}{1.0}

\newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\noindent
\renewcommand{\arraystretch}{2.0}
\begin{tabular}{lll}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Multivariate MGF, MVN %%%%%%%%%%%%%%%%%%%%%%%%%

$M_{\mathbf{v}}(\mathbf{t}) = E(e^{\mathbf{t}^\prime\mathbf{v}})$ 
& ~~~~~ & 
$M_{\mathbf{Av}}(\mathbf{t}) = M_{\mathbf{v}}(\mathbf{A}^\prime\mathbf{t})$
\\
$M_{\mathbf{v}+\mathbf{c}}(\mathbf{t}) = e^{\mathbf{t}^\prime\mathbf{c}} M_{\mathbf{v}}(\mathbf{t})$
 & ~~~~~ & 
\parbox{7 cm}{$\mathbf{v}_1$ and $\mathbf{v}_2$ are independent if and only if
$M_{(\mathbf{v}_1,\mathbf{v}_2)}\left(\mathbf{t}_1,\mathbf{t}_2\right)
= M_{\mathbf{v}_1}(\mathbf{t}_1) M_{\mathbf{v}_2}(\mathbf{t}_2)$.} \vspace{2mm}
\\
$\mathbf{v} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$ means $M_{\mathbf{v}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu} + \frac{1}{2} \mathbf{t}^\prime \boldsymbol{\Sigma} \mathbf{t}}$
 & ~~~~~ & 
 \parbox{7 cm}{For the multivariate normal, zero covariance implies independence.} \\
If $\mathbf{v} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, then $\mathbf{Av} + \mathbf{c} \sim N_q(\mathbf{A}\boldsymbol{\mu}+\mathbf{c}, \mathbf{A}\boldsymbol{\Sigma} \mathbf{A}^\prime)$,
 & ~~~~~ &
and $w = (\mathbf{v}-\boldsymbol{\mu})^\prime
           \boldsymbol{\Sigma}^{-1}(\mathbf{v}-\boldsymbol{\mu}) \sim \chi^2(p)$
\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Simple regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

$y_i = \beta_0 + \beta_1 x_i + \epsilon_i$
& ~~~~~ & 
$\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$
\\
$\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})}
           {\sum_{i=1}^n(x_i-\overline{x})^2} 
     = \frac{\sum_{i=1}^n x_iy_i - n \, \overline{x} \, \overline{y}}
            {\sum_{i=1}^n x_i^2 - n\overline{x}^2}$
& ~~~~~ & 
$r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})}
               {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$
\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$
& ~~~~~ & 
with $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2$,  $Cov(\epsilon_i,\epsilon_j)=0$ for $i \neq j$
\\
$\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $E(\boldsymbol{\epsilon})=\mathbf{0}$, 
$cov(\boldsymbol{\epsilon})=\sigma^2\mathbf{I}_n$
& ~~~~~ &
$\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} 
                   \mathbf{X}^\prime \mathbf{y}$
\\
$\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, where 
$\mathbf{H} = (h_{ij}) = \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} 
                   \mathbf{X}^\prime $
& ~~~~~ &
$\widehat{\boldsymbol{\epsilon}} = \mathbf{y} - \widehat{\mathbf{y}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$ 
 \hspace{8mm}  $\mathbf{X}^\prime\widehat{\boldsymbol{\epsilon}} = \mathbf{0}$
\\
$\sum_{i=1}^n(y_i-\overline{y})^2 = \sum_{i=1}^n(y_i-\widehat{y}_i)^2 + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$
& ~~~~~ &
$SST=SSE+SSR$ and $R^2 = \frac{SSR}{SST}$   \vspace{5mm}
\\
% & ~~~~~ & \\ % Skip a line
%%%%%%%%%%%%%%%%%%%%%%%%%%%% Normal Model %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \widehat{\boldsymbol{\beta}}      \widehat{\boldsymbol{\epsilon}}
%
$\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2\mathbf{I}_n)$
& ~~~~~ &
 \parbox{7 cm}{$y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$ \\
 $\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$}
\\
$\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{y} \sim N_{k+1}\left(\boldsymbol{\beta},\sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}\right)$
& ~~~~~ &
$\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$ are independent under normality.
\\
$s^2 = \frac{\widehat{\boldsymbol{\epsilon}}^{\,\prime \,}
             \widehat{\boldsymbol{\epsilon}}}{n-k-1} 
             = \frac{\mbox{\footnotesize\emph{SSE}}}{n-k-1} =$ \emph{MSE}
& ~~~~~ &
$\frac{\mbox{\footnotesize\emph{SSE}}}{\sigma^2} 
= \frac{\widehat{\boldsymbol{\epsilon}}^{\,\prime \,}\widehat{\boldsymbol{\epsilon}}}
{\sigma^2}  \sim \chi^2(n-k-1)$ 
\\
$t = \frac{z}{\sqrt{w/\nu}} \sim t(\nu)$
& ~~~~~ &
$F = \frac{w_1/\nu_1}{w_2/\nu_2} \sim F(\nu_1,\nu_2)$
\\
$t = \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} -\mathbf{a}^\prime \boldsymbol{\beta}}
             {\sqrt{\mbox{\footnotesize\emph{MSE}} \, \mathbf{a}^\prime 
             (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{a}}} \sim t(n-k-1)$
& ~~~~~ &
$\mathbf{a}^\prime \widehat{\boldsymbol{\beta}}  \pm t_{\alpha/2} \, 
\sqrt{\mbox{\small\emph{MSE}} \, \mathbf{a}^\prime 
             (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{a}}$
\\
$F^* = \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime
            (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1}
            (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})}
           {q \, \mbox{\footnotesize\emph{MSE}}} \stackrel{H_0}{\sim} F(q,n-k-1)$
& ~~~~~ &
$F^* = \frac{\mbox{\footnotesize\emph{SSR(full)$-$SSR(reduced)}}}{q \, \mbox{\footnotesize\emph{MSE(full)}}} = 
        \left( \frac{n-k-1}{q}  \right) \left( \frac{p}{1-p} \right)$
\\
$p = \frac{R^2(\mbox{\footnotesize\emph{full}}) - R^2(\mbox{\footnotesize\emph{reduced}})}{1-R^2(\mbox{\footnotesize\emph{reduced}})} $
& ~~~~~ &
$p = \frac{qF^*}{qF^*+n-k-1}$
\\
$t = \frac{y_0-\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}}
             {\sqrt{MSE \, (1+\mathbf{x}_0^\prime 
             (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}} \sim t(n-k-1)$
& ~~~~~ &
$\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}  \pm t_{\alpha/2} \, 
\sqrt{\mbox{\small\emph{MSE}} \, (1 + \mathbf{x}_0^\prime 
             (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}$
\\
$t_i = \frac{y_i-\mathbf{x}_i^\prime \widehat{\boldsymbol{\beta}}_{(i)}}
             {\sqrt{MSE_{(i)}(1+\mathbf{x}_i^\prime 
             (\mathbf{X}_{(i)}^\prime \mathbf{X}_{(i)})^{-1}\mathbf{x}_i)}}
      \sim t(n-k-2)$
& ~~~~~ & \\
$\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2\mathbf{V})$
& ~~~~~ & 
$\widehat{\boldsymbol{\beta}}_{gls} = (\mathbf{X}^\prime \mathbf{V}^{-1} \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{V}^{-1} \mathbf{y}$
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{tabular}
\renewcommand{\arraystretch}{1.0}

\newpage

\begin{verbatim}
qf(0.95,df1=6,df2=122) # Critical value for F, not in any table
qt(0.975,df=122)       $ Critical value for t

XpX= t(X)%*%X
XpXinv = solve(XpX)

install.packages("readxl") # Only need to do this once. There are no dependencies.
library(readxl) # Load the package
hungry = read_excel("Diet.xlsx")

math = read.table("http://www.utstat.toronto.edu/~brunner/data/legal/mathtest.txt") 
head(math)
colnames(math) = c("ID","HScalcMark","PreCalcScore","CalcScore","UnivCalcMark")
summary(math)
cor(math)
attach(math)
mathmod = lm(UnivCalcMark ~ HScalcMark+PreCalcScore+CalcScore, data = math)
cellmeans = lm(lper100k ~ 0+Cntry+weight+length)
summary(mathmod)
betahat = coefficients(mathmod)
epsilonhat = residuals(mathmod)
plot(HScalcMark,epsilonhat)
yhat = fitted.values(mathmod) 
hii = hatvalues(mathmod)
cd = cooks.distance(mathmod); summary(cd)
MSE.XpXinv = vcov(mathmod)

a = as.matrix(c(0,0,-1,1)) 
se = sqrt( t(a)%*%MSE.XpXinv%*%a ) # Standard error of the difference
me95 = as.numeric( t.025*se ) # Now me95 is different
estdiff = as.numeric( t(a) %*% betahat ); estdiff
Lower95 = estdiff - me95; Upper95 = estdiff + me95; c(Lower95, Upper95)

ti = rstudent(mathmod) # Studentized deleted residuals
alpha = 0.05; a = alpha/200; bcrit = qt(1-a/2,dfe-1); bcrit

# H0: C beta = t        ftest(model, C, t=0)
source("http://www.utstat.utoronto.ca/~brunner/Rfunctions/ftest.txt")
C1 = rbind( c(0,1,0,0),
            c(0,0,1,0),
            c(0,0,0,1) )
ftest(mod,C1)
anova(reducedmodel, fullmodel)

c1 = numeric(n); c1[Cntry=='Europ'] = 1; table(c1,Cntry)
c2 = numeric(n); c2[Cntry=='Japan'] = 1; table(c2,Cntry)
c3 = numeric(n); c3[Cntry=='US'] = 1;    table(c3,Cntry)

wc1 = weight*c1; wc2 = weight*c2
uneqslope = lm(lper100k ~ weight+c1+c2+wc1+wc2)

Cntry = factor(Cntry)
contrasts(Cntry)
Country  = Cntry
contrasts(Country) = contr.treatment(3,base=3)
\end{verbatim}
\end{document}