\documentclass[11pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
%\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links

\oddsidemargin=-.25in                  % Good for US Letter paper
\evensidemargin=0in
\textwidth=6.3in
\topmargin=-0.5in
\headheight=0.1in
\headsep=0.1in
\textheight=9.4in

%\pagestyle{empty} % No page numbers

\begin{document}
%\enlargethispage*{1000 pt} 


\begin{center}   
{\Large \textbf{~~~~~STA 442/2101 Formulas}}\\   % Version 2
\vspace{1 mm}
\end{center}

% Spectral decomposition, linear independence.
% MGFs
% Random vectors
% Linear model
% Distribution facts, incl x2 addup?
% Test stats and CIs


\noindent
\renewcommand{\arraystretch}{2.0}
\begin{tabular}{lll}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Univariate MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$M_y(t) = E(e^{yt})$ & ~~~~~ & $M_{ay}(t) = M_y(at)$ \\
$M_{y+a}(t) = e^{at}M_y(t)$ & ~~~~~ & 
$M_{_{\sum_{i=1}^n y_i}}(t) = \prod_{i=1}^n M_{y_i}(t)$
\\
$y \sim N(\mu,\sigma^2)$ means $M_{_y}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$
& ~~~~~ & 
$y \sim \chi^2(\nu)$ means $M_{_y}(t) = (1-2t)^{-\nu/2}$
\\
\multicolumn{3}{l}{If  $W=W_1+W_2$ with $W_1$ and $W_2$ independent, $W\sim\chi^2(\nu_1+\nu_2)$, $W_2\sim\chi^2(\nu_2)$ then $W_1\sim\chi^2(\nu_1)$} \\ 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Linear Algebra %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\parbox{7 cm}{Columns of  $\mathbf{A}$ \emph{linearly dependent} means there is a vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$.} & ~~~~~ &
\parbox{7 cm}{Columns of  $\mathbf{A}$ \emph{linearly independent} means that $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$.}
\\
\multicolumn{3}{l}{$\mathbf{A}$ \emph{positive definite} means  $\mathbf{v}^\top \mathbf{Av} > 0$ for all vectors $\mathbf{v} \neq \mathbf{0}$.} \\
$\boldsymbol{\Sigma} = \mathbf{P} \boldsymbol{\Lambda}\mathbf{P}^\top$
& ~~~~~ & 
$\boldsymbol{\Sigma}^{-1} = \mathbf{P} \boldsymbol{\Lambda}^{-1} \mathbf{P}^\top$
\\
$\boldsymbol{\Sigma}^{1/2} = \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\top$
& ~~~~~ &
$\boldsymbol{\Sigma}^{-1/2} = \mathbf{P} \boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\top$
\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Random vectors and MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$cov(\mathbf{w}) = 
E\left\{(\mathbf{w}-\boldsymbol{\mu}_w)(\mathbf{w}-\boldsymbol{\mu}_w)^\top\right\}$ 
& ~~~~~ & 
$cov(\mathbf{w,t}) = E\left\{ (\mathbf{w}-\boldsymbol{\mu}_w)
                             (\mathbf{t}-\boldsymbol{\mu}_t)^\top\right\}$
\\
$cov(\mathbf{w}) = E\{\mathbf{ww}^\top\} - \boldsymbol{\mu}_w\boldsymbol{\mu}_w^\top$
& ~~~~~ &
$cov(\mathbf{Aw}) = \mathbf{A}cov(\mathbf{w}) \mathbf{A}^\top$
\\

%%%%%%%%%%%%%%%%%%%%%%%% MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

If $\mathbf{w} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma} )$, then
$\mathbf{Aw}+\mathbf{c} \sim N_r(\mathbf{A}\boldsymbol{\mu} + \mathbf{c},
                    \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top )$
& ~~~~~ & 
and $(\mathbf{w}-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{w}-\boldsymbol{\mu}) \sim \chi^2 (p)$
\\
\multicolumn{3}{l}{$L(\boldsymbol{\mu,\Sigma}) = |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} 
    \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) +
    (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} 
    (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}$,  
where $\boldsymbol{\widehat{\Sigma}} = 
\frac{1}{n}\sum_{i=1}^n (\mathbf{y}_i-\overline{\mathbf{y}}) 
                        (\mathbf{y}_i-\overline{\mathbf{y}})^\top $} 
\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Simple regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

$y_i = \beta_0 + \beta_1 x_i + \epsilon_i$
& ~~~~~ & 
$\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$
\\
$\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})}
           {\sum_{i=1}^n(x_i-\overline{x})^2} 
     = \frac{\sum_{i=1}^n x_iy_i - n \, \overline{x} \, \overline{y}}
            {\sum_{i=1}^n x_i^2 - n\overline{x}^2}$
& ~~~~~ & 
$r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})}
               {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$
\\
%%%%%%%%%%%%%%%%%%%%%%%%%% Multiple Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

$y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i$
& ~~~~~ & 
$\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$
\\
$\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ 
& ~~~~~ &
$\boldsymbol{\epsilon} \sim N_n(\mathbf{0},\sigma^2\mathbf{I}_n)$
\\
$\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} 
                   \mathbf{X}^\top \mathbf{y} \sim N_p\left(\boldsymbol{\beta}, 
                   \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1}\right)$
& ~~~~~ &
$\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, where 
$\mathbf{H} = \mathbf{X}(\mathbf{X}^\top \mathbf{X})^{-1} 
                   \mathbf{X}^\top $
\\ 
$\mathbf{e} = \mathbf{y} - \widehat{\mathbf{y}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$, ~~ $\mathbf{X}^\top\mathbf{e} = \mathbf{0}$ 
& ~~~~~ &
$\widehat{\boldsymbol{\beta}}$ and $\textbf{e}$ are independent under normality.
\\
$\sum_{i=1}^n(y_i-\overline{y})^2 = \sum_{i=1}^n(y_i-\widehat{y}_i)^2 + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$
& ~~~~~ &
$SST=SSE+SSR$ and $R^2 = \frac{SSR}{SST}$
\\
$\frac{SSE}{\sigma^2} = \frac{\textbf{e}^\top \textbf{e}}{\sigma^2}  \sim \chi^2(n-p)$
& ~~~~~ &
$MSE = \frac{SSE}{n-p}$
\\
$T = \frac{Z}{\sqrt{W/\nu}} \sim t(\nu)$ 
& ~~~~~ & 
$F = \frac{W_1/\nu_1}{W_2/\nu_2} \sim F(\nu_1,\nu_2)$ 
\\
\multicolumn{3}{l}{Under $H_0:\mathbf{L}\boldsymbol{\beta}=\mathbf{h}$, 
$F^* = \frac{(\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})^\top
            (\mathbf{L}(\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{L}^\top)^{-1}
            (\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})} {r \, MSE} 
= \frac{SSR_F-SSR_R}{r \, MSE_F} \sim F(r,n-p)$} \\
\end{tabular}

\begin{tabular}{lll}
%%%%%%%%%%%%%%%%%%%%%%%% Large sample %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\multicolumn{3}{l}{If $\lim_{n \rightarrow \infty} E(T_n) = \theta$  and $\lim_{n \rightarrow \infty} Var(T_n) = 0$, then $T_n \stackrel{p}{\rightarrow} \theta$} \\ 
\multicolumn{3}{l}{If $\sqrt{n}(T_n-\mu)  \stackrel{d}{\rightarrow} T \sim N(0,\sigma^2)$, then 
$\sqrt{n}\left(g(T_n)-g(\mu)\right)  \stackrel{d}{\rightarrow} g^\prime(\mu)T  \sim N(0,g^\prime(\mu)^2\sigma^2)$} \\ 
If $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and
   $\mathbf{Y}_n \stackrel{p}{\rightarrow} \mathbf{c}$, then
   $\left( \begin{array}{cc}  
                        \mathbf{T}_n \\ \mathbf{Y}_n
                        \end{array} \right)
                          \stackrel{d}{\rightarrow}
                        \left( \begin{array}{cc}  
                        \mathbf{T} \\ \mathbf{c}
                        \end{array} \right)$
& ~~~~~ &
$\sqrt{n}(\overline{\mathbf{x}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{x} \sim 
N(\mathbf{0},\boldsymbol{\Sigma})$ 
\\
\multicolumn{3}{l}{Let $g: \mathbb{R}^d \rightarrow \mathbb{R}^k$ etc. If $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T}$, then 
$\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} 
\mbox{\.{g}} (\boldsymbol{\theta}) \mathbf{T}$, where \.{g}$(\boldsymbol{\theta}) = 
\left[ \frac{\partial g_i}{\partial \theta_j} \right]_{k \times d}$}
\\
$G^2 = -2 \log \left(   
           \frac{\max_{\theta \in \Theta_0} L(\theta)}
                {\max_{\theta \in \Theta} L(\theta)}
           \right) = -2 \log \left( 
                     \frac{L(\widehat{\theta}_0)}{L(\widehat{\theta})}
                     \right)$
& ~~~~~ &
$W_n = (\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})^\top 
\left(\mathbf{L} \widehat{\mathbf{V}}_n \mathbf{L}^\top\right)^{-1} 
(\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})$ 
\\
%%%%%%%%%%%%%%%%%%%%%%%%% Logistic regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$ \log\left(\frac{\pi_i}{1-\pi_i} \right) = 
    \beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}$
& ~~~~~ &
   $\pi_i  =  \frac{e^{\beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}}}
                    {1+e^{\beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}}}$
\\
%%%%%%%%%%%%%%%%%%%%%%%%% Poisson regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$ \log(\lambda_i) = 
    \beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}$
& ~~~~~ &
\\   
%%%%%%%%%%%%%%%%%%%%%%%%% Multinomial logit models %%%%%%%%%%%%%%%%%%%%%%%%
\parbox{7 cm}{ 
\begin{eqnarray*}
\log\left(\frac{\pi_1}{\pi_3} \right ) & = & 
     \beta_{0,1} + \beta_{1,1} x_1 + \ldots + \beta_{p-1,1} x_{p-1}  = L_1 \\ \\
\log\left(\frac{\pi_2}{\pi_3} \right ) & = & 
     \beta_{0,2} + \beta_{1,2} x_1 + \ldots + \beta_{p-1,2} x_{p-1} = L_2
\end{eqnarray*}
} % End parbox
& ~~~~~ &
\parbox{7 cm}{ 
\begin{eqnarray*}
\pi_1     & = & \frac{e^{L_1}}{1+e^{L_1}+e^{L_2}} \\ \\
\pi_2     & = & \frac{e^{L_2}}{1+e^{L_1}+e^{L_2}} \\ \\
\pi_3     & = & \frac{1}{1+e^{L_1}+e^{L_2}}
\end{eqnarray*}
} % End parbox
\\  
% Still okay?
\end{tabular}
\renewcommand{\arraystretch}{1.0}
  

\begin{verbatim}
> df = 1:12
> Critical_Value = qchisq(0.95,df)
> cbind(df,Critical_Value)
      df Critical_Value
 [1,]  1       3.841459
 [2,]  2       5.991465
 [3,]  3       7.814728
 [4,]  4       9.487729
 [5,]  5      11.070498
 [6,]  6      12.591587
 [7,]  7      14.067140
 [8,]  8      15.507313
 [9,]  9      16.918978
[10,] 10      18.307038
[11,] 11      19.675138
[12,] 12      21.026070

\end{verbatim} 

%\vspace{5mm}


\noindent
\begin{center}\begin{tabular}{l}
\hspace{6.5in} \\ \hline
\end{tabular}\end{center}
This formula sheet was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistics, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/~brunner/oldclass/302f17} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f17}}



\end{document}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{verbatim}
> # Chi-squared critical values
> df = 1:6
> Critical_Value = qchisq(0.95,df)
> cbind(df,Critical_Value)
     df Critical_Value
[1,]  1       3.841459
[2,]  2       5.991465
[3,]  3       7.814728
[4,]  4       9.487729
[5,]  5      11.070498
[6,]  6      12.591587
\end{verbatim}