\documentclass[11pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb %\usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \oddsidemargin=-.25in % Good for US Letter paper \evensidemargin=0in \textwidth=6.3in \topmargin=-0.5in \headheight=0.1in \headsep=0.1in \textheight=9.4in %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{~~~~~STA 442/2101 Formulas}}\\ % Version 2 \vspace{1 mm} \end{center} % Spectral decomposition, linear independence. % MGFs % Random vectors % Linear model % Distribution facts, incl x2 addup? % Test stats and CIs \noindent \renewcommand{\arraystretch}{2.0} \begin{tabular}{lll} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Univariate MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $M_y(t) = E(e^{yt})$ & ~~~~~ & $M_{ay}(t) = M_y(at)$ \\ $M_{y+a}(t) = e^{at}M_y(t)$ & ~~~~~ & $M_{_{\sum_{i=1}^n y_i}}(t) = \prod_{i=1}^n M_{y_i}(t)$ \\ $y \sim N(\mu,\sigma^2)$ means $M_{_y}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$ & ~~~~~ & $y \sim \chi^2(\nu)$ means $M_{_y}(t) = (1-2t)^{-\nu/2}$ \\ \multicolumn{3}{l}{If $W=W_1+W_2$ with $W_1$ and $W_2$ independent, $W\sim\chi^2(\nu_1+\nu_2)$, $W_2\sim\chi^2(\nu_2)$ then $W_1\sim\chi^2(\nu_1)$} \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%% Linear Algebra %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \parbox{7 cm}{Columns of $\mathbf{A}$ \emph{linearly dependent} means there is a vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$.} & ~~~~~ & \parbox{7 cm}{Columns of $\mathbf{A}$ \emph{linearly independent} means that $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$.} \\ \multicolumn{3}{l}{$\mathbf{A}$ \emph{positive definite} means $\mathbf{v}^\top \mathbf{Av} > 0$ for all vectors $\mathbf{v} \neq \mathbf{0}$.} \\ $\boldsymbol{\Sigma} = \mathbf{P} \boldsymbol{\Lambda}\mathbf{P}^\top$ & ~~~~~ & $\boldsymbol{\Sigma}^{-1} = \mathbf{P} \boldsymbol{\Lambda}^{-1} \mathbf{P}^\top$ \\ $\boldsymbol{\Sigma}^{1/2} = \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\top$ & ~~~~~ & $\boldsymbol{\Sigma}^{-1/2} = \mathbf{P} \boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\top$ \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Random vectors and MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $cov(\mathbf{w}) = E\left\{(\mathbf{w}-\boldsymbol{\mu}_w)(\mathbf{w}-\boldsymbol{\mu}_w)^\top\right\}$ & ~~~~~ & $cov(\mathbf{w,t}) = E\left\{ (\mathbf{w}-\boldsymbol{\mu}_w) (\mathbf{t}-\boldsymbol{\mu}_t)^\top\right\}$ \\ $cov(\mathbf{w}) = E\{\mathbf{ww}^\top\} - \boldsymbol{\mu}_w\boldsymbol{\mu}_w^\top$ & ~~~~~ & $cov(\mathbf{Aw}) = \mathbf{A}cov(\mathbf{w}) \mathbf{A}^\top$ \\ %%%%%%%%%%%%%%%%%%%%%%%% MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% If $\mathbf{w} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma} )$, then $\mathbf{Aw}+\mathbf{c} \sim N_r(\mathbf{A}\boldsymbol{\mu} + \mathbf{c}, \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top )$ & ~~~~~ & and $(\mathbf{w}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{w}-\boldsymbol{\mu}) \sim \chi^2 (p)$ \\ \multicolumn{3}{l}{$L(\boldsymbol{\mu,\Sigma}) = |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}$, where $\boldsymbol{\widehat{\Sigma}} = \frac{1}{n}\sum_{i=1}^n (\mathbf{y}_i-\overline{\mathbf{y}}) (\mathbf{y}_i-\overline{\mathbf{y}})^\top $} \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Simple regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_i + \epsilon_i$ & ~~~~~ & $\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$ \\ $\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})} {\sum_{i=1}^n(x_i-\overline{x})^2} = \frac{\sum_{i=1}^n x_iy_i - n \, \overline{x} \, \overline{y}} {\sum_{i=1}^n x_i^2 - n\overline{x}^2}$ & ~~~~~ & $r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})} {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$ \\ %%%%%%%%%%%%%%%%%%%%%%%%%% Multiple Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i$ & ~~~~~ & $\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$ \\ $\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ & ~~~~~ & $\boldsymbol{\epsilon} \sim N_n(\mathbf{0},\sigma^2\mathbf{I}_n)$ \\ $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{y} \sim N_p\left(\boldsymbol{\beta}, \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1}\right)$ & ~~~~~ & $\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, where $\mathbf{H} = \mathbf{X}(\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top $ \\ $\mathbf{e} = \mathbf{y} - \widehat{\mathbf{y}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$, ~~ $\mathbf{X}^\top\mathbf{e} = \mathbf{0}$ & ~~~~~ & $\widehat{\boldsymbol{\beta}}$ and $\textbf{e}$ are independent under normality. \\ $\sum_{i=1}^n(y_i-\overline{y})^2 = \sum_{i=1}^n(y_i-\widehat{y}_i)^2 + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$ & ~~~~~ & $SST=SSE+SSR$ and $R^2 = \frac{SSR}{SST}$ \\ $\frac{SSE}{\sigma^2} = \frac{\textbf{e}^\top \textbf{e}}{\sigma^2} \sim \chi^2(n-p)$ & ~~~~~ & $MSE = \frac{SSE}{n-p}$ \\ $T = \frac{Z}{\sqrt{W/\nu}} \sim t(\nu)$ & ~~~~~ & $F = \frac{W_1/\nu_1}{W_2/\nu_2} \sim F(\nu_1,\nu_2)$ \\ \multicolumn{3}{l}{Under $H_0:\mathbf{L}\boldsymbol{\beta}=\mathbf{h}$, $F^* = \frac{(\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})^\top (\mathbf{L}(\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{L}^\top)^{-1} (\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})} {r \, MSE} = \frac{SSR_F-SSR_R}{r \, MSE_F} \sim F(r,n-p)$} \\ \end{tabular} \begin{tabular}{lll} %%%%%%%%%%%%%%%%%%%%%%%% Large sample %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \multicolumn{3}{l}{If $\lim_{n \rightarrow \infty} E(T_n) = \theta$ and $\lim_{n \rightarrow \infty} Var(T_n) = 0$, then $T_n \stackrel{p}{\rightarrow} \theta$} \\ \multicolumn{3}{l}{If $\sqrt{n}(T_n-\mu) \stackrel{d}{\rightarrow} T \sim N(0,\sigma^2)$, then $\sqrt{n}\left(g(T_n)-g(\mu)\right) \stackrel{d}{\rightarrow} g^\prime(\mu)T \sim N(0,g^\prime(\mu)^2\sigma^2)$} \\ If $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and $\mathbf{Y}_n \stackrel{p}{\rightarrow} \mathbf{c}$, then $\left( \begin{array}{cc} \mathbf{T}_n \\ \mathbf{Y}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{cc} \mathbf{T} \\ \mathbf{c} \end{array} \right)$ & ~~~~~ & $\sqrt{n}(\overline{\mathbf{x}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{x} \sim N(\mathbf{0},\boldsymbol{\Sigma})$ \\ \multicolumn{3}{l}{Let $g: \mathbb{R}^d \rightarrow \mathbb{R}^k$ etc. If $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T}$, then $\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mbox{\.{g}} (\boldsymbol{\theta}) \mathbf{T}$, where \.{g}$(\boldsymbol{\theta}) = \left[ \frac{\partial g_i}{\partial \theta_j} \right]_{k \times d}$} \\ $G^2 = -2 \log \left( \frac{\max_{\theta \in \Theta_0} L(\theta)} {\max_{\theta \in \Theta} L(\theta)} \right) = -2 \log \left( \frac{L(\widehat{\theta}_0)}{L(\widehat{\theta})} \right)$ & ~~~~~ & $W_n = (\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})^\top \left(\mathbf{L} \widehat{\mathbf{V}}_n \mathbf{L}^\top\right)^{-1} (\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})$ \\ %%%%%%%%%%%%%%%%%%%%%%%%% Logistic regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $ \log\left(\frac{\pi_i}{1-\pi_i} \right) = \beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}$ & ~~~~~ & $\pi_i = \frac{e^{\beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}}} {1+e^{\beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}}}$ \\ %%%%%%%%%%%%%%%%%%%%%%%%% Poisson regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $ \log(\lambda_i) = \beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}$ & ~~~~~ & \\ %%%%%%%%%%%%%%%%%%%%%%%%% Multinomial logit models %%%%%%%%%%%%%%%%%%%%%%%% \parbox{7 cm}{ \begin{eqnarray*} \log\left(\frac{\pi_1}{\pi_3} \right ) & = & \beta_{0,1} + \beta_{1,1} x_1 + \ldots + \beta_{p-1,1} x_{p-1} = L_1 \\ \\ \log\left(\frac{\pi_2}{\pi_3} \right ) & = & \beta_{0,2} + \beta_{1,2} x_1 + \ldots + \beta_{p-1,2} x_{p-1} = L_2 \end{eqnarray*} } % End parbox & ~~~~~ & \parbox{7 cm}{ \begin{eqnarray*} \pi_1 & = & \frac{e^{L_1}}{1+e^{L_1}+e^{L_2}} \\ \\ \pi_2 & = & \frac{e^{L_2}}{1+e^{L_1}+e^{L_2}} \\ \\ \pi_3 & = & \frac{1}{1+e^{L_1}+e^{L_2}} \end{eqnarray*} } % End parbox \\ % Still okay? \end{tabular} \renewcommand{\arraystretch}{1.0} \begin{verbatim} > df = 1:12 > Critical_Value = qchisq(0.95,df) > cbind(df,Critical_Value) df Critical_Value [1,] 1 3.841459 [2,] 2 5.991465 [3,] 3 7.814728 [4,] 4 9.487729 [5,] 5 11.070498 [6,] 6 12.591587 [7,] 7 14.067140 [8,] 8 15.507313 [9,] 9 16.918978 [10,] 10 18.307038 [11,] 11 19.675138 [12,] 12 21.026070 \end{verbatim} %\vspace{5mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6.5in} \\ \hline \end{tabular}\end{center} This formula sheet was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f17} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f17}} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{verbatim} > # Chi-squared critical values > df = 1:6 > Critical_Value = qchisq(0.95,df) > cbind(df,Critical_Value) df Critical_Value [1,] 1 3.841459 [2,] 2 5.991465 [3,] 3 7.814728 [4,] 4 9.487729 [5,] 5 11.070498 [6,] 6 12.591587 \end{verbatim}