\documentclass[10pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb %\usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \oddsidemargin=0in % Good for US Letter paper \evensidemargin=0in \textwidth=6.3in \topmargin=-0.5in \headheight=0.1in \headsep=0.1in \textheight=9.4in \pagestyle{empty} % No page numbers \begin{document} \enlargethispage*{1000 pt} \begin{center} % Version 9 {\Large \textbf{STA 302 Formulas}\footnote{This formula sheet was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}}} . }\\ \vspace{1 mm} \end{center} %%%%%%%%%%%%%%%%%%%%%% Expected value, variance and covariance %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent \renewcommand{\arraystretch}{2.0} \begin{tabular}{ll} $E(X) \stackrel{def}{=} \sum_x \, x \, p_{_X}(x)$ & $E(X) \stackrel{def}{=} \int_{-\infty}^\infty x f_{_X}(x) \, dx$ \\ $E(g(X)) = \sum_x g(x) \, p_{_X}(x)$ & $E(g(\mathbf{X})) = \sum_{x_1} \cdots \sum_{x_p} g(x_1, \ldots, x_p) \, p_{_\mathbf{X}}(x_1, \ldots, x_p) $ \\ $E(g(X)) = \int_{-\infty}^\infty g(x) \, f_{_X}(x) \, dx$ & $E(g(\mathbf{X})) = \int_{-\infty}^\infty \cdots \int_{-\infty}^\infty g(x_1, \ldots, x_p) \, f_{_\mathbf{X}}(x_1, \ldots, x_p) \, dx_1 \ldots dx_p $ \\ $E(\sum_{i=1}^na_iX_i) = \sum_{i=1}^na_iE(X_i)$ & $Var(X) \stackrel{def}{=} E\left( \, (X-\mu_{_X})^2 \, \right) = E(X^2)-[E(X)]^2$ \\ $Cov(X,Y) \stackrel{def}{=} E\left( \, (X-\mu_{_X})(Y-\mu_{_Y}) \, \right)$ & $Cov(X,Y) = E(XY)-E(X)E(Y)$ \\ $Corr(X,Y) \stackrel{def}{=} \frac{Cov(X,Y)}{\sqrt{Var(X)Var(Y)} } $ & $Cov\left(\sum_{i=1}^na_iX_i~,\,\sum_{j=1}^m b_j Y_j \right) = \sum_{i=1}^n\sum_{j=1}^m a_i b_j Cov\left( X_i, Y_j \right)$\\ $M_{_X}(t) = E(e^{Xt})$ & $M_{_{aX}}(t) = M_{_X}(at)$ \\ $M_{_{X+a}}(t) = e^{at}M_{_X}(t)$ & $M_{_{\sum_{i=1}^n X_i}}(t) = \prod_{i=1}^n M_{X_i}(t)$ \\ $X \sim N(\mu,\sigma^2)$ means $M_{_X}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$ & $X \sim \chi^2(\nu)$ means $M_{_X}(t) = (1-2t)^{-\nu/2}$ \\ \multicolumn{2}{l}{If $W=W_1+W_2$ with $W_1$ and $W_2$ independent, $W\sim\chi^2(\nu_1+\nu_2)$, $W_2\sim\chi^2(\nu_2)$ then $W_1\sim\chi^2(\nu_1)$} \\ \\ \end{tabular} \renewcommand{\arraystretch}{1.0} \vspace{-8mm} %%%%%%%%%%%%%%%%%%%%%%%%%%%%% Normal distribution basics %%%%%%%%%%%%%%%%%%%%%%% \noindent ~~\,If $x_1, \ldots, x_n$ is a random sample from a Normal$(\mu,\sigma^2)$ distribution, then $f(x) = \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{(x-\mu)^2}{2 \sigma^2}}$, and \begin{itemize} \item $\widehat{\mu} = \overline{x}_n$ and $\widehat{\sigma}^2 = \frac{1}{n} \sum_{i=1}^n(x_i-\overline{x}_n)^2 = \left(\frac{n-1}{n} \right)s^2$, where $s^2 = \frac{1}{n-1} \sum_{i=1}^n(x_i-\overline{x}_n)^2 $ \item $\overline{x}_n$ and $s^2$ are independent. \item $\frac{(n-1)s^2}{\sigma^2} \sim \chi^2(n-1)$. \item $t = \frac{\sqrt{n}(\overline{s}-\mu)}{s} \sim t(n-1)$ \end{itemize} \vspace{3mm} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Matrices %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent \begin{tabular}{lll} $\left(\begin{array}{cc} a & b \\ c & d \end{array}\right)^{-1} = \frac{1}{ad-bc} \left(\begin{array}{rr} d & -b \\ -c & a \end{array}\right)$ & ~~~~~ & \hspace{16mm} $\mathbf{AB}=\left[ \displaystyle \sum_{k}a_{i,k}b_{k,j}\right]$ \hspace{7mm} $tr(\mathbf{AB}) = tr(\mathbf{BA})$ \\ &&\\ \multicolumn{3}{l}{The square matrix $\mathbf{A}$ has an eigenvalue equal to $\lambda$ with corresponding eigenvector $\mathbf{x} \neq \mathbf{0}$ if $\mathbf{Ax} = \lambda\mathbf{x}$.} \end{tabular} \vspace{2mm} \noindent \renewcommand{\arraystretch}{2.0} \begin{tabular}{lll} \parbox{7 cm}{Columns of $\mathbf{A}$ \emph{linearly dependent} means there is a vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$.} & ~~~~~ & \parbox{7 cm}{Columns of $\mathbf{A}$ \emph{linearly independent} means that $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$.} \\ \multicolumn{3}{l}{$\mathbf{A}$ \emph{positive definite} means $\mathbf{v}^\prime \mathbf{Av} > 0$ for all vectors $\mathbf{v} \neq \mathbf{0}$.} \\ $\boldsymbol{\Sigma} = \mathbf{CD} \mathbf{C}^\prime$ & ~~~~~ & $\boldsymbol{\Sigma}^{-1} = \mathbf{C} \mathbf{D}^{-1} \mathbf{C}^\prime$ \\ $\boldsymbol{\Sigma}^{1/2} = \mathbf{CD}^{1/2} \mathbf{C}^\prime$ & ~~~~~ & $\boldsymbol{\Sigma}^{-1/2} = \mathbf{CD}^{-1/2} \mathbf{C}^\prime$ \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%% Random vectors %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $cov(\mathbf{y}) = E\left\{(\mathbf{y}-\boldsymbol{\mu}_y)(\mathbf{y}-\boldsymbol{\mu}_y)^\prime\right\}$ & ~~~~~ & $cov(\mathbf{y,t}) = E\left\{ (\mathbf{y}-\boldsymbol{\mu}_y) (\mathbf{t}-\boldsymbol{\mu}_t)^\prime\right\}$ \\ $cov(\mathbf{y}) = E\{\mathbf{yy}^\prime\} - \boldsymbol{\mu}_y\boldsymbol{\mu}_y^\prime$ & ~~~~~ & $cov(\mathbf{Ay}) = \mathbf{A}cov(\mathbf{y}) \mathbf{A}^\prime$ \\ $cov(\mathbf{Ay,By}) = \mathbf{A}cov(\mathbf{y}) \mathbf{B}^\prime$ & ~~~~~ & \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \\ \end{tabular} \renewcommand{\arraystretch}{1.0} \newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent \renewcommand{\arraystretch}{2.0} \begin{tabular}{lll} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Multivariate MGF, MVN %%%%%%%%%%%%%%%%%%%%%%%%% $M_{\mathbf{v}}(\mathbf{t}) = E(e^{\mathbf{t}^\prime\mathbf{v}})$ & ~~~~~ & $M_{\mathbf{Av}}(\mathbf{t}) = M_{\mathbf{v}}(\mathbf{A}^\prime\mathbf{t})$ \\ $M_{\mathbf{v}+\mathbf{c}}(\mathbf{t}) = e^{\mathbf{t}^\prime\mathbf{c}} M_{\mathbf{v}}(\mathbf{t})$ & ~~~~~ & \parbox{7 cm}{$\mathbf{v}_1$ and $\mathbf{v}_2$ are independent if and only if $M_{(\mathbf{v}_1,\mathbf{v}_2)}\left(\mathbf{t}_1,\mathbf{t}_2\right) = M_{\mathbf{v}_1}(\mathbf{t}_1) M_{\mathbf{v}_2}(\mathbf{t}_2)$.} \vspace{2mm} \\ $\mathbf{v} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$ means $M_{\mathbf{v}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu} + \frac{1}{2} \mathbf{t}^\prime \boldsymbol{\Sigma} \mathbf{t}}$ & ~~~~~ & \parbox{7 cm}{For the multivariate normal, zero covariance implies independence.} \\ If $\mathbf{v} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, then $\mathbf{Av} + \mathbf{c} \sim N_q(\mathbf{A}\boldsymbol{\mu}+\mathbf{c}, \mathbf{A}\boldsymbol{\Sigma} \mathbf{A}^\prime)$, & ~~~~~ & and $w = (\mathbf{v}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{v}-\boldsymbol{\mu}) \sim \chi^2(p)$ \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Simple regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_i + \epsilon_i$ & ~~~~~ & $\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$ \\ $\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})} {\sum_{i=1}^n(x_i-\overline{x})^2} = \frac{\sum_{i=1}^n x_iy_i - n \, \overline{x} \, \overline{y}} {\sum_{i=1}^n x_i^2 - n\overline{x}^2}$ & ~~~~~ & $r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})} {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$ \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$ & ~~~~~ & with $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2$, $Cov(\epsilon_i,\epsilon_j)=0$ for $i \neq j$ \\ $\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $E(\boldsymbol{\epsilon})=\mathbf{0}$, $cov(\boldsymbol{\epsilon})=\sigma^2\mathbf{I}_n$ & ~~~~~ & $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{y}$ \\ $\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, where $\mathbf{H} = (h_{ij}) = \mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime $ & ~~~~~ & $\widehat{\boldsymbol{\epsilon}} = \mathbf{y} - \widehat{\mathbf{y}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$ \hspace{8mm} $\mathbf{X}^\prime\widehat{\boldsymbol{\epsilon}} = \mathbf{0}$ \\ $\sum_{i=1}^n(y_i-\overline{y})^2 = \sum_{i=1}^n(y_i-\widehat{y}_i)^2 + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$ & ~~~~~ & $SST=SSE+SSR$ and $R^2 = \frac{SSR}{SST}$ \vspace{5mm} \\ % & ~~~~~ & \\ % Skip a line %%%%%%%%%%%%%%%%%%%%%%%%%%%% Normal Model %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % \widehat{\boldsymbol{\beta}} \widehat{\boldsymbol{\epsilon}} % $\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2\mathbf{I}_n)$ & ~~~~~ & \parbox{7 cm}{$y_i = \beta_0 + \beta_1 x_{i1} + \cdots + \beta_k x_{ik} + \epsilon_i$ \\ $\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$} \\ $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{y} \sim N_{k+1}\left(\boldsymbol{\beta},\sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}\right)$ & ~~~~~ & $\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$ are independent under normality. \\ $s^2 = \frac{\widehat{\boldsymbol{\epsilon}}^{\,\prime \,} \widehat{\boldsymbol{\epsilon}}}{n-k-1} = \frac{\mbox{\footnotesize\emph{SSE}}}{n-k-1} =$ \emph{MSE} & ~~~~~ & $\frac{\mbox{\footnotesize\emph{SSE}}}{\sigma^2} = \frac{\widehat{\boldsymbol{\epsilon}}^{\,\prime \,}\widehat{\boldsymbol{\epsilon}}} {\sigma^2} \sim \chi^2(n-k-1)$ \\ $t = \frac{z}{\sqrt{w/\nu}} \sim t(\nu)$ & ~~~~~ & $F = \frac{w_1/\nu_1}{w_2/\nu_2} \sim F(\nu_1,\nu_2)$ \\ $t = \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} -\mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{\mbox{\footnotesize\emph{MSE}} \, \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{a}}} \sim t(n-k-1)$ & ~~~~~ & $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{\mbox{\small\emph{MSE}} \, \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{a}}$ \\ $F^* = \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q \, \mbox{\footnotesize\emph{MSE}}} \stackrel{H_0}{\sim} F(q,n-k-1)$ & ~~~~~ & $F^* = \frac{\mbox{\footnotesize\emph{SSR(full)$-$SSR(reduced)}}}{q \, \mbox{\footnotesize\emph{MSE(full)}}} = \left( \frac{n-k-1}{q} \right) \left( \frac{p}{1-p} \right)$ \\ $p = \frac{R^2(\mbox{\footnotesize\emph{full}}) - R^2(\mbox{\footnotesize\emph{reduced}})}{1-R^2(\mbox{\footnotesize\emph{reduced}})} $ & ~~~~~ & $p = \frac{qF^*}{qF^*+n-k-1}$ \\ $t = \frac{y_0-\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}}} {\sqrt{MSE \, (1+\mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}} \sim t(n-k-1)$ & ~~~~~ & $\mathbf{x}_0^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{\mbox{\small\emph{MSE}} \, (1 + \mathbf{x}_0^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{x}_0)}$ \\ $t_i = \frac{y_i-\mathbf{x}_i^\prime \widehat{\boldsymbol{\beta}}_{(i)}} {\sqrt{MSE_{(i)}(1+\mathbf{x}_i^\prime (\mathbf{X}_{(i)}^\prime \mathbf{X}_{(i)})^{-1}\mathbf{x}_i)}} \sim t(n-k-2)$ & ~~~~~ & \\ $\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2\mathbf{V})$ & ~~~~~ & $\widehat{\boldsymbol{\beta}}_{gls} = (\mathbf{X}^\prime \mathbf{V}^{-1} \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{V}^{-1} \mathbf{y}$ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{tabular} \renewcommand{\arraystretch}{1.0} \newpage \begin{verbatim} qf(0.95,df1=6,df2=122) # Critical value for F, not in any table qt(0.975,df=122) $ Critical value for t XpX= t(X)%*%X XpXinv = solve(XpX) install.packages("readxl") # Only need to do this once. There are no dependencies. library(readxl) # Load the package hungry = read_excel("Diet.xlsx") math = read.table("http://www.utstat.toronto.edu/~brunner/data/legal/mathtest.txt") head(math) colnames(math) = c("ID","HScalcMark","PreCalcScore","CalcScore","UnivCalcMark") summary(math) cor(math) attach(math) mathmod = lm(UnivCalcMark ~ HScalcMark+PreCalcScore+CalcScore, data = math) cellmeans = lm(lper100k ~ 0+Cntry+weight+length) summary(mathmod) betahat = coefficients(mathmod) epsilonhat = residuals(mathmod) plot(HScalcMark,epsilonhat) yhat = fitted.values(mathmod) hii = hatvalues(mathmod) cd = cooks.distance(mathmod); summary(cd) MSE.XpXinv = vcov(mathmod) a = as.matrix(c(0,0,-1,1)) se = sqrt( t(a)%*%MSE.XpXinv%*%a ) # Standard error of the difference me95 = as.numeric( t.025*se ) # Now me95 is different estdiff = as.numeric( t(a) %*% betahat ); estdiff Lower95 = estdiff - me95; Upper95 = estdiff + me95; c(Lower95, Upper95) ti = rstudent(mathmod) # Studentized deleted residuals alpha = 0.05; a = alpha/200; bcrit = qt(1-a/2,dfe-1); bcrit # H0: C beta = t ftest(model, C, t=0) source("http://www.utstat.utoronto.ca/~brunner/Rfunctions/ftest.txt") C1 = rbind( c(0,1,0,0), c(0,0,1,0), c(0,0,0,1) ) ftest(mod,C1) anova(reducedmodel, fullmodel) c1 = numeric(n); c1[Cntry=='Europ'] = 1; table(c1,Cntry) c2 = numeric(n); c2[Cntry=='Japan'] = 1; table(c2,Cntry) c3 = numeric(n); c3[Cntry=='US'] = 1; table(c3,Cntry) wc1 = weight*c1; wc2 = weight*c2 uneqslope = lm(lper100k ~ weight+c1+c2+wc1+wc2) Cntry = factor(Cntry) contrasts(Cntry) Country = Cntry contrasts(Country) = contr.treatment(3,base=3) \end{verbatim} \end{document}