% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on prime \usetheme{Frankfurt} % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Interpretation of regression coefficients\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2020} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Average response} %\framesubtitle{} The model says {\LARGE \begin{displaymath} E(y) = \beta_0 + \beta_1 x_{1} + \cdots + \beta_k x_{k} \end{displaymath} } \pause \begin{itemize} \item Can be viewed as a conditional expected value, given the values $x_1, \ldots, x_k$. \pause \item Theoretically, there is a sub-population for each set of $x_1, \ldots, x_k$ values. \pause \item $E(y|x_1, \ldots, x_k)$ is the sub-population mean (average response) for that sub-population. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$E(y|\mathbf{x}) = \beta_0 + \beta_1 x_{1} + \cdots + \beta_k x_{k}$} %\framesubtitle{} {\LARGE \begin{displaymath} g(x_1, \ldots, x_k) = \beta_0 + \beta_1 x_{1} + \cdots + \beta_k x_{k} \end{displaymath} } \pause Examine $g(x_1, \ldots, x_k)$ as a mathematical function, to see what the regression coefficients mean. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Simple regression} \framesubtitle{$y_i = \beta_0 + \beta_1 x_i + \epsilon_i$} {\Large \begin{displaymath} g(x) = \beta_0 + \beta_1 x \end{displaymath} } \pause {\small \begin{itemize} \item The equation of a straight line. \pause \item Say $x$ is income and $y$ is credit card debt. \pause \item $\beta_1>0$ would mean that higher income tends to go with higher debt, on average. \pause \item Call it a ``positive (linear) relationship." \pause \item $\beta_1<0$ would mean that higher income tends to go with lower debt, on average. \pause \item Call it a ``negative (linear) relationship." \pause \item If the model is correct, $\beta_1=0$ would mean that there is no connection at all between income and average credit card debt. \pause \item This is why testing $H_0:\beta_1=0$ is so important. \end{itemize} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Testing $H_0:\beta_1=0$} \framesubtitle{An example of $H_0:\mathbf{a}^\prime\boldsymbol{\beta}=t_0$} {\LARGE \begin{displaymath} t = \frac{\mathbf{a}^\prime\widehat{\boldsymbol{\beta}}-t_0} {\sqrt{\mbox{\emph{MSE}~}\mathbf{a}^\prime(X^\prime X)^{-1}\mathbf{a}}} ~ \stackrel{H_0}{\sim} ~ t(n-k-1) \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Estimated regression coefficients} \framesubtitle{$\widehat{E(y|x)} = \widehat{\beta}_0 + \widehat{\beta}_1 x$} \pause \begin{itemize} \item The same talk applies, with the addition of ``estimated" or ``predicted." \pause \item \emph{Estimated} average credit card debt is higher for consumers with higher incomes (if $\widehat{\beta}_1>0$). \pause \item \emph{Predicted} credit card debt is higher for consumers with higher incomes (if $\widehat{\beta}_1>0$). \pause \item \emph{Estimated} average credit card debt is lower for consumers with higher incomes (if $\widehat{\beta}_1<0$). \pause \item \emph{Predicted} credit card debt is lower for consumers with higher incomes (if $\widehat{\beta}_1<0$). \pause \item Suppose annual income is in thousands of dollars. The question says: ``When annual income is \$1,000 higher, estimated average credit card debt is \underline{\hspace{15mm}} higher. The answer is a number from your printout." \pause Write the value of $\widehat{\beta}_1$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Sometimes loose language is okay} \pause %\framesubtitle{} \begin{itemize} \item Technically, regression is about the connection between $x$ and \emph{expected}, or \emph{average} $y$. \pause \item But sometimes people (and my questions) speak just of the relationship between $x$ and $y$. \pause \item Like the relationship between High School GPA and University GPA. \pause \item Yes, technically $g(x) = \beta_0 + \beta_1 x$ gives the relationship between High School GPA and \emph{average} University GPA. \pause \item But it's harmless -- actually it's helpful. If necessary you can clarify. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Plain language is important} \pause %\framesubtitle{} { \footnotesize \begin{itemize} \item If you can only be understood by mathematicians and statisticians, your knowledge is much less valuable. \pause \item Often a question will say ``Give the answer in plain, non-statistical language." \pause \item This means if $x$ is income and $y$ is credit card debt, you make a statement about income and average or predicted credit card debt, like the ones on the preceding slides. \pause \item If you use mathematical notation or words like null hypothesis, unbiased estimator, p-value or statistically significant, you will lose a lot of marks even if the statement is correct. Even avoid ``positive relationship," and so on. \pause \item If the study is about fish, talk about fish. \pause \item If the study is about blood pressure, talk about blood pressure. \pause \item If the study is about breaking strength of yarn, talk about breaking strength of yarn. \pause \item Assume you are talking to your boss, who was a Commerce major and does not like to feel stupid. \end{itemize} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{We will be guided by hypothesis tests with $\alpha = 0.05$} \framesubtitle{For plain-language conclusions} \pause \begin{itemize} \item If we do not reject a null hypothesis like $H_0:\beta_1=0$, we will not draw a definite conclusion. \pause \item Instead, say things like: \pause \begin{itemize} \item There is no evidence of a connection between blood sugar level and mood. \pause \item These results are not strong enough for us to conclude that attractiveness is related to mark in first-year Computer Science. \pause \item These results are consistent with no effect of dosage level on bone density. \pause \end{itemize} \item If the null hypothesis is not rejected, please do \emph{not} claim that the drug has no effect, etc.. \pause \item In this we are taking Fisher's side in a historical fight between Fisher on one side and Neyman \& Pearson on the other. \pause \item Though we are guided by $\alpha = 0.05$, we \emph{never} mention it when plain language is required. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{A technical issue} %\framesubtitle{} { \small \begin{itemize} \item In this class we will avoid one-tailed tests. \pause \item Why? Ask what would happen if the results were strong and in the opposite direction to what was predicted (dental example). \pause \item But when $H_0$ is rejected, we still draw directional conclusions. \pause \item For example, if $x$ is income and $y$ is credit card debt, we test $H_0: \beta_1=0$ with a two-sided $t$-test. \pause \item Say $p = 0.0021$ and $\widehat{\beta}_1 = 1.27$. We say ``Consumers with higher incomes tend to have more credit card debt." \pause \item Is this justified? We'd better hope so, or all we can say is ``There is a connection between income and average credit card debt." \pause \item Then they ask: ``What's the connection? Do people with lower income have more debt?" \pause \item And you have to say ``Sorry, I don't know." \pause \item It's a good way to get fired, or at least look silly. \end{itemize} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The technical resolution} %\framesubtitle{} {\small \begin{itemize} \item Decompose the two-sided test into a set of two one-sided tests with significance level $\alpha/2$, equivalent to the two-sided test. \begin{center} \includegraphics[width=3in]{Decompose} \end{center} \pause \item In practice, just look at the sign of the regression coefficient. \pause \item Under the surface you are decomposing the two-sided test, but you never mention it. \pause \item \emph{Marking rule}: If the question asks for plain language and you draw a non-directional conclusion when a directional conclusion is possible, you get half marks. \end{itemize} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Multiple regression} %\framesubtitle{} {\LARGE \begin{displaymath} g(x_1, \ldots, x_k) = \beta_0 + \beta_1 x_{1} + \cdots + \beta_k x_{k} \end{displaymath} } \pause \begin{itemize} \item It's the equation of a hyper-plane, a $k$-dimensional surface in $k+1$ dimensions. \pause \item Again, think of a sub-population at each combination of $x$ values. \pause \item $g(x_1, \ldots, x_k)$ is the average response at that set of values. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$g(x_1, \ldots, x_k) = \beta_0 + \beta_1 x_{1} + \cdots + \beta_k x_{k}$} %\framesubtitle{} \begin{itemize} \item Hold all the $x$ values except $x_j$ fixed. \pause \item That is, do it in your mind. We are studying the function $g(\mathbf{x})$. \pause \begin{eqnarray*} g(\mathbf{x}) & = & \beta_0 + \beta_1 x_{1} + \cdots + \beta_k x_{k} \\ \pause & = & (\beta_0 + \sum_{i \neq j}\beta_i x_i) + \beta_j x_j\\ \pause & = & ~~~~~~~~~\alpha_0 \hspace{10mm} + \beta_j x_j \pause \end{eqnarray*} \item Another straight line. \pause \item The slope is unaffected by where you hold those other variables constant. \pause \item The intercept is affected, but usually nobody cares. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{How to talk about it} \pause %\framesubtitle{} \begin{itemize} \item With all other $x$ values held constant as $x_j$ varies, $E(y) = \alpha_0 + \beta_j x_j$. \pause \item We talk about it as before, but say ``controlling for" or ``allowing for" or ``taking into account" or ``correcting for" the other variables. \pause \item Controlling for parents' income, there is no evidence of a relationship between education and career success. \pause \item Allowing for age, there is still a tendency for adults who exercise more to have lower blood pressure. \pause \item These results are corrected for age, sex and severity of disease. \pause \item Holding other variables constant, a student who studies one hour more per day is predicted to have a grade point average that is 0.47 higher. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Call it \emph{model-based control}} \pause %\framesubtitle{} \begin{itemize} \item This is a big selling point for multiple regression of all kinds. \pause \item To see what happens when variables are held constant at certain values, you don't literally have to hold them constant. \pause \item Like ``controlling for number of cigarettes smoked per day \ldots" \pause \item[] \item It's valid provided that the model is approximately correct. \pause \item It's risky outside the range of the data. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Correlation-causation} \pause % \framesubtitle{} \begin{itemize} \item In the model, the $x$ values are literally producing $y$. \pause \item For real data, this may be true, and it may not. \pause \item A real (non-chance) connection between $x$ and $y$ does establish \emph{why} the connection exists. \pause \item People say ``Correlation does not imply causation." \pause \item By \emph{correlation} they mean any kind of non-independence. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Examples} \pause %\framesubtitle{} \begin{itemize} \item Exercise and arthritis pain. \pause \item The Mozart effect. \pause \item Private music lessons, athletic training. \pause \item Baldness and wearing a hat. \pause \item Smoking and lung cancer. \pause \item Vitamin B and spina bifida. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Solution?} %\framesubtitle{} \begin{itemize} \item The best solution is random assignment, \pause \item But this is not always possible. \pause \item Be aware of the correlation-causation issue when making plain-language statements about the results of a statistical analysis. \pause \item Watch out for going too far beyond what the data are actually telling you. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } # Picture of curve for decomposing 2-sided test rm(list=ls()) x = seq(from=-3.5,to=3.5,length=500); y = dnorm(x) plot(x,y,type='l', ann=F, axes=F, ylim = c(-.1,.8)) # Draw an axis line below x1 = c(-4,4); y1 = c(0,0); lines(x1,y1) # Draw cutoffs x2 = c(-1.3,-1.3); y2 = c(0,dnorm(-1.3)); lines(x2,y2) x3 = c(1.3,1.3); y3 = c(0,dnorm(1.3)); lines(x3,y3) lo = expression(paste(-t[alpha/2])); text(-1.3,-0.03,lo) hi = expression(paste(t[alpha/2])); text(1.3,-0.03,hi) alphaover2 = expression(alpha/2) text(-1.7,.04,alphaover2); text(1.7,.04,alphaover2)