% Vent Damper (matched t) for Applied Stat I % Notes and comments at the end % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} % To create handout using article mode: Comment above and uncomment below (2 places) %\documentclass[12pt]{article} %\usepackage{beamerarticle} %\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=red]{hyperref} % For live Web links with href in article mode %\usepackage{fullpage} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{The vent damper example\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2014} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{The Vent damper data} \framesubtitle{Based on a Minitab data set, but all the numbers are different} When a furnace is off, the chimney does not need to be open. Maybe, closing it can conserve energy. A vent damper is a kind of barrier that closes the chimney. Dampers on fireplaces are manual, but in modern heating systems they are automatic. Automatic vent dampers can be either electrical or thermal. Which kind saves more energy? \end{frame} \begin{frame} \frametitle{Vent damper continued} Forty houses were randomly assigned to have electrical vent dampers installed, and 50 were randomly assigned to have thermal vent dampers. Average daily energy consumption was measured during two consecutive weeks in the winter. For one week the vent damper was active (turned on), and the other week it was inactive (turned off). Within damper type, equal numbers of houses were randomly assigned to have the damper on during the first week and the second week. \vspace{3mm} The cases are houses. The three variables in the file are energy consumption with vent damper active, energy consumption with vent damper inactive, and type of damper. The first question is whether it's true that energy consumption is less when the damper is active. For this analysis, we will ignore whether the vent damper is electrical or thermal. \end{frame} \begin{frame} \frametitle{The main question (for now)} % \framesubtitle{} Does the vent damper affect energy consumption, and if so, by how much? \begin{itemize} \item Let's do a $t$-test. \pause \item It's natural to calculate a \emph{difference} in energy consumption for each house, and test whether the mean difference equals zero. \pause \item Or, we could do a two-sample $t$-test with $n_1=n_2$. \item Which one is better? \item A test implies a model; compare the models. \end{itemize} \end{frame} \begin{frame}{Model for the matched $t$-test} Independently for $i=1, \ldots, n$, observe $(X_i,Y_i)$. \begin{itemize} \item $X_i \sim N(\mu_1,\sigma^2_1)$, $Y_i \sim N(\mu_2,\sigma^2_2)$. \item $Cov(X_i,Y_i)=\sigma_{12}$. \item Calculate Differences $D_i=X_i-Y_i$ \item Matched $t$-test on $D_1, \ldots, D_n$ \item $H_0: \mu=0$, where $\mu = E(D_i) = \mu_1-\mu_2$ \end{itemize} \vspace{5mm} Test statistic is \begin{displaymath} T_1 = \frac{\sqrt{n}(\overline{D}-0)}{S} \end{displaymath} with $df=n-1$. \end{frame} \begin{frame}{Independent $t$-test}{Correct if $\sigma^2_1=\sigma^2_2$ and $\sigma_{12}=0$} \begin{displaymath} T_2 = \frac{\overline{X}-\overline{Y}}{S_p \sqrt{\frac{1}{n_1} + \frac{1}{n_2}}}, \end{displaymath} with $df=n_1+n_2-2$, where %\vspace{5mm} \begin{displaymath} S^2_p = \frac{\sum_{i=1}^{n_1}(X_i-\overline{X})^2 + \sum_{i=1}^{n_2}(Y_i-\overline{Y})^2} {n_1+n_2-2} \end{displaymath} \end{frame} \begin{frame}{Comparing the Tests} \begin{eqnarray*} T_1 & = & \frac{\sqrt{n}(\overline{D}-0)}{S},~~df=n-1 \\ \\ T_2 & = & \frac{\overline{X}-\overline{Y}}{S_p \sqrt{\frac{1}{n_1} + \frac{1}{n_2}}},~~df=2(n-1) \end{eqnarray*} \begin{itemize} \item The two-sample test pretends it has twice the degrees of freedom. \item Could cause worry about inflated Type I error rate \item But both critical values go to $z_{\alpha/2}$ as $n \rightarrow \infty$. \item For example, for $n=100$, $t_{0.975}(99) = 1.98$ while $t_{0.975}(198) = 1.97$. \item So if there is a problem with $df$, it will be for small samples. \end{itemize} \end{frame} \begin{frame}{Comparing the Test Statistics} \begin{eqnarray*} T_1 & = & \frac{(\overline{D}-0)}{S/\sqrt{n}},~~df=n-1 \\ \\ T_2 & = & \frac{\overline{X}-\overline{Y}}{S_p \sqrt{\frac{1}{n_1} + \frac{1}{n_2}}},~~df=2(n-1) \end{eqnarray*} \begin{itemize} \item $\overline{D} = \frac{1}{n} \sum_{i=1}^n (X_i-Y_i) = \overline{X}-\overline{Y}$ \item So the numerators are the same. \item Compare denominators \end{itemize} \end{frame} \begin{frame}{One-Sample (Matched) $t$-Test} {\small \begin{eqnarray*} S^2/n & = & \frac{1}{n(n-1)} \sum_{i=1}^n (D_i-\overline{D})^2 \\ & = & \frac{1}{n(n-1)} \sum_{i=1}^n \left(X_i-Y_i - (\overline{X}-\overline{Y})\right)^2 \\ & = & \frac{1}{n(n-1)} \sum_{i=1}^n \left((X_i-\overline{X}) - (Y_i-\overline{Y})\right)^2 \\ & = & \frac{1}{n} \left[ \frac{\sum_{i=1}^n(X_i-\overline{X})^2}{n-1} - 2 \frac{\sum_{i=1}^n(X_i-\overline{X}) (Y_i-\overline{Y})}{n-1} \right. \\ && ~~+ \left. \frac{\sum_{i=1}^n(Y_i-\overline{Y})^2}{n-1} \right]\\ & = & \frac{1}{n} \left[S^2_x -2 S_{xy} + S^2_y\right] \end{eqnarray*} where $S_{xy}$ is the sample covariance. } % End size \end{frame} \begin{frame}{Two-Sample (Independent) $t$-Test}{With $n_1=n_2=n$} \begin{eqnarray*} S^2_p \left(\frac{1}{n_1} + \frac{1}{n_2}\right) & = & \frac{(n_1-1)S^2_x + (n_2-1)S^2_y}{n_1+n_2-2} \left(\frac{1}{n_1} + \frac{1}{n_2}\right) \\ & = & \frac{(n-1)(S^2_x + S^2_y)}{n+n-2} \left(\frac{2}{n}\right) \\ & = & \frac{(n-1)(S^2_x + S^2_y)}{2(n-1)} \left(\frac{2}{n}\right) \\ & = & \frac{S^2_x + S^2_y}{n} \end{eqnarray*} \end{frame} \begin{frame}{Comparing (Squared) Denominators}% {Of the $t$ Statistics} \begin{eqnarray*} S^2_p \left(\frac{1}{n_1} + \frac{1}{n_2}\right) & = & \frac{1}{n} \left[ S^2_x + S^2_y \right] \\ \\ S^2/n & = & \frac{1}{n} \left[S^2_x -2 S_{xy} + S^2_y\right] \end{eqnarray*} {\footnotesize \begin{itemize} \item If covariance is zero, they are the same. \item If covariance is negative \begin{itemize} \item Denominator of two-sample $t$ is too small. \item Value of $t$ too large. \item Null hypothesis rejected too often. \end{itemize} \item If covariance is positive % (realistic) \begin{itemize} \item Denominator of two-sample $t$ is too large. \item Value of $t$ too small. \item Null hypothesis \emph{less} likely to be rejected. \item If $H_0$ is false, expect loss of power. \end{itemize} \end{itemize} } % End size \end{frame} \begin{frame}{Covariance should be positive: Why?}{A more detailed model} Independently for $i=1, \ldots, n$, observe $(X_i,Y_i)$ where \begin{eqnarray*} X_i & = & \delta + Z_i + \epsilon_{i1} \\ Y_i & = & ~~~~~ Z_i + \epsilon_{i2} \end{eqnarray*} \begin{itemize} \item $X_i$ is the measurement with vent damper active. \item $\delta$ is the effect of having the vent damper active. \item $Z_i$ reflects characteristics of the individual house (surface area, insulation, habits of the occupants, etc.). $Z_i \sim N(\mu_z,\sigma^2_z)$. \item $\epsilon_{ij}$ reflects other influences not specific to the house (measurement error, weather etc.). $\epsilon_{ij} \sim N(0,\sigma_j)$ for $j=1,2$. \item $Z_i$ and $\epsilon_{ij}$ all independent. \end{itemize} \end{frame} \begin{frame}{Covariance is positive}{And could be quite large} \begin{eqnarray*} X_i & = & \delta + Z_i + \epsilon_{i1} \\ Y_i & = & ~~~~~ Z_i + \epsilon_{i2} \end{eqnarray*} {\LARGE \begin{displaymath} Cov(X_i,Y_i) = Var(Z_i) \end{displaymath} } % End size \begin{itemize} \item Notice $Z_i$ cancels in $D_i=X_i-Y_i$. \item A lot of extraneous variance is removed. \item Each house serves as its own control. \item And $\delta = \mu_1-\mu_2$. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Read the data} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > furnace = read.table("http://www.utstat.utoronto.ca/~brunner/ appliedf14/code_n_data/lecture/damper.data") # URL is all on one line > head(furnace) \end{verbatim} } % End color \begin{verbatim} damper active inactive 1 EVD 10.71 11.46 2 EVD 7.45 7.91 3 EVD 9.25 10.48 4 EVD 15.91 16.97 5 EVD 7.47 7.88 6 EVD 7.29 7.75 \end{verbatim} } % End size Type \texttt{furnace} to look at the whole data set. \end{frame} \begin{frame}[fragile] \frametitle{Summary of Difference score} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > attach(furnace) # Make variable names available > diff = active-inactive; summary(diff) \end{verbatim} } % End color \begin{verbatim} Min. 1st Qu. Median Mean 3rd Qu. Max. -3.9800 -1.0550 -0.7100 -0.7747 -0.4175 0.8700 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Histogram of Difference score} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > hist(diff) \end{verbatim} } % End color } % End size \begin{center} \includegraphics[width=2.75in]{hist} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{Check the number} %\framesubtitle{} {\scriptsize {\color{blue} \begin{verbatim} > sort(diff) # Looks like just one possible outlier \end{verbatim} } % End color \begin{verbatim} [1] -3.98 -2.29 -1.95 -1.94 -1.59 -1.57 -1.54 -1.53 -1.53 -1.52 -1.38 -1.34 -1.32 -1.23 -1.19 -1.18 -1.17 [18] -1.11 -1.11 -1.10 -1.09 -1.09 -1.06 -1.04 -1.03 -1.01 -0.99 -0.98 -0.97 -0.91 -0.90 -0.89 -0.89 -0.85 [35] -0.83 -0.81 -0.80 -0.80 -0.79 -0.75 -0.75 -0.74 -0.73 -0.72 -0.72 -0.70 -0.70 -0.68 -0.68 -0.67 -0.67 [52] -0.67 -0.66 -0.62 -0.61 -0.60 -0.59 -0.58 -0.57 -0.56 -0.49 -0.47 -0.47 -0.46 -0.46 -0.46 -0.44 -0.41 [69] -0.41 -0.38 -0.37 -0.37 -0.37 -0.32 -0.30 -0.30 -0.29 -0.27 -0.26 -0.23 -0.23 -0.21 -0.18 -0.15 -0.11 [86] 0.17 0.25 0.29 0.38 0.87 \end{verbatim} {\color{blue} \begin{verbatim} > (1:90)[diff==min(diff)] \end{verbatim} } % End color \begin{verbatim} [1] 90 \end{verbatim} It's House number 90. } % End size \end{frame} \begin{frame}[fragile] \frametitle{How unusual is that observation?} \framesubtitle{Standardize} If $D_i$ are normal, $Z_i = \frac{D_i-\overline{D}}{S_d}$ are approximately standard normal %{\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > Z = (diff-mean(diff))/sd(diff); sort(Z)[1:5] \end{verbatim} } % End color \begin{verbatim} [1] -5.177325 -2.447600 -1.898424 -1.882272 -1.316944 \end{verbatim} %} % End size \vspace{5mm} \pause $Z = -5.177$ is really unusual. Homework: What's the probability of getting one or more this big in absolute value for $n=90$ independent standard normal data? \end{frame} \begin{frame}[fragile] \frametitle{Does it look like an outlier in two dimensions?} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > plot(inactive,active) \end{verbatim} } % End color } % End size \begin{center} \includegraphics[width=2.75in]{plot} \end{center} \end{frame} \begin{frame} \frametitle{What is unusual about House Number 90?} %\framesubtitle{} \begin{itemize} \item Looking in the Minitab handbook, find \emph{nothing} remarkable about that house on any of about 10 variables. \item In a real data analysis job, look harder. \item I don't want to throw it out unless I know \emph{why} it's different. \pause \item Do the analyses with and without this house. If the conclusions are similar, we are happy. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Matched $t$-test with the full data set} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > t.test(diff) # Matched t \end{verbatim} } % End color \begin{verbatim} One Sample t-test data: diff t = -11.8705, df = 89, p-value < 2.2e-16 alternative hypothesis: true mean is not equal to 0 95 percent confidence interval: -0.9043367 -0.6449966 sample estimates: mean of x -0.7746667 \end{verbatim} % Could have said: \texttt{t.test(active,inactive,paired=T)} Conclusion: \pause Use of the vent damper reduces average energy consumption. } % End size \end{frame} \begin{frame}[fragile] \frametitle{Two-sample $t$-test with the full data set} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > t.test(active,inactive,var.equal = T) # Two-sample (independent) t \end{verbatim} } % End color \begin{verbatim} Two Sample t-test data: active and inactive t = -1.7437, df = 178, p-value = 0.08294 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -1.6513757 0.1020424 sample estimates: mean of x mean of y 10.70844 11.48311 \end{verbatim} Conclusion: These results are consistent with no effect of vent damper. } % End size \end{frame} \begin{frame}[fragile] \frametitle{Matched $t$-test with outlier deleted} \framesubtitle{Including outlier, had $t = -11.8705$} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > activeDel = active[-90] # Could have said active[1:89] > inactiveDel = inactive[-90] # Can give it a list, like active[-c(1,2,3,90)] > # But watch out for missing values! > diffDel = activeDel-inactiveDel > t.test(diffDel) \end{verbatim} \pause } % End color \begin{verbatim} One Sample t-test data: diffDel t = -13.421, df = 88, p-value < 2.2e-16 alternative hypothesis: true mean is not equal to 0 95 percent confidence interval: -0.8480265 -0.6292769 sample estimates: mean of x -0.7386517 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Two-sample $t$-test with outlier deleted} \framesubtitle{Including outlier, had $t = -1.7437$} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > t.test(activeDel,inactiveDel,var.equal = T) \end{verbatim} } % End color \begin{verbatim} Two Sample t-test data: activeDel and inactiveDel t = -1.6481, df = 176, p-value = 0.1011 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -1.6231323 0.1458289 sample estimates: mean of x mean of y 10.73933 11.47798 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Comments} %\framesubtitle{} \begin{itemize} \item As expected, the two-sample $t$-test was less sensitive. \item Because the covariance between energy consumption with vent damper active and vent damper inactive was large, the two-sample $t$-test was \emph{much} less sensitive. \item Deleting the outlier actually made the results more convincing, even though it made the largest contribution to the observed difference between means. \item If the confidence interval for the effect of vent damper could be converted to dollars, it would be very meaningful. % You can't because energy consumption was screwy: BTU divided by (house areaa x weather). \end{itemize} \end{frame} \begin{frame} \frametitle{What about normality?} \framesubtitle{The $t$-test assumes normality,} \begin{itemize} \item If the outlier is dropped, the data might be normal, maybe. \pause \item Try a sign test. \item Then discuss robustness. \end{itemize} \end{frame} \begin{frame} \frametitle{Sign test} %\framesubtitle{} \begin{itemize} \item Under the null hypothesis of no effect, energy consumption with vent damper active and vent damper inactive are identically distributed. \item Therefore if the distributions are continuous, $Pr\{X_i > Y_i\}= Pr\{X_i < Y_i\} = \frac{1}{2}$. \item (What do you have to assume about the \emph{joint} distribution to actually show this?) \pause \item Use a Bernoulli model and test $H_0: \theta=0.5$ \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Calculate the sign test} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > # Sign test > neg = length(diff[diff<0]); pos = length(diff[diff>0]) > neg; pos \end{verbatim} } % End color \begin{verbatim} [1] 85 [1] 5 \end{verbatim} \pause {\color{blue} \begin{verbatim} > p = neg/90 > Z = sqrt(90)*(p-1/2)/sqrt(p*(1-p)); Z \end{verbatim} } % End color \begin{verbatim} [1] 18.40716 \end{verbatim} There can be no doubt of the effect. } % End size \end{frame} \begin{frame} \frametitle{Also, the $t$-test is \emph{robust} with respect to normality} %\framesubtitle{} Compare $T = \frac{\sqrt{n}(\overline{D}-0)}{S}$ to $Z_n = \frac{\sqrt{n}(\overline{D}-0)}{S}$. \pause \begin{itemize} \item Central Limit Theorem says $Z_n \stackrel{d}{\rightarrow} Z \sim N(0,1)$. \item And as $df \rightarrow \infty$, the $t$ distribution becomes standard normal. \item So for large enough samples, the assumption of normality used in the derivation of the $t$-test is not actually necessary. \end{itemize} \pause \vspace{5mm} Moral of the story: It's always nice when the model is realistic, but when the model is unrealistic it may or may not matter. We have to study the consequences of model incorrectness. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf14} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf14}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame}