% Estimate effect of a treatment for STA305 (Experimental Design) % Notes and comments at the end % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} % To create handout using article mode: Comment above and uncomment below (2 places) %\documentclass[12pt]{article} %\usepackage{beamerarticle} %\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=red]{hyperref} % For live Web links with href in article mode %\usepackage{fullpage} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Estimating the effect of an experimental treatment\footnote{See last slide for copyright information.}} \subtitle{STA305 Winter 2014} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Sources} \framesubtitle{You don't need to look at these.} \begin{itemize} \item \emph{Theory of the design of experiments} (Cox and Reid, 2000) \item \emph{Sampling design and analysis} (Lohr, 2009) \end{itemize} \end{frame} \begin{frame} \frametitle{Assumption of unit-treatment additivity} Experimental units are randomly assigned to either a treatment condition or a control condition. \begin{itemize} \item Say the treatment has an effect. \item What effect? \item Suppose the treatment adds the same constant $\Delta$ to the response of each unit receiving the treatment. \item Cox and Reid call this the ``Assumption of unit-treatment additivity." \item Certainly it's not the only possibility. \item But it's very standard. \end{itemize} \end{frame} \section{Randomization model} \begin{frame} \frametitle{Random assignment is like sampling from a finite population} \framesubtitle{Use sample survey notation (Lohr, 2009)} \begin{itemize} \item We have $N$ experimental units. \item Sample $n$ without replacement for the experimental group. \item For $i = 1, \ldots, N$ let \begin{displaymath} Z_i = \left\{ \begin{array}{ll} % ll means left left 1 & \mbox{if unit $i$ is chosen} \\ 0 & \mbox{if unit $i$ is not chosen} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \item $E(Z_i)= P(Z_i=1)= \frac{n}{N}$ \item $Var(Z_i) = \frac{n}{N}\left(1-\frac{n}{N}\right)$ \item $Cov(Z_i,Z_j) = - \frac{n}{N}\left(1-\frac{n}{N}\right)/(N-1)$ \end{itemize} \end{frame} \begin{frame} \frametitle{More definitions and properties} \framesubtitle{$Z_i=1$ if unit $i$ is selected, zero otherwise.} If all experimental units were in the control condition, their response variable values would have been $y_1, \ldots, y_N$. \begin{displaymath} \begin{array}{lcl} \overline{y}_u = \frac{1}{N}\sum_{i=1}^N y_i, & \overline{y} = \frac{1}{n}\sum_{i=1}^N Z_i y_i, & S^2 = \frac{1}{N-1}\sum_{i=1}^N (y_i-\overline{y}_u)^2 \\ && \\ E(\overline{y}) = \overline{y}_u & & Var(\overline{y}) = \frac{S^2}{n}\left(1-\frac{n}{N}\right) \\ && \\ \hline \end{array} \end{displaymath} %\vspace{3mm} \begin{itemize} \item $\overline{y}_1 = \frac{1}{n}\sum_{i=1}^N Z_i (y_i+\Delta)$ \item $\overline{y}_2 = \frac{1}{N-n}\sum_{i=1}^N (1-Z_i) y_i$ \end{itemize} \vspace{3mm} Have $E(\overline{y}_1-\overline{y}_2)=\Delta$ and $Var(\overline{y}_1-\overline{y}_2)= \frac{S^2}{n\left(1-\frac{n}{N}\right)}$. \end{frame} \begin{frame} \frametitle{Under the randomization model} % \framesubtitle{} \begin{itemize} \item $E(\overline{y}_1-\overline{y}_2)=\Delta$ \item So $\overline{y}_1-\overline{y}_2$ is an \emph{unbiased estimator} of the treatment effect $\Delta$. \item More later on the precision of this estimate \end{itemize} \end{frame} \section{Random sampling model} \begin{frame} \frametitle{Random sampling model} % \framesubtitle{} \begin{itemize} \item Suppose the $N$ experimental units actually are a simple random sample from some large population. \item Approximately, the observed values of the response variable are independent and identically distributed random variables. \item Now we'll call the total sample size $n$. \item Random assignment of $n_1$ units to the treatment condition yields two independent random samples from the same distribution, with expected value $\mu$ and variance $\sigma^2$. \item Often assumed normal. \item $n_1+n_2=n$. \item The assumption of unit-treatment additivity says the treatment adds the constant $\Delta$ to the $n_1$ observations in the treatment condition. \item Want to estimate and test hypotheses about $\Delta$. \end{itemize} \end{frame} \begin{frame} \frametitle{Estimating the treatment effect} % \framesubtitle{} \begin{displaymath} \overline{Y}_1 = \frac{1}{n_1}\sum_{i=1}^{n_1}(Y_{i,1}+\Delta) ~~~~~~~~~~ \overline{Y}_2 = \frac{1}{n_2}\sum_{i=1}^{n_2}Y_{i,2} \end{displaymath} \begin{itemize} \item $\widehat{\Delta} = \overline{Y}_1-\overline{Y}_2$ is an unbiased estimator of $\Delta$. \item $Var(\overline{Y}_1-\overline{Y}_2)= \sigma^2\left(\frac{1}{n_1}+\frac{1}{n_2} \right)$ \item Want the estimate to be as precise as possible. \begin{itemize} \item Make $\sigma^2$ small somehow. \item Make $n_1$ and $n_2$ big. \item For fixed $n_1+n_2=n$, choose $n_1$ to minimize $Var(\widehat{\Delta})$. \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{Extension to more treatments} % \framesubtitle{} \begin{itemize} \item Can have $p$ treatment conditions (including control). \item Effects $\Delta_1, \ldots, \Delta_{p-1}$ \end{itemize} \end{frame} \begin{frame} \frametitle{Dummy variable regression} \framesubtitle{A very good way to write the model} {\LARGE \begin{displaymath} Y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i \end{displaymath} } % End size \vspace{15mm} Make a table. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/305s14} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/305s14}} \end{frame} \end{document} \begin{frame} \frametitle{} % \framesubtitle{} \begin{itemize} \item \item \end{itemize} \end{frame} Replaced ... \begin{frame} \frametitle{More definitions and properties} \framesubtitle{$Z_i=1$ if unit $i$ is selected, zero otherwise.} If all experimental units were in the control condition, their response variable values would have been $y_1, \ldots, y_N$. % \vspace{3mm} \begin{itemize} \item $\overline{y}_u = \frac{1}{N}\sum_{i=1}^N y_i$ \item $S^2 = \frac{1}{N-1}\sum_{i=1}^N (y_i-\overline{y}_u)^2$ \item $\overline{y} = \frac{1}{n}\sum_{i=1}^N Z_i y_i$ \item $E(\overline{y}) = \overline{y}_u $ \item $Var(\overline{y}) = \frac{S^2}{n}\left(1-\frac{n}{N}\right)$ \item[] \item $\overline{y}_1 = \frac{1}{n}\sum_{i=1}^N Z_i (y_i+\Delta)$ and $\overline{y}_2 = \frac{1}{N-n}\sum_{i=1}^N (1-Z_i) y_i$ \end{itemize} \vspace{3mm} Have $E(\overline{y}_1-\overline{y}_2)=\Delta$ and $Var(\overline{y}_1-\overline{y}_2)= \frac{S^2}{n\left(1-\frac{n}{N}\right)}$ \end{frame}