% Non-Central Chi-squared for Applied Stat I % Notes and comments at the end % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Non-Central Chi-squared\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2016} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Definition} If $X \sim N(\mu,1)$ then $Y=X^2$ is said to have a \emph{non-central chi-squared} distribution with degrees of freedom one and \emph{non-centrality parameter} $\lambda = \mu^2$. \vspace{10mm} \pause Write $Y \sim \chi^2(1,\lambda)$. \end{frame} \begin{frame} \frametitle{Facts about the non-central chi-squared distribution} \framesubtitle{$Y=X^2$ where $X \sim N(\mu,1)$} {\huge \begin{displaymath} Y \sim \chi^2(1,\lambda), \mbox{ where } \lambda \geq 0 \end{displaymath} } \pause \begin{itemize} \item $Pr\{Y>0\}=1$, of course. \pause \item If $\lambda=0$, the non-central chi-squared reduces to the ordinary central chi-squared. \pause \item The distribution is ``stochastically increasing" in $\lambda$, meaning that if $Y_1 \sim \chi^2(1,\lambda_1)$ and $Y_2 \sim \chi^2(1,\lambda_2)$ with $\lambda_1>\lambda_2$, then \pause $Pr\{Y_1>y\}$ $>$ $Pr\{Y_2>y\}$ for any $y>0$. \pause \item $\lim_{\lambda \rightarrow \infty} Pr\{Y>y\} = 1$ \pause \item There are efficient algorithms for calculating non-central chi-squared probabilities. R's \texttt{pchisq} function does it. \end{itemize} \end{frame} \begin{frame} \frametitle{Why is it called ``non-central?"} \framesubtitle{Recall that if $X \sim N(\mu,1)$ then $Y=X^2 \sim \chi^2(1,\lambda=\mu^2)$} \pause If $X \sim N(\mu,\sigma^2)$, then if we center it and scale it, \pause \begin{itemize} \item $Z^2 = \left(\frac{X-\mu}{\sigma}\right)^2 \sim \chi^2(1)$, \pause the \emph{central} chi-squared. \pause \item What if we scale it correctly, but center it to the wrong place? \pause \item $Z = \frac{X-\mu_0}{\sigma} \sim N(\frac{\mu-\mu_0}{\sigma},1)$ \pause \item And $Z^2$ is chi-squared with $df=1$ and non-centrality parameter \pause \end{itemize} \begin{displaymath} \lambda = \left( \frac{\mu-\mu_0}{\sigma} \right)^2 \end{displaymath} \pause This applies whether the normality is exact or asymptotic. \end{frame} \begin{frame} \frametitle{An example} \framesubtitle{Back to the coffee taste test} \pause $Y_1, \ldots, Y_n \stackrel{i.i.d.}{\sim} B(1,\theta)$ \vspace{3mm} $H_0:\theta=\theta_0=\frac{1}{2}$ %\vspace{3mm} Reject $H_0$ if $|Z_2| = \left|\frac{\sqrt{n}(\overline{Y}-\theta_0)}{\sqrt{\overline{Y}(1-\overline{Y})}} \right| > z_{\alpha/2}$ \vspace{10mm} \pause Suppose that in the population, 60\% of consumers would prefer the new blend. If we test 100 consumers, what is the probability of obtaining results that are statistically significant? \pause \vspace{3mm} That is, if $\theta=0.60$, what is the power for $n=100$? \pause Earlier, got 0.53 with a direct standard normal calculation. \end{frame} \begin{frame} \frametitle{Non-central chi-squared} \framesubtitle{Recall that if $X \sim N(\mu,\sigma^2)$, then $\left(\frac{X-\mu_0}{\sigma}\right)^2 \sim \chi^2\left(1,\left(\frac{\mu-\mu_0}{\sigma}\right)^2\right)$.} \pause % \vspace{3mm} For large $n$, the sample proportion $\overline{Y}$ is approximately normal with mean $\mu=\theta$ and variance $\sigma^2 = \frac{\theta(1-\theta)}{n}$. So, \pause \begin{eqnarray*} Z_2^2 & = & \left(\frac{\sqrt{n}(\overline{Y}-\theta_0)}{\overline{Y}(1-\overline{Y})}\right)^2 \\ \pause & \approx & \frac{(\overline{Y}-\theta_0)^2}{\theta(1-\theta)/n} \\ \pause & = & \left(\frac{\overline{Y}-\theta_0}{\sigma} \right)^2 \\ \pause & \stackrel{approx}{\sim} & \chi^2\left(1, n\,\frac{(\theta-\theta_0)^2}{\theta(1-\theta)}\right) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{We have found that} %\framesubtitle{} The Wald chi-squared test statistic of $H_0:\theta=\theta_0$ \begin{displaymath} Z_2^2 = \frac{n(\overline{Y}-\theta_0)^2}{\overline{Y}(1-\overline{Y})} \end{displaymath} \pause has an asymptotic non-central chi-squared distribution with $df=1$ and non-centrality parameter \pause \begin{displaymath} \lambda = \frac{n(\theta-\theta_0)^2}{\theta(1-\theta)} \end{displaymath} \pause Notice the similarity of test statistic and non-centrality parameter, \pause and also that \begin{itemize} \item If $\theta=\theta_0$, then $\lambda=0$ and $Z_2^2$ has a central chi-squared distribution. \pause \item The probability of exceeding any critical value (power) can be made as large as desired by making $\lambda$ bigger. \pause \item There are 2 ways to make $\lambda$ bigger. What are they? \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Power calculation with R} \framesubtitle{For $n=100$, $\theta_0=0.50$ and $\theta=0.60$} \pause \begin{verbatim} > # Power for Wald chisquare test of H0: theta=theta0 > n=100; theta0=0.50; theta=0.60 > lambda = n * (theta-theta0)^2 / (theta*(1-theta)) > critval = qchisq(0.95,1) > power = 1-pchisq(critval,1,lambda); power [1] 0.5324209 \end{verbatim} \pause Earlier, had \begin{verbatim} > Z2power(0.60,100) [1] 0.5324209 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Check power calculations by simulation} \framesubtitle{First develop and illustrate the code} \begin{verbatim} # Try a simulation to test it. set.seed(9999) # Set seed for "random" number generation theta = 0.50; theta0 = 0.50; n = 100; m = 10 critval = qchisq(0.95,1); critval p = rbinom(m,n,theta)/n; p Z2 = sqrt(n)*(p-theta0)/sqrt(p*(1-p)) rbind(p,Z2) sig = (Z2^2>critval); sig sum(sig)/n \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Output from the last slide} {\tiny \begin{verbatim} > # Try a simulation to test it. > set.seed(9999) # Set seed for "random" number generation > theta = 0.50; theta0 = 0.50; n = 100; m = 10 > critval = qchisq(0.95,1); critval [1] 3.841459 > p = rbinom(m,n,theta)/n; p [1] 0.40 0.56 0.47 0.57 0.47 0.50 0.58 0.48 0.40 0.53 > Z2 = sqrt(n)*(p-theta0)/sqrt(p*(1-p)) > rbind(p,Z2) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] p 0.400000 0.560000 0.4700000 0.570000 0.4700000 0.5 0.580000 0.4800000 0.400000 Z2 -2.041241 1.208734 -0.6010829 1.413925 -0.6010829 0.0 1.620882 -0.4003204 -2.041241 [,10] p 0.5300000 Z2 0.6010829 > sig = (Z2^2>critval); sig [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE > sum(sig)/n [1] 0.02 \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{Now the real simulation} \framesubtitle{First estimated probability should equal about 0.05 because $\theta=\theta_0$} {\footnotesize \begin{verbatim} > # Check Type I error probability > set.seed(9999) > theta = 0.50; theta0 = 0.50; n = 100; m = 10000 > critval = qchisq(0.95,1) > p = rbinom(m,n,theta)/n; Z2 = sqrt(n)*(p-theta0)/sqrt(p*(1-p)) > sig = (Z2^2>critval) > sum(sig)/m [1] 0.0574 > # Exact power calculation for theta=0.60 gives power = 0.5324209 > set.seed(9998) > theta = 0.60; theta0 = 0.50; n = 100; m = 10000 > critval = qchisq(0.95,1) > p = rbinom(m,n,theta)/n; Z2 = sqrt(n)*(p-theta0)/sqrt(p*(1-p)) > sig = (Z2^2>critval) > sum(sig)/m [1] 0.5353 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Conclusions from the power analysis} \pause %\framesubtitle{} \begin{itemize} \item Power for $n=100$ is pathetic. \pause \item As Fisher said, ``To call in the statistician after the experiment is done may be no more than asking him to perform a postmortem examination: he may be able to say what the experiment died of." \pause \item $n=200$ is better. \pause \begin{verbatim} > n=200; theta0=0.50; theta=0.60 > lambda = n * (theta-theta0)^2 / (theta*(1-theta)) > power = 1-pchisq(critval,1,lambda); power [1] 0.8229822 \end{verbatim} \pause \item What sample size is required for power of 90\%? \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{What sample size is required for power of 90\%?} \begin{verbatim} > # Find sample size needed for power = 0.90 > theta0=0.50; theta=0.60; critval = qchisq(0.95,1) > effectsize = (theta-theta0)^2 / (theta*(1-theta)) > n = 0 > power=0 > while(power < 0.90) + { + n = n+1 + lambda = n * effectsize + power = 1-pchisq(critval,1,lambda) + } > n; power [1] 253 [1] 0.9009232 \end{verbatim} \end{frame} \begin{frame} \frametitle{General non-central chi-squared} %\framesubtitle{Definition} Let $X_1, \ldots, X_n$ be independent $N(\mu_i,\sigma^2_i)$. \pause Then \begin{displaymath} Y = \sum_{i=1}^n \frac{X_i^2}{\sigma^2_i} \sim \chi^2(n,\lambda), \mbox{ where } \lambda = \sum_{i=1}^n \frac{\mu_i^2}{\sigma^2_i} \end{displaymath} \pause \begin{itemize} \item Density is a bit messy --- a Poisson mixture of central chi-squares. \pause \item Reduces to central chi-squared when $\lambda=0$. \pause \item Generalizes to $Y \sim \chi^2(\nu,\lambda)$, where $\nu>0$ as well as $\lambda>0$. \pause \item Stochastically increasing in $\lambda$, meaning $Pr\{Y>y\}$ can be increased by increasing $\lambda$. \pause \item $\lim_{\lambda \rightarrow \infty} Pr\{Y>y\} = 1$ \pause \item Probabilities are easy to calculate numerically. \end{itemize} \end{frame} \begin{frame} % \frametitle{Picture of the Density} %\framesubtitle{} \begin{center} \includegraphics[width=3.4in]{Non-centralChisq} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{R code for the record} %\framesubtitle{} {\scriptsize \begin{verbatim} # Plotting non-central chi-squared in R with Greek letters lambda1 = 1; lambda2=2; top = 15; DF=3 titlestring = paste("Non-central Chi-squared with df =",DF) x = seq(from=0,to=top,by=0.01) Density = dchisq(x,df=DF) d1 = dchisq(x,df=DF,ncp=lambda1); d2 = dchisq(x,df=DF,ncp=lambda2) plot(x,Density,type="l",main=titlestring) # That's a lower case L lines(x,d1,lty=2); lines(x,d2,lty=3) # Make line labels x1 <- c(11,14) ; y1 <- c(0.20,0.20) ; lines(x1,y1,lty=1) x2 <- c(11,14) ; y2 <- c(0.19,0.19) ; lines(x2,y2,lty=2) x3 <- c(11,14) ; y3 <- c(0.18,0.18) ; lines(x3,y3,lty=3) caption0 = expression(paste(lambda," = 0")); text(10,0.20,caption0); # Ugly but flexible: * means concatenation in expressions, and we # need to substitute numerical values. First comes # an expression, and then a list of substitutions. caption1 = substitute( lambda * " = " * L1, list(L1=lambda1) ) text(10,0.19,caption1) caption2 = substitute( lambda * " = " * L2, list(L2=lambda2) ) text(10,0.18,caption2) \end{verbatim} } % End size % This example solves the difficult problem of combining the display of % Greek letters with numerical values that are not hard-coded. Note that % the zero in caption0 is hard-coded, but caption1 and caption2 use % the values of lambda1 and lambda2 assigned earlier. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf16} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf16}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Notes and comments In 2013, Cut out non-central chisq part, added nice what's a model, parameter space Non-central chisq is in 2012 version. In 2014, based on a one-vs. 2-sample t-test rather than binomial. Version 2014a wandered back into the taste test example. In 2016, lifted material from 2012 to beef it up and prepare for power of the general linear test. # Plotting non-central chi-squared in R with Greek letters lambda1 = 1; lambda2=2; top = 15; DF=3 titlestring = paste("Non-central Chi-squared with df =",DF) x = seq(from=0,to=top,by=0.01) Density = dchisq(x,df=DF) d1 = dchisq(x,df=DF,ncp=lambda1); d2 = dchisq(x,df=DF,ncp=lambda2) plot(x,Density,type="l",main=titlestring) # That's a lower case L lines(x,d1,lty=2); lines(x,d2,lty=3) # Make line labels x1 <- c(11,14) ; y1 <- c(0.20,0.20) ; lines(x1,y1,lty=1) x2 <- c(11,14) ; y2 <- c(0.19,0.19) ; lines(x2,y2,lty=2) x3 <- c(11,14) ; y3 <- c(0.18,0.18) ; lines(x3,y3,lty=3) caption0 = expression(paste(lambda," = 0")); text(10,0.20,caption0); # Ugly but flexible: * means concatenation in expressions, and we # need to substitute numerical values. First comes # an expression, and then a list of substitutions. caption1 = substitute( lambda * " = " * L1, list(L1=lambda1) ) text(10,0.19,caption1) caption2 = substitute( lambda * " = " * L2, list(L2=lambda2) ) text(10,0.18,caption2) # This example solves the difficult problem of combining the display of # Greek letters with numerical values that are not hard-coded. Note that # the zero in caption0 is hard-coded, but caption1 and caption2 use # the values of lambda1 and lambda2 assigned earlier.