% Large sample tools for Applied Stat I % Notes and comments are after the end of the document % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Large sample tools\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2016} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Background Reading: Davison's \emph{Statistical models}} %\framesubtitle{} \begin{itemize} \item See Section 2.2 (Pages 28-37) on convergence. \item Section 3.3 (Pages 77-90) goes more deeply into simulation than we will. At least skim it. \end{itemize} \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Foundations} \begin{frame} \frametitle{Sample Space $\Omega$, $\omega \in \Omega$} \pause %\framesubtitle{} \begin{itemize} \item Observe whether a single individual is male or female: $\Omega = \{F,M\}$ \pause \item Pair of individuals; observe their genders in order: \pause $\Omega =\{(F,F),(F,M),(M,F),(M,M)\}$ \pause \item Select n people and count the number of females: \pause $\Omega = \{0,\ldots , n\}$ \pause \end{itemize} \vspace{10mm} For limits problems, the points in $\Omega$ are infinite sequences. \end{frame} \begin{frame} \frametitle{Random variables are functions from $\Omega$ into the set of real numbers} \pause {\LARGE \begin{displaymath} Pr\{X \in B\} = Pr(\{\omega \in \Omega: X(\omega) \in B \}) \end{displaymath} } \end{frame} \begin{frame} \frametitle{Random Sample $X_1(\omega), \ldots, X_n(\omega)$} \pause %\framesubtitle{} \begin{itemize} \item $T = T(X_1, \ldots, X_n)$ \pause \item $T = T_n(\omega)$ \pause \item Let $n \rightarrow \infty$ to see what happens for large samples \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{{\LARGE Modes of Convergence}} \pause %\framesubtitle{} {\LARGE \begin{itemize} \item Almost Sure Convergence \item Convergence in Probability \item Convergence in Distribution \end{itemize} } \end{frame} \begin{frame} \frametitle{Almost Sure Convergence} \pause We say that $T_n$ converges \emph{almost surely} to $T$, and write $T_n \stackrel{a.s.}{\rightarrow} T$ if \pause \begin{displaymath} Pr\{\omega:\, \lim_{n \rightarrow \infty} T_n(\omega) = T(\omega)\}=1. \end{displaymath} \pause \begin{itemize} \item Acts like an ordinary limit, except possibly on a set of probability zero. \pause \item All the usual rules of limits apply. \pause \item Called convergence with probability one or sometimes strong convergence. \end{itemize} \end{frame} \section{LLN} \begin{frame} \frametitle{Strong Law of Large Numbers} \pause %\framesubtitle{} Let $X_1, \ldots, X_n$ be independent with common expected value $\mu$. \pause \vspace{10mm} {\huge \begin{displaymath} \overline{X}_n \stackrel{a.s.}{\rightarrow} E(X_i) = \mu \end{displaymath} } \pause The only condition required for this to hold is the existence of the expected value. \end{frame} \begin{frame} \frametitle{Probability is long run relative frequency} \pause \begin{itemize} \item Statistical experiment: Probability of ``success" is $\theta$. \pause \item Carry out the experiment many times independently. \pause \item Code the results $X_i=1$ if success, $X_i=0$ for failure, $i = 1, 2, \ldots, n$ \end{itemize} \end{frame} \begin{frame} \frametitle{Sample proportion of successes converges to the probability of success} \pause \framesubtitle{Recall $X_i=0$ or $1$.} {\Large \begin{eqnarray*} E(X_i) &=& \sum_{x=0}^1 x \, Pr\{X_i = x\} \\ \pause % &&\\ &=& 0\cdot (1-\theta) + 1\cdot \theta \\ \pause % &&\\ &=& \theta \end{eqnarray*} } \pause The relative frequency (sample proportion) is \pause {\Large \begin{displaymath} \frac{1}{n}\sum_{i=1}^n X_i = \overline{X}_n \stackrel{a.s.}{\rightarrow} \theta \end{displaymath} } \end{frame} \begin{frame}[fragile] \frametitle{Estimating power by simulation} \pause Recall the coffee taste test: $Z_2 = \frac{\sqrt{n}(\overline{Y}-\theta_0)}{\sqrt{\overline{Y}(1-\overline{Y})}}$ \pause \begin{itemize} \item We found that if true $\theta=0.6$, need $n=189$ for a power of 0.80. \pause \item Verify by simulation. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Estimate the power} \pause %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > theta0 = 0.50; theta = 0.60 > n=190; M = 1000000 # M is Monte Carlo sample size > ybar = rbinom(M,size=n,prob=theta)/n > Z2 = sqrt(n)*(ybar-theta0)/sqrt(ybar*(1-ybar)) # There are M of these > # Estimated power is another sample proportion > estpow = length(subset(Z2,abs(Z2)>1.96))/M > cat("Estimated power is",estpow,"\n") \end{verbatim} \pause } % End color \begin{verbatim} Estimated power is 0.793081 \end{verbatim} \pause {\color{blue} \begin{verbatim} > # 99% confidence interval for the true power > marginerr99 = qnorm(0.995) * sqrt(estpow*(1-estpow)/M) > ci = c(estpow-marginerr99,estpow+marginerr99) > cat("99% confidence interval for the power is (",ci,") \n") \end{verbatim} \pause } % End color \begin{verbatim} 99% confidence interval for the power is ( 0.7920375 0.7941245 ) \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Strategy for estimating power by simulation} \framesubtitle{Similar approach for probability of Type I error} \begin{itemize} \item Generate a large number of random data sets under the alternative hypothesis. \item For each data set, test $H_0$. \item Estimated power is the proportion of times $H_0$ is rejected. % \item How about a confidence interval? \end{itemize} \end{frame} \begin{frame} \frametitle{Recall the Change of Variables formula: Let $Y = g(X)$} \pause %\framesubtitle{} {\LARGE \begin{displaymath} E(Y) = \int_{-\infty}^\infty y \, f_{_Y}(y) \, dy = \int_{-\infty}^\infty g(x) \, f_{_X}(x) \, dx \end{displaymath} } \pause Or, for discrete random variables \pause {\LARGE \begin{displaymath} E(Y) = \sum_y y \, p_{_Y}(y) = \sum_x g(x) \, p_{_X}(x) \end{displaymath} } \pause This is actually a big theorem, not a definition. \end{frame} \begin{frame} \frametitle{Applying the change of variables formula} \framesubtitle{To approximate $E[g(X)]$} \pause {\LARGE \begin{eqnarray*} \frac{1}{n}\sum_{i=1}^n g(X_i) &=& \frac{1}{n}\sum_{i=1}^n Y_i \pause \stackrel{a.s.}{\rightarrow} E(Y) \\ \\ \pause &=& E(g(X)) \end{eqnarray*} } \end{frame} \begin{frame} \frametitle{So for example} %\framesubtitle{} {\LARGE \begin{eqnarray*} \frac{1}{n}\sum_{i=1}^n X_i^k &\stackrel{a.s.}{\rightarrow}& E(X^k) \\ &&\\ \pause \frac{1}{n}\sum_{i=1}^n U_i^2 V_i W_i^3 &\stackrel{a.s.}{\rightarrow}& E(U^2VW^3) \end{eqnarray*} } \pause \vspace{5mm} That is, sample moments converge almost surely to population moments. \end{frame} \begin{frame} \frametitle{Approximate an integral: $\int_{-\infty}^{\infty} h(x) \, dx$} \framesubtitle{Where $h(x)$ is a nasty function.} \pause Let $f(x)$ be a density with $f(x)>0$ wherever $h(x)\neq 0$. \pause \begin{eqnarray*} \int_{-\infty}^{\infty} h(x) \, dx \pause & = & \int_{-\infty}^{\infty} \frac{h(x)}{f(x)} f(x) \, dx \\ \pause & = & E\left[ \frac{h(X)}{f(X)}\right] \\ \pause & = & E[g(X)], \end{eqnarray*} \pause So \begin{itemize} \item Sample $X_1, \ldots, X_n$ from the distribution with density $f(x)$ \pause \item Calculate $Y_i = g(X_i) = \frac{h(X_i)}{f(X_i)}$ for $i=1, \ldots, n$ \pause \item Calculate $\overline{Y}_n \stackrel{a.s.}{\rightarrow} E[Y]= E[g(X)]$ \pause \item Confidence interval for $\mu = E[g(X)]$ is routine. \end{itemize} \end{frame} \begin{frame} \frametitle{Convergence in Probability} We say that $T_n$ converges \emph{in probability} to $T$, and write $T_n \stackrel{P}{\rightarrow} T$ if \pause for all $\epsilon>0$, \pause {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty} P\{|T_n-T|<\epsilon \}=1 \end{displaymath} } \pause Convergence in probability (say to a constant $\theta$) means \pause no matter how small the interval around $\theta$, \pause for large enough $n$ (that is, for all $n>N_1$) \pause the probability of getting that close to $\theta$ is as close to one as you like. \end{frame} \begin{frame} \frametitle{Weak Law of Large Numbers} {\huge \begin{displaymath} \overline{X}_n \stackrel{p}{\rightarrow} \mu \end{displaymath} } \pause \begin{itemize} \item Almost Sure Convergence implies Convergence in Probability. \pause \item Strong Law of Large Numbers implies Weak Law of Large Numbers \end{itemize} \end{frame} \section{Consistency} \begin{frame} \frametitle{Consistency} \framesubtitle{$T = T(X_1, \ldots, X_n)$ is a statistic estimating a parameter $\theta$} \pause The statistic $T_n$ is said to be \emph{consistent} for $\theta$ if $T_n \stackrel{P}{\rightarrow} \theta$. \pause {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty} P\{|T_n-\theta|<\epsilon \}=1 \end{displaymath} } \pause \vspace{5mm} The statistic $T_n$ is said to be \emph{strongly consistent} for $\theta$ if $T_n \stackrel{a.s.}{\rightarrow} \theta$. \vspace{5mm} Strong consistency implies ordinary consistency. \end{frame} \begin{frame} \frametitle{Consistency is great but it's not enough.} \begin{itemize} \item It means that as the sample size becomes indefinitely large, you probably get as close as you like to the truth. \pause \item It's the least we can ask. Estimators that are \emph{not} consistent are completely unacceptable for most purposes. \end{itemize} \pause {\LARGE \begin{displaymath} T_n \stackrel{a.s.}{\rightarrow} \theta \Rightarrow \pause U_n = T_n + \frac{100,000,000}{n} \stackrel{a.s.}{\rightarrow} \theta \end{displaymath} } \end{frame} \begin{frame} \frametitle{Consistency of the Sample Variance } \pause %{\LARGE \begin{eqnarray*} \widehat{\sigma}^2_n &=& \frac{1}{n}\sum_{i=1}^n (X_i-\overline{X})^2 \\ \\ \pause &=& \frac{1}{n}\sum_{i=1}^n X_i^2 - \overline{X}^2 \end{eqnarray*} %} \pause \vspace{5mm} By SLLN, $\overline{X}_n \stackrel{a.s.}{\rightarrow}\mu$ and $\frac{1}{n}\sum_{i=1}^n X_i^2 \stackrel{a.s.}{\rightarrow} E(X^2) = \sigma^2+\mu^2$. \pause \vspace{5mm} Because the function $g(x,y)=x-y^2$ is continuous, \pause \vspace{5mm} \begin{displaymath} \widehat{\sigma}^2_n = g\left(\frac{1}{n}\sum_{i=1}^n X_i^2,\overline{X}_n\right) \stackrel{a.s.}{\rightarrow} g(\sigma^2+\mu^2,\mu) \pause = \sigma^2+\mu^2 - \mu^2 = \sigma^2 \end{displaymath} \end{frame} \section{CLT} \begin{frame} \frametitle{Convergence in Distribution} \framesubtitle{Sometimes called \emph{Weak Convergence}, or \emph{Convergence in Law}} \pause Denote the cumulative distribution functions of $T_1, T_2, \ldots$ by $F_1(t), F_2(t), \ldots$ respectively, and denote the cumulative distribution function of $T$ by $F(t)$. \pause \vspace{5mm} We say that $T_n$ converges \emph{in distribution} to $T$, and write $T_n \stackrel{d}{\rightarrow} T$ if for every point $t$ at which $F$ is continuous, \pause {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty} F_n(t) = F(t) \end{displaymath} } \end{frame} \begin{frame} \frametitle{Univariate Central Limit Theorem} Let $X_1, \ldots, X_n$ be a random sample from a distribution with expected value $\mu$ and variance $\sigma^2$. Then \pause {\LARGE \begin{displaymath} Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1) \end{displaymath} } \end{frame} \begin{frame} \frametitle{Connections among the Modes of Convergence} {\LARGE \begin{itemize} \item $ T_n \stackrel{a.s.}{\rightarrow} T \Rightarrow T_n \stackrel{p}{\rightarrow} T \Rightarrow T_n \stackrel{d}{\rightarrow} T $. \pause \vspace{5mm} \item If $a$ is a constant, $ T_n \stackrel{d}{\rightarrow} a \Rightarrow T_n \stackrel{p}{\rightarrow} a$. \end{itemize} } \end{frame} \begin{frame} \frametitle{Sometimes we say the distribution of the sample mean is approximately normal, or asymptotically normal.} \pause %\framesubtitle{} \begin{itemize} \item This is justified by the Central Limit Theorem. \pause \item But it does \emph{not} mean that $\overline{X}_n$ converges in distribution to a normal random variable. \pause \item The Law of Large Numbers says that $\overline{X}_n$ converges almost surely (and in probability) to a constant, $\mu$. \pause \item So $\overline{X}_n$ converges to $\mu$ in distribution as well. \end{itemize} \end{frame} \begin{frame} \frametitle{Why would we say that for large $n$, the sample mean is approximately $N(\mu,\frac{\sigma^2}{n})$?} \pause \vspace{5mm} Have $Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1)$. \pause {\footnotesize \begin{eqnarray*} Pr\{\overline{X}_n \leq x\} & = & Pr\left\{ \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \\ \pause & = & Pr\left\{ Z_n \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \pause \approx \Phi\left( \frac{\sqrt{n}(x-\mu)}{\sigma} \right) \end{eqnarray*} } \pause Suppose $Y$ is \emph{exactly} $N(\mu,\frac{\sigma^2}{n})$: \pause {\footnotesize \begin{eqnarray*} Pr\{Y \leq x\} & = & Pr\left\{ \frac{\sqrt{n}(Y-\mu)}{\sigma} \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \\ \pause & = & Pr\left\{ Z \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \pause = \Phi\left( \frac{\sqrt{n}(x-\mu)}{\sigma} \right) \end{eqnarray*} } % End size \end{frame} \section{Convergence of random vectors} \begin{frame}[allowframebreaks] % Continue frame onto several slides \frametitle{Convergence of random vectors} {\footnotesize \begin{enumerate} \item Definitions (All quantities in boldface are vectors in $\mathbb{R}^m$ unless otherwise stated ) \begin{enumerate} \item[$\star$] $ \mathbf{T}_n \stackrel{a.s.}{\rightarrow} \mathbf{T}$ means $P\{\omega:\, \lim_{n \rightarrow \infty} \mathbf{T}_n(\omega) = \mathbf{T}(\omega)\}=1$. \item[$\star$] $ \mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T}$ means $\forall \epsilon>0,\,\lim_{n \rightarrow \infty} P\{||\mathbf{T}_n-\mathbf{T}||<\epsilon \}=1$. \item[$\star$] $ \mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ means for every continuity point $\mathbf{t}$ of $F_\mathbf{T}$, $\lim_{n \rightarrow \infty}F_{\mathbf{T}_n}(\mathbf{t}) = F_\mathbf{T}(\mathbf{t})$. \end{enumerate} \item $ \mathbf{T}_n \stackrel{a.s.}{\rightarrow} \mathbf{T} \Rightarrow \mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T} \Rightarrow \mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T} $. \item If $\mathbf{a}$ is a vector of constants, $ \mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{a} \Rightarrow \mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{a}$. \item Strong Law of Large Numbers (SLLN): Let $\mathbf{X}_1, \ldots \mathbf{X}_n$ be independent and identically distributed random vectors with finite first moment, and let $\mathbf{X}$ be a general random vector from the same distribution. Then $ \overline{\mathbf{X}}_n \stackrel{a.s.}{\rightarrow} E(\mathbf{X})$. \item Central Limit Theorem: Let $\mathbf{X}_1, \ldots, \mathbf{X}_n$ be i.i.d. random vectors with expected value vector $\boldsymbol{\mu}$ and covariance matrix $\boldsymbol{\Sigma}$. Then $\sqrt{n}(\overline{\mathbf{X}}_n-\boldsymbol{\mu})$ converges in distribution to a multivariate normal with mean \textbf{0} and covariance matrix $\boldsymbol{\Sigma}$. \framebreak \item \label{slutd} Slutsky Theorems for Convergence in Distribution: \begin{enumerate} \item \label{slutcond} If $\mathbf{T}_n \in \mathbb{R}^m$, $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and if $f:\,\mathbb{R}^m \rightarrow \mathbb{R}^q$ (where $q \leq m$) is continuous except possibly on a set $C$ with $P(\mathbf{T} \in C)=0$, then $f(\mathbf{T}_n) \stackrel{d}{\rightarrow} f(\mathbf{T})$. \item \label{slutdiffd} If $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and $(\mathbf{T}_n - \mathbf{Y}_n) \stackrel{P}{\rightarrow} 0$, then $\mathbf{Y}_n \stackrel{d}{\rightarrow} \mathbf{T}$. \item \label{slutstackd} If $\mathbf{T}_n \in \mathbb{R}^d$, $\mathbf{Y}_n \in \mathbb{R}^k$, $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and $\mathbf{Y}_n \stackrel{P}{\rightarrow} \mathbf{c}$, then \begin{displaymath} \left( \begin{array}{cc} \mathbf{T}_n \\ \mathbf{Y}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{cc} \mathbf{T} \\ \mathbf{c} \end{array} \right) \end{displaymath} \end{enumerate} \end{enumerate} } \end{frame} \begin{frame} \frametitle{An application of the Slutsky Theorems} \begin{itemize} \item Let $X_1, \ldots, X_n \stackrel{i.i.d.}{\sim}\,?(\mu,\sigma^2)$ \pause \item By CLT, $Y_n = \sqrt{n}(\overline{X}_n-\mu) \stackrel{d}{\rightarrow} Y \sim N(0,\sigma^2)$ \pause \item Let $\widehat{\sigma}_n$ be \emph{any} consistent estimator of $\sigma$. \pause \item Then by \ref{slutd}.\ref{slutstackd}, $\mathbf{T}_n = \left( \begin{array}{cc} Y_n \\ \widehat{\sigma}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{cc} Y \\ \sigma \end{array} \right) = \mathbf{T} $ \pause \item The function $f(x,y)=x/y$ is continuous except if $y=0$ \\ so by \ref{slutd}.\ref{slutcond}, \pause \end{itemize} \begin{displaymath} f(\mathbf{T}_n) = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\widehat{\sigma}_n} \stackrel{d}{\rightarrow} f(\mathbf{T}) = \frac{Y}{\sigma} \sim N(0,1) \end{displaymath} \end{frame} \begin{frame} \frametitle{We need more tools} \pause Because \begin{itemize} \item The multivariate CLT establishes convergence to a multivariate normal, and \item Vectors of MLEs are approximately multivariate normal for large samples, and \item Most real-life models have multiple parameters, \end{itemize} \vspace{15mm} \pause We need to look at random vectors and the multivariate normal distribution. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf16} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf16}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\footnotesize % or scriptsize \begin{verbatim} theta0 = 0.50; theta = 0.60 n=190; M = 1000000 # M is Monte Carlo sample size ybar = rbinom(M,size=n,prob=theta)/n Z2 = sqrt(n)*(ybar-theta0)/sqrt(ybar*(1-ybar)) # There are M of these # Estimated power is another sample proportion estpow = length(subset(Z2,abs(Z2)>1.96))/M cat("Estimated power is",estpow,"\n") # 99% confidence interval for the true power marginerr99 = qnorm(0.995) * sqrt(estpow*(1-estpow)/M) ci = c(estpow-marginerr99,estpow+marginerr99) cat("99% confidence interval for the power is (",ci,") \n") \end{verbatim} } % End size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Notes and Comments: The 2012 version has multinomial. I cut this out to same time. Replaces the "hard elementary problem" with a power by simulation. In 2016, cut this down a lot to save time. I still need consistency. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%