% Large sample tools for Applied Stat I % Notes and comments are after the end of the document % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Large sample tools\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2013} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Background Reading: Davison's \emph{Statistical models}} %\framesubtitle{} \begin{itemize} \item For completeness, look at Section 2.1, which presents some basic applied statistics in an advanced way. \item Especially see Section 2.2 (Pages 28-37) on convergence. \item Section 3.3 (Pages 77-90) goes more deeply into simulation than we will. At least skim it. \end{itemize} \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Foundations} \begin{frame} \frametitle{Sample Space $\Omega$, $\omega \in \Omega$} %\framesubtitle{} \begin{itemize} \item Observe whether a single individual is male or female: $\Omega = \{F,M\}$ \item Pair of individuals; observe their genders in order: $\Omega =\{(F,F),(F,M),(M,F),(M,M)\}$ \item Select n people and count the number of females: $\Omega = \{0,\ldots , n\}$ \end{itemize} \vspace{10mm} For limits problems, the points in $\Omega$ are infinite sequences. \end{frame} \begin{frame} \frametitle{Random variables are functions from $\Omega$ into the set of real numbers} {\LARGE \begin{displaymath} Pr\{X \in B\} = Pr(\{\omega \in \Omega: X(\omega) \in B \}) \end{displaymath} } \end{frame} \begin{frame} \frametitle{Random Sample $X_1(\omega), \ldots, X_n(\omega)$} %\framesubtitle{} \begin{itemize} \item $T = T(X_1, \ldots, X_n)$ \item $T = T_n(\omega)$ \item Let $n \rightarrow \infty$ to see what happens for large samples \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{{\LARGE Modes of Convergence}} %\framesubtitle{} {\LARGE \begin{itemize} \item Almost Sure Convergence \item Convergence in Probability \item Convergence in Distribution \end{itemize} } \end{frame} \begin{frame} \frametitle{Almost Sure Convergence} We say that $T_n$ converges \emph{almost surely} to $T$, and write $T_n \stackrel{a.s.}{\rightarrow} T$ if \begin{displaymath} Pr\{\omega:\, \lim_{n \rightarrow \infty} T_n(\omega) = T(\omega)\}=1. \end{displaymath} \pause \begin{itemize} \item Acts like an ordinary limit, except possibly on a set of probability zero. \item All the usual rules apply. \item Called convergence with probability one or sometimes strong convergence. \end{itemize} \end{frame} \section{LLN} \begin{frame} \frametitle{Strong Law of Large Numbers} %\framesubtitle{} Let $X_1, \ldots, X_n$ be independent with common expected value $\mu$. \vspace{10mm} {\huge \begin{displaymath} \overline{X}_n \stackrel{a.s.}{\rightarrow} E(X_i) = \mu \end{displaymath} } The only condition required for this to hold is the existence of the expected value. \end{frame} \begin{frame} \frametitle{Probability is long run relative frequency} \begin{itemize} \item Statistical experiment: Probability of ``success" is $\theta$ \item Carry out the experiment many times independently. \item Code the results $X_i=1$ if success, $X_i=0$ for failure, $i = 1, 2, \ldots$ \end{itemize} \end{frame} \begin{frame} \frametitle{Sample proportion of successes converges to the probability of success} \framesubtitle{Recall $X_i=0$ or $1$.} {\Large \begin{eqnarray*} E(X_i) &=& \sum_{x=0}^1 x \, Pr\{X_i = x\} \\ % &&\\ &=& 0\cdot (1-\theta) + 1\cdot \theta \\ % &&\\ &=& \theta \end{eqnarray*} } \pause Relative frequency is {\Large \begin{displaymath} \frac{1}{n}\sum_{i=1}^n X_i = \overline{X}_n \stackrel{a.s.}{\rightarrow} \theta \end{displaymath} } \end{frame} \begin{frame} \frametitle{Simulation} \framesubtitle{Using pseudo-random number generation by computer} \begin{itemize} \item Estimate almost any probability that's hard to figure out \pause \item Power \pause \item Weather model \pause \item Performance of statistical methods \pause \item Need confidence intervals for estimated probabilities. \end{itemize} \end{frame} \begin{frame} \frametitle{Estimating power by simulation} Recall the two test statistics for testing $H_0: \theta=\theta_0$: \begin{itemize} \item $Z_1 = \frac{\sqrt{n}(\overline{Y}-\theta_0)}{\sqrt{\theta_0(1-\theta_0)}}$ \item $Z_2 = \frac{\sqrt{n}(\overline{Y}-\theta_0)}{\sqrt{\overline{Y}(1-\overline{Y})}}$ \end{itemize} When $\theta \neq \theta_0$, calculating $P\{|Z_2|>z_{\alpha/2} \}$ can be challenging. \end{frame} \begin{frame} \frametitle{Strategy} \framesubtitle{For estimating power by simulation} \begin{itemize} \item Generate a large number of random data sets under the alternative hypothesis. \item For each data set, test $H_0$. \item Estimated power is the proportion of times $H_0$ is rejected. \item How accurate is the estimate? \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Testing $H_0: \theta = 0.50$ when true $\theta=0.60$ and $n=100$} \framesubtitle{Power of $Z_1$ was about 0.52} \pause \begin{displaymath} Z_2 = \frac{\sqrt{n}(\overline{Y}-\theta_0)}{\sqrt{\overline{Y}(1-\overline{Y})}} \end{displaymath} \pause {\footnotesize % or scriptsize \begin{verbatim} > # Power by simulation > set.seed(9999) > m = 10000 # Monte Carlo sample size > theta=0.60; theta0 = 1/2; n = 100 > Ybar = rbinom(m,size=n,prob=theta)/n # A vector of length m > Z2 = sqrt(n)*(Ybar-theta0)/sqrt(Ybar*(1-Ybar)) # Another vector of length m > power = length(Z2[abs(Z2>1.96)])/m; power [1] 0.5394 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Margin of error for estimated power} % \framesubtitle{} \pause Confidence interval for an estimated probability was \begin{displaymath} \overline{Y} \pm z_{\alpha/2}\sqrt{\frac{\overline{Y}(1-\overline{Y})}{n}} \end{displaymath} \pause {\footnotesize % or scriptsize \begin{verbatim} # How about a 99 percent margin of error > a = 0.005; z = qnorm(1-a) > merror = z * sqrt(power*(1-power)/m); merror [1] 0.0128391 > Lower = power - merror; Lower [1] 0.5265609 > Upper = power + merror; Upper [1] 0.5522391 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Recall the Change of Variables formula: Let $Y = g(X)$} %\framesubtitle{} {\LARGE \begin{displaymath} E(Y) = \int_{-\infty}^\infty y \, f_{_Y}(y) \, dy = \int_{-\infty}^\infty g(x) \, f_{_X}(x) \, dx \end{displaymath} } Or, for discrete random variables {\LARGE \begin{displaymath} E(Y) = \sum_y y \, p_{_Y}(y) = \sum_x g(x) \, p_{_X}(x) \end{displaymath} } This is actually a big theorem, not a definition. \end{frame} \begin{frame} \frametitle{Applying the change of variables formula} \framesubtitle{To approximate $E[g(X)]$} {\LARGE \begin{eqnarray*} \frac{1}{n}\sum_{i=1}^n g(X_i) &=& \frac{1}{n}\sum_{i=1}^n Y_i \stackrel{a.s.}{\rightarrow} E(Y) \\ \\ &=& E(g(X)) \end{eqnarray*} } \end{frame} \begin{frame} \frametitle{So for example} %\framesubtitle{} {\LARGE \begin{eqnarray*} \frac{1}{n}\sum_{i=1}^n X_i^k &\stackrel{a.s.}{\rightarrow}& E(X^k) \\ &&\\ \pause \frac{1}{n}\sum_{i=1}^n U_i^2 V_i W_i^3 &\stackrel{a.s.}{\rightarrow}& E(U^2VW^3) \end{eqnarray*} } \pause \vspace{5mm} That is, sample moments converge almost surely to population moments. \end{frame} \begin{frame} \frametitle{Approximate an integral: $\int_{-\infty}^{\infty} h(x) \, dx$} \framesubtitle{Where $h(x)$ is a nasty function.} \pause Let $f(x)$ be a density with $f(x)>0$ wherever $h(x)\neq 0$. \pause \begin{eqnarray*} \int_{-\infty}^{\infty} h(x) \, dx & = & \int_{-\infty}^{\infty} \frac{h(x)}{f(x)} f(x) \, dx \\ \pause & = & E\left[ \frac{h(X)}{f(X)}\right] \\ \pause & = & E[g(X)], \end{eqnarray*} \pause So \begin{itemize} \item Sample $X_1, \ldots, X_n$ from the distribution with density $f(x)$ \pause \item Calculate $Y_i = g(X_i) = \frac{h(X_i)}{f(X_i)}$ for $i=1, \ldots, n$ \pause \item Calculate $\overline{Y}_n \stackrel{a.s.}{\rightarrow} E[Y]= E[g(X)]$ \pause \item Confidence interval for $\mu = E[g(X)]$ is routine, \end{itemize} \end{frame} \begin{frame} \frametitle{Convergence in Probability} We say that $T_n$ converges \emph{in probability} to $T$, and write $T_n \stackrel{P}{\rightarrow} T$ if for all $\epsilon>0$, {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty} P\{|T_n-T|<\epsilon \}=1 \end{displaymath} } Convergence in probability (say to a constant $\theta$) means no matter how small the interval around $\theta$, for large enough $n$ (that is, for all $n>N_1$) the probability of getting that close to $\theta$ is as close to one as you like. \end{frame} \begin{frame} \frametitle{Weak Law of Large Numbers} {\huge \begin{displaymath} \overline{X}_n \stackrel{p}{\rightarrow} \mu \end{displaymath} } \pause \begin{itemize} \item Almost Sure Convergence implies Convergence in Probability \item Strong Law of Large Numbers implies Weak Law of Large Numbers \end{itemize} \end{frame} \section{Consistency} \begin{frame} \frametitle{Consistency} \framesubtitle{$T = T(X_1, \ldots, X_n)$ is a statistic estimating a parameter $\theta$} The statistic $T_n$ is said to be \emph{consistent} for $\theta$ if $T_n \stackrel{P}{\rightarrow} \theta$. \pause {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty} P\{|T_n-\theta|<\epsilon \}=1 \end{displaymath} } \pause \vspace{5mm} The statistic $T_n$ is said to be \emph{strongly consistent} for $\theta$ if $T_n \stackrel{a.s.}{\rightarrow} \theta$. \vspace{5mm} Strong consistency implies ordinary consistency. \end{frame} \begin{frame} \frametitle{Consistency is great but it's not enough.} \begin{itemize} \item It means that as the sample size becomes indefinitely large, you probably get as close as you like to the truth. \item It's the least we can ask. Estimators that are not consistent are completely unacceptable for most purposes. \end{itemize} \pause {\LARGE \begin{displaymath} T_n \stackrel{a.s.}{\rightarrow} \theta \Rightarrow U_n = T_n + \frac{100,000,000}{n} \stackrel{a.s.}{\rightarrow} \theta \end{displaymath} } \end{frame} \begin{frame} \frametitle{Consistency of the Sample Variance } \pause %{\LARGE \begin{eqnarray*} \widehat{\sigma}^2_n &=& \frac{1}{n}\sum_{i=1}^n (X_i-\overline{X})^2 \\ \\ \pause &=& \frac{1}{n}\sum_{i=1}^n X_i^2 - \overline{X}^2 \end{eqnarray*} %} \pause \vspace{5mm} By SLLN, $\overline{X}_n \stackrel{a.s.}{\rightarrow}\mu$ and $\frac{1}{n}\sum_{i=1}^n X_i^2 \stackrel{a.s.}{\rightarrow} E(X^2) = \sigma^2+\mu^2$. \pause \vspace{5mm} Because the function $g(x,y)=x-y^2$ is continuous, \pause \vspace{5mm} \begin{displaymath} \widehat{\sigma}^2_n = g\left(\frac{1}{n}\sum_{i=1}^n X_i^2,\overline{X}_n\right) \stackrel{a.s.}{\rightarrow} g(\sigma^2+\mu^2,\mu) = \sigma^2+\mu^2 - \mu^2 = \sigma^2 \end{displaymath} \end{frame} \section{CLT} \begin{frame} \frametitle{Convergence in Distribution} \framesubtitle{Sometimes called \emph{Weak Convergence}, or \emph{Convergence in Law}} \pause Denote the cumulative distribution functions of $T_1, T_2, \ldots$ by $F_1(t), F_2(t), \ldots$ respectively, and denote the cumulative distribution function of $T$ by $F(t)$. \pause \vspace{5mm} We say that $T_n$ converges \emph{in distribution} to $T$, and write $T_n \stackrel{d}{\rightarrow} T$ if for every point $t$ at which $F$ is continuous, {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty} F_n(t) = F(t) \end{displaymath} } \end{frame} \begin{frame} \frametitle{Univariate Central Limit Theorem} Let $X_1, \ldots, X_n$ be a random sample from a distribution with expected value $\mu$ and variance $\sigma^2$. Then {\LARGE \begin{displaymath} Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1) \end{displaymath} } \end{frame} \begin{frame} \frametitle{Connections among the Modes of Convergence} {\LARGE \begin{itemize} \item $ T_n \stackrel{a.s.}{\rightarrow} T \Rightarrow T_n \stackrel{p}{\rightarrow} T \Rightarrow T_n \stackrel{d}{\rightarrow} T $. \pause \vspace{5mm} \item If $a$ is a constant, $ T_n \stackrel{d}{\rightarrow} a \Rightarrow T_n \stackrel{p}{\rightarrow} a$. \end{itemize} } \end{frame} \begin{frame} \frametitle{Sometimes we say the distribution of the sample mean is approximately normal, or asymptotically normal.} %\framesubtitle{} \begin{itemize} \item This is justified by the Central Limit Theorem. \item But it does \emph{not} mean that $\overline{X}_n$ converges in distribution to a normal random variable. \item The Law of Large Numbers says that $\overline{X}_n$ converges almost surely (and in probability) to a constant, $\mu$. \item So $\overline{X}_n$ converges to $\mu$ in distribution as well. \end{itemize} \end{frame} \begin{frame} \frametitle{Why would we say that for large $n$, the sample mean is approximately $N(\mu,\frac{\sigma^2}{n})$?} \pause \vspace{5mm} Have $Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1)$. \pause {\footnotesize \begin{eqnarray*} Pr\{\overline{X}_n \leq x\} & = & Pr\left\{ \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \\ \pause & = & Pr\left\{ Z_n \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \pause \approx \Phi\left( \frac{\sqrt{n}(x-\mu)}{\sigma} \right) \end{eqnarray*} } \pause Suppose $Y$ is \emph{exactly} $N(\mu,\frac{\sigma^2}{n})$: \pause {\footnotesize \begin{eqnarray*} Pr\{Y \leq x\} & = & Pr\left\{ \frac{\sqrt{n}(Y-\mu)}{\sigma} \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \\ \pause & = & Pr\left\{ Z_n \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \pause = \Phi\left( \frac{\sqrt{n}(x-\mu)}{\sigma} \right) \end{eqnarray*} } % End size \end{frame} \section{Convergence of random vectors} \begin{frame}[allowframebreaks] % Continue frame onto several slides \frametitle{Convergence of random vectors} {\footnotesize \begin{enumerate} \item Definitions (All quantities in boldface are vectors in $\mathbb{R}^m$ unless otherwise stated ) \begin{enumerate} \item[$\star$] $ \mathbf{T}_n \stackrel{a.s.}{\rightarrow} \mathbf{T}$ means $P\{\omega:\, \lim_{n \rightarrow \infty} \mathbf{T}_n(\omega) = \mathbf{T}(\omega)\}=1$. \item[$\star$] $ \mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T}$ means $\forall \epsilon>0,\,\lim_{n \rightarrow \infty} P\{||\mathbf{T}_n-\mathbf{T}||<\epsilon \}=1$. \item[$\star$] $ \mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ means for every continuity point $\mathbf{t}$ of $F_\mathbf{T}$, $\lim_{n \rightarrow \infty}F_{\mathbf{T}_n}(\mathbf{t}) = F_\mathbf{T}(\mathbf{t})$. \end{enumerate} \item $ \mathbf{T}_n \stackrel{a.s.}{\rightarrow} \mathbf{T} \Rightarrow \mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T} \Rightarrow \mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T} $. \item If $\mathbf{a}$ is a vector of constants, $ \mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{a} \Rightarrow \mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{a}$. \item Strong Law of Large Numbers (SLLN): Let $\mathbf{X}_1, \ldots \mathbf{X}_n$ be independent and identically distributed random vectors with finite first moment, and let $\mathbf{X}$ be a general random vector from the same distribution. Then $ \overline{\mathbf{X}}_n \stackrel{a.s.}{\rightarrow} E(\mathbf{X})$. \item Central Limit Theorem: Let $\mathbf{X}_1, \ldots, \mathbf{X}_n$ be i.i.d. random vectors with expected value vector $\boldsymbol{\mu}$ and covariance matrix $\boldsymbol{\Sigma}$. Then $\sqrt{n}(\overline{\mathbf{X}}_n-\boldsymbol{\mu})$ converges in distribution to a multivariate normal with mean \textbf{0} and covariance matrix $\boldsymbol{\Sigma}$. \framebreak \item \label{slutd} Slutsky Theorems for Convergence in Distribution: \begin{enumerate} \item \label{slutcond} If $\mathbf{T}_n \in \mathbb{R}^m$, $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and if $f:\,\mathbb{R}^m \rightarrow \mathbb{R}^q$ (where $q \leq m$) is continuous except possibly on a set $C$ with $P(\mathbf{T} \in C)=0$, then $f(\mathbf{T}_n) \stackrel{d}{\rightarrow} f(\mathbf{T})$. \item \label{slutdiffd} If $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and $(\mathbf{T}_n - \mathbf{Y}_n) \stackrel{P}{\rightarrow} 0$, then $\mathbf{Y}_n \stackrel{d}{\rightarrow} \mathbf{T}$. \item \label{slutstackd} If $\mathbf{T}_n \in \mathbb{R}^d$, $\mathbf{Y}_n \in \mathbb{R}^k$, $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and $\mathbf{Y}_n \stackrel{P}{\rightarrow} \mathbf{c}$, then \begin{displaymath} \left( \begin{array}{cc} \mathbf{T}_n \\ \mathbf{Y}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{cc} \mathbf{T} \\ \mathbf{c} \end{array} \right) \end{displaymath} \end{enumerate} \framebreak \item \label{slutp} Slutsky Theorems for Convergence in Probability: \begin{enumerate} \item \label{slutconp} If $\mathbf{T}_n \in \mathbb{R}^m$, $\mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T}$ and if $f:\,\mathbb{R}^m \rightarrow \mathbb{R}^q$ (where $q \leq m$) is continuous except possibly on a set $C$ with $P(\mathbf{T} \in C)=0$, then $f(\mathbf{T}_n) \stackrel{P}{\rightarrow} f(\mathbf{T})$. \item \label{slutdiffp} If $\mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T}$ and $(\mathbf{T}_n - \mathbf{Y}_n) \stackrel{P}{\rightarrow} 0$, then $\mathbf{Y}_n \stackrel{P}{\rightarrow} \mathbf{T}$. \item \label{slutstackp} If $\mathbf{T}_n \in \mathbb{R}^d$, $\mathbf{Y}_n \in \mathbb{R}^k$, $\mathbf{T}_n \stackrel{P}{\rightarrow} \mathbf{T}$ and $\mathbf{Y}_n \stackrel{P}{\rightarrow} \mathbf{Y}$, then \begin{displaymath} \left( \begin{array}{cc} \mathbf{T}_n \\ \mathbf{Y}_n \end{array} \right) \stackrel{P}{\rightarrow} \left( \begin{array}{cc} \mathbf{T} \\ \mathbf{Y} \end{array} \right) \end{displaymath} \end{enumerate} \framebreak \item \label{delta} Delta Method (Theorem of Cram\'{e}r, Ferguson p. 45): Let $g: \mathbb{R}^d \rightarrow \mathbb{R}^k$ be such that the elements of \.{g}$(\mathbf{x}) = \left[ \frac{\partial g_i}{\partial x_j} \right]_{k \times d}$ are continuous in a neighborhood of $\boldsymbol{\theta} \in \mathbb{R}^d$. If $\mathbf{T}_n$ is a sequence of $d$-dimensional random vectors such that $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T}$, then $\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mbox{\.{g}} (\boldsymbol{\theta}) \mathbf{T}$. In particular, if $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T} \sim N(\mathbf{0},\mathbf{\Sigma})$, then $\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N(\mathbf{0}, \mbox{\.{g}}(\boldsymbol{\theta})\mathbf{\Sigma}\mbox{\.{g}}(\boldsymbol{\theta}) ^\prime)$. \end{enumerate} } \end{frame} \begin{frame} \frametitle{An application of the Slutsky Theorems} \begin{itemize} \item Let $X_1, \ldots, X_n \stackrel{i.i.d.}{\sim}\,?(\mu,\sigma^2)$ \pause \item By CLT, $Y_n = \sqrt{n}(\overline{X}_n-\mu) \stackrel{d}{\rightarrow} Y \sim N(0,\sigma^2)$ \pause \item Let $\widehat{\sigma}_n$ be \emph{any} consistent estimator of $\sigma$. \pause \item Then by \ref{slutd}.\ref{slutstackd}, $\mathbf{T}_n = \left( \begin{array}{cc} Y_n \\ \widehat{\sigma}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{cc} Y \\ \sigma \end{array} \right) = \mathbf{T} $ \pause \item The function $f(x,y)=x/y$ is continuous except if $y=0$ \\ so by \ref{slutd}.\ref{slutcond}, \pause \end{itemize} \begin{displaymath} f(\mathbf{T}_n) = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\widehat{\sigma}_n} \stackrel{d}{\rightarrow} f(\mathbf{T}) = \frac{Y}{\sigma} \sim N(0,1) \end{displaymath} \end{frame} \section{Delta Method} \begin{frame} \frametitle{Univariate delta method} In the multivariate Delta Method~\ref{delta}, the matrix $\mbox{\.{g}}(\boldsymbol{\theta})$ is a Jacobian. The univariate version of the delta method says that if $\sqrt{n}\left( T_n- \theta \right) \stackrel{d}{\rightarrow} T$ and $g^{\prime\prime}(x)$ is continuous at $\theta$, then \begin{displaymath} \sqrt{n}\left( g(T_n)- g(\theta) \right) \stackrel{d}{\rightarrow} g^\prime(\theta) \, T. \end{displaymath} \vspace{5mm} \pause When using the Central Limit Theorem, \emph{especially} if there is a $\theta \neq \mu$ in the model, it's safer to write \begin{displaymath} \sqrt{n}\left( g(\overline{X}_n)- g(\mu) \right) \stackrel{d}{\rightarrow} g^\prime(\mu) \, T. \end{displaymath} and then substitute for $\mu$ in terms of $\theta$. \end{frame} \begin{frame} \frametitle{Example: Geometric distribution} %\framesubtitle{} Let $X_1, \ldots, X_n$ be a random sample from a distribution,with probability mass function $p(x|\theta) = \theta (1-\theta)^{x-1}$ for $x = 1, 2, \ldots$, where $0<\theta<1$. \vspace{3mm} \pause So, $E(X_i)= \frac{1}{\theta}$ and $Var(X_i)= \frac{1-\theta}{\theta^2}$. \vspace{3mm} \pause The maximum likelihood estimator of $\theta$ is $\widehat{\theta} = \frac{1}{\overline{X}_n}$. Using the Central Limit Theorem and the delta method, find the approximate large-sample distribution of $\widehat{\theta}$. \end{frame} \begin{frame} \frametitle{Solution: Geometric distribution} \framesubtitle{$\mu=\frac{1}{\theta}$ and $\sigma^2 = \frac{1-\theta}{\theta^2}$} \begin{itemize} \item[] Have $\sqrt{n}\left( \overline{X}_n- \mu \right) \stackrel{d}{\rightarrow} T \sim N(0,\frac{1-\theta}{\theta^2})$ \pause \item[] And $\sqrt{n}\left( g(\overline{X}_n)- g(\mu) \right) \stackrel{d}{\rightarrow} g^\prime(\mu) \, T$. \pause \item[] $g(x) = \frac{1}{x} = x^{-1}$ \pause \item[] $g^\prime (x) = -x^{-2}$ \pause \item[] So, \pause \end{itemize} \begin{eqnarray*} \sqrt{n}\left( g(\overline{X}_n)- g(\mu)\right) & = & \pause \sqrt{n}\left( \frac{1}{\overline{X}_n} - \frac{1}{\mu}\right) \\ \pause & = & \sqrt{n}\left( \widehat{\theta} - \theta\right) \\ \pause & \stackrel{d}{\rightarrow} & g^\prime(\mu) \, T = -\frac{1}{\mu^2} \, T \\ \pause & = & -\theta^2 \, T \sim N\left(0, \theta^4 \cdot\frac{1-\theta}{\theta^2} \right) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Asymptotic distribution of $\widehat{\theta} = \frac{1}{\overline{X}_n}$} \framesubtitle{Approximate large-sample distribution} \pause Have $Y_n = \sqrt{n}\left( \widehat{\theta} - \theta\right) \stackrel{\cdot}{\sim} N(0,\theta^2(1-\theta))$. \pause \vspace{5mm} \begin{itemize} \item[] So $\frac{Y_n}{\sqrt{n}} = \left( \widehat{\theta} - \theta\right) \stackrel{\cdot}{\sim} N\left(0,\frac{\theta^2(1-\theta)}{n}\right)$ \pause \item[] And $\frac{Y_n}{\sqrt{n}} + \theta = \widehat{\theta} \stackrel{\cdot}{\sim} N\left(\theta,\frac{\theta^2(1-\theta)}{n}\right)$ \pause \end{itemize} \vspace{5mm} We'll say that $\widehat{\theta} = \frac{1}{\overline{X}_n}$ is approximately $N\left(\theta,\frac{\theta^2(1-\theta)}{n}\right)$. \end{frame} \begin{frame} \frametitle{Another example of $\sqrt{n}\left( g(\overline{X}_n)- g(\mu) \right) \stackrel{d}{\rightarrow} g^\prime(\mu) \, T$} \framesubtitle{Don't lose your head} \pause \begin{itemize} \item[] Let $X_1, \ldots, X_n \stackrel{i.i.d.}{\sim}\,?(\mu,\sigma^2)$ \pause \item[] CLT says $\sqrt{n}(\overline{X}_n-\mu) \stackrel{d}{\rightarrow} T \sim N(0,\sigma^2)$ \pause \item[] Let $g(x)=x^2$ \pause \item[] Delta method says $\sqrt{n}\left( g(\overline{X}_n)- g(\mu) \right) \stackrel{d}{\rightarrow} g^\prime(\mu) \, T$. \pause \item[] So $\sqrt{n}\left( \overline{X}_n^2- \mu^2 \right) \stackrel{d}{\rightarrow} 2\mu \, T \sim N(0,4\mu^2\sigma^2)$ \pause \item[] Really? What if $\mu=0$? \pause \item[] \item[] If $\mu=0$ then $\sqrt{n}\left( \overline{X}_n^2- \mu^2 \right) = \sqrt{n} \, \overline{X}_n^2$ \pause $\stackrel{d}{\rightarrow} 2 \mu T$ \pause $=0$ \pause $\Rightarrow \sqrt{n} \, \overline{X}_n^2 \stackrel{p}{\rightarrow} 0$. \pause \item[] Faster convergence. \end{itemize} \end{frame} % herehere On the other hand, .... scale by n and get chisquare (if sigma=1) \begin{frame} \frametitle{On the other hand \ldots} % \framesubtitle{} Have $\sqrt{n} \, \overline{X}_n^2 \stackrel{p}{\rightarrow} 0$, \pause but if (say) $\sigma^2=1$, \vspace{7mm} \pause {\Large \begin{displaymath} n \overline{X}_n^2 = \left( \sqrt{n}(\overline{X}_n-\mu) \right)^2 \stackrel{d}{\rightarrow} Z^2 \sim \chi^2(1) \end{displaymath} } % End size \vspace{7mm} \pause If $\sigma^2 \neq 1$, the target is Gamma($\alpha= \frac{1}{2}, \, \beta = 2\sigma$) \end{frame} \begin{frame} \frametitle{A variance-stabilizing transformation} \framesubtitle{An application of the delta method} \begin{itemize} \item Because the Poisson process is such a good model, count data often have approximate Poisson distributions. \item Let $X_1, \ldots, X_n \stackrel{i.i.d}{\sim}$ Poisson$(\lambda)$ \item $E(X_i)=Var(X_i)=\lambda$ \item $Z_n = \frac{\sqrt{n}(\overline{X}_n-\lambda)}{\sqrt{\overline{X}_n}} \stackrel{d}{\rightarrow} Z \sim N(0,1)$ \item An approximate large-sample confidence interval for $\lambda$ is \begin{displaymath} \overline{X}_n \pm z_{\alpha/2}\sqrt{\frac{\overline{X}_n}{n}} \end{displaymath} \item Can we do better? \end{itemize} \end{frame} \begin{frame} \frametitle{Variance-stabilizing transformation continued} %\framesubtitle{} \begin{itemize} \item CLT says $\sqrt{n}(\overline{X}_n-\lambda) \stackrel{d}{\rightarrow} T \sim N(0,\lambda)$. \pause \vspace{10mm} \item Delta method says $\sqrt{n}\left( g(\overline{X}_n)- g(\lambda) \right) \stackrel{d}{\rightarrow} g^\prime(\lambda) \, T = Y \sim N\left(0,g^\prime(\lambda)^2 \, \lambda\right)$ \pause \vspace{10mm} \item If $g^\prime(\lambda) = \frac{1}{\sqrt{\lambda}}$, then $Y \sim N(0,1)$. \end{itemize} \end{frame} \begin{frame} \frametitle{An elementary differential equation: $g^\prime(x) = \frac{1}{\sqrt{x}}$} \framesubtitle{Solve by separation of variables} \pause {\LARGE \begin{eqnarray*} & & \frac{dg}{dx} = x^{-1/2} \\ \pause & \Rightarrow & dg = x^{-1/2} \, dx\\ \pause & \Rightarrow & \int dg = \int x^{-1/2} \, dx\\ \pause & \Rightarrow & g(x) = \frac{x^{1/2}}{1/2} + c = 2 x^{1/2} + c \end{eqnarray*} } \end{frame} \begin{frame} \frametitle{We have found} \pause \begin{eqnarray*} \sqrt{n}\left( g(\overline{X}_n)- g(\lambda) \right) & = & \sqrt{n}\left( 2\overline{X}_n^{1/2}- 2\lambda^{1/2} \right) \\ & \stackrel{d}{\rightarrow} & Z \sim N(0,1) \end{eqnarray*} \pause So, \begin{itemize} \item We could say that $\sqrt{\overline{X}_n}$ is asymptotically normal, with (asymptotic) mean $\sqrt{\lambda}$ and (asymptotic) variance $\frac{1}{4n}$. \pause \item This calculation could justify a square root transformation for count data. \pause \item How about a better confidence interval for $\lambda$? \end{itemize} \end{frame} \begin{frame} \frametitle{Seeking a better confidence interval for $\lambda$} %\framesubtitle{} \begin{eqnarray*} 1-\alpha &=& \Pr\{-z_{\alpha/2} < Z < z_{\alpha/2} \} \\ \pause & \approx & \Pr\{-z_{\alpha/2} < 2\sqrt{n}\left( \overline{X}_n^{1/2} - \lambda^{1/2} \right) < z_{\alpha/2} \} \\ \pause & = & \Pr\left\{\sqrt{\overline{X}_n}-\frac{z_{\alpha/2}}{2\sqrt{n}} < \sqrt{\lambda} < \sqrt{\overline{X}_n}+\frac{z_{\alpha/2}}{2\sqrt{n}} \right\} \\ \pause & = & \Pr\left\{\left(\sqrt{\overline{X}_n} - \frac{z_{\alpha/2}}{2\sqrt{n}}\right)^2 < \lambda < \left(\sqrt{\overline{X}_n}+\frac{z_{\alpha/2}}{2\sqrt{n}}\right)^2 \right\}, \end{eqnarray*} \pause where the last equality is valid provided $\sqrt{\overline{X}_n} - \frac{z_{\alpha/2}}{2\sqrt{n}} \ge 0$. \end{frame} \begin{frame} \frametitle{Compare the confidence intervals} %\framesubtitle{} \pause Variance-stabilized CI is \begin{eqnarray*} && \left(\left(\sqrt{\overline{X}_n} - \frac{z_{\alpha/2}}{2\sqrt{n}}\right)^2 ~~,~~ \left(\sqrt{\overline{X}_n} + \frac{z_{\alpha/2}}{2\sqrt{n}}\right)^2\right)\\ \pause &=& \left( \overline{X}_n - 2\sqrt{\overline{X}_n} \frac{z_{\alpha/2}}{2\sqrt{n}} + \frac{z^2_{\alpha/2}}{4n} ~~,~~ \overline{X}_n + 2\sqrt{\overline{X}_n} \frac{z_{\alpha/2}}{2\sqrt{n}} + \frac{z^2_{\alpha/2}}{4n} \right)\\ \pause &=& \left( \overline{X}_n - z_{\alpha/2}\sqrt{\frac{\overline{X}_n}{n}} + {\color{red} \frac{z^2_{\alpha/2}}{4n} } ~~,~~ \overline{X}_n + z_{\alpha/2}\sqrt{\frac{\overline{X}_n}{n}} + {\color{red} \frac{z^2_{\alpha/2}}{4n} } \right) \end{eqnarray*} \pause Compare to the ordinary (Wald) CI \begin{displaymath} \left( \overline{X}_n - z_{\alpha/2}\sqrt{\frac{\overline{X}_n}{n}} ~~,~~ \overline{X}_n + z_{\alpha/2}\sqrt{\frac{\overline{X}_n}{n}} \right) \end{displaymath} \end{frame} \begin{frame} \frametitle{Variance-stabilized CI is just like the ordinary CI} Except shifted to the right by {\color{red} $\frac{z^2_{\alpha/2}}{4n}$}. \begin{itemize} \item If there is a difference in performance, we will see it for small $n$. \item Try some simulations. \item Is the coverage probability closer? \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Try $n=10$, True $\lambda=1$} \framesubtitle{Illustrate the code first} {\footnotesize \begin{verbatim} > # Variance stabilized Poisson CI > n = 10; lambda=1; m=10; alpha = 0.05; set.seed(9999) > z = qnorm(1-alpha/2) > cover1 = cover2 = NULL > for(sim in 1:m) + { + x = rpois(n,lambda); xbar = mean(x); xbar + a1 = xbar - z*sqrt(xbar/n); b1 = xbar + z*sqrt(xbar/n) + shift = z^2/(4*n) + a2 = a1+shift; b2 = b1+shift + cover1 = c(cover1,(a1 < lambda && lambda < b1)) + cover2 = c(cover2,(a2 < lambda && lambda < b2)) + } # Next sim > rbind(cover1,cover2) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] cover1 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE cover2 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE > mean(cover1) [1] 0.9 \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{Code for Monte Carlo sample size~=~10,000 simulations} {\footnotesize \begin{verbatim} # Now the real simulation n = 10; lambda=1; m=10000; alpha = 0.05; set.seed(9999) z = qnorm(1-alpha/2) cover1 = cover2 = NULL for(sim in 1:m) { x = rpois(n,lambda); xbar = mean(x); xbar a1 = xbar - z*sqrt(xbar/n); b1 = xbar + z*sqrt(xbar/n) shift = z^2/(4*n) a2 = a1+shift; b2 = b1+shift cover1 = c(cover1,(a1 < lambda && lambda < b1)) cover2 = c(cover2,(a2 < lambda && lambda < b2)) } # Next sim p1 = mean(cover1); p2 = mean(cover2) # 99 percent margins of error me1 = qnorm(0.995)*sqrt(p1*(1-p1)/m); me1 = round(me1,3) me2 = qnorm(0.995)*sqrt(p1*(1-p1)/m); me2 = round(me2,3) cat("Coverage of ordinary CI = ",p1,"plus or minus ",me1,"\n") cat("Coverage of variance-stabilized CI = ",p2, "plus or minus ",me2,"\n") \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{Results for $n=10$, $\lambda=1$ and 10,000 simulations} {\footnotesize \begin{verbatim} Coverage of ordinary CI = 0.9292 plus or minus 0.007 Coverage of variance-stabilized CI = 0.9556 plus or minus 0.007 > # Does CI include 0.95? > # Look at estimate (too high) minus margin of error. > p2-me2 [1] 0.9486 \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{Results for $n=100$} \framesubtitle{$\lambda=1$ and 10,000 simulations} {\footnotesize \begin{verbatim} Coverage of ordinary CI = 0.9448 plus or minus 0.006 Coverage of variance-stabilized CI = 0.9473 plus or minus 0.006 > p1+me1 [1] 0.9508 \end{verbatim} } \end{frame} \begin{frame} \frametitle{The arcsin-square root transformation} \framesubtitle{For proportions} Sometimes, variable values consist of proportions, one for each case. \begin{itemize} \item For example, cases could be hospitals. \item The variable of interest is the proportion of patients who came down with something \emph{unrelated} to their reason for admission -- hospital-acquired infection. \item This is an example of \emph{aggregated data}. \end{itemize} \end{frame} \begin{frame} \frametitle{The advice you often get} When a proportion is the response variable in a regression, use the \emph{arcsin square root} transformation. \vspace{5mm} That is, if the proportions are $P_1, \ldots, P_n$, let \begin{displaymath} Y_i = \sin^{-1}(\sqrt{P_i}) \end{displaymath} and use the $Y_i$ values in your regression. \vspace{5mm} \begin{center}{\huge \textbf{Why?}} \end{center} \end{frame} \begin{frame} \frametitle{It's a variance-stabilizing transformation.} \begin{itemize} \item The proportions are little sample means: $P_i = \frac{1}{m}\sum_{j=1}^m X_{i,j}$ \item Drop the $i$ for now. \item $X_1, \ldots, X_m$ may not be independent, but let's pretend. \item $P = \overline{X}_m$ \item Approximately, $\overline{X}_m \sim N\left(\theta,\frac{\theta(1-\theta)}{m}\right)$ \item Normality is good. \item Variance that depends on the mean $\theta$ is not so good. % Partly because we prefer a nice linear model for the mean. \end{itemize} \end{frame} \begin{frame} \frametitle{Apply the delta method} Central Limit Theorem says {\Large \begin{displaymath} \sqrt{m}(\overline{X}_m-\theta) \stackrel{d}{\rightarrow} T \sim N\left(0,\theta(1-\theta)\right) \end{displaymath} } \pause Delta method says {\Large \begin{displaymath} \sqrt{m}\left( g(\overline{X}_m)- g(\theta) \right) \stackrel{d}{\rightarrow} Y \sim N\left(0,g^\prime(\theta)^2 \,\theta(1-\theta)\right). \end{displaymath} } \pause Want a function $g(x)$ with {\Large \begin{displaymath} g^\prime(x) = \frac{1}{\sqrt{x(1-x)}} \end{displaymath} } \pause Try $g(x) = \sin^{-1}\left(\sqrt{x}\right)$. \end{frame} \begin{frame} \frametitle{Chain rule to get $\frac{d}{dx}\sin^{-1}\left(\sqrt{x}\right)$} \pause ``Recall" that $\frac{d}{dx}\sin^{-1}(x) = \frac{1}{\sqrt{1-x^2}}$. Then, \pause \vspace{5mm} \begin{eqnarray*} \frac{d}{dx}\sin^{-1}\left(\sqrt{x}\right) & = & \frac{1}{\sqrt{1-\sqrt{x}^2}} \,\cdot\, \frac{1}{2} x^{-1/2} \\ \pause & = & \frac{1}{2\sqrt{x(1-x)}}. \end{eqnarray*} \vspace{5mm} \pause Conclusion: \begin{displaymath} \sqrt{m}\left( \sin^{-1}\left(\sqrt{\overline{X}_m}\right) - \sin^{-1}\left(\sqrt{\theta}\right) \right) \stackrel{d}{\rightarrow} Y \sim N\left(0,\frac{1}{4}\right) \end{displaymath} \end{frame} \begin{frame} \frametitle{So the arcsin-square root transformation stabilizes the variance} %\framesubtitle{} \begin{itemize} \item $Y \sim N\left(0,\frac{1}{4}\right)$ means the variance no longer depends on the probability that the proportion is estimating. \item Does not quite \emph{standardize} the proportion, but that's okay for regression. \item Potentially useful for non-aggregated data too. \item If we want to do a regression on aggregated data, the point we have reached is that approximately, \end{itemize} {\LARGE \begin{displaymath} Y_i \sim N\left( \sin^{-1}\left(\sqrt{\theta_i}\right), \frac{1}{4m_i} \right) \end{displaymath} } % There are so many good questions about this. % At least the transformation is an increasing function. Call it "chance" of the event. % Weighted least squares. I wonder how much it matters. % For predictions, unwrap the functions % In a regression, do you really need to estimate the variance with MSE? For a small number of cases but proportions based on a lot of instances, this could matter. % Wonder how harmful the within-case iid assumption really is. \end{frame} \begin{frame} \frametitle{That was fun, but it was all univariate.} \pause Because \begin{itemize} \item The multivariate CLT establishes convergence to a multivariate normal, and \item Vectors of MLEs are approximately multivariate normal for large samples, and \item The multivariate delta method can yield the asymptotic distribution of useful functions of the MLE vector, \end{itemize} \vspace{15mm} \pause We need to look at random vectors and the multivariate normal distribution. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf13} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf13}} \end{frame} \end{document} \sin^{-1}\left(\sqrt{\overline{X}_m}\right) \sin^{-1}\left(\sqrt{\theta}\right) \begin{displaymath} \sqrt{m}\left( g(\overline{X}_m)- g(\theta) \right) \stackrel{d}{\rightarrow} Y \sim N\left(0,(g^\prime(\theta))^2\theta(1-\theta)\right). \end{displaymath} } CLT says $\sqrt{n}(\overline{X}_n-\lambda) \stackrel{d}{\rightarrow} T \sim N(0,\lambda)$. $\sqrt{n}\left( g(\overline{X}_n)- g(\lambda) \right) = \sqrt{n}\left( 2\sqrt{\overline{X}_n}- 2\sqrt{\lambda} \right) \stackrel{d}{\rightarrow} Z \sim N(0,1)$ \Pr\{-z < 2\sqrt{n}\left(\sqrt{\overline{X}_n}-\sqrt{\lambda}\right) < z \} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Power by simulation set.seed(9999) m = 10000 # Monte Carlo sample size theta=0.60; theta0 = 1/2; n = 100 Ybar = rbinom(m,size=n,prob=theta)/n # A vector of length m Z2 = sqrt(n)*(Ybar-theta0)/sqrt(Ybar*(1-Ybar)) # Another vector of length m power = length(Z2[abs(Z2>1.96)])/m; power # How about a 99 percent margin of error a = 0.005; z = qnorm(1-a) merror = z * sqrt(power*(1-power)/m); merror Lower = power - merror; Lower Upper = power + merror; Upper %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Notes and Comments: The 2012 version has multinomial. I cut this out to same time. Replaces the "hard elementary problem" with a power by simulation. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%