%\documentclass[serif]{beamer} % Get Computer Modern math font. %\hypersetup{colorlinks,linkcolor=,urlcolor=red} % To create handout using article mode: Comment above and uncomment below (2 places) \documentclass[12pt]{article} \usepackage{beamerarticle} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=red]{hyperref} % For live Web links with href in article mode \usepackage{amsmath} % For \binom{n}{y} \usepackage{graphicx} % To include pdf files! \usepackage{fullpage} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols \usetheme{Berlin} % Displays sections on top % \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides % \usetheme{Warsaw} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] %\mode \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Contingency Tables Part One\footnote{See last slide for copyright information.}} %\subtitle{STA 312: Fall 2022} %\date{} % To suppress date \begin{document} % More material required for handout in article mode. Also eliminate vspace \title{Contingency Tables Part One\footnote{See last page for copyright information.}} \subtitle{STA 312: Fall 2022} \date{} % To suppress date \maketitle \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Suggested Reading: Chapter 2} %\framesubtitle{} \begin{itemize} \item Read Sections 2.1-2.4 \item You are not responsible for Section 2.5 % \item Read Section 2.6 about Fisher's exact test % \item And ... \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % \section{Overview} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Definitions} %%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{We are interested in \textbf{relationships} between variables} %\framesubtitle{} A \emph{contingency table} is a joint frequency distribution. \begin{center} \begin{tabular}{|l|c|c|} \hline & No Pneumonia & ~~~Pneumonia \\ \hline No Vitamin C & & \\ \hline 500 mg. or more Daily & & \\ \hline \end{tabular} \end{center} A contingency table \begin{itemize} \item Counts the number of cases in combinations of two (or more) categorical variables \item In general, $X$ has $I$ categories and $Y$ has $J$ categories \item Often, $X$ is the explanatory variable and $Y$ is the response variable (like regression). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Cell probabilities $\pi_{ij}$} \framesubtitle{$i = 1, \ldots, I$ and $j = 1, \ldots, J$} \begin{center} \begin{tabular}{|l|c|c|c|} \multicolumn{1}{c}{} & \multicolumn{2}{c}{\textbf{Passed the Course}} & \multicolumn{1}{c}{} \\ \hline \textbf{Course} & Did not pass & ~~~Passed~~~ & Total \\ \hline Catch-up & $\pi_{11}$ & $\pi_{12}$ & {\color{red} $\pi_{1+}$ } \\ \hline Mainstream & $\pi_{21}$ & $\pi_{22}$ & {\color{red} $\pi_{2+}$ } \\ \hline Elite & $\pi_{31}$ & $\pi_{32}$ & {\color{red} $\pi_{3+}$ } \\ \hline Total & {\color{red}$\pi_{+1}$}& {\color{red}$\pi_{+2}$}& {\color{red} 1 } \\ \hline \end{tabular} \end{center} Marginal probabilities \begin{itemize} \item $Pr\{X=i\} = \displaystyle{\sum_{j=1}^J \pi_{ij} } = \pi_{i+}$ \item $Pr\{Y=j\} = \displaystyle{\sum_{i=1}^I \pi_{ij} } = \pi_{+j}$ \end{itemize} \end{frame} \begin{frame} \frametitle{Conditional probabilities} \begin{displaymath} Pr\{Y=j|X=i\} = \frac{Pr\{Y=j \cap X=i\}}{Pr\{X=i\}} = \frac{\pi_{ij}}{\pi_{i+}} \end{displaymath} \vspace{5mm} \begin{center} {\footnotesize \begin{tabular}{|l|c|c|c|} \multicolumn{1}{c}{} & \multicolumn{2}{c}{\textbf{Passed the Course}} & \multicolumn{1}{c}{} \\ \hline \textbf{Course} & Did not pass & ~~~Passed~~~ & Total \\ \hline Catch-up & $\pi_{11}$ & $\pi_{12}$ & {\color{red} $\pi_{1+}$ } \\ \hline Mainstream & $\pi_{21}$ & $\pi_{22}$ & {\color{red} $\pi_{2+}$ } \\ \hline Elite & $\pi_{31}$ & $\pi_{32}$ & {\color{red} $\pi_{3+}$ } \\ \hline Total & {\color{red}$\pi_{+1}$}& {\color{red}$\pi_{+2}$}& {\color{red} 1 } \\ \hline \end{tabular} } % End footnotesize \vspace{5mm} \begin{itemize} \item Usually, interest is in the conditional distribution of the response variable given the explanatory variable. \item Sometimes, we make tables of conditional probabilities \end{itemize} \end{center} \end{frame} \begin{frame} \frametitle{Cell frequencies} \begin{center} \begin{tabular}{|l|c|c|c|} \multicolumn{1}{c}{} & \multicolumn{2}{c}{\textbf{Passed the Course}} & \multicolumn{1}{c}{} \\ \hline \textbf{Course} & Did not pass & Passed & Total \\ \hline Catch-up & $n_{11}$ & $n_{12}$ & {\color{red} $n_{1+}$ } \\ \hline Mainstream & $n_{21}$ & $n_{22}$ & {\color{red} $n_{2+}$ } \\ \hline Elite & $n_{31}$ & $n_{32}$ & {\color{red} $n_{3+}$ } \\ \hline Total & {\color{red}$n_{+1}$}& {\color{red}$n_{+2}$}& {\color{red} $n$ } \\ \hline \end{tabular} \end{center} \end{frame} \begin{frame} \frametitle{For example} \begin{center} \begin{tabular}{|l|c|c|c|} \multicolumn{1}{c}{} & \multicolumn{2}{c}{\textbf{Passed the Course}} & \multicolumn{1}{c}{} \\ \hline \textbf{Course} & Did not pass & Passed & Total \\ \hline Catch-up & \texttt{~27} &\texttt{~~8} & {\color{red} \texttt{~35} } \\ \hline Mainstream &\texttt{124} & \texttt{204} & {\color{red} \texttt{328} } \\ \hline Elite &\texttt{~~7} & \texttt{~24} & {\color{red} \texttt{~31} } \\ \hline Total & {\color{red}\texttt{158}}& {\color{red}\texttt{236}} & {\color{red} \texttt{394} } \\ \hline \end{tabular} \end{center} \end{frame} \section{Study Designs and Models} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Estimating probabilities} { \Large Should we just estimate $\pi_{ij}$ with $p_{ij}=\frac{n_{ij}}{n}$? } \vspace{10mm} \begin{itemize} \item \emph{Sometimes}. \item It depends on the study design. \item The study design determines exactly what is in the tables \end{itemize} \end{frame} \begin{frame} \frametitle{Study designs} %\framesubtitle{Illustrate with medical applications} \begin{itemize} \item Cross-sectional \item Prospective \item Retrospective \end{itemize} \end{frame} \begin{frame} \frametitle{Cross-sectional design} %\framesubtitle{} \begin{itemize} \item Both variables in the table are measured with \begin{itemize} \item No assignment of cases to experimental conditions \item No selection of cases based on variable values \end{itemize} \item For example, a sample of $n$ first-year university students sign up for one of three calculus courses, and each student either passes the course or does not. \item Total sample size $n$ is fixed by the design. \item Multinomial model, with $c = IJ$ categories. \item Estimate $\pi_{ij}$ with $p_{ij}$ \item Estimating conditional probabilities is easy. \end{itemize} \end{frame} \begin{frame} \frametitle{Prospective design} %\framesubtitle{} {\footnotesize \begin{itemize} \item Prospective means ``looking forward" (from explanatory to response). \item Groups that define the explanatory variable categories are formed before the response variable is observed. \item Experimental studies with random assignment are prospective (clinical trials). \item Cohort studies that follow patients who got different types of surgery. \item Stratified sampling, like interview 200 people from each province. \item Marginal totals of the explanatory variable are fixed by the design. \item Assume random sampling within each category defined by the explanatory variable, and independence between categories. \item \emph{Product multinomial} model: A product of $I$ multinomial likelihoods. \item Good for estimating \emph{conditional} probability of response given a value of the explanatory variable. \end{itemize} } % End size \end{frame} \begin{frame} \frametitle{Product multinomial} %\framesubtitle{} \begin{itemize} \item Take independent random samples of sizes $n_{1+}, \ldots, n_{I+}$ from $I$ sub-populations. \item In each, observe a multinomial with $J$ categories. Compare. \item Example: Sample 100 entring students from each of three campuses. At the end of first year, observe whether they are in good standing, on probation, or have left the university. \item The $\pi_{ij}$ are now conditional probabilities: $\pi_{1+}=1$ \item Write the likelihood as \end{itemize} \begin{displaymath} \prod_{i=1}^3 \left[ \pi_{i1}^{n_{i1}} \pi_{i2}^{n_{i2}} (1-\pi_{i1}-\pi_{i2})^{n_{i3}} \right)] \end{displaymath} \end{frame} \begin{frame} \frametitle{Retrospective design} %\framesubtitle{} \begin{itemize} \item Retrospective means ``looking backward" (from response to explanatory). \item In a \emph{case control} study, a sample of patients with a disease is compared to a sample without the disease, to discover variables that might have caused the disease. \item Vitamin C and Pneumonia (fairly rare, even in the elderly) \item Marginal totals for the response variable are fixed by the design. \item Product multinomial again \item Natural for estimating conditional probability of explanatory variable given response variable. \item Usually that's not what you want. \item But if you know the probability of having the disease, you can use Bayes' Theorem to estimate the conditional probabilities in the more interesting direction. \end{itemize} \end{frame} \section{Odds ratio} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Meanings of $X$ and $Y$ ``unrelated"} %\framesubtitle{} \begin{itemize} \item Conditional distribution of $Y|X=x$ is the same for every $x$ \item Conditional distribution of $X|Y=y$ is the same for every $y$ \item $X$ and $Y$ are independent (if both are random) \end{itemize} \vspace{10mm} If variables are not unrelated, call them ``related." \end{frame} \begin{frame} \frametitle{Put probabilities in table cells} %\framesubtitle{They could be conditional probabilites} \begin{center} \begin{tabular}{|l|c|c|c|} \hline & $Y=1$ & $Y=2$ & Total \\ \hline $X=1$ & $\pi_{11}$ & $\pi_{12}$ &{\color{red} $\pi_{11}+\pi_{12}$ } \\ \hline $X=2$ & $\pi_{21}$ & $\pi_{22}$ &{\color{red} $\pi_{21}+\pi_{22}$ } \\ \hline Total &{\color{red} $\pi_{11}+\pi_{21}$ } &{\color{red} $\pi_{12}+\pi_{22}$ } & \\ \hline \end{tabular} \end{center} \vspace{10mm} {\LARGE \begin{displaymath} Pr\{Y=1|X=1\} = \frac{\pi_{11}}{\pi_{11}+\pi_{12}} \end{displaymath} } \end{frame} \begin{frame} \frametitle{Conditional distribution of $Y$ given $X=x$} \framesubtitle{Same for all values of $x$} \begin{center} {\tiny \begin{center} \begin{tabular}{|l|c|c|c|} \hline & $Y=1$ & $Y=2$ & Total \\ \hline $X=1$ & $\pi_{11}$ & $\pi_{12}$ &{\color{red} $\pi_{11}+\pi_{12}$ } \\ \hline $X=2$ & $\pi_{21}$ & $\pi_{22}$ &{\color{red} $\pi_{21}+\pi_{22}$ } \\ \hline Total &{\color{red} $\pi_{11}+\pi_{21}$ } &{\color{red} $\pi_{12}+\pi_{22}$ } & \\ \hline \end{tabular} \end{center} } % End small size \vspace{10mm} $Pr\{Y=1|X=1\} = Pr\{Y=1|X=2\}$ \hspace{5mm} \end{center} \begin{eqnarray*} & \Leftrightarrow & \frac{\pi_{11}}{\pi_{11}+\pi_{12}} = \frac{\pi_{21}}{\pi_{21}+\pi_{22}} \\ & \Leftrightarrow & \pi_{11} (\pi_{21}+\pi_{22}) = \pi_{21} (\pi_{11}+\pi_{12}) \\ & \Leftrightarrow & {\color{blue}\pi_{11}\pi_{21}} + \pi_{11}\pi_{22} = {\color{blue}\pi_{11}\pi_{21}} + \pi_{12}\pi_{21} \\ & \Leftrightarrow & \pi_{11}\pi_{22} = \pi_{12}\pi_{21} \\ && \\ & \Leftrightarrow & \frac{\pi_{11}\pi_{22}}{\pi_{12}\pi_{21}} = \theta = 1 \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Cross product ratio} \begin{center} \begin{tabular}{|l|c|c|c|} \hline & $Y=1$ & $Y=2$ & Total \\ \hline $X=1$ & $\pi_{11}$ & $\pi_{12}$ &{\color{red} $\pi_{11}+\pi_{12}$ } \\ \hline $X=2$ & $\pi_{21}$ & $\pi_{22}$ &{\color{red} $\pi_{21}+\pi_{22}$ } \\ \hline Total &{\color{red} $\pi_{11}+\pi_{21}$ } &{\color{red} $\pi_{12}+\pi_{22}$ } & \\ \hline \end{tabular} \end{center} \vspace{15mm} {\LARGE \begin{displaymath} \theta = \frac{\pi_{11}\pi_{22}}{\pi_{12}\pi_{21}} \end{displaymath} } \end{frame} \begin{frame} \frametitle{Conditional distribution of $X$ given $Y=y$} \framesubtitle{Same for all values of $y$} \begin{center} {\tiny \begin{tabular}{|l|c|c|c|} \hline & $Y=1$ & $Y=2$ & Total \\ \hline $X=1$ & $\pi_{11}$ & $\pi_{12}$ &{\color{red} $\pi_{11}+\pi_{12}$ } \\ \hline $X=2$ & $\pi_{21}$ & $\pi_{22}$ &{\color{red} $\pi_{21}+\pi_{22}$ } \\ \hline Total &{\color{red} $\pi_{11}+\pi_{21}$ } &{\color{red} $\pi_{12}+\pi_{22}$ } & \\ \hline \end{tabular} } % End small size \vspace{10mm} $Pr\{X=1|Y=1\} = Pr\{X=1|Y=2\}$ \hspace{5mm} \end{center} \begin{eqnarray*} & \Leftrightarrow & \frac{\pi_{11}}{\pi_{11}+\pi_{21}} = \frac{\pi_{12}}{\pi_{12}+\pi_{22}} \\ & \Leftrightarrow & \pi_{11} (\pi_{12}+\pi_{22}) = \pi_{12} (\pi_{11}+\pi_{21}) \\ & \Leftrightarrow & {\color{blue}\pi_{11}\pi_{12}} + \pi_{11}\pi_{22} = {\color{blue}\pi_{11}\pi_{12}} + \pi_{12}\pi_{21} \\ & \Leftrightarrow & \pi_{11}\pi_{22} = \pi_{12}\pi_{21} \\ && \\ & \Leftrightarrow & \frac{\pi_{11}\pi_{22}}{\pi_{12}\pi_{21}} = \theta = 1 \end{eqnarray*} \end{frame} \begin{frame} \frametitle{$X$ and $Y$ independent} \framesubtitle{Meaningful in a cross-sectional design} Write the probability table as \begin{center} $\boldsymbol{\pi}$ = \begin{tabular}{|c|c|c|} \hline $x$ & $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $1-a-b+x$ &{\color{red} $1-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $1-b$ } & {\color{red} $1$ } \\ \hline \end{tabular} \end{center} Independence means $P(X=x,Y=y) = P(X=x)P(Y=y)$. If $x=ab$ then \begin{center} $\boldsymbol{\pi}$ = \begin{tabular}{|c|c|c|} \hline $ab$ & $a(1-b)$ &{\color{red} $a$ } \\ \hline $b(1-a)$ & $(1-a)(1-b)$ &{\color{red} $1-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $1-b$ } & {\color{red} $1$ } \\ \hline \end{tabular} \end{center} And the cross-product ratio $\theta=1$. \end{frame} \begin{frame} \frametitle{Conversely} {\tiny \begin{center} \begin{tabular}{|c|c|c|} \hline $x$ & $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $1-a-b+x$ &{\color{red} $1-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $1-b$ } & {\color{red} $1$ } \\ \hline \end{tabular} \end{center} } % End small size If $\theta=1$, then \begin{eqnarray*} && x(1-a-b+x) = (a-x)(b-x) \\ & \Leftrightarrow & x-ax-bx-x^2 = ab-ax-bx-x^2 \\ & \Leftrightarrow & x = ab \\ \end{eqnarray*} Meaning $X$ and $Y$ are independent. \end{frame} \begin{frame} \frametitle{What we have learned about the cross-product ratio $\theta$} %\framesubtitle{} \begin{itemize} \item In a $2 \times 2$ table, $\theta=1$ if and only if the variables are unrelated, no matter how ``unrelated" is expressed. \begin{itemize} \item Conditional distribution of $Y|X=x$ is the same for every $x$ \item Conditional distribution of $X|Y=y$ is the same for every $y$ \item $X$ and $Y$ are independent (if both are random) \end{itemize} \item It's meaningful for all three study designs: Prospective, Retrospective and Cross-sectional. \end{itemize} \vspace{10mm} Investigate $\theta$ a bit more. \end{frame} \begin{frame} \frametitle{Odds} Denoting the probability of an event by $\pi$, {\LARGE \begin{displaymath} \mbox{Odds}=\frac{\pi}{1-\pi}. \end{displaymath} } \begin{itemize} \item Implicitly, we are saying the odds are $\frac{\pi}{1-\pi}$ ``to one." \item if the probability of the event is $\pi=2/3$, then the odds are $\frac{2/3}{1/3}=2$, or two to one. \item Instead of saying the odds are 5 to 2, we'd say 2.5 to one. \item Instead of saying 1 to four, we'd say 0.25 to one. \item The higher the probability, the greater the odds. \item And as the probability of an event approaches one, the denominator of the odds approaches zero. \item This means the odds can be any non-negative number. \end{itemize} \end{frame} \begin{frame} \frametitle{Odds ratio} %\framesubtitle{} \begin{itemize} \item \emph{Conditional Odds} is an idea that makes sense. \item Just use a conditional probability to calculate the odds. \item Consider the \emph{ratio} of the odds of $Y=1$ given $X=1$ to the odds of $Y=1$ given $X=2$. \item Could say something like ``The odds of cancer are 3.2 times as great for smokers." \end{itemize} \begin{displaymath} \frac{\mbox{Odds}(Y=1|X=1)}{\mbox{Odds}(Y=1|X=2)} = \frac{P(Y=1|X=1)}{P(Y=2|X=1)} \left/ \frac{P(Y=1|X=2)}{P(Y=2|X=2)} \right. \end{displaymath} \end{frame} \begin{frame} \frametitle{Simplify the odds ratio} \begin{center} {\tiny \begin{tabular}{|l|c|c|c|} \hline & $Y=1$ & $Y=2$ & Total \\ \hline $X=1$ & $\pi_{11}$ & $\pi_{12}$ &{\color{red} $\pi_{1+}$ } \\ \hline $X=2$ & $\pi_{21}$ & $\pi_{22}$ &{\color{red} $\pi_{2+}$ } \\ \hline Total &{\color{red} $\pi_{+1}$ } &{\color{red} $\pi_{+2}$ } & {\color{red} $1$} \\ \hline \end{tabular} } % End small size \end{center} \begin{eqnarray*} \frac{\mbox{Odds}(Y=1|X=1)}{\mbox{Odds}(Y=1|X=2)} & = & \frac{P(Y=1|X=1)}{P(Y=2|X=1)} \left/ \frac{P(Y=1|X=2)}{P(Y=2|X=2)} \right. \\ & = & \frac{\pi_{11}/\pi_{1+}}{\pi_{12}/\pi_{1+}} \left/ \frac{\pi_{21}/\pi_{2+}}{\pi_{22}/\pi_{2+}} \right. \\ & = & \frac{\pi_{11} \pi_{22}}{\pi_{12} \pi_{21}} \\ & = & \theta \end{eqnarray*} \textbf{So the cross-product ratio is actually the odds ratio.} \end{frame} \begin{frame} \frametitle{The cross-product ratio \emph{is} the odds ratio} %\framesubtitle{} \begin{itemize} \item When $\theta=1$, \begin{itemize} \item The odds of $Y=1$ given $X=1$ equal the odds of $Y=1$ given $X=2$. \item This happens if and only if $X$ and $Y$ are unrelated. \item Applies to all 3 study designs. \end{itemize} \item If $\theta>1$, the odds of $Y=1$ given $X=1$ are greater than the odds of $Y=1$ given $X=2$. \item If $\theta<1$, the odds of $Y=1$ given $X=1$ are less than the odds of $Y=1$ given $X=2$. \end{itemize} \end{frame} \begin{frame} \frametitle{Odds ratio applies to larger tables} %\framesubtitle{} \begin{center} \begin{tabular}{|l|r|r|} \hline & Admitted & Not Admitted \\ \hline Dept. A & 601 & 332 \\ \hline Dept. B & 370 & 215 \\ \hline Dept. C & 322 & 596 \\ \hline Dept. D & 269 & 523 \\ \hline Dept. E & 147 & 437 \\ \hline Dept. F & 46 & 668 \\ \hline \end{tabular} \end{center} The (estimated) odds of being accepted are \begin{displaymath} \widehat{\theta} = \frac{(601)(668)}{(332)(46)} = 26.3 \end{displaymath} times as great in Department A, compared to Department F. \end{frame} \begin{frame} \frametitle{Some things to notice} \framesubtitle{About the odds ratio} \begin{itemize} \item The cross-product (odds) ratio is meaningful for large tables; apply it to 2x2 sub-tables. \item Re-arrange rows and columns as desired to get the cell you want in the upper left position. \item Combining rows or columns (especially columns) by adding the frequencies is natural and valid. \item If you hear something like ``Chances of death before age 50 are four times as great for smokers," most likely they are talking about an odds ratio. \end{itemize} \end{frame} \section{Testing Independence} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Testing independence with large samples} \framesubtitle{For cross-sectional data} {\small \begin{center} \begin{tabular}{|l|c|c|c|} \multicolumn{1}{c}{} & \multicolumn{2}{c}{\textbf{Passed the Course}} & \multicolumn{1}{c}{} \\ \hline \textbf{Course} & Did not pass & ~~~Passed~~~ & Total \\ \hline Catch-up & $\pi_{11}$ & $\pi_{12}$ & {\color{red} $\pi_{1+}$ } \\ \hline Mainstream & $\pi_{21}$ & $\pi_{22}$ & {\color{red} $\pi_{2+}$ } \\ \hline Elite & $\pi_{31}$ & $\pi_{32}$ & {\color{red} $1-\pi_{1+}-\pi_{2+}$ } \\ \hline Total & {\color{red}$\pi_{+1}$}& {\color{red}$1-\pi_{+1}$}& {\color{red} 1 } \\ \hline \end{tabular} \end{center} Under $H_0: \pi_{ij} = \pi_{i+}\pi_{+j}$ \begin{itemize} \item There are $(I-1)+(J-1)$ free parameters: The marginal probabilities. \item MLEs of marginal probabilities are $\widehat{\pi}_{i+}=p_{i+}$ and $\widehat{\pi}_{+j}=p_{+j}$ \item Restricted MLEs are $\widehat{\pi}_{ij}=p_{i+} \, p_{+j}$ \item The null hypothesis \emph{reduces} the number of free parameters in the model by $(IJ-1)-(I-1+J-1) = (I-1)(J-1)$ \item So the test has $(I-1)(J-1)$ degrees of freedom. \end{itemize} } % End size \end{frame} \begin{frame} \frametitle{Estimated expected frequencies} \framesubtitle{Under the null hypothesis of independence} \begin{eqnarray*} \widehat{\mu}_{ij} & = & n \, \widehat{\pi}_{ij} \\ \\ & = & n \, \widehat{\pi}_{i+} \widehat{\pi}_{+j} \\ \\ & = & n \, p_{i+} p_{+j} \\ \\ & = & n \, \frac{n_{i+}}{n} \frac{n_{+j}}{n} \\ \\ & = & \frac{n_{i+} n_{+j}}{n} \end{eqnarray*} \vspace{5mm} \begin{center} (Row total) $\times$ (Column total) $\div$ (Total total) \end{center} \end{frame} \begin{frame} \frametitle{Test statistics} \framesubtitle{For testing independence} \begin{displaymath} G^2 = 2 \sum_{i=1}^I \sum_{j=1}^J n_{ij}\log \left(\frac{n_{ij}}{\widehat{\mu}_{ij}}\right) ~~~~~~~~~~ X^2 = \sum_{i=1}^I \sum_{j=1}^J \frac{(n_{ij}-\widehat{\mu}_{ij})^2}{\widehat{\mu}_{ij}} \end{displaymath} \vspace{10mm} With expected frequencies \begin{displaymath} \widehat{\mu}_{ij} = \frac{n_{i+} n_{+j}}{n} = \frac{\mbox{(Row total) (Column total)}}{\mbox{Total total}} \end{displaymath} And degrees of freedom \begin{displaymath} df = (I-1)(J-1) \end{displaymath} \end{frame} %\section{Tables of Higher Dimension} %%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\section{Copyright} \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/brunner/oldclass/312f22} {\texttt{http://www.utstat.toronto.edu/brunner/oldclass/312f22}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} \begin{itemize} \item \item \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{R example} Skip to the \href{http://www.utstat.toronto.edu/~brunner/312f12/lectures/312f12Independence1.pdf} {Independence Part One handout}. \vspace{5mm} If that doesn't work, \href{http://www.utstat.toronto.edu/~brunner/oldclass/312f12/lectures/312f12Independence1.pdf} {Try this}. \end{frame}