%\documentclass[serif]{beamer} % Get Computer Modern math font. %\hypersetup{colorlinks,linkcolor=,urlcolor=red} % \usepackage{amsmath} % Supposedly unnecessary wiht Beamer % To create handout using article mode: Comment above and uncomment below (2 places) \documentclass[12pt]{article} \usepackage{beamerarticle} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=red]{hyperref} % For live Web links with href in article mode \usepackage{amsmath} % For \binom{n}{y} \usepackage{graphicx} % To include pdf files! \usepackage{fullpage} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout % \title{Contingency TablesPart Two\footnote{See last slide for copyright information.}} % \subtitle{STA 312: Fall 2022} % \date{} % To suppress date \begin{document} % More material required for handout in article mode. Also eliminate vspace \title{Contingency Tables: Part Two\footnote{See last slide for copyright information.}} \subtitle{STA 312: Fall 2022} \date{} % To suppress date \maketitle \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Suggested Reading: Chapter 2} %\framesubtitle{} \begin{itemize} \item Read Section 2.6 about Fisher's exact test \item Read Section 2.7 about multi-dimensional tables and Simpson's paradox. \end{itemize} \end{frame} % \section{Overview} \begin{frame} % \frametitle{Overview} \tableofcontents \end{frame} \section{Testing for the Product Multinomial} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Testing Association for the Product Multinomial} \framesubtitle{Prospective and retrospective designs} Prospective design: \begin{itemize} \item A conditional multinomial in each row \item $I$ independent random samples, one for each value of $X$ \item Likelihood is a product of $I$ multinomials \item Null hypothesis is that all $I$ sets of conditional probabilities are the same. \end{itemize} A retrospective design is just like this, but with rows and columns reversed. \end{frame} \begin{frame} \frametitle{Null hypothesis is no differences among the $I$ vectors of conditional probabilities} {\small \begin{center} \begin{tabular}{|l|c|c|c|c|c|} \hline & Attack & Stroke & Both & Neither & Total \\ \hline Drug & & & & & {\color{red} $n_{1+}$ } \\ \hline Drug and Exercise & & & & & {\color{red} $n_{2+}$ } \\ \hline Total &{\color{red} $n_{+1}$}&{\color{red} $n_{+2}$} &{\color{red} $n_{+3}$}&{\color{red} $n_{+4}$} & {\color{red} $n$ } \\ \hline \end{tabular} \end{center} } % End size \begin{itemize} \item Both $n_{1+}$ and $n_{2+}$ are fixed by the design. They are \emph{sample sizes}. \item Under $H_0$, MLE of the (common) conditional probability is the marginal sample proportion: \begin{displaymath} \widehat{\pi}_{ij} = p_{+j} = \frac{n_{+j}}{n} \end{displaymath} \item And the expected cell frequency is just \begin{displaymath} \widehat{\mu}_{ij} = n_{i+} \, \widehat{\pi}_{ij} = n_{i+} \, \frac{n_{+j}}{n} = \frac{n_{i+}n_{+j}}{n}. \end{displaymath} \end{itemize} \end{frame} \begin{frame} \frametitle{Expected frequencies are the same!} For testing both independence and testing equal conditional probabilities, \begin{displaymath} \widehat{\mu}_{ij} = \frac{n_{i+}n_{+j}}{n}. \end{displaymath} The degrees of freedom are the same too. For the product multinomial, \begin{itemize} \item There are $I(J-1)$ free parameters in the unconstrained model. \item There are $J-1$ free parameters under the null hypothesis. \item $H_0$ imposes $I(J-1)-(J-1) = (I-1)(J-1)$ constraints on the parameter vector. \item So $df=(I-1)(J-1)$. \end{itemize} {\tiny \begin{center} \begin{tabular}{|l|c|c|c|c|c|} \hline & Attack & Stroke & Both & Neither & Total \\ \hline Drug & & & & & {\color{red} $n_{1+}$ } \\ \hline Drug and Exercise & & & & & {\color{red} $n_{2+}$ } \\ \hline Total &{\color{red} $n_{+1}$}&{\color{red} $n_{+2}$} &{\color{red} $n_{+3}$}&{\color{red} $n_{+4}$} & {\color{red} $n$ } \\ \hline \end{tabular} \end{center} } % End size \end{frame} \begin{frame} \frametitle{This is very fortunate} %\framesubtitle{} \begin{itemize} \item The cross-sectional, prospective and retrospectives are different from one another conceptually. \item The multinomial and product-multinomial models are different from one another technically. \item But the tests for relationship between explanatory and response variables are 100\% the same. \item Same expected frequencies and same degrees of freedom. \item Therefore we get the same test statistics and $p$-values. \end{itemize} \end{frame} \section{Fisher's Exact Test} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Fisher's Exact Test} %\framesubtitle{} \begin{itemize} \item Everything so far is based on large-sample theory. \item What if the sample is small? \item Fisher's exact test is good for $2 \times 2$ tables. \item There are extensions for larger tables. \end{itemize} \end{frame} \begin{frame} \frametitle{Fisher's exact test is a permutation test} \begin{center} % Left labels, kind of crude \begin{tabular}{c} ~ \\ ~ \\ $X$ \\ ~ \end{tabular} \begin{tabular}{c} ~ \\ 1 \\ 2 \\ \end{tabular} \begin{tabular}{|c|c|c|} \multicolumn{2}{c}{$Y$} & \multicolumn{1}{c}{~} \\ \multicolumn{1}{c}{1} & \multicolumn{1}{c}{2} & \multicolumn{1}{c}{~} \\ \hline ~~~~~~$x$~~~~~~& $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $n-a-b+x$ &{\color{red} $n-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $n-b$ } & {\color{red} $n$ } \\ \hline \end{tabular} \end{center} \begin{itemize} \item Think of a data file with 2 columns, $X$ and $Y$, filled with ones and twos. \item $X$ has $a$ ones and $Y$ has $b$ ones. \item Calculate the estimated odds ratio $\widehat{\theta}$. \item If $X$ and $Y$ are unrelated, all possible pairings of $X$ and $Y$ values should be equally likely. \item There are $n!$ ways to order the $X$ values, and for each of these, $n!$ ways to order the $Y$ values. \end{itemize} \end{frame} \begin{frame} \frametitle{Idea of a permutation test} %\framesubtitle{} {\tiny \begin{center} % Left labels, kind of crude \begin{tabular}{c} ~ \\ ~ \\ $X$ \\ ~ \end{tabular} \begin{tabular}{c} ~ \\ 1 \\ 2 \\ \end{tabular} \begin{tabular}{|c|c|c|} \multicolumn{2}{c}{$Y$} & \multicolumn{1}{c}{~} \\ \multicolumn{1}{c}{1} & \multicolumn{1}{c}{2} & \multicolumn{1}{c}{~} \\ \hline ~~~~~~$x$~~~~~~& $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $n-a-b+x$ &{\color{red} $n-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $n-b$ } & {\color{red} $n$ } \\ \hline \end{tabular} \end{center} } % End size \begin{itemize} \item There are $(n!)^2$ ways to arrange the $X$ and $Y$ values. \item For what fraction of these is the (estimated) odds ratio \begin{itemize} \item Greater than or equal to $\widehat{\theta}$ (Upper tail $p$-value) \item Less than or equal to $\widehat{\theta}$ (Lower tail $p$-value) \end{itemize} For a 2-sided test, add the probabilities of all the tables less likely than or equally likely to the one we have observed. (This is what R does.) \end{itemize} \vspace{10mm} Nice idea, but hard to compute. Fisher thought of it \emph{and} simplified it. \end{frame} \begin{frame} \frametitle{Let us count together} {\tiny \begin{center} % Left labels, kind of crude \begin{tabular}{c} ~ \\ ~ \\ $X$ \\ ~ \end{tabular} \begin{tabular}{c} ~ \\ 1 \\ 2 \\ \end{tabular} \begin{tabular}{|c|c|c|} \multicolumn{2}{c}{$Y$} & \multicolumn{1}{c}{~} \\ \multicolumn{1}{c}{1} & \multicolumn{1}{c}{2} & \multicolumn{1}{c}{~} \\ \hline ~~~~~~$x$~~~~~~& $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $n-a-b+x$ &{\color{red} $n-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $n-b$ } & {\color{red} $n$ } \\ \hline \end{tabular} \end{center} } % End size {\small \begin{itemize} \item The $n!$ permutations of 1s and 2s have lots of repeats that look the same. \item There are $\binom{n}{a}$ ways to choose which cases have $X=1$. \item For each of these, there are $\binom{n}{b}$ ways to choose which cases have $Y=1$. \item So the total number of $2 \times 2$ tables with $n$ observations, $n_{1+}=a$ and $n_{+1}=b$ is $\binom{n}{a}\binom{n}{b}$. \item Of these, the number of ways to get the values in the table is just the multinomial coefficient \end{itemize} \begin{displaymath} \binom{n}{x~~a-x~~b-x~~n-a-b+x} = \frac{n!}{x! (a-x)! (b-x)! (n-a-b+x)!} . \end{displaymath} } % End size \end{frame} \begin{frame} \frametitle{Hypergeometric probability} {\tiny \begin{center} % Left labels, kind of crude \begin{tabular}{c} ~ \\ ~ \\ $X$ \\ ~ \end{tabular} \begin{tabular}{c} ~ \\ 1 \\ 2 \\ \end{tabular} \begin{tabular}{|c|c|r|} \multicolumn{2}{c}{$Y$} & \multicolumn{1}{c}{~} \\ \multicolumn{1}{c}{1} & \multicolumn{1}{c}{2} & \multicolumn{1}{c}{~} \\ \hline ~~~~~~$x$~~~~~~& $a-x$ &{\color{red} $a=n_{1+}$ } \\ \hline $b-x$ & $n-a-b+x$ &{\color{red} $n-a=n_{2+}$ } \\ \hline {\color{red} $b=n_{+1}$ } &{\color{red} $n-b=n_{+2}$ } & {\color{red}$n~~~~~~~$ } \\ \hline \end{tabular} \end{center} } % End size {\small Dividing the number of ways to get $n_{11}=x$ by the total number of equally likely outcomes, \begin{eqnarray*} P(n_{11}= x) & = & \frac{\binom{n}{x~~a-x~~b-x~~n-a-b+x}}{\binom{n}{a}\binom{n}{b}} \\ \\ & = & \frac{ \frac{n!}{x! (a-x)! (b-x)! (n-a-b+x)!} } { \frac{n!}{a!(n-a)!} \frac{n!}{b!(n-b)!} } \\ \\ & = & \frac{ \binom{a}{x}\binom{n-a}{b-x} } {\binom{n}{b}} \\ \\ & = & \frac{ \binom{n_{1+}}{n_{11}}\binom{n_{2+}}{n_{+1}-n_{11}} } {\binom{n}{n_{+1}}} ~~~~~~~~\mbox{(Eq. 2.11, p. 46)} \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Adding up the probabilities} \framesubtitle{Always remembering that $a$, $b$ and $n$ are fixed} {\tiny \begin{center} % Left labels, kind of crude \begin{tabular}{c} ~ \\ ~ \\ $X$ \\ ~ \end{tabular} \begin{tabular}{c} ~ \\ 1 \\ 2 \\ \end{tabular} \begin{tabular}{|c|c|c|} \multicolumn{2}{c}{$Y$} & \multicolumn{1}{c}{~} \\ \multicolumn{1}{c}{1} & \multicolumn{1}{c}{2} & \multicolumn{1}{c}{~} \\ \hline ~~~~~~$x$~~~~~~& $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $n-a-b+x$ &{\color{red} $n-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $n-b$ } & {\color{red} $n$ } \\ \hline \end{tabular} \end{center} } % End size \begin{itemize} \item Fortunately, $\theta(x)$ is an increasing function of $x$ (differentiate). \item So, tables with larger $x$ values than the one observed also have greater sample odds ratios. Add $ P(n_{11}= x)$ over $x$ to get tail probabilities. \item Range of $x$: \begin{itemize} \item $x \leq \min(a,b)$ \item $n_{22}= n-a-b+x \geq 0$, so $x \geq a+b-n$. \item Thus, $x$ ranges from $\max(0,a+b-n)$ to $\min(a,b)$. \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Example: Sinking of the the Titanic} %\framesubtitle{} {\footnotesize \begin{verbatim} > # help(Titanic) > dimnames(Titanic) $Class [1] "1st" "2nd" "3rd" "Crew" $Sex [1] "Male" "Female" $Age [1] "Child" "Adult" $Survived [1] "No" "Yes" > # Women in 1st class vs Women in crew > > ladies = Titanic[c(1,4),2,2,] \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Just the ladies} %\framesubtitle{} {\footnotesize \begin{verbatim} > ladies Survived Class No Yes 1st 4 140 Crew 3 20 > 140/144 # Rich ladies [1] 0.9722222 > 20/23 # Cleaning ladies [1] 0.8695652 > X2 = chisq.test(ladies,correct=F); X2 Warning message: In chisq.test(ladies, correct = F) : Chi-squared approximation may be incorrect Pearson's Chi-squared test data: ladies X-squared = 5.2043, df = 1, p-value = 0.02253 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Check the expected frequencies} %\framesubtitle{} {\footnotesize \begin{verbatim} > X2$expected Survived Class No Yes 1st 6.0359281 137.96407 Crew 0.9640719 22.03593 > > fisher.test(ladies) Fisher's Exact Test for Count Data data: ladies p-value = 0.05547 alternative hypothesis: true odds ratio is not equal to 1 95 percent confidence interval: 0.03027561 1.41705937 sample estimates: odds ratio 0.1935113 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Conclusion} Though a higher percentage of women in first class survived than female crew, it could have been due to chance. \end{frame} \begin{frame} \frametitle{Fisher's exact test makes sense even without the pretending we have a random sample} You could say \begin{itemize} \item Assume that status on the ship for these women (First Class passenger vs. crew) is fixed. It was what it was. \item Survival also was what it was. \item Given this, is the observed \emph{pairing} of status and survival an unusual one? \item That is, for what fraction of the possible pairings is the status difference in survival as great or greater than the one we have observed? \item A little over 5\%? That's a bit unusual, but perhaps not \emph{very} unusual. \item \textbf{There is not even any need to talk about probability.} \end{itemize} % Note how this way of talking implies a different definition of the p-value. \end{frame} \section{Tables of Higher Dimension} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Tables of Higher Dimension: Conditional independence} %\framesubtitle{} \begin{itemize} \item Suppose $X$ and $Y$ are related. \item Are $X$ and $Y$ related \emph{conditionally} on the value of $W$? \item One sub-table for each value of $W$. \item $X$ and $Y$ can easily be related unconditionally, but still be conditionally independent. \item Example: Among adults 18 and older, $X=$Tattoos and $Y=$Grey hair. \item Need a 3-way table, showing the relationship of tattoos and grey hair separately for each age group. \item Speak of the relationship between $X$ and $Y$ ``controlling for" $W$, or ``allowing for" $W$. \end{itemize} \end{frame} \begin{frame} \frametitle{Was UC Berkeley discriminating against women?} \framesubtitle{Data from the 1970s} Data in a 3-dimensional array: Variables are \begin{itemize} \item Sex of the person applying for graduate study \item Department to which the person applied \item Whether or not the person was admitted \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Berkeley data} %\framesubtitle{} {\footnotesize \begin{verbatim} > ########################################################## > # More than one Explanatory Variable at once # > # data() to list the nice data sets that come with R # > # help(UCBAdmissions) # > ########################################################## > dim(UCBAdmissions) [1] 2 2 6 > dimnames(UCBAdmissions) $Admit [1] "Admitted" "Rejected" $Gender [1] "Male" "Female" $Dept [1] "A" "B" "C" "D" "E" "F" > # Look at gender by admit. > # Apply sum to rows and columns, obtaining the marginal freqs. > sexadmit = apply(UCBAdmissions,c(1,2),sum) \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Sex by Admission} %\framesubtitle{} {\footnotesize \begin{verbatim} > sexadmit Gender Admit Male Female Admitted 1198 557 Rejected 1493 1278 > sexadmit = t(sexadmit); sexadmit Admit Gender Admitted Rejected Male 1198 1493 Female 557 1278 > rowmarg = apply(sexadmit,1,sum); rowmarg Male Female 2691 1835 > percentadmit = 100 * sexadmit[,1]/rowmarg ; percentadmit Male Female 44.51877 30.35422 \end{verbatim} } % End size It certainly looks suspicious. \end{frame} \begin{frame}[fragile] \frametitle{Test sex by admission} %\framesubtitle{} {\footnotesize \begin{verbatim} > chisq.test(sexadmit,correct=F) Pearson's Chi-squared test data: sexadmit X-squared = 92.2053, df = 1, p-value < 2.2e-16 > fisher.test(sexadmit) # Gives same p-value Fisher's Exact Test for Count Data data: sexadmit p-value < 2.2e-16 alternative hypothesis: true odds ratio is not equal to 1 95 percent confidence interval: 1.621356 2.091246 sample estimates: odds ratio 1.840856 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{But look at the whole table} %\framesubtitle{} {\footnotesize \begin{verbatim} > UCBAdmissions , , Dept = A Gender Admit Male Female Admitted 512 89 Rejected 313 19 , , Dept = B Gender Admit Male Female Admitted 353 17 Rejected 207 8 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Berkeley table continued} %\framesubtitle{} {\footnotesize \begin{verbatim} , , Dept = C Gender Admit Male Female Admitted 120 202 Rejected 205 391 , , Dept = D Gender Admit Male Female Admitted 138 131 Rejected 279 244 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Berkeley table continued some more} %\framesubtitle{} {\footnotesize \begin{verbatim} , , Dept = E Gender Admit Male Female Admitted 53 94 Rejected 138 299 , , Dept = F Gender Admit Male Female Admitted 22 24 Rejected 351 317 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Look at Department $A$} %\framesubtitle{} {\footnotesize \begin{verbatim} > # Just Department A > JustA = t(UCBAdmissions[,,1]); JustA Admit Gender Admitted Rejected Male 512 313 Female 89 19 > JustA[1,1]/sum(JustA[1,]) # Men [1] 0.6206061 > JustA[2,1]/sum(JustA[2,]) # Women [1] 0.8240741 > chisq.test(UCBAdmissions[,,1],correct=F) Pearson's Chi-squared test data: UCBAdmissions[, , 1] X-squared = 17.248, df = 1, p-value = 3.28e-05 \end{verbatim} } % End size Women are more likely to be admitted. \end{frame} \begin{frame}[fragile] \frametitle{Summarize analyses of sub-tables} \framesubtitle{Just the code, for reference} {\footnotesize \begin{verbatim} # Summarize analyses of sub-tables: Loop over departments # Sum of chi-squared values in X2 ndepts = dim(UCBAdmissions)[3] gradschool=NULL; X2=0 for(j in 1:ndepts) { dept = dimnames(UCBAdmissions)$Dept[j] # A B C etc. tabl = t(UCBAdmissions[,,j]) # All rows, all cols, level j Rowmarg = apply(tabl,1,sum) Percentadmit = round( 100*tabl[,1]/Rowmarg ,1) per = round(Percentadmit,2) Test = chisq.test(tabl,correct=F) tstat = round(Test$statistic,2); pval = round(Test$p.value,5) gradschool = rbind(gradschool,c(dept,Percentadmit,tstat,pval)) X2 = X2+Test$statistic } # Next Department colnames(gradschool) = c("Dept","%MaleAcc","%FemAcc","Chisq","p-value") noquote(gradschool) # Print character strings without quote marks \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Simpson's paradox} %\framesubtitle{} \begin{verbatim} > noquote(gradschool) # Print character strings without quote marks Dept %MaleAcc %FemAcc Chisq p-value [1,] A 62.1 82.4 17.25 3e-05 [2,] B 63 68 0.25 0.61447 [3,] C 36.9 34.1 0.75 0.38536 [4,] D 33.1 34.9 0.3 0.58515 [5,] E 27.7 23.9 1 0.31705 [6,] F 5.9 7 0.38 0.53542 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Overall test of conditional independence} %\framesubtitle{} Add the chi-squared values and add the degrees of freedom. {\footnotesize \begin{verbatim} > # Overall test of conditional independence > names(X2) = "Pooled Chi-square" > df = ndepts ; names(df)="df" > pval=1-pchisq(X2,df) > names(pval) = "P-value" > print(c(X2,df,pval)) Pooled Chi-square df P-value 19.938413378 6.000000000 0.002840164 \end{verbatim} } % End size Conclusion: Gender and admission are \emph{not} conditionally independent. From the preceding slide, we see it comes from Department $A$'s being more likely to admit women than men. \end{frame} \begin{frame}[fragile] \frametitle{Track it down} %\framesubtitle{} Make a table showing Department, Number of applicants, Percent female applicants and Percent of applicants admitted. {\footnotesize \begin{verbatim} > # What's happening? > whoapplies = NULL > for(j in 1:ndepts) + { + dept = dimnames(UCBAdmissions)$Dept[j]; names(dept) = "Dept" + tabl = t(UCBAdmissions[,,j]) # All rows, all cols, level j + nj = sum(tabl); names(nj)=" n " + mf = apply(tabl,1,sum); femapp = round(100*mf[2]/nj,2) + succ = apply(tabl,2,sum); getin = round(100*succ[1]/nj,2) + whoapplies = rbind(whoapplies,c(dept,nj,femapp,getin)) + } # Next Department > \end{verbatim} } % End size Now it's in a table called \texttt{whoapplies}. \end{frame} \begin{frame}[fragile] \frametitle{The explanation} %\framesubtitle{} {\footnotesize \begin{verbatim} > noquote(whoapplies) Dept n Female Admitted [1,] A 933 11.58 64.42 [2,] B 585 4.27 63.25 [3,] C 918 64.6 35.08 [4,] D 792 47.35 33.96 [5,] E 584 67.29 25.17 [6,] F 714 47.76 6.44 \end{verbatim} } % End size Departments with a higher acceptance rate have a higher percentage of male applicants. \end{frame} \begin{frame} \frametitle{Does this mean that the University of California at Berkeley was \emph{not} discriminating against women?} \begin{itemize} \item By no means. Why does a department admit very few applicants relative to the number who apply? \item Because they do not have enough professors and other resources to offer more classes. \item This implies that the departments popular with men were getting more resources, relative to the level of interest measured by number of applicants. \item Why? Maybe because men were running the show. \item The ``show," definitely includes the U. S. military, which funds a lot of engineering and similar stuff at big American universities. \end{itemize} \end{frame} \begin{frame} \frametitle{Some uncomfortable truths} %\framesubtitle{} \begin{itemize} \item Especially for non-experimental studies, statistical analyses involving just one explanatory variable at a time can be very misleading. \item When you include a new variable in an analysis, the results could get weaker, they could get stronger, or they could reverse direction --- all depending upon the inter-relations of the explanatory variables and the response variable. \item Failing to include important explanatory variables in observational studies is a common source of bias. \item Ask: ``Did you control for \ldots" \end{itemize} \end{frame} \begin{frame} \frametitle{At least it's a start} %\framesubtitle{} \begin{itemize} \item We have seen one way to ``control" for potentially misleading variables (sometimes called ``confounding variables"). \item It's \emph{control by sub-division}, in which you examine the relationship in question separately for each value of a control variable or variables. \item We have a good way of pooling the tests within each level of the control variable, to obtain a test of conditional independence. \item There's also model-based control, which is coming next. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\section{Copyright} \begin{frame} \frametitle{Copyright Information} This document was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/brunner/oldclass/312f22} {\texttt{http://www.utstat.toronto.edu/brunner/oldclass/312f22}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} \begin{itemize} \item \item \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\tiny \begin{center} \begin{tabular}{|c|c|c|} \hline $x$ & $a-x$ &{\color{red} $a$ } \\ \hline $b-x$ & $1-a-b+x$ &{\color{red} $1-a$ } \\ \hline {\color{red} $b$ } &{\color{red} $1-b$ } & {\color{red} $1$ } \\ \hline \end{tabular} \end{center} } % End small size \begin{center} \begin{tabular}{|l|c|c|c|c|c|} \hline & Heart attack & Stroke & Both & Neither & Total \\ \hline Drug & $\pi_{11}$ & $\pi_{12}$ &{\color{red} $\pi_{1+}$ } \\ \hline Drug and Exercise & $\pi_{21}$ & $\pi_{22}$ &{\color{red} $\pi_{2+}$ } \\ \hline Total &{\color{red} $\pi_{+1}$ } &{\color{red} $\pi_{+2}$ } & {\color{red} $1$} \\ \hline \end{tabular} \end{center} \begin{frame} \frametitle{Save} %\framesubtitle{} {\tiny \begin{center} \begin{tabular}{|l|c|c|} \hline & $Y=1$ & $Y=2$ \\ \hline $X=1$ & $\pi_{11}$ & $\pi_{12}$ \\ \hline $X=2$ & $\pi_{21}$ & $\pi_{22}$ \\ \hline \end{tabular} \end{center} } % End small size {\tiny \begin{center} \begin{tabular}{|c|c|} \hline $\pi_{11}$ & $\pi_{12}$ \\ \hline $\pi_{21}$ & $\pi_{22}$ \\ \hline \end{tabular} \end{center} } % End small size \end{frame} \begin{frame} \frametitle{R example} Skip to the \href{http://www.utstat.toronto.edu/~brunner/312f12/lectures/312f12Independence1.pdf} {Independence Part One handout}. \vspace{5mm} If that doesn't work, \href{http://www.utstat.toronto.edu/~brunner/oldclass/312f12/lectures/312f12Independence1.pdf} {Try this}. \end{frame}