% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on prime \usetheme{Frankfurt} % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage{comment} \usepackage{alltt} % For colour within verbatim % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Polynomial Regression\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2020} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Fitting curves} %\framesubtitle{} \begin{itemize} \item ``Linear" regression means linear in the $\beta_j$. \pause \item Model can allow for curviness in the $x$ variable(s). \item Suppose you want to fit the curve $y = g(x)$ by least squares. \pause \item Calculate a new $x$ variable using $x^* = g(x)$. \pause \item And do \texttt{lm(y $\sim$ xstar)}. \pause \item Implicitly, the model is $y_i = \beta_0 + \beta_1 g(x_i) + \epsilon_i$. \pause \item If you are really sure $\beta_0=0$ and $\beta_1=1$, try \\ \texttt{lm(y $\sim$ 0 + offset(xstar))}. \pause \item $H_0: \beta_0=0,\beta_1=1$ is testable. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Fitting a curve: $\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1 \sqrt{x}$} \framesubtitle{Transform the explanatory variable} \begin{center} \includegraphics[width=3in]{rocks} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Taylor's Theorem} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{Taylor's Theorem} %\framesubtitle{} \begin{itemize} \item What if you don't know $g(x)$, but want to allow for possible curviness? \pause \item Taylor's Theorem says \begin{eqnarray*} g(x) & = & g(x_0) + g^\prime(x_0)(x-x_0) + g^{\prime\prime}(x_0)\frac{(x-x_0)^2}{2!} \\ & + & g^{\prime\prime\prime}(x_0)\frac{(x-x_0)^3}{3!} + \cdots \end{eqnarray*} \pause \item The first several terms can approximate $g(x)$ quite well. \pause \item The first several terms are a polynomial in $x$. \pause \item So try something like \verb:lm(y ~ x + xsq + xpow3):. % Need fragile. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Try a Quadratic} %\framesubtitle{} \begin{center} \includegraphics[width=3in]{Cropyield1} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Try a Quadratic} %\framesubtitle{} \begin{center} \includegraphics[width=3in]{Cropyield2} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{Diminishing Returns} %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> Seedsq = Seeds^2 > mod = lm(Yield ~ Seeds + Seedsq); summary(mod) } Call: lm(formula = Yield ~ Seeds + Seedsq) Residuals: Min 1Q Median 3Q Max -0.54050 -0.12593 -0.04656 0.15393 0.56948 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.2487 0.2217 1.122 0.267621 Seeds 0.7202 0.1620 4.445 5.34e-05 *** Seedsq -0.1043 0.0266 -3.922 0.000284 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.2333 on 47 degrees of freedom Multiple R-squared: 0.3623, Adjusted R-squared: 0.3352 F-statistic: 13.35 on 2 and 47 DF, p-value: 2.559e-05 \end{alltt} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Warning} %\framesubtitle{} Polynomial regression can give very bad predictions outside the range of the data. \begin{center} \includegraphics[width=2.7in]{Cubic} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Even worse} \framesubtitle{An extra term, which is statistically significant} \begin{center} \includegraphics[width=3in]{Quartic} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Response Surface Methodology} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Response Surface Methodology} \pause %\framesubtitle{} \begin{itemize} \item Have an experiment with different dosages of two drugs, or different amounts of water and fertilizer. \pause \item What is the optimal combination? \pause \item Include quadratic terms \emph{and} an interaction: \begin{displaymath} \begin{array}{ccl} y = \beta_0 & + & \beta_1 x_1 + \beta_2 x_1^2 \\ & + & \beta_3 x_2 + \beta_4 x_2^2 \\ & + & \beta_5 x_1x_2 ~~~~~~~~~ + \epsilon \end{array} \end{displaymath} \pause \item The result is a curvy surface that could have a maximum or minimum. \pause \item Estimate the $\beta_j$ parameters, differentiate $E(y)$ with respect to $x_1$ and $x_2$, set the derivatives to zero, and solve. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Solution} %\framesubtitle{} {\LARGE \begin{eqnarray*} \widehat{x}_{1} & = & \frac{2 \, \widehat{\beta}_{1} \widehat{\beta}_{4} - \widehat{\beta}_{3} \widehat{\beta}_{5}} {\widehat{\beta}_{5}^{2} - 4 \, \widehat{\beta}_{2} \widehat{\beta}_{4}} \\ && \\ \widehat{x}_{2} & = & \frac{2 \, \widehat{\beta}_{2}\widehat{\beta}_{3} - \widehat{\beta}_{1} \widehat{\beta}_{5}} {\widehat{\beta}_{5}^{2} - 4 \, \widehat{\beta}_{2} \widehat{\beta}_{4}} \end{eqnarray*} \pause } % End size \begin{itemize} \item $ \widehat{x}_{1}$ and $ \widehat{x}_{2}$ really are estimates -- estimates of non-linear functions of the $\beta_j$ \pause \item There's more to it -- for example checking that it's really a maximum. \pause \item Is the answer in the range of the data? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{comment} Requires \usepackage{comment} # Response surface solution with SageMath var('x1 x2 beta0 beta1 beta2 beta3 beta4 beta5') Ey = beta0 + beta1*x1 + beta2*x1^2 + beta3*x2 + beta4*x2^2 + beta5*x1*x2 d1 = derivative(Ey,x1); d2 = derivative(Ey,x2) eq = [d1==0,d2==0]; eq sol = solve(eq,[x1,x2]); sol # A list with one element, number zero print(latex(sol[0])) # Edit this to put hats etc. $\widehat{x}_{1} = \frac{2 \, \widehat{\beta}_{1} \widehat{\beta}_{4} - \widehat{\beta}_{3} \widehat{\beta}_{5}} {\widehat{\beta}_{5}^{2} - 4 \, \widehat{\beta}_{2} \widehat{\beta}_{4}}, \widehat{x}_{2} = \frac{2 \, \widehat{\beta}_{2}\widehat{\beta}_{3} - \widehat{\beta}_{1} \widehat{\beta}_{5}} {\widehat{\beta}_{5}^{2} - 4 \, \widehat{\beta}_{2} \widehat{\beta}_{4}}$ $x_{1} = \frac{2 \, \beta_{1} \beta_{4} - \beta_{3} \beta_{5}} {\beta_{5}^{2} - 4 \, \beta_{2} \beta_{4}}, x_{2} = \frac{2 \, \beta_{2}\beta_{3} - \beta_{1} \beta_{5}} {\beta_{5}^{2} - 4 \, \beta_{2} \beta_{4}}$ \left[x_{1} = -\frac{2 \, \beta_{1} \beta_{4} - \beta_{3} \beta_{5}}{4 \, \beta_{2} \beta_{4} - \beta_{5}^{2}}, x_{2} = -\frac{2 \, \beta_{2} \beta_{3} - \beta_{1} \beta_{5}}{4 \, \beta_{2} \beta_{4} - \beta_{5}^{2}}\right] \begin{displaymath} y_i = \beta_0 + \beta_1 x_{i,1} + \beta_2 x_{i,1}^2 + \beta_3 x_{i,2} + \beta_4 x_{i,2}^2 + \beta_5 x_{i,1}x_{i,2} + \epsilon_i \end{displaymath} \begin{displaymath} \begin{array}{ccl} y_i = \beta_0 & + & \beta_1 x_{i,1} + \beta_2 x_{i,1}^2 \\ & + & \beta_3 x_{i,2} + \beta_4 x_{i,2}^2 \\ & + & \beta_5 x_{i,1}x_{i,2}+ \epsilon_i \end{array} \end{displaymath} \begin{displaymath} \begin{array}{ccl} y_i = \beta_0 & + & \beta_1 x_{i,1} + \beta_2 x_{i,1}^2 \\ & + & \beta_3 x_{i,2} + \beta_4 x_{i,2}^2 \\ & + & \beta_5 x_{i,1}x_{i,2} ~~~~~~~~~ + \epsilon_i \end{array} \end{displaymath} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{comment} Requires \usepackage{comment} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } \begin{displaymath} g(x) = g(x_0) + g^\prime(x_0)(x-x_0) + g^{\prime\prime}(x_0)\frac{(x-x_0)^2}{2!} + g^{\prime\prime\prime}(x_0)\frac{(x-x_0)^3}{3!} + \cdots \end{displaymath} %%%%%%%%%%%%%%%%%%%%%%%% Rocks %%%%%%%%%%%%%%%% rm(list=ls()) rocks = read.table('http://www.utstat.toronto.edu/~brunner/data/legal/rock1.data.txt') head(rocks); attach(rocks) plot(support,bforce, xlab = 'Lateral Support', ylab='Breaking Strength', main = 'Lateral Support and Breaking Strength of Rock Cores') sqrtsup = sqrt(support) # Fit the model bforce_i = beta_0 + beta_1 sqrtsup_i + epsilon_i fit = lm(bforce ~ sqrtsup) betahat = coefficients(fit); betahat xx = seq(from=0,to=10,by=0.1); yy = betahat[1] + betahat[2]*sqrt(xx) lines(xx,yy) ################################# # Testing offset rm(list=ls()) set.seed(9999); n = 100 x = runif(n,0,5); y = sqrt(x) + rnorm(n) plot(x,y) sqrtx = sqrt(x) mod1 = lm(y ~ sqrtx); summary(mod1) mod2 = lm(y ~ 0 + offset(sqrtx)); summary(mod2) summary(lm(y ~ 0 + offset(sqrt(x)))) # But power terms do not seem to work. ################################# # Simulating diminishing returns for kg seeds planted per acre and crop yield in tons rm(list=ls()) set.seed(9999); n = 50; sigma=1/4 Seeds = seq(from=1,to=5,length=n) mu = 1.5 - 0.25^(Seeds-1); plot(Seeds,mu) Yield = mu + rnorm(length(Seeds),0,sigma); min(Yield) plot(Seeds,Yield, ylab='Crop Yield') title('Seeds planted in kg. and crop yield in tons') # This is Cropyield1.pdf Seedsq = Seeds^2 mod = lm(Yield ~ Seeds + Seedsq); summary(mod) lines(Seeds,fitted.values(mod))# Producing Cropyield2.pdf ################################# # Polynomial regression can give very bad predictions outside the range of the data. # Cubic rm(list=ls()) n = 300; sigma=1 set.seed(9999) x = runif(n,-4,5) mu = x + 3*sin(x); y = mu + rnorm(n,0,sigma) plot(x,y, xlim=c(-5,10), ylim = c(-10,10)) # Fit a polynomial regression model x2 = x^2; x3 = x^3 regmod = lm(y ~ x + x2 + x3) summary(regmod) # Plot yhat xval = seq(from=-5,to=10,length=200) int = numeric(200)+1 X = cbind(int,xval,xval^2,xval^3); head(X) betahat = as.matrix(coefficients(regmod)); betahat yhat = as.numeric(X%*%betahat) lines(xval,yhat) tstring = expression(paste(hat(y),' = ',hat(beta)[0],' + ',hat(beta)[1],x, ' + ', hat(beta)[2],x^2, ' + ', hat(beta)[3],x^3 )) title(tstring) # Quartic rm(list=ls()) n = 300; sigma=1 set.seed(9999) x = runif(n,-4,5) mu = x + 3*sin(x); y = mu + rnorm(n,0,sigma) plot(x,y, xlim=c(-5,10), ylim = c(-10,10)) # Fit a polynomial regression model x2 = x^2; x3 = x^3; x4 = x^4 regmod = lm(y ~ x + x2 + x3 + x4) summary(regmod) # Plot yhat xval = seq(from=-5,to=10,length=200) int = numeric(200)+1 X = cbind(int,xval,xval^2,xval^3,xval^4); head(X) betahat = as.matrix(coefficients(regmod)); betahat yhat = as.numeric(X%*%betahat) lines(xval,yhat) tstring = expression(paste(hat(y),' = ',hat(beta)[0],' + ',hat(beta)[1],x, ' + ', hat(beta)[2],x^2, ' + ', hat(beta)[3],x^3, ' + ', hat(beta)[4],x^4 )) title(tstring) # This looked pretty good kurve = lines(x,yhat) ; yhat = fitted.values(regmod)