/* sampvar.sas */
title 'Sample variation method for selecting sample size';

/* Suppose we are planning a 2x3x4 analysis of covariance, 
with two covariates, and factors named A, B and C. We 
are setting it up as a regression model, with one dummy 
variable for A, 2 dummy variables for B, and 3 for C. 
Interactions are represented by product terms, and there 
are 2 products for the AxB interaction, 3 for AxC, 6 for 
BxC, and 1*2*3 = 6 for AxBxC. The regression coefficients 
for these plus two for the covariates and one for the 
intercept give us p = 26. The null hypothesis is that of no 
BxC interaction, so s = 6. The "other effects in the 
model" for which we are "controlling" are represented 
by 2 covariates and 17 dummy variables and products of 
dummy variables. */

proc iml; 
     title2 'Find n given a';
     alpha = 0.05;  /* Significance level.                     */
     s = 6;         /* Numerator df = # Expl vars tested.      */
     p = 26;        /* There are p beta parameters.            */
     a = .10  ;     /* Proportion of remaining variation after */
                   /* controlling for all other variables.    */
   /* Initializing ... */  pval = 1; n = p;
   do until (pval <= alpha);
      n = n+1 ;
      F = (n-p)/s * a/(1-a);
      df2 = n-p;
      pval = 1-probf(F,s,df2);
   end;
   print "Required sample size is" n;
   quit;

/* In the potato data, there are 3 potatoes per treatment 
   combination in a temperature (2 levels) by Bacteria type 
   (3 levels) by oxygen level (3 levels) design. What pro-
   portion of remaining variation is required for the 
   main effect of bacteria type to be significant? */

proc iml;
   title2 'Find a given n';
   alpha = 0.05;  /* Significance level.                */
   s = 2;         /* Numerator df = # Expl vars tested. */
   p = 18;        /* There are p beta parameters.       */
   n = 54  ;      /* Sample size                        */

   /* Initializing ... */  a = 0; df2 = n-p;
   do until (pval <= alpha);
      a = a + .001 ;
      F = (n-p)/s * a/(1-a);
      pval = 1-probf(F,s,df2);
     end;
     print "Required proportion of remaining variation is" a;
     quit;