/********************* scab2.sas (2020 version) **********************/
title 'Randomization Tests on the Scab Disease Data';

data potato;
     infile '/home/brunner0/441s20/ScabDisease.data.txt';
     length condition $ 10.; /* Default length of character values is 8. */
     input condition $  infection;
     label infection = 'Ave percent surface covered';

ods  select OverallANOVA;
proc glm data=potato order=data;
     title2 'With proc glm for Comparison';
     class condition;
     model infection=condition;
run;     

proc means data = potato order=data;
     class condition; var infection;
run;

proc npar1way wilcoxon data=potato;
     title2 'Classical Kruskal-Wallis Test';
     class condition;
     var infection;
     exact / mc seed=77777 maxtime=300;
     /* It will never take 5 minutes, but just to be safe .... */
run;

proc npar1way scores=data data=potato;
     title2 'Randomization Test';
     class condition;
     var infection;
     exact / mc seed=88888 maxtime=300;
run;

/* Follow up with proc multtest. All 21 pairwise comparisons plus
      * Control vs. Average of Spring
      * Control vs. Average of Fall
      * Average of Spring vs. Average of Fall
 */

proc multtest data=potato order=data
     permutation nsample=20000 seed=99999 ;
     title2 'Randomization multiple comparisons with proc multtest';
     class condition;
     /* Pairwise                                                 Bonferroni */
     contrast 'Control vs. Spring300'    1 -1  0  0  0  0  0;    /* 1.0000  */
     contrast 'Control vs. Spring600'    1  0 -1  0  0  0  0;    /* 1.0000  */
     contrast 'Control vs. Spring1200'   1  0  0 -1  0  0  0;    /* 1.0000  */
     contrast 'Control vs. Fall300'      1  0  0  0 -1  0  0;    /* 0.0888  */
     contrast 'Control vs. Fall600'      1  0  0  0  0 -1  0;    /* 1.0000  */
     contrast 'Control vs. Fall1200'     1  0  0  0  0  0 -1;    /* 0.0096  */
     contrast 'Spring300 vs. Spring600'  0  1 -1  0  0  0  0;
     contrast 'Spring300 vs. Spring1200' 0  1  0 -1  0  0  0;
     contrast 'Spring300 vs. Fall300'    0  1  0  0 -1  0  0;
     contrast 'Spring300 vs. Fall600'    0  1  0  0  0 -1  0;
     contrast 'Spring300 vs. Fall1200'   0  1  0  0  0  0 -1;
     contrast 'Spring600 vs. Spring1200' 0  0  1 -1  0  0  0;
     contrast 'Spring600 vs. Fall300'    0  0  1  0 -1  0  0;
     contrast 'Spring600 vs. Fall600'    0  0  1  0  0 -1  0;
     contrast 'Spring600 vs. Fall1200'   0  0  1  0  0  0 -1;
     contrast 'Spring1200 vs. Fall300'   0  0  0  1 -1  0  0;
     contrast 'Spring1200 vs. Fall600'   0  0  0  1  0 -1  0;
     contrast 'Spring1200 vs. Fall1200'  0  0  0  1  0  0 -1;
     contrast 'Fall300 vs. Fall600'      0  0  0  0  1 -1  0;
     contrast 'Fall300 vs. Fall1200'     0  0  0  0  1  0 -1;
     contrast 'Fall600 vs. Fall1200'     0  0  0  0  0  1 -1;
     /* Averages */
     contrast 'Control vs. Spring'       3 -1 -1 -1  0  0  0;
     contrast 'Control vs. Fall'         3  0  0  0 -1 -1 -1;
     contrast 'Spring vs. Fall'          0  1  1  1 -1 -1 -1;
     test mean(infection); /* Requests t-tests */
run;

/* The only issue is Controll vs. Fall300. p = 0.0519, but it's an 
   ESTIMATE of the p-value from a randomization test. Is it 
   SIGNIFICANTLY above 0.05? */

proc iml;
     title2 'Testing H0: p = 0.05 with a large-sample Z-test for proportions';
     p = 0.0519; n = 20000; Z = sqrt(n)*(p-0.05)/sqrt(0.05*0.95);
     print p Z;
quit;

/* Try a bigger MC sample size: 200 thousand. */

proc multtest data=potato order=data
     permutation nsample=200000 seed=99999 ;
     title2 'Bigger MC sample size: See Control vs. Fall300';
     class condition;
     /* Pairwise                                                 Bonferroni */
     contrast 'Control vs. Spring300'    1 -1  0  0  0  0  0;    /* 1.0000  */
     contrast 'Control vs. Spring600'    1  0 -1  0  0  0  0;    /* 1.0000  */
     contrast 'Control vs. Spring1200'   1  0  0 -1  0  0  0;    /* 1.0000  */
     contrast 'Control vs. Fall300'      1  0  0  0 -1  0  0;    /* 0.0888  */
     contrast 'Control vs. Fall600'      1  0  0  0  0 -1  0;    /* 1.0000  */
     contrast 'Control vs. Fall1200'     1  0  0  0  0  0 -1;    /* 0.0096  */
     contrast 'Spring300 vs. Spring600'  0  1 -1  0  0  0  0;
     contrast 'Spring300 vs. Spring1200' 0  1  0 -1  0  0  0;
     contrast 'Spring300 vs. Fall300'    0  1  0  0 -1  0  0;
     contrast 'Spring300 vs. Fall600'    0  1  0  0  0 -1  0;
     contrast 'Spring300 vs. Fall1200'   0  1  0  0  0  0 -1;
     contrast 'Spring600 vs. Spring1200' 0  0  1 -1  0  0  0;
     contrast 'Spring600 vs. Fall300'    0  0  1  0 -1  0  0;
     contrast 'Spring600 vs. Fall600'    0  0  1  0  0 -1  0;
     contrast 'Spring600 vs. Fall1200'   0  0  1  0  0  0 -1;
     contrast 'Spring1200 vs. Fall300'   0  0  0  1 -1  0  0;
     contrast 'Spring1200 vs. Fall600'   0  0  0  1  0 -1  0;
     contrast 'Spring1200 vs. Fall1200'  0  0  0  1  0  0 -1;
     contrast 'Fall300 vs. Fall600'      0  0  0  0  1 -1  0;
     contrast 'Fall300 vs. Fall1200'     0  0  0  0  1  0 -1;
     contrast 'Fall600 vs. Fall1200'     0  0  0  0  0  1 -1;
     /* Averages */
     contrast 'Control vs. Spring'       3 -1 -1 -1  0  0  0;
     contrast 'Control vs. Fall'         3  0  0  0 -1 -1 -1;
     contrast 'Spring vs. Fall'          0  1  1  1 -1 -1 -1;
     test mean(infection); /* Requests t-tests */
run;

proc iml;
     title2 'Another large-sample Z-test of H0: p = 0.05';
     p = 0.0515; n = 200000; Z = sqrt(n)*(p-0.05)/sqrt(0.05*0.95);
     print p Z;
quit;