function(x=c(.8,.1,.14,.6,.34), y=c(.4,.38,.64,.26,.31,.55), comb.lim=1e6, Nsim=10000){ # This function SiegelTukey performs the Siegel-Tukey test for # dispersion effect, with allowance for ties. It is assumed # a priori that the first input sample vector x has higher # dispersion if there is a treatment effect at all. # The two sample vectors are input as arguments x and y. # This function uses the function ST.rank which is assumed to # be in the workspace. # The p-value is computed either by full enumeration of the # distribution (if the size of that enumeration does not exceed # comb.lim=1e6) or otherwise by using simulation, sampling Nsim times # from that distribution. Change the 1e6 and/or Nsim limits # as is suitable to your machine. # The p-value is also computed by using the normal approximation # (without continuity correction in case of ties. #---------------------------------------------------------------- z=c(x,y) ord=order(z) zz=z[ord] n=length(y) m=length(x) sample.xy=c(rep(1,m),rep(2,n)) sample.ind=(sample.xy)[ord] N=m+n STR=ST.rank(N) zz.u = unique(zz) for(zx in zz.u){ STR[zz==zx]=mean(STR[zz==zx]) } Ws=sum(STR[sample.ind==1]) W.mean=m*(N+1)/2 FLAG=F # FLAG control whether we use a continuity correction or not # in the normal approximation. if(length(zz.u)mn) FLAGmn=F # FLAGmn controls that we take the smaller of the two sample # sizes in getting the p-value. if(choose(N,mn)<=comb.lim){ out=combn(STR,mn,FUN=sum)}else{ for(i in 1:Nsim){ out[i]=sum(sample(STR,mn)) } } NN1=N*(N+1)/2 if(FLAGmn==F){ pval.exact=mean(out<=Ws)}else{ pval.exact=mean(NN1-Ws<=out) } if(FLAG==F){ pval.norm=pnorm((Ws-W.mean+.5)/sqrt(W.var))}else{ pval.norm=pnorm((Ws-W.mean)/sqrt(W.var)) } combs=length(out) out0=c(Ws,W.mean,sqrt(W.var),pval.exact,pval.norm,combs) if(combs!=Nsim){ names(out0)=c("Ws","mean(Ws)", "st.dev(Ws)","pval.exact","pval.norm","n.comb")}else{ names(out0)=c("Ws","mean(Ws)", "st.dev(Ws)","pval.simul", "pval.norm","Nsim") } signif(out0,3) }