#construct a population of points in the unit square and let y=x1+x2 be the variable #of interest, want to estimate the population total for y, tau N<-100 #population size x1<-runif(100) x2<-runif(100) y<-x1+x2+rnorm(100,0,.25) tau<-sum(y) n=30 #sample size # this population can be stratified in different ways,let the number of strata be H=3 # design 1 plot(x1,x2) lines(c(.5,0),c(0,.5)) lines(c(1,.5),c(.5,1)) z1<-c() z1[(x1+x2)<.5]<-1 z1[(x1+x2)>.5 & (x1+x2)<1.5]<-2 z1[(x1+x2)>1.5]<-3 H<-3 nh1<-c() Nh1<-c() tauh1<-c() for (h in 1:H) { Nh1[h]<-length(z1[z1==h]) nh1[h]<-round(n*Nh1[h]/N) s<-sample(1:Nh1[h],nh1[h]) points(x1[z1==h][s],x2[z1==h][s],col="red") tauh1[h]<-Nh1[h]*mean(y[z1==h][s]) } tauhat1<-sum(tauh1) # design 2 plot(x1,x2) lines(c(.5,1),c(0,.5)) lines(c(0,.5),c(.5,1)) z2<-c() z2[(x1-x2)< -.5]<-1 z2[(x1-x2)> -.5 & (x1-x2)<.5]<-2 z2[(x1-x2)>.5]<-3 H<-3 nh2<-c() Nh2<-c() tauh2<-c() for (h in 1:H) { Nh2[h]<-length(z2[z2==h]) nh2[h]<-round(n*Nh2[h]/N) s<-sample(1:Nh2[h],nh2[h]) points(x1[z2==h][s],x2[z2==h][s],col="red") tauh2[h]<-Nh2[h]*mean(y[z2==h][s]) } tauhat2<-sum(tauh2) # design 3 plot(x1,x2) lines(c(.2,.2),c(0,1)) lines(c(.8,.8),c(0,1)) z3<-c() z3[x1<.2]<-1 z3[x1>.2 & x1<.8]<-2 z3[x1>.8]<-3 H<-3 nh3<-c() Nh3<-c() tauh3<-c() for (h in 1:H) { Nh3[h]<-length(z3[z3==h]) nh3[h]<-round(n*Nh3[h]/N) s<-sample(1:Nh3[h],nh3[h]) points(x1[z3==h][s],x2[z3==h][s],col="red") tauh3[h]<-Nh3[h]*mean(y[z3==h][s]) } tauhat3<-sum(tauh3) #simulation study to compare the stratification designs b<-10000 tauhat1<-c() for (i in 1:b){ nh1<-c() Nh1<-c() tauh1<-c() for (h in 1:H) { Nh1[h]<-length(z1[z1==h]) nh1[h]<-round(n*Nh1[h]/N) s<-sample(1:Nh1[h],nh1[h]) tauh1[h]<-Nh1[h]*mean(y[z1==h][s]) } tauhat1[i]<-sum(tauh1) } tauhat2<-c() for (i in 1:b){ nh2<-c() Nh2<-c() tauh2<-c() for (h in 1:H) { Nh2[h]<-length(z2[z2==h]) nh2[h]<-round(n*Nh2[h]/N) s<-sample(1:Nh2[h],nh2[h]) tauh2[h]<-Nh2[h]*mean(y[z2==h][s]) } tauhat2[i]<-sum(tauh2) } tauhat3<-c() for (i in 1:b) { nh3<-c() Nh3<-c() tauh3<-c() for (h in 1:H) { Nh3[h]<-length(z3[z3==h]) nh3[h]<-round(n*Nh3[h]/N) s<-sample(1:Nh3[h],nh3[h]) tauh3[h]<-Nh3[h]*mean(y[z3==h][s]) } tauhat3[i]<-sum(tauh3) } var(tauhat1) var(tauhat2) var(tauhat3) par(mfrow=c(2,2)) l1<-min(c(tauhat1,tauhat2,tauhat3)) l2<-max(c(tauhat1,tauhat2,tauhat3)) hist(tauhat1,xlim=c(l1,l2)) hist(tauhat2,xlim=c(l1,l2)) hist(tauhat3,xlim=c(l1,l2)) par(mfrow=c(1,1)) hist(tauhat2) hist(tauhat1,add=T,col="lightblue") abline(v=tau,col="red",lwd=2) hist(tauhat3,add=T,col="pink") ################## Moose data example and simulation ############### moosedata<-read.table(file="C:/Users/Shirin/Desktop/STUFF/STAT-410/moosedata.txt") stratum<-moosedata$str y<-moosedata$moose n1<-length(y[stratum==1]) n2<-length(y[stratum==2]) n3<-length(y[stratum==3]) tapply(y,stratum,mean) tapply(y,stratum,var) ######construct a population with strata sizes: N1<-122 N2<-57 N3<-22 y1<-y[stratum==1] y2<-y[stratum==2] y3<-y[stratum==3] copy1<-floor(N1/n1) copy2<-floor(N2/n2) copy3<-floor(N3/n3) res1<-N1-copy1*n1 res2<-N2-copy2*n2 res3<-N3-copy3*n3 y1aug<-c(rep(y1,copy1),sample(y1,res1)) y2aug<-c(rep(y2,copy2),sample(y2,res2)) y3aug<-c(rep(y3,copy3),sample(y3,res3)) tau<-sum(y1aug)+sum(y2aug)+sum(y3aug) tauhat<-c() for (k in 1:b) { s1<-sample(N1,n1) tauhat1<-N1*mean(y1aug[s1]) s2<-sample(N2,n2) tauhat2<-N2*mean(y2aug[s2]) s3<-sample(N3,n3) tauhat3<-N3*mean(y3aug[s3]) tauhat[k]<-tauhat1+tauhat2+tauhat3 } hist(tauhat) abline(v=tau,col="red",lwd=2)