#load the artifitial data set rainfall-data load("rainfall-data.gz") y<-rain #this is the variable of interest tau=sum(y)#this is the total amount of rainfall for the year, the quantity that we wish to estimate #we will use two systematic samples and three cluster samples, the number of secondary #units (days) is fixed at m=50 for all the samples, but the number of primary units and #form of them is different under each design; comparisons are made based on the variance #of the estimates, estimated using simulation M<-364 m<-56 #systematic sample with one primary unit k<-floor(M/m) start<-sample(1:k,1) s<-seq(start,M,k) tauhat1<-k*sum(y[s])#unbiased estimate of the total rainfall #systematic sampling with 2 primary units n<-2 k<-floor(M/(m/2)) start<-sample(1:k,n) s1<-seq(start[1],M,k) s2<-seq(start[2],M,k) tauhat2<-k*(mean(c(sum(y[s1]),sum(y[s2])))) #cluster sampling with n clusters of size Mi (here we choose n and Mi such that #n*Mi=56, the number of secondary sampling units) taucluster<-function(n,Mi){ N<-M/Mi #number of clusters z<-c() for (i in 1:N) z<-c(z,rep(i,Mi)) start<-sample(1:N,n) ys<-c() for (i in 1:n){ yi<-y[z==start[i]] ys[i]<-sum(yi) } out<-N*mean(ys) return(out) } #now let's run simulations to make a comparison among the above sampling strategies b<-10000 #systematic with one primary unit k<-floor(M/m) tauhat1<-c() for (j in 1:b) { start<-sample(1:k,1) s<-seq(start,M,k) tauhat1[j]<-k*sum(y[s]) } var(tauhat1) #systematic with 2 primary units n<-2 k<-floor(M/(m/2)) tauhat2<-c() for (j in 1:b){ start<-sample(1:k,n) s1<-seq(start[1],M,k) s2<-seq(start[2],M,k) tauhat2[j]<-k*(mean(c(sum(y[s1]),sum(y[s2])))) } var(tauhat2) #cluster sampling with 5 choices of (n,Mi) tauh1<-c() tauh2<-c() tauh3<-c() tauh4<-c() tauh5<-c() tauh6<-c() for (j in 1:b) { tauh1[j]<-taucluster(56,1) tauh2[j]<-taucluster(28,2) tauh3[j]<-taucluster(14,4) tauh4[j]<-taucluster(7,8) tauh5[j]<-taucluster(4,14) tauh6[j]<-taucluster(2,28) } var(tauh1) var(tauh2) var(tauh3) var(tauh4) var(tauh5) var(tauh6) par(mfrow=c(2,2)) hist(tauhat1) hist(tauhat2) par(mfrow=c(3,2)) hist(tauh1) hist(tauh2) hist(tauh3) hist(tauh4) hist(tauh5) hist(tauh6) par(mfrow=c(1,1)) box<-list(tauhat1,tauhat2,tauh1,tauh2,tauh3,tauh4,tauh5,tauh6) boxplot(box) abline(h=tau,col="red")