#compare the estimators under systematic and cluster sampling #Consider rainfall data again #systematic sample with one primary unit load("rainfall-data.gz") y<-rain #this is the variable of interest tau=sum(y)#this is the total amount of rainfall for the year, the quantity that we wish to estimate M<-364 #(length(y)) #now let's run simulations to make a comparison among the above sampling strategies b<-10000 Mi=2 #systematic with one primary unit n=1 N<-floor(M/Mi/n) tauhatSys1<-rep(NA,b) for (j in 1:b) { start<-sample(1:N,n) s<-seq(start,M,N) tauhatSys1[j]<-N*sum(y[s]) #only one primary unit, so this is just a sum over secondary units within the one primary unit } mean(tauhatSys1) var(tauhatSys1) #systematic with 2 primary units n<-2 N<-floor(M/Mi) tauhatSys2<-rep(NA,b) for (j in 1:b){ ys=rep(NA,n) start<-sample(1:N,n) for (i in 1:n){ s<-seq(start[i],M,N) ys[i]=sum(y[s]) } tauhatSys2[j]<-N*(mean(ys)) #tau_hat=N*y_bar where y_bar is mean of sums of y within a primary unit i i.e., yi=sum_j(yij) } mean(tauhatSys2) var(tauhatSys2) #cluster sampling with n clusters of size Mi (here we choose n and Mi such that #n*Mi=50, the number of secondary sampling units) #in clustering, each primary unit is a cluster taucluster<-function(n,Mi,M){ N<-M/Mi #number of clusters z<-c() for (i in 1:N) z<-c(z,rep(i,Mi)) start<-sample(1:N,n) ys<-rep(NA,n) for (i in 1:n){ yi<-y[z==start[i]] ys[i]<-sum(yi) #sum of yij (secondary units) within a cluster i.e., yi=sum_j(yij) } out<-N*mean(ys) return(out) } #cluster sampling with 5 choices of (n,Mi) nad M=M is held fixed #clusters are of equal sizes tauh_cluster1=tauh_cluster2= tauh_cluster3 = tauh_cluster4 = tauh_cluster5 = tauh_cluster6=rep(NA,b) for (j in 1:b) { tauh_cluster1[j]<-taucluster(56,1,M=M) tauh_cluster2[j]<-taucluster(28,2,M=M) tauh_cluster3[j]<-taucluster(14,4,M=M) tauh_cluster4[j]<-taucluster(7,8,M=M) tauh_cluster5[j]<-taucluster(4,14,M=M) tauh_cluster6[j]<-taucluster(2,28,M=M) } mean(tauh_cluster1) mean(tauh_cluster2) mean(tauh_cluster3) mean(tauh_cluster4) mean(tauh_cluster5) mean(tauh_cluster6) var(tauh_cluster1) var(tauh_cluster2) var(tauh_cluster3) var(tauh_cluster4) var(tauh_cluster5) var(tauh_cluster6) par(mfrow=c(2,2)) hist(tauhatSys1) abline(v=tau,col='red') hist(tauhatSys2) abline(v=tau,col='red') #make a new page for the histograms par(mfrow=c(3,2)) hist(tauh_cluster1) abline(v=tau,col='red') hist(tauh_cluster2) abline(v=tau,col='red') hist(tauh_cluster3) abline(v=tau,col='red') hist(tauh_cluster4) abline(v=tau,col='red') hist(tauh_cluster5) abline(v=tau,col='red') hist(tauh_cluster6) abline(v=tau,col='red') par(mfrow=c(1,1)) box<-list(s1=tauhatSys1,s2=tauhatSys2,cl1=tauh_cluster1,cl2=tauh_cluster2,cl3=tauh_cluster3,cl4=tauh_cluster4,cl5=tauh_cluster5,cl6=tauh_cluster6) boxplot(box) abline(h=tau,col="red")