#Tutorial-9 #we estimate the variance of the estimate of the population total when using a systematic #sample with one primary unit; #the data comprise counts of a certain species of ducks observed in N=200 quadrats #want to estimate the total count using a systematic sample of size n=20 #then we calculate estimates of the variance of this estimate using the following methods #1. simulation #2. treating the systematic sample as a simple random sample #3. post stratify the population and calculate the stratified variance pop<-read.table("bwt100.dat") pop<-data.matrix(pop) tau<-sum(pop) N<-length(c(pop)) n<-20 k<-N/n start<-sample(1:k,1) sys<-seq(start,N,k) tauhat<-k*sum(pop[start,]) #we can actually calculate the exact variance since there are only 10 possible systematic #samples and the exact sampling dist of tauhat can be obtained tauhat<-c() for (i in 1:k) { start<-i sys<-seq(start,N,k) tauhat[i]<-k*sum(pop[start,]) } table(tauhat) vartrue<-var(tauhat) sdtrue<-sqrt(vartrue) #estimate the variance with simulation b<-10000 tauhat<-c() for (i in 1:b) { start<-sample(1:k,1) sys<-seq(start,N,k) tauhat[i]<-k*sum(pop[start,]) } varsim<-var(tauhat) sdsim<-sqrt(varsim) #treat the sys sample as an SRS start<-sample(1:k,1) sys<-seq(start,N,k) tauhat<-k*sum(pop[start]) varsrs<-(N-n)*var(pop[start,])/n varsrs #post-stratification: 1. define each 2-column of the matrix pop as a stratum; calculate #both the stratification and post-stratification variance H<-10 Nh<-rep(20,H) nh<-rep(2,H) start<-sample(1:k,1) sys<-seq(start,N,k) s2h<-c() for (h in 1:H) { samp<-sys[(2*h-1):(2*h)] s2h[h]<-var(pop[samp]) } varst<-sum(Nh*(Nh-nh)*s2h/nh) varst #practice: 1. try this with strata of different sizes, 2. try this on the rainfall data #two-stage sampling for the duck data; let the primary units be the columns of the data #matrix pop, i.e., N=20. There are 10 secondary units in each primary unit, i.e., Mi=10. #We choose n primary units and take m secondary units inside each. #Let the total number of secondary units be fixed at n*m=20. We will try n=5,m=4; n=10,m=2; #n=20,m=1. N=20 M<-10 n=5 m=4 ps<-sample(1:20,n) yhat<-c() for (h in 1:n) { ss<-sample(1:M,m) ys<-pop[ss,ps[h]] yhat[h]<-M*sum(ys)/m } tauhat<-N*sum(yhat)/n twostage<-function(n,m){ ps<-sample(1:20,n) yhat<-c() for (h in 1:n) { ss<-sample(1:M,m) ys<-pop[ss,ps[h]] yhat[h]<-M*sum(ys)/m } tauhat<-N*sum(yhat)/n return(tauhat) } b<-10000 tauhat1<-c() tauhat2<-c() tauhat3<-c() for (i in 1:b) { tauhat1[i]<-twostage(5,4) tauhat2[i]<-twostage(10,2) tauhat3[i]<-twostage(20,1) } box<-list(tauhat1,tauhat2,tauhat3) boxplot(box)