#consider the data given by the pdf file "map" in form of a map of a region in which we #want to estimate the number of certain type of trees; the region is devided to farms of #different areas. We want to estimate the number of trees in the region by selecting n=5 #farms and counting the number of trees in them. We will use the area of the land as an #auxiliary variable with respect to which unequal probabilities are defined. y<-c(9,7,1,16,7,5,7,0,7,6,18,5,3,9)#unknown population values tau<-sum(y)#true value of the parameter we want to estimate x.p<-c(.08,.08,.07,.15,.09,.03,.06,.02,.04,.04,.16,.09,.02,.07)#known auxiliary variable p<-x.p/sum(x.p)#unequal probabilities n<-5#sample size N<-length(y)#population size #first we ignore the fact that we have farms of different sizes and take an SRS and use #sample mean to estimate the population total s<-sample(1:N,n) s y.s<-y[s] tauhat<-N*mean(y.s) tauhat varhat<-N*(N-n)*var(y.s)/n #now take a sample with respect to the probabilities proportional to the area of the lands #and calculate the Hansen-Hurvitz estimator p<-x.p/sum(x.p) s<-sample(1:N,n,replace=TRUE,prob=p) s y.s<-y[s] p.s<-p[s] tauhat.hh<-mean(y.s/p.s) tauhat.hh varhat.hh<-var(y[s]/p[s])/n #let's calculate the Hurvitz-Thompson estimator pi<-1-(1-p)^n u.s<-unique(s) nu<-length(u.s) pi.s<-pi[u.s] ys<-y[u.s] tauhat.ht<-sum(ys/pi.s) pij<-array(dim=c(N,N)) for (i in 1:N) { for (j in 1:N) { if (i==j) pij[i,j]<-pi[i] if (i!=j) pij[i,j]<-pi[i]+pi[j]-(1-(1-p[i]-p[j])^n) } } #calculate the variance of HT estimator using formula (6.5) M<-pij term1<-sum(((1-pi)/pi)*y^2) k<-0 term2<-c() for (i in 1:N) { for (j in 1:N) { if (j!=i) { k<-k+1 term2[k]<-((M[i,j]-M[i,i]*M[j,j])/(M[i,i]*M[j,j]))*y[i]*y[j] } } } var.ht<-term1+sum(term2) #calculate the estimate of variance of HT estimator using formula (6.6) G<-pij[u.s,u.s] G term1<-sum(((1-pi.s)/pi.s^2)*ys^2) k<-0 term2<-c() for (i in 1:nu) { for (j in 1:nu) { if (j!=i) { k<-k+1 term2[k]<-((G[i,j]-G[i,i]*G[j,j])/(G[i,i]*G[j,j]))*ys[i]*ys[j]/G[i,j] } } } varhat.ht<-term1+sum(term2) #calculate the approximation to the estimate of the varianve of HT estimator using formula (6.8) t<-nu*ys/pi.s vartild.ht<-((N-nu)/N)*var(t)/nu #let's run some simulations: HH<-function(y,n,b,p){ N<-length(y) tauhh<-c() for (i in 1:b){ s<-sample(1:N,n,replace=TRUE,prob=p) tauhh[i]<-mean(y[s]/p[s]) } a<-print.noquote(paste("(a) HH","E(ybarhh)=",mean(tauhh))) return(tauhh) } b<-10000 thh<-HH(y,n,b,p) hist(thh) points(tau,0,pch=17) HT<-function(y,n,b,p){ N<-length(y) pi<-1-(1-p)^n tauht<-c() for (i in 1:b){ s<-sample(1:N,n,replace=TRUE,prob=pi) s<-unique(s) nu<-length(s) t<-nu*y[s]/pi[s] tauht[i]<-mean(t) } a<-print.noquote(paste("(b) HT","E(ybarht)=",mean(tauht))) return(tauht) } tht<-HT(y,n,b,p) hist(tht) points(tau,0,pch=17)