#Example 1: #consider the data in form of a map of a region tht is divided into farms of different arreas #We are going to estimate the number of trees in the region by selecting n=5 farms and #estimating the number of trees in them. The area of the farm land will be used as an #auxiliary variable to estimate the unequal probabilities z_pop<-c(9,7,1,16,7,5,7,0,7,6,18,5,3,9)#unknown population values tau<-sum(z_pop)#true value of the parameter we want to estimate x<-c(.08,.08,.07,.15,.09,.03,.06,.02,.04,.04,.16,.09,.02,.07)#known auxiliary variable such as #area of the farm p<-x/sum(x)#obtain unequal probabilities n<-5#sample size N<-length(z_pop)#population size #lets see what will be the sample total popoulation estimate #if we ignore the size of the farms #and take SRS #sample mean to estimate the population total (s<-sample(1:N,n)) smpl<-z_pop[s] (tau_hat<-N*mean(smpl)) varhat<-N*(N-n)*var(smpl)/n #using formula 2.9 remember tau_hat=N*y_bar #now take into account the size of the farms, i.e, take a sample using the unequal #probabilities and calculate the Hansen-Hurvitz estimator p<-x/sum(x) (smpl<-sample(1:N,n,replace=TRUE,prob=p)) y_s<-z_pop[smpl] p_s<-p[smpl] (tauhat.hh<-mean(y_s/p_s)) varhat.hh<-var(y_s/p_s)/n #let's calculate the Hurvitz-Thompson estimator #obtain the inclusion probabilities pi<-1-(1-p)^n unique_s<-unique(smpl) y_s<-z_pop[unique_s] nu<-length(unique_s) pi_s<-pi[unique_s] tauhat.ht<-sum(y_s/pi_s) #Now obtain variance of the tauhat.ht #First obtain the joint inclusion prob #pii=pi on diagonal #pij=pi+pj-(1-(1-pi-pj)^n), off-dagonal t<-array(dim=c(N,N)) pij=matrix(NA,N,N) for (i in 1:N) { for (j in 1:N) { if (i==j) pij[i,j]<-pi[i] if (i!=j) pij[i,j]<-pi[i]+pi[j]-(1-(1-p[i]-p[j])^n) } } #calculate the variance of HT estimator using formula (6.5) page 70 M<-pij term1<-sum(((1-pi)/pi)*z_pop^2) k<-0 term2<-c() for (i in 1:N) { for (j in 1:N) { if (j!=i) { k<-k+1 term2[k]<-((M[i,j]-M[i,i]*M[j,j])/(M[i,i]*M[j,j]))*z_pop[i]*z_pop[j] } } } var.ht<-term1+sum(term2) #next, calculate the estimate of variance of HT estimator using formula (6.6) page 70 (G<-pij[unique_s,unique_s]) term1<-sum(((1-pi_s)/pi_s^2)*y_s^2) k<-0 term2<-c() for (i in 1:nu) { for (j in 1:nu) { if (j!=i) { k<-k+1 term2[k]<-((G[i,j]-G[i,i]*G[j,j])/(G[i,i]*G[j,j]))*y_s[i]*y_s[j]/G[i,j] } } } varhat.ht<-term1+sum(term2) #calculate the approximation to the estimate of the variance of HT estimator #using formula (6.8) page 70 t<-nu*y_s/pi_s tHT=mean(t) vartild.ht<-((N-nu)/N)*var(t)/nu #Now we can run some simulations HH<-function(y,n,b,p){ N<-length(y) tauhh<-rep(NA,b) for (i in 1:b){ s<-sample(1:N,n,replace=TRUE,prob=p) tauhh[i]<-mean(y[s]/p[s]) } return(tauhh) } b<-10000 thh<-HH(z_pop,n,b,p) hist(thh) points(tau,0,pch=17, col='red') #Horwitz - Thompson HT<-function(y,n,b,p){ N = length(y) pi = 1-(1-p)^n tauht = rep(NA,b) var_tauht = rep(NA,b) for (i in 1:b){ smpl = sample(1:N,n,replace=TRUE,prob=pi) s = unique(smpl) nu = length(s) #approximate tauHT estimator page 70 t = nu*y[s]/pi[s] tauht[i] = mean(t) var_tauht[i] = ((N-nu)/N)*var(t)/nu #variance of the tauHT formula (6.8) page 70 } return(list(tauht=tauht,var_tauht=var_tauht)) } tht<-HT(z_pop,n,b,p) #mean(tht$tauht) hist(tht$tauht) points(tau,0,pch=17,col='red') #Next function calculates biased HH. What is making this HH estimator biased? HHbiased<-function(y,n,b,p){ N<-length(y) tau_hat<-rep(NA,b) for (i in 1:b){ s<-sample(1:N,n,replace=TRUE,prob=p) s<-unique(s) tau_hat[i]<-mean(y[s]/p[s]) } out<-tau_hat return(out) } hhb<-HHbiased(z_pop,5,b,p) hist(hhb,col="cyan",prob=TRUE) points(tau,0,pch=17,col='red')