#consider the data given by the pdf file "map" in form of a map of a region in which we 
#want to estimate the number of certain type of trees; the region is devided to farms of
#different areas. We want to estimate the number of trees in the region by selecting n=5
#farms and counting the number of trees in them. We will use the area of the land as an 
#auxiliary variable with respect to which unequal probabilities are defined.


y<-c(9,7,1,16,7,5,7,0,7,6,18,5,3,9)#unknown population values
tau<-sum(y)#true value of the parameter we want to estimate
x.p<-c(.08,.08,.07,.15,.09,.03,.06,.02,.04,.04,.16,.09,.02,.07)#known auxiliary variable
p<-x.p/sum(x.p)#unequal probabilities
n<-5#sample size
N<-length(y)#population size

#first we ignore the fact that we have farms of different sizes and take an SRS and use 
#sample mean to estimate the population total

s<-sample(1:N,n)
s
y.s<-y[s]
tauhat<-N*mean(y.s)
tauhat
varhat<-N*(N-n)*var(y.s)/n

#now take a sample with respect to the probabilities proportional to the area of the lands
#and calculate the Hansen-Hurvitz estimator

p<-x.p/sum(x.p)
s<-sample(1:N,n,replace=TRUE,prob=p)
s
y.s<-y[s]
p.s<-p[s]
tauhat.hh<-mean(y.s/p.s)
tauhat.hh
varhat.hh<-var(y[s]/p[s])/n

#let's calculate the Hurvitz-Thompson estimator

pi<-1-(1-p)^n
u.s<-unique(s)
nu<-length(u.s)
pi.s<-pi[u.s]
ys<-y[u.s]
tauhat.ht<-sum(ys/pi.s)
pij<-array(dim=c(N,N))
for (i in 1:N) {
  for (j in 1:N) {
    if (i==j) pij[i,j]<-pi[i]
    if (i!=j) pij[i,j]<-pi[i]+pi[j]-(1-(1-p[i]-p[j])^n)
  }
}

#calculate the variance of HT estimator using formula (6.5)
M<-pij
term1<-sum(((1-pi)/pi)*y^2)
k<-0
term2<-c()
for (i in 1:N) {
  for (j in 1:N) {
    if (j!=i) {
      k<-k+1
      term2[k]<-((M[i,j]-M[i,i]*M[j,j])/(M[i,i]*M[j,j]))*y[i]*y[j]
    }
  }
}
var.ht<-term1+sum(term2)

#calculate the estimate of variance of HT estimator using formula (6.6)
G<-pij[u.s,u.s]
G
term1<-sum(((1-pi.s)/pi.s^2)*ys^2)
k<-0
term2<-c()
for (i in 1:nu) {
  for (j in 1:nu) {
    if (j!=i) {
      k<-k+1
      term2[k]<-((G[i,j]-G[i,i]*G[j,j])/(G[i,i]*G[j,j]))*ys[i]*ys[j]/G[i,j]
    }
  }
}
varhat.ht<-term1+sum(term2)

#calculate the approximation to the estimate of the varianve of HT estimator using formula (6.8)
t<-nu*ys/pi.s
vartild.ht<-((N-nu)/N)*var(t)/nu

#let's run some simulations:
HH<-function(y,n,b,p){
  N<-length(y)
  tauhh<-c()
  for (i in 1:b){
    s<-sample(1:N,n,replace=TRUE,prob=p)
    tauhh[i]<-mean(y[s]/p[s])
  }
  a<-print.noquote(paste("(a) HH","E(ybarhh)=",mean(tauhh)))
  return(tauhh)
}
b<-10000
thh<-HH(y,n,b,p)
hist(thh)
points(tau,0,pch=17)

HT<-function(y,n,b,p){
  N<-length(y)
  pi<-1-(1-p)^n
  tauht<-c()
  for (i in 1:b){
    s<-sample(1:N,n,replace=TRUE,prob=pi)
    s<-unique(s)
    nu<-length(s)
    t<-nu*y[s]/pi[s]
    tauht[i]<-mean(t)
  }
  a<-print.noquote(paste("(b) HT","E(ybarht)=",mean(tauht)))
  return(tauht)
}

tht<-HT(y,n,b,p)
hist(tht)
points(tau,0,pch=17)