#load the artifitial data set rainfall-data
load("rainfall-data.gz")
y<-rain #this is the variable of interest
tau=sum(y)#this is the total amount of rainfall for the year, the quantity that we wish to estimate

#we will use two systematic samples and three cluster samples, the number of secondary
#units (days) is fixed at m=50 for all the samples, but the number of primary units and 
#form of them is different under each design; comparisons are made based on the variance
#of the estimates, estimated using simulation

M<-364
m<-56

#systematic sample with one primary unit

k<-floor(M/m)
start<-sample(1:k,1)
s<-seq(start,M,k)
tauhat1<-k*sum(y[s])#unbiased estimate of the total rainfall

#systematic sampling with 2 primary units
n<-2
k<-floor(M/(m/2))
start<-sample(1:k,n)
s1<-seq(start[1],M,k)
s2<-seq(start[2],M,k)
tauhat2<-k*(mean(c(sum(y[s1]),sum(y[s2]))))

#cluster sampling with n clusters of size Mi (here we choose n and Mi such that 
#n*Mi=56, the number of secondary sampling units)

taucluster<-function(n,Mi){
  N<-M/Mi #number of clusters
  z<-c()
  for (i in 1:N) z<-c(z,rep(i,Mi))
  start<-sample(1:N,n)
  ys<-c()
  for (i in 1:n){
    yi<-y[z==start[i]]
    ys[i]<-sum(yi)
  }
  out<-N*mean(ys)
  return(out)
}

#now let's run simulations to make a comparison among the above sampling strategies
b<-10000

#systematic with one primary unit
k<-floor(M/m)
tauhat1<-c()
for (j in 1:b) {
  start<-sample(1:k,1)
  s<-seq(start,M,k)
  tauhat1[j]<-k*sum(y[s])
}
var(tauhat1)

#systematic with 2 primary units
n<-2
k<-floor(M/(m/2))
tauhat2<-c()
for (j in 1:b){
  start<-sample(1:k,n)
  s1<-seq(start[1],M,k)
  s2<-seq(start[2],M,k)
  tauhat2[j]<-k*(mean(c(sum(y[s1]),sum(y[s2]))))
}
var(tauhat2)

#cluster sampling with 5 choices of (n,Mi)
tauh1<-c()
tauh2<-c()
tauh3<-c()
tauh4<-c()
tauh5<-c()
tauh6<-c()
for (j in 1:b) {
  tauh1[j]<-taucluster(56,1)
  tauh2[j]<-taucluster(28,2)
  tauh3[j]<-taucluster(14,4)
  tauh4[j]<-taucluster(7,8)
  tauh5[j]<-taucluster(4,14)
  tauh6[j]<-taucluster(2,28)
}
var(tauh1)
var(tauh2)
var(tauh3)
var(tauh4)
var(tauh5)
var(tauh6)

par(mfrow=c(2,2))
hist(tauhat1)
hist(tauhat2)
par(mfrow=c(3,2))
hist(tauh1)
hist(tauh2)
hist(tauh3)
hist(tauh4)
hist(tauh5)
hist(tauh6)

par(mfrow=c(1,1))
box<-list(tauhat1,tauhat2,tauh1,tauh2,tauh3,tauh4,tauh5,tauh6)
boxplot(box)
abline(h=tau,col="red")