#Tutorial-1

x<-c(10,12,23,14)#define a vector
x[2] #print the second component of x
#or
x<-scan()
10 12 23 14 #double enter


#generate a population of N=100 units uniformely distributed in the unit square
a<-0 #lower limit for the generated values
b<-1 #upper limit for the generated values
N<-100 #population size
x.p<-runif(100,a,b)
y.p<-runif(100,a,b)
plot(x.p,y.p)

#take a sample (wothout replacement) of sixe n=10 of this population
n<-10
samp1<-sample(1:N,n) 
#samp2<-sample(1:N,n,replace=TRUE)
points(x.p[samp1],y.p[samp1],col="blue",pch=15,cex=1.5)


#Now construct a population of size N=100 of real number (we generate these numbers from 
#a Normal(5,1) distribution but then treat them as a fixed population)
y.p<-rnorm(N,5,1)
ybar.p<-mean(y.p)#true population mean
var.p<-var(y.p)#true population variance
#now assume that all we have is a sample of size n=10 from this population
i<-sample(1:N,n)
y.s<-y.p[i]
muhat<-mean(y.s)#sample mean
s2<-var(y.s)#sample variance
#build a function that takes a sample and population size and calculates sample mean and
#the estimate of the variance of the sample mean
meanvar<-function(s,N){
  n<-length(s)
  smean<-mean(s)
  term1<-(N-n)/N
  term2<-var(s)/n
  vhat<-term1*term2
  out<-list()
  out$mean<-smean
  out$var<-vhat
  return(out)
}

mean.var(y.s,100)

truevar<-function(pop,n){
  N<-length(pop)
  term1<-(N-n)/N
  term2<-var(pop)/n
  out<-term1*term2
  return(out)
}
truevar(y.p,10)
#practice: write a function that takes the sample and population size as input and gives
#the sample mean, sample variance, estimated variance of the sample mean, estimate of the
#population total and its estimated variance

#simulation: want to repeat what we did b=10000 times to be able to study the sampling 
#distribution of the sample mean and the effectiveness of the sampling strategy; also we can
#see how the theoretical estimate of the variance of the sample mean is close to the estimate
#of the variance obtained from the simulation

b<-10000
ybar<-c()#or ybar<-numeric(b) defines ybar as an empty vector
for (j in 1:b) {
  s<-sample(y.p,n)
  ybar[j]<-mean(s)
}

#look at the sampling distribution of ybar
hist(ybar)
#estimate of the variance of ybar from the simulation
var(ybar)

#practice: change the sample size n and look at the histogram and variance of sample mean

#now let's try the trees dataset
trees
y.p<-trees$Volume
N<-length(y.p)
n<-10
y.s<-sample(y.p,n)
meanvar(y.s,N)

ybar.trees<-c()
for (j in 1:b) {
  s<-sample(y.p,n)
  ybar.trees[j]<-mean(s)
}
hist(ybar.trees)
var(ybar.trees)
truevar(y.p,n)