#Tutorial-1 x<-c(10,12,23,14)#define a vector x[2] #print the second component of x #or x<-scan() 10 12 23 14 #double enter #generate a population of N=100 units uniformely distributed in the unit square a<-0 #lower limit for the generated values b<-1 #upper limit for the generated values N<-100 #population size x.p<-runif(100,a,b) y.p<-runif(100,a,b) plot(x.p,y.p) #take a sample (wothout replacement) of sixe n=10 of this population n<-10 samp1<-sample(1:N,n) #samp2<-sample(1:N,n,replace=TRUE) points(x.p[samp1],y.p[samp1],col="blue",pch=15,cex=1.5) #Now construct a population of size N=100 of real number (we generate these numbers from #a Normal(5,1) distribution but then treat them as a fixed population) y.p<-rnorm(N,5,1) ybar.p<-mean(y.p)#true population mean var.p<-var(y.p)#true population variance #now assume that all we have is a sample of size n=10 from this population i<-sample(1:N,n) y.s<-y.p[i] muhat<-mean(y.s)#sample mean s2<-var(y.s)#sample variance #build a function that takes a sample and population size and calculates sample mean and #the estimate of the variance of the sample mean meanvar<-function(s,N){ n<-length(s) smean<-mean(s) term1<-(N-n)/N term2<-var(s)/n vhat<-term1*term2 out<-list() out$mean<-smean out$var<-vhat return(out) } mean.var(y.s,100) truevar<-function(pop,n){ N<-length(pop) term1<-(N-n)/N term2<-var(pop)/n out<-term1*term2 return(out) } truevar(y.p,10) #practice: write a function that takes the sample and population size as input and gives #the sample mean, sample variance, estimated variance of the sample mean, estimate of the #population total and its estimated variance #simulation: want to repeat what we did b=10000 times to be able to study the sampling #distribution of the sample mean and the effectiveness of the sampling strategy; also we can #see how the theoretical estimate of the variance of the sample mean is close to the estimate #of the variance obtained from the simulation b<-10000 ybar<-c()#or ybar<-numeric(b) defines ybar as an empty vector for (j in 1:b) { s<-sample(y.p,n) ybar[j]<-mean(s) } #look at the sampling distribution of ybar hist(ybar) #estimate of the variance of ybar from the simulation var(ybar) #practice: change the sample size n and look at the histogram and variance of sample mean #now let's try the trees dataset trees y.p<-trees$Volume N<-length(y.p) n<-10 y.s<-sample(y.p,n) meanvar(y.s,N) ybar.trees<-c() for (j in 1:b) { s<-sample(y.p,n) ybar.trees[j]<-mean(s) } hist(ybar.trees) var(ybar.trees) truevar(y.p,n)