#Tutorial-3 # Example 1: See if CLT holds for SRS without replacement on the islands data set # Example 2:- Estimate coverage probabilities for 95% CI # - Calculate coverage probabilities for several sample sizes and make plots rm(list=ls()) #Example 1: #we will use the data set islands which is part of R basic dataset package #Finite Population CLT in SRS without replacement zpop<-islands####The areas in thousands of square miles of the landmasses which #exceed 10,000 square miles.################## hist(zpop) mu<-mean(zpop) sigma2<-var(zpop) N<-length(zpop) n<-7 smpl_index<-sample(1:N,n) s<-zpop[smpl_index] #### See if it holds finite population CLT for SRS without replacement smpl_mean_var <- function(zpop,n,b) { N<-length(zpop) sigma2<-var(zpop) ybar<-rep(NA,b) for (i in 1:b) { s<-sample(1:N,n) ybar[i]<-mean(zpop[s]) } ybar_var<-((N-n)/N)*(sigma2/n) out<-list(ybar=ybar,ybar_var=ybar_var) return(out) } #now use the function smpl_mean_var to simulate sample mean from b samples #of sample sizes c(5,10,15,24,35,45) #and plot the standardized ybar against standard normal to see if they match par(mfrow=c(2,3)) # splits the screen into a matrix with dimension (2,3) #so that we can accomodate histograms in each of the cells of the matrix b<-10000 for (n in c(5,10,15,24,35,45)) { #simulate a vector of sample means of b samples of size n from zpop simul<-smpl_mean_var(zpop,n,b) ybar_standardize<-(simul[[1]]-mu)/sqrt(simul[[2]]) #see the distribution of the stanardized ybar hist(ybar_standardize,prob=T,ylim=c(0,1.2),main=n) #create a grid of length 100 z<-seq(min(ybar_standardize),max(ybar_standardize),length=100) #evaluate standard normal distribution at z values from the grid lines(z,dnorm(z),col="red") # the lines function assumes that there is already a plot or histogram # so it just drws a line on the existing histogram } #Q: Does the histograms follow the standard normal (red line) as sample size n increases? #i.e., does the CLT hold under the SRS without replacement? #Example 2: #Check this web site for data sets #https://vincentarelbundock.github.io/Rdatasets/datasets.html #Estimate the exact coverage probability for the approximate (1-alpha)% CIs through simulation catsDataSet<-read.table(file="catsM.csv",header=T,sep = ",") head(catsDataSet) # the head function returns the first few rows of the data set together with the # structure of the table #three columns: Sex (F or M), Bwt (Body weight in kg), Hwt (heart weight in g) #lets take the heart weight and use it as our population zpop<-catsDataSet$Hwt #population mu<-mean(zpop) #population mean N<-length(zpop) #population size sigma2=var(zpop) alpha<-.05 #so that (1-alpha) is .95 n<-7 #sample size b<-10000 #number of iterations in simulation counter_true<-0 lower<-rep(NA,b) upper<-rep(NA,b) for (i in 1:b) { smpl_index<-sample(1:N,n) s=zpop[smpl_index] ybar<-mean(s) var_hat_ybar<-((N-n)/N)*var(s)/n lower[i]<-ybar-qnorm((1-alpha/2))*sqrt(var_hat_ybar) upper[i]<-ybar+qnorm((1-alpha/2))*sqrt(var_hat_ybar) if (mu>lower[i] & mulower[i] & mu