# you can use this online resource to practice # introduction into R # http://tryr.codeschool.com/ ############################################################################ # BINOMIAL DISTRIBUTION IN R ############################################################################ # X- number of successes in n trials ############################################################################ #example: Each of the students in today's tutorial toss a coin. # Let heads denote a success.Count the number of heads (successes) in # number-of-students number of trials. #set the working directory #be careful with this path, it will change according your #personal computer folder structure setwd('Z:/STAT380/tutorials/week1') #binomial distribution functions in R are built in the 'base' package #so we do not need any additional package ############################################################################ # Each of the students in today's tutorial is tossing a coin # We denote 'success' to be head, 'failure' is tail ############################################################################ n=17 p=1/2 ############################################################################ # generate a binomial random variable (n,p) # and save it in a variable X ############################################################################ # one realization from binomial distribution rbinom(1,n,p) #use the ? before the function name to call help file #about that function ?rbinom X=rbinom(10000,n,p) #print the variable X values X #another way to print the output (variable X values) is (X=rbinom(10000,n,p)) ############################################################################ ############################################################################ #Make a histogram using the generated variable X ############################################################################ #Once we have generated the binomial random variable #we can make plots, calculate quantiles #check the documentation about hist function ?hist #use nclass to define the number of bins of the histogram hist(X,nclass=25,col='purple',main='Histogram of X') hist(X,nclass=100,col='purple',main='Histogram of X') ############################################################################ # Make plots against a predifined grid # using dbinom ############################################################################ #define the grid grid = seq(0, 17, by = 1) #evaluate log binomial against the defined grid and make a plot evalXlog=dbinom(grid, n, p, log = TRUE) plot(grid, evalXlog, type = "l", ylab = "log density", main = "log density from dbinom", col='red') #evaluate binomial against the defined grid and make a plot evalX=dbinom(grid, n, p, log = FALSE) plot(grid, evalX,col='blue',type = "l", ylab = " density", main = "density from dbinom") ############################################################################ ############################################################################ # dbinom is evaluating the density, so we can use it to calculate # probabilities that X is exactly some value, or it is smaller or bigger # Example: Calculate the probability that there were 2 successes # that is we need to calculate P(X=2)= ( n!/2!(n-2)!) * p^2*(1-p)^(n-2) ############################################################################ #Calculate P(X=2) using the hard coded PMF of binomial (probXis2fla= factorial(n)/(factorial(2)*factorial(n-2))* p^2*(1-p)^(n-2) ) #Calculate P(X=2) using the R function dbinom (probXis2=dbinom(2, n, p, log = F)) ############################################################################ ############################################################################ ##calculate P( 6 <=X<= 8) #Several ways to do it! I will show you 3 today! ############################################################################ #1. #use the hard coded PMF of binomial (probXbtw6and8fla= (factorial(n)/(factorial(6)*factorial(n-6)))* p^6*(1-p)^(n-6) + (factorial(n)/(factorial(7)*factorial(n-7)))* p^7*(1-p)^(n-7) + (factorial(n)/(factorial(8)*factorial(n-8)))* p^8*(1-p)^(n-8)) #2. #We can write a parametrized function that can be recalled each time we need it. #This techique will be helful when you work when the distribution function #such as dbinom not available in R #This is just a more useful technique than the first one. #It makes it easier to calculate much more probabilities like P( 6 <=X<= 17 ) myDistBinom=function(numb,n,p){ (factorial(n)/(factorial(numb)*factorial(n-numb)))* p^numb*(1-p)^(n-numb) } (probXbtw6and8MyFla=myDistBinom(6,n,p)+myDistBinom(7,n,p)+myDistBinom(8,n,p)) #or we can write a for loop to make a summation probXbtw6and8MyFla1=0 for (i in (6:8)){ probXbtw6and8MyFla1=probXbtw6and8MyFla+myDistBinom(i,n,p) } # Note: # In general R is very slow when it comes to using for loop # There are more sofisticated functions such as apply, sapply, lapply.. # that can be used to avoid the for loop. We will get there soon. #3. #use dbinom #provided a vector 6:8 #dbinom will calculate probabilities for each values in the vector 6:8 dbinom(6:8, n, p, log = F) #so to calculate prob P( 6 <=X<= 8) we will need to sum up probabilities #returned by dbinom evaluated at each of the values 6:8 (probXbtw6and8=sum(dbinom(6:8, n, p, log = F))) ############################################################################ #calculate 75 percentile ############################################################################ qbinom(0.75,n,p) ############################################################################ #END ############################################################################