############################################################################ # NEGATIVE BINOMIAL DISTRIBUTION IN R ############################################################################ # X - Negative binomial (r,p) represents the number of failures which occur # in a sequence of Bernoulli trial before a prespecified number of # successes (r) is reached ############################################################################ #example: each student toss a coin. The success is defined as 'three heads'. # the r.v will cont number of tails until the third head appears #we need to call the package 'stats' #This might be already on your computer #but here I am showing you because R has a big repository #with free packages #install.packages(stats) #library(stats) n=17 p=0.5 r=3 ############################################################################ # generate a negative binomial random variable (r,p) # and save it in a variable X ############################################################################ # one realization from negative binomial distribution rnbinom(1,r,p) #use the ? before the function name to call help file #about that function ?rnbinom (X=rnbinom(10000,r,p)) #print the variable X values ############################################################################ ############################################################################ #Make a histogram using the generated variable X ############################################################################ #Once we have generated the negative binomial random variable (r,p) #we can make plots, calculate quantiles #check the documentation about hist function ?hist #use nclass to define the number of bins of the histogram hist(X,nclass=25,col='purple',main='Histogram of X') hist(X,nclass=100,col='purple',main='Histogram of X') ############################################################################ # Make plots against a predifined grid # using dnbinom ############################################################################ #define the grid grid = seq(0, 17, by = 1) #evaluate log binomial against the defined grid and make a plot evalXlog=dnbinom(grid, r, p, log = TRUE) plot(grid, evalXlog, type = "l", ylab = "log density", main = "log density from dnbinom", col='red') #evaluate binomial against the defined grid and make a plot evalX=dnbinom(grid, r, p, log = FALSE) plot(grid, evalX,col='blue',type = "l", ylab = " density", main = "density from dnbinom") ############################################################################ ############################################################################ # dnbinom is evaluating the density, so we can use it to calculate # probabilities that X is exactly some value, or it is smaller or bigger # Example: Calculate the probability that there were 2 failures before # the third head occured # that is we need to calculate # P(X=2)=(gamma(2+r)/(gamma(r)*factorial(2)) ) * p^r*(1-p)^2 ############################################################################ #Calculate P(X=2) using the hard coded PMF of binomial (probXis2fla= (gamma(2+r)/(gamma(r)*factorial(2)) ) * p^r*(1-p)^2 ) #Calculate P(X=2) using the R function dbinom (probXis2=dnbinom(2, r, p, log = F)) ############################################################################ ############################################################################ ##calculate P( 6 <=X<= 8) #Several ways to do it! I will show you 3 today! ############################################################################ #1. #use the hard coded PMF of negative binomial (probXbtw6and8fla= (gamma(6+r)/(gamma(r)*factorial(6)))* p^r*(1-p)^6+ (gamma(7+r)/(gamma(r)*factorial(7)))* p^r*(1-p)^7+ (gamma(8+r)/(gamma(r)*factorial(8)))* p^r*(1-p)^8) #2. #We can write a parametrized function that can be recalled each time we need it. #This techique will be helful when you work when the distribution function #such as dbinom not available in R #This is just a more useful technique than the first one. #It makes it easier to calculate much more probabilities like P( 6 <=X<= 17 ) myDistNBinom=function(numb,r,p){ (gamma(numb+r)/(gamma(r)*factorial(numb)))* p^r*(1-p)^numb } (probXbtw6and8MyFla=myDistNBinom(6,r,p)+myDistNBinom(7,r,p)+myDistNBinom(8,r,p)) #or we can write a for loop to make a summation probXbtw6and8MyFla1=0 for (i in (6:8)){ probXbtw6and8MyFla1=probXbtw6and8MyFla1+myDistNBinom(i,r,p) } # Note: # In general R is very slow when it comes to using for loop # There are more sofisticated functions such as apply, sapply, lapply.. # that can be used to avoid the for loop. We will get there soon. #3. #use dnbinom #provided a vector 6:8 #dnbinom will calculate probabilities for each values in the vector 6:8 dnbinom(6:8, r, p, log = F) #so to calculate prob P( 6 <=X<= 8) we will need to sum up probabilities #returned by dbinom evaluated at each of the values 6:8 (probXbtw6and8=sum(dnbinom(6:8, r, p, log = F))) ############################################################################ #calculate 75 percentile ############################################################################ qnbinom(0.75,r,p) ############################################################################ #END ############################################################################