install.packages("gamlss.dist") install.packages("gamlss") library(gamlss.dist) library(gamlss) setwd("C:\\Users\\authorized user\\Dropbox\\STAT445\\week10\\tutorials") ################################################################################## #examples using Poisson Inverse Gaussian #using PIG R function from the gamlss.dist R package ################################################################################## #plot pdf of Poisson Inverse Gaussian and Negative binomial par(mfrow=c(1,2)) fy=function(y) dPIG(y, mu=15, sigma = 1 ) fx=function(x) dnbinom(x, size=50, mu=15, log = FALSE) plot(fy, from=0, to=50, n=50+1, type="h",main="Poisson inverse Gaussian") # pdf plot(fx,from=0,to=50,n=50+1,type="h",col="red",main="Negative Binomial" ) # generate random sample from PIG # generate random sample from NB par(mfrow=c(1,2)) tbl <- table(pig <- rPIG(100, mu=15, sigma=1)) barplot(tbl, col='lightblue',main="Random sample Poisson Inverse Gaussian") tbl1 <- table(nbi <- rnbinom(100, size=50, mu=15)) barplot(tbl1, col='purple',main="Random sample from Negative binomial") #try to fit the model on data generated from rPIG mPIG=gamlss(pig~1,family=PIG) mNBI=gamlss(pig~1,family=NBI) #see which one is the best fit using AIC function AIC(mNBI, mPIG) #try to fit the model on data generated from rnbinom mPIG=gamlss(nbi~1,family=PIG) mNBI=gamlss(nbi~1,family=NBI) #see which one is the best fit using AIC function AIC(mNBI, mPIG) #try to fit the data on the lice data set #which model will fit better? data("lice") mNBI = gamlss(head ~ 1, data = lice, family = NBI, weights = freq,trace = FALSE) mPIG = gamlss(head ~ 1, data = lice, family = PIG, weights = freq,trace = FALSE) AIC(mNBI, mPIG) #zero inflated distribution ########################################################## #The state wildlife biologists want to model how many fish #are being caught by fishermen at a state park. #Visitors are asked how long they stayed, #how many people were in the group, #were there children in the group and #how many fish were caught. #Some visitors do not fish, but there is no data on #whether a person fished or not. #Some visitors who did fish did not catch any fish so #there are excess zeros in the data because of the people #that did not fish. ########################################################## zinf <- read.csv("fish.csv") zinf <- within(zinf, { nofish <- factor(nofish) livebait <- factor(livebait) camper <- factor(camper) }) summary(zinf) par(mfrow=c(1,1)) #histogram of counts of fish hist(zinf$count,nclass=200, col="purple",main="Zero-inflated distribution") d=zinf$count mPIG=gamlss(d~1,family=PIG) mNBI=gamlss(d~1,family=NBI) #see which one is the best fit using AIC function AIC(mNBI, mPIG) (m2 <- matrix(1:20, 4, 4)) lower.tri(m2) m2[lower.tri(m2)] <- NA m2