This example illustrate three ways of obtaining the GLMM
parameters using R
. We analyze the popular motor insurance data motorins{araway}
. The data is divided into five groups on account of variable Kilometres
representing kilometers per year as \(1: < 1000; 2: 1000-15,000; 3: 15,000-20,000; 4: 20,000-25,000; 5: > 25,000\). The response variable of interest is Claims
being the number of claims. In the model, we include exposure log(Insured)
, the log-number of insured in a policy-year, as the offset.
options(digits=2)
library(lme4)
library(MASS)
library(glmmML)
library(faraway)
attach(motorins)
glmm1 <-glmer(Claims~Kilometres+(1|Bonus), family=poisson(link="log"), offset=log(Insured), data=motorins)
glmm2 <-glmmPQL(fixed=Claims~Kilometres+offset(log(Insured)), random=~1|Bonus, family=poisson(link="log"), data=motorins)
glmm3 <-glmmML(Claims~Kilometres, cluster=Bonus, offset=log(Insured), method = c("Laplace"), family=poisson(link="log"), data=motorins)
##Summary
summ <-rbind(cbind(glmm1@beta, glmm2$coefficients$fixed, glmm3$coefficients),
cbind(glmm1@theta, glmm2$sigma, glmm3$sigma))
col <-c("AG-H quadrature","PQL", "Laplace")
row <-c("Intercept", "K2", "K3", "K4", "K5", "Dispersion")
rownames(summ)<-row
colnames(summ)<-col
summ
## AG-H quadrature PQL Laplace
## Intercept -2.700 -2.700 -2.699
## K2 0.445 0.445 0.445
## K3 -0.040 -0.040 -0.040
## K4 0.070 0.070 0.070
## K5 0.004 0.004 0.004
## Dispersion 0.389 2.305 0.389
##random effects
cbind("glmer"=glmm1@u, "glmmPQL"=glmm2$coefficients$random$Bonus)
## glmer (Intercept)
## 1 1.91 0.743
## 2 0.66 0.258
## 3 0.14 0.052
## 4 -0.18 -0.070
## 5 -0.43 -0.165
## 6 -0.59 -0.228
## 7 -1.52 -0.590
library(hglm)
library(insuranceData)
data(dataCar)
attach(dataCar)
names(dataCar)
## [1] "veh_value" "exposure" "clm" "numclaims" "claimcst0"
## [6] "veh_body" "veh_age" "gender" "area" "agecat"
## [11] "X_OBSTAT_"
Y <-claimcst0[claimcst0>0]
X <-as.factor(veh_age)[claimcst0>0]
Z <-as.factor(veh_body)[claimcst0>0]
hglm.ig <- hglm(fixed=Y~X,
random=~1|veh_body[claimcst0>0], vcovmat = TRUE,
family=inverse.gaussian(log), disp.family=Gamma(log),
data=dataCar)
print(summary(hglm.ig), print.ranef = TRUE)
## Call:
## hglm.formula(family = inverse.gaussian(log), fixed = Y ~ X, random = ~1 |
## veh_body[claimcst0 > 0], data = dataCar, vcovmat = TRUE,
## disp.family = Gamma(log))
##
## ----------
## MEAN MODEL
## ----------
##
## Summary of the fixed effects estimates:
##
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 7.56714 0.06495 116.502 <2e-16 ***
## X2 0.05134 0.07309 0.702 0.4825
## X3 0.05545 0.07212 0.769 0.4420
## X4 0.14424 0.07583 1.902 0.0572 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Note: P-values are based on 4618 degrees of freedom
##
## Summary of the random effects estimates:
##
## Estimate Std. Error
## as.factor(veh_body[claimcst0 > 0])BUS -0.0043 0.069
## as.factor(veh_body[claimcst0 > 0])CONVT 0.0007 0.069
## as.factor(veh_body[claimcst0 > 0])COUPE 0.0282 0.066
## as.factor(veh_body[claimcst0 > 0])HBACK -0.0009 0.048
## as.factor(veh_body[claimcst0 > 0])HDTOP 0.0164 0.063
## as.factor(veh_body[claimcst0 > 0])MCARA -0.0148 0.069
## as.factor(veh_body[claimcst0 > 0])MIBUS 0.0167 0.067
## as.factor(veh_body[claimcst0 > 0])PANVN -0.0018 0.066
## as.factor(veh_body[claimcst0 > 0])RDSTR -0.0023 0.069
## as.factor(veh_body[claimcst0 > 0])SEDAN -0.0964 0.046
## as.factor(veh_body[claimcst0 > 0])STNWG -0.0192 0.048
## as.factor(veh_body[claimcst0 > 0])TRUCK 0.0448 0.064
## as.factor(veh_body[claimcst0 > 0])UTE 0.0329 0.059
##
## ----------------
## DISPERSION MODEL
## ----------------
##
## NOTE: h-likelihood estimates through EQL can be biased.
##
## Dispersion parameter for the mean model:
## [1] 0.0014
##
## Model estimates for the dispersion term:
##
## Link = log
##
## Effects:
## Estimate Std. Error
## -6.578 0.021
##
## Dispersion = 1 is used in Gamma model on deviances to calculate the standard error(s).
##
## Dispersion parameter for the random effects:
## [1] 0.005813
##
## Dispersion model for the random effects:
##
## Link = log
##
## Effects:
## .|Random1
## Estimate Std. Error
## -5.1 0.9
##
## Dispersion = 1 is used in Gamma model on deviances to calculate the standard error(s).
##
## EQL estimation converged in 4 iterations.
library(insuranceData)
library(MASS)
library(fitdistrplus) #requires R >= 3.2.0
library(vcd)
data(SingaporeAuto)
claims<-SingaporeAuto$Clm_Count
fitdistr(claims, "Negative Binomial") #MLEs with BFGS method
## size mu
## 0.8399 0.0699
## (0.2510) (0.0032)
fitdist(claims, "nbinom",method = "mle",optim.method="Nelder-Mead")
## Fitting of the distribution ' nbinom ' by maximum likelihood
## Parameters:
## estimate Std. Error
## size 0.87 0.2755
## mu 0.07 0.0032
fitdist(claims, "nbinom",method = "mle",optim.method="BFGS")
## Fitting of the distribution ' nbinom ' by maximum likelihood
## Parameters:
## estimate Std. Error
## size 0.84 0.2520
## mu 0.07 0.0032
fitdist(claims, "nbinom",method = "mle",optim.method="SANN")
## Fitting of the distribution ' nbinom ' by maximum likelihood
## Parameters:
## estimate Std. Error
## size 0.88 0.2778
## mu 0.07 0.0032
fitdist(claims, "nbinom",method = "mme") # Moment matching estimation
## Fitting of the distribution ' nbinom ' by matching moments
## Parameters:
## estimate
## size 0.84
## mu 0.07
out <-goodfit(claims, type = "nbinomial", method = "MinChisq") #ML or Minchisq method
summary(out)
##
## Goodness-of-fit test for nbinomial distribution
##
## X^2 df P(> X^2)
## Pearson 1.4 1 0.24
out$par
## $size
## [1] 0.81
##
## $prob
## [1] 0.92
out
##
## Observed and fitted values for nbinomial distribution
## with parameters estimated by `MinChisq'
##
## count observed fitted pearson residual
## 0 6996 6995.9 0.0009
## 1 455 451.9 0.1439
## 2 28 32.5 -0.7921
## 3 4 2.4 0.8547