Chapter 1 - examples

Example - estimation of \((\lambda, \alpha)'\) in Gamma Distribution

There are at least three equivalent methods in R that can be used to obtain ML estimates of the parameters in Gamma distribution. In the first method, we implement the two-parameter log-likelihood of Gamma distribution, and basic function optim is applied.

options(digits=2)

y.gam <- rgamma(100, 2)

l.gamma <- function(y,par) {n<-length(y)
-n*par[2]*log(par[1])+n*log(gamma(par[2]))-(par[2]-1)*sum(log(y))+par[1]*sum(y)} # -log-likelihood function         
result <- optim( par = c(1,2), l.gamma, y = y.gam, method = "Nelder-Mead")
result[1]

## $par
## [1] 0.89 1.65

result[2]

## $value
## [1] 155

library(stats4) 
l.gamma<-function(lambda,alfa) {y<-y.gam
n<-length(y)
-n*alfa*log(lambda)+n*log(gamma(alfa))-(alfa-1)*sum(log(y))+lambda*sum(y)} # -log-likelihood function   
est<-mle(minuslog=l.gamma, start=list(lambda=1,alfa=2))
summary(est)

## Maximum likelihood estimation
## 
## Call:
## mle(minuslogl = l.gamma, start = list(lambda = 1, alfa = 2))
## 
## Coefficients:
##        Estimate Std. Error
## lambda     0.89       0.14
## alfa       1.65       0.21
## 
## -2 log L: 309

library(MASS) 
fitdistr(y.gam,"gamma", method = "Nelder-Mead") # fitting gamma pdf parameters

##   shape    rate 
##   1.65    0.89  
##  (0.21)  (0.14)

Example - fitting GLM Inverse-Gaussian

In this example, data set dataCar taken from package insuranceData is based on one-year vehicle insurance policies taken out in 2004 or 2005. There are 67,856 policies, of which 4624 (6.8%) had at least one claim.

options(digits=2)

library(insuranceData)

data(dataCar)
attach(dataCar)

glm.ig <- glm(claimcst0[dataCar$claimcst0>0]~as.factor(veh_age[dataCar$claimcst0>0]), 
family=inverse.gaussian(link="log"), data=dataCar)
summary(glm.ig)

## 
## Call:
## glm(formula = claimcst0[dataCar$claimcst0 > 0] ~ as.factor(veh_age[dataCar$claimcst0 > 
##     0]), family = inverse.gaussian(link = "log"), data = dataCar)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.06419  -0.04364  -0.02216   0.00102   0.11552  
## 
## Coefficients:
##                                            Estimate Std. Error t value
## (Intercept)                                  7.5418     0.0603  125.07
## as.factor(veh_age[dataCar$claimcst0 > 0])2   0.0464     0.0783    0.59
## as.factor(veh_age[dataCar$claimcst0 > 0])3   0.0570     0.0773    0.74
## as.factor(veh_age[dataCar$claimcst0 > 0])4   0.1402     0.0810    1.73
##                                            Pr(>|t|)    
## (Intercept)                                  <2e-16 ***
## as.factor(veh_age[dataCar$claimcst0 > 0])2    0.553    
## as.factor(veh_age[dataCar$claimcst0 > 0])3    0.461    
## as.factor(veh_age[dataCar$claimcst0 > 0])4    0.084 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for inverse.gaussian family taken to be 0.0016)
## 
##     Null deviance: 6.4422  on 4623  degrees of freedom
## Residual deviance: 6.4369  on 4620  degrees of freedom
## AIC: 77190
## 
## Number of Fisher Scoring iterations: 9

mean.group <-tapply(claimcst0[dataCar$claimcst0>0], as.factor(veh_age[dataCar$claimcst0>0]), mean)
data.frame("mean"=mean.group, "fitted values"=unique(glm.ig$fitted.values))

##   mean fitted.values
## 1 1885          1996
## 2 1975          1975
## 3 1996          1885
## 4 2169          2169

mean.group

##    1    2    3    4 
## 1885 1975 1996 2169

Example - Density of Tweedie Distribution

The graph of Tweedie density assuming different powers \(p\).

library(tweedie)
y=seq(0,4,length=1000)

d1 <-dtweedie( y, power=1.5, mu=1, phi=1)
d2 <-dtweedie( y, power=2, mu=1, phi=1)
d3 <-dtweedie( y, power=2.5, mu=1, phi=1)
d4 <-dtweedie( y, power=5, mu=1, phi=1)

Example - fitting DGLM Poisson

We fit the parameters of Poisson in which the response variable of interest is Claims. Variable Insured is treated as the exposure to risk, and the dispersion is modeled with variable Make. The R code and output are presented below. Figure shows the diagnostic plots.

library(dglm)
library(faraway)

attach(motorins)
Kilom <-as.factor(Kilometres)
Bonus <-as.factor(Bonus)
Make <-as.factor(Make)

dglm.claims <-dglm(Claims~Kilom+Bonus+Make, offset=log(motorins$Insured), 
~Make, family=poisson(link="log"), data=motorins)
summary(dglm.claims)

## 
## Call: dglm(formula = Claims ~ Kilom + Bonus + Make, dformula = ~Make, 
##     family = poisson(link = "log"), data = motorins, offset = log(motorins$Insured))
## 
## Mean Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    2.537      0.091    27.8 2.5e-170
## Kilom.L       -0.896      0.063   -14.1  2.4e-45
## Kilom.Q       -0.333      0.059    -5.6  1.8e-08
## Kilom.C        0.410      0.060     6.9  6.4e-12
## Kilom^4        0.092      0.053     1.7  8.6e-02
## Bonus          0.278      0.013    21.9 1.1e-106
## Make2         -1.377      0.090   -15.2  2.5e-52
## Make3         -1.639      0.097   -16.9  3.4e-64
## Make4         -1.484      0.131   -11.4  6.7e-30
## Make5         -1.237      0.089   -13.9  5.0e-44
## Make6         -0.857      0.093    -9.3  2.0e-20
## Make7         -1.568      0.089   -17.6  5.6e-69
## Make8         -2.156      0.098   -22.0 7.5e-107
## Make9          1.935      0.091    21.3 4.0e-101
## (Dispersion Parameters for poisson family estimated as below )
## 
##     Scaled Null Deviance: 4193 on 1796 degrees of freedom
## Scaled Residual Deviance: 1797 on 1783 degrees of freedom
## 
## Dispersion Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)     3.83      0.094    40.8  0.0e+00
## Make2          -1.38      0.136   -10.1  5.5e-24
## Make3          -1.52      0.142   -10.8  4.9e-27
## Make4          -0.53      0.147    -3.6  2.9e-04
## Make5          -1.33      0.136    -9.8  1.8e-22
## Make6          -0.76      0.135    -5.6  1.7e-08
## Make7          -1.66      0.138   -12.0  2.3e-33
## Make8          -1.99      0.143   -14.0  2.7e-44
## Make9           2.05      0.130    15.7  1.2e-55
## (Dispersion parameter for Gamma family taken to be 2 )
## 
##     Scaled Null Deviance: 4490 on 1796 degrees of freedom
## Scaled Residual Deviance: 2642 on 1788 degrees of freedom
## 
## Minus Twice the Log-Likelihood: 14997 
## Number of Alternating Iterations: 6

##Figure
par(mfrow=c(2,2))
plot(dglm.claims)

glm.claims <-glm(Claims~Kilom+Bonus+Make, offset=log(motorins$Insured),
family=poisson(link="log"), data=motorins)

plot(quantile(dglm.claims$fitted.values, probs=seq(0,1,0.00001)), 
quantile(glm.claims$fitted.values, probs=seq(0,1,0.00001)),
xlab="DGLM", 
ylab="GLM" )
abline(0,1,lwd=3,col="gray")

Example - the estimation of \(\lambda\) of Poisson distribution

In order to estimate parameter \(\lambda\) in the Poisson distribution, the most-general way is the straightforward implementation of the log-likelihood function and applying function optizime. In the code below, the proper log-likelihood is denoted as l.pois.

k <- rpois(20,4)        
l.lambda <- function(lambda) {log(prod(((lambda^k)*exp(-lambda))/factorial(k)))}        
optimize(f=l.lambda, interval = c(0,10), maximum=TRUE)

## $maximum
## [1] 4.4
## 
## $objective
## [1] -43

Example - estimation of parameters in ZI distribution

We investigate the number of claims taken from data set dataOhlsson{insuranceData}. This data comes from the former Swedish insurance company Wasa and concerns partial casco insurance (this time, for motorcycles). It contains aggregated data on all insurance policies and claims during the period of 1994-1998.

library(pscl)
library(insuranceData)
data(dataOhlsson)
number.of.claims <- dataOhlsson$antskad

table(number.of.claims)

## number.of.claims
##     0     1     2 
## 63878   643    27

zip.pois <-zeroinfl(number.of.claims~1, dist = "poisson")  
zip.nb <-zeroinfl(number.of.claims~1, dist = "negbin")  

lambda.pois <-exp(zip.pois$coefficients$count)
logit.tau.pois <-zip.pois$coefficients$zero
tau.pois <-exp(logit.tau.pois)/(1+exp(logit.tau.pois)) 

lambda.nb <-exp(zip.nb$coefficients$count)
logit.tau.nb <-zip.nb$coefficients$zero
tau.nb <-exp(logit.tau.nb)/(1+exp(logit.tau.nb)) 

##summary
cbind(lambda.pois, tau.pois, lambda.nb, tau.nb)

##             lambda.pois tau.pois lambda.nb tau.nb
## (Intercept)        0.08     0.86      0.08   0.86

Example - simulation random sample from ZINB and ZIP

Two simulations for the ZINB and ZIGP distributions are carried out. In both models, we assume equivalent parameters: mean, dispersion parameter, and the fraction of non-zero observations. In Figure, the results are presented in the form of histograms. The generated histograms are very similar, with a high proportion of zeros and a few non-zero values.

library(gamlss.dist)
library(pscl)

count.zinb <- rZINBI(1000, mu=5, sigma=1, nu=0.5) #random sample from ZINB distribution
count.zip <- rZIP(1000, mu=4, sigma=0.5) #random sample from ZIP distribution

## Figure
par(mfrow=c(1,2))
hist(count.zinb, main="")
hist(count.zip, main="")

Example - the probability mass function of ZTPois

dztpois <- function (y, lambda) 
{
    n <- length(y)
    dztp <- vector(length=n)
    dztp[y>0] <- dpois(y[y>0], lambda)/(1 - dpois(0, lambda))
    return(dztp)
}
k <- rpois(5,lambda=5)
round(dztpois(k[k>0], lambda=5), digits=4)

## [1] 0.177 0.105 0.066 0.085 0.177

k.plot <- 1:10  
dk <- length(10) 
for (i in 1:10) {
dk[i] <- dztpois(i, lambda=5)
}
dk

##  [1] 0.034 0.085 0.141 0.177 0.177 0.147 0.105 0.066 0.037 0.018

##Figure
plot(k.plot, dk, type="h", xlab="k", ylab="dztpois", axes = FALSE)
axis(1, 1:10, 1:10)
axis(2, round(dk, digits=2))
par(new = T)
plot(k.plot, dk, type="p", xlab="", ylab="")

Example - the estimation of \(\lambda\) in ZTPois

k <- rpois(200,3)
k.nz <- as.data.frame(k[k>0])
X <- model.matrix(~1, k.nz)

beta <- coef(glm(k[k>0] ~ 1, family = poisson(link=log)))

ll.ztp <- function(beta){
lambda <- exp(X %*% beta)   
ll <-  sum(k[k>0]*log(lambda) - log(exp(lambda)-1)-log(factorial(k[k>0])))
return(-ll)
}
opt <- optim(beta, ll.ztp, hessian = TRUE, method = "BFGS")
log.likelihood <- (-opt$value)
opt$par

## (Intercept) 
##         1.1

log.likelihood

## [1] -342

opt$hessian

##             (Intercept)
## (Intercept)         496

Example - simulation Tweedie’s compound Poisson distribution

library(tweedie)
library(statmod)

lambda=2
iota=30
varsigma=.2

mu=lambda*iota*varsigma
p=(iota+2)/(iota+1)
phi=lambda^(1-p)*(iota*varsigma)^(2-p)/(2-p)

n=1000
S=NULL
for (i in 1:n){
S[i] <-sum(rgamma(rpois(n,lambda),shape=iota,scale=varsigma))
S=cbind(S,S[i])
 }

##Figure
hist(S, breaks=30, main="")
abline(v=mean(S), lty=2, lwd=3)

Example - estimation of Tweedie’s compound Poisson parameters

Response Claim from portfolio IndustryAuto{insuranceData} is modeled assuming a TCPoisson model without regressors. We fit the model parameters using function cpglm.

library(insuranceData)
library(cplm)
data(IndustryAuto)

cpglm <- cpglm(Claim~1, link="log", data=IndustryAuto)
summary(cpglm)

## 
## Call:
## cpglm(formula = Claim ~ 1, link = "log", data = IndustryAuto)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -119.1   -23.6    18.1    30.9    60.0  
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   10.595      0.034     312   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated dispersion parameter: 2404.5
## Estimated index parameter: 1.01 
## 
## Residual deviance: 139097  on 54  degrees of freedom
## AIC:  1174 
## 
## Number of Fisher Scoring iterations:  4

cbind("Intercept"=cpglm$coefficients, "Power p"=cpglm$p, "Dispersion"=cpglm$phi)

##             Intercept Power p Dispersion
## (Intercept)        11       1       2405

##Figure
hist(IndustryAuto$Claim, main="")

Fixed Effect Models in Ratemaking

Example - Zero-inflated TCPoisson Model

Here, we analyze the response variable that is the aggregate value of claims claimscst0 for the single risk observed in insurance portfolio ’dataCar{insuranceData}`. We start with the calculation of the expected value and the variance of this variable.

library(insuranceData)
data(dataCar)
attach(dataCar)

group <-interaction(gender,veh_age,agecat)

mean(claimcst0)

## [1] 137

var(claimcst0)

## [1] 1115765

tweedie.zi <-zcpglm(claimcst0~gender+as.factor(veh_age)+agecat + offset(exposure)||agecat, data=dataCar)
tweedie.zi

## 
## Call:
## zcpglm(formula = claimcst0 ~ gender + as.factor(veh_age) + agecat + 
##     offset(exposure) || agecat, data = dataCar)
## 
## Zero-inflation model coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -6.647      2.326   -2.86   0.0043 **
## agecat         0.744      0.412    1.81   0.0705 . 
## 
## Compound Poisson model coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           4.7328     0.0782   60.49  < 2e-16 ***
## genderM               0.1940     0.0442    4.39  1.1e-05 ***
## as.factor(veh_age)2   0.1639     0.0685    2.39    0.017 *  
## as.factor(veh_age)3   0.0628     0.0666    0.94    0.346    
## as.factor(veh_age)4   0.0817     0.0673    1.21    0.224    
## agecat               -0.1401     0.0173   -8.10  5.5e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated dispersion parameter: 267.73
## Estimated index parameter: 1.5657

The full code of the chapter 1 is available on GitHub.