#File: Rexamples.txt #Author: Juha Karvanen #Date: 2009-04-22 #Updated: #Summary: R examples for the course Generalized linear models, University of Helsinki, spring 2009 #Example of a GLM set.seed(3000) b<-3; n<-500; x<-rnorm(n); y<-runif(n)74"),ordered=TRUE) finrisk$agegro2<-C(finrisk$agegro,poly,1) chd1<-subset(finrisk,subset=(endpoint=="CHD1")) m1<-glm(events~rua+sex+agegr+year+offset(log(followupyears)),data=chd1,family=poisson(link = "log")) boxplot(residuals(m1,"deviance")~chd1$year) abline(h=0) m1b<-glm(events~rua+sex+agegr+year+log(followupyears),data=chd1,family=poisson(link = "log")) m1c<-glm(events~rua+sex+agegr+year+followupyears,data=chd1,family=poisson(link = "log")) m1d<-glm(events~rua+sex+agegr+I(year-1982)+offset(log(followupyears)),data=chd1,family=poisson(link = "log")) m3<-glm(events~rua+sex+agegr*year+offset(log(followupyears)),data=chd1,family=poisson(link = "log")) boxplot(residuals(m3,"deviance")~chd1$year) m4<-glm(events~rua+sex*agegr+year+offset(log(followupyears)),data=chd1,family=poisson(link = "log")) boxplot(residuals(m4,"deviance")~chd1$year) m5<-glm(events~rua+sex+agegro+year+offset(log(followupyears)),data=chd1,family=poisson(link = "log")) m5b<-glm(events~rua+sex+agegro2+year+offset(log(followupyears)),data=chd1,family=poisson(link = "log")) m6<-glm(cbind(events,round(followupyears-events))~rua+sex+agegr+year,data=chd1,binomial(link = "logit")) plot(chd1$events,predict(m1,type="response")) plot(chd1$events,predict(m4,type="response")) #Pneumo library(nnet) library(VGAM) data(pneumo) pneumo <- transform(pneumo, let=log(exposure.time)) m1<-multinom(cbind(normal, mild, severe) ~ let, data=pneumo) m2<-vglm(cbind(normal, mild, severe) ~ let, multinomial(ref=1), pneumo) m3<-vglm(cbind(normal,mild,severe) ~ let, cumulative(parallel=TRUE, reverse=TRUE), pneumo) m4<-glm(cbind(normal,mild)~let,family=binomial,data=pneumo) m5<-glm(cbind(normal,mild+severe)~let,family=binomial,data=pneumo) m6<-glm(cbind(mild,severe)~let,family=binomial,data=pneumo) #Switching2 con <- url("http://www.tilastotiede.fi/data/switching2.Rdata") print(load(con)) close(con) m1<-glm(cbind(responses,trials-responses)~current,data=switching2,binomial(link = "logit")) m2<-glm(cbind(responses,trials-responses)~current,data=switching2,binomial(link = "cloglog")) m2b<-glm(cbind(responses,trials-responses)~current,data=switching2[1:29,],binomial(link = "cloglog")) m3<-glm(cbind(responses,trials-responses)~current,data=switching2[1:29,],quasibinomial(link = "logit")) m4<-glm(cbind(responses,trials-responses)~current,data=switching2[1:29,],quasibinomial(link = "cloglog")) #Example of pdfs with positive support #Gamma, shape parameter varied y<-seq(0,10,by=0.01) plot(y,dgamma(y,scale=1,shape=0.5),type="l",ylim=c(0,1),ylab="density") lines(y,dgamma(y,scale=1,shape=3),col="red") lines(y,dgamma(y,scale=1,shape=6),col="blue") abline(h=0,lty=2) abline(v=0,lty=2) legend("topright",lty=c(1,1,1),col=c("black","red","blue"),legend=c("scale=1,shape=0.5","scale=1,shape=3","scale=1,shape=6")) #Gamma, scale parameter varied y<-seq(0,10,by=0.01) plot(y,dgamma(y,scale=0.5,shape=3),type="l",ylim=c(0,1),ylab="density") lines(y,dgamma(y,scale=1,shape=3),col="red") lines(y,dgamma(y,scale=2,shape=3),col="blue") abline(h=0,lty=2) abline(v=0,lty=2) legend("topright",lty=c(1,1,1),col=c("black","red","blue"),legend=c("scale=0.5,shape=3","scale=1,shape=3","scale=2,shape=3")) #Lognormal, location parameter varied y<-seq(0,10,by=0.01) plot(y,dlnorm(y,meanlog=0.1,sdlog=1),type="l",ylim=c(0,1),ylab="density") lines(y,dlnorm(y,meanlog=1,sdlog=1),col="red") lines(y,dlnorm(y,meanlog=2,sdlog=1),col="blue") abline(h=0,lty=2) legend("topright",lty=c(1,1,1),col=c("black","red","blue"),legend=c("meanlog=0.1,sdlog=1","meanlog=1,sdlog=1","meanlog=2,sdlog=1")) #Lognormal, scale parameter varied y<-seq(0,10,by=0.01) plot(y,dlnorm(y,meanlog=1,sdlog=0.5),type="l",ylim=c(0,1),ylab="density") lines(y,dlnorm(y,meanlog=1,sdlog=1),col="red") lines(y,dlnorm(y,meanlog=1,sdlog=2),col="blue") abline(h=0,lty=2) legend("topright",lty=c(1,1,1),col=c("black","red","blue"),legend=c("meanlog=1,sdlog=0.5","meanlog=1,sdlog=1","meanlog=1,sdlog=2")) library(statmod) #Inverse Gaussian, location parameter varied y<-seq(0,10,by=0.01) plot(y,dinvgauss(y,mu=1,lambda=3),type="l",ylim=c(0,1),ylab="density") lines(y,dinvgauss(y,mu=2,lambda=3),col="red") lines(y,dinvgauss(y,mu=3,lambda=3),col="blue") abline(h=0,lty=2) legend("topright",lty=c(1,1,1),col=c("black","red","blue"),legend=c("mu=1,lambda=3","mu=2,lambda=3","mu=3,lambda=3")) #Inverse Gaussian, shape parameter varied y<-seq(0,10,by=0.01) plot(y,dinvgauss(y,mu=3,lambda=1),type="l",ylim=c(0,1),ylab="density") lines(y,dinvgauss(y,mu=3,lambda=3),col="red") lines(y,dinvgauss(y,mu=3,lambda=30),col="blue") lines(y,dinvgauss(y,mu=3,lambda=150),col="magenta") abline(h=0,lty=2) legend("topright",lty=c(1,1,1,1),col=c("black","red","blue","magenta"),legend=c("mu=3,lambda=1","mu=3,lambda=3","mu=3,lambda=30","mu=3,lambda=150")) #Example with gamma distributed response set.seed(33101) b<-1 n<-500; x<-rnorm(n,10,sd=2); shape<-2; y<-rgamma(n,shape,scale=b*x/shape) m1<-glm(y~x,gaussian(link = "identity")) m2<-glm(y ~x, family=quasi(variance="mu", link="identity")) m3<-glm(y~log(x),Gamma(link = "log")) m4<-glm(y~x,Gamma(link = "identity")) m4b<-glm(y~log(x),Gamma(link = "identity")) m5<-glm(y~I(1/x),Gamma(link = "inverse")) m5b<-glm(y~x,Gamma(link = "inverse")) m6<-glm(y~x,inverse.gaussian(link = "identity")) m7<-glm(y~I(1/x^2),inverse.gaussian(link = "1/mu^2")) m8<-glm(y~log(x),inverse.gaussian(link = "log")) plot(x,residuals(m3,type="deviance"),ylab="Deviance residuals",main="Model 3: Gamma"); plot(x,residuals(m4,type="deviance"),ylab="Deviance residuals",main="Model 4: Gamma"); plot(x,residuals(m4b,type="deviance"),ylab="Deviance residuals",main="Model 4b: Gamma"); plot(x,residuals(m6,type="deviance"),ylab="Deviance residuals",main="Model 6: Inverse Gaussian"); #Canadian Automobile Insurance Claims for 1957-1958 #library(statmod) #for tweedie family #library(dglm) #for double GLM con <- url("http://www.statsci.org/data/general/carinsca.txt") carinsca<-read.table(con,header=TRUE) carinsca$Merit <- ordered(carinsca$Merit) carinsca$Class <- factor(carinsca$Class) options(contrasts=c("contr.treatment","contr.treatment")) tinsured<-tapply(carinsca$Insured,list(carinsca$Merit,carinsca$Class),mean); m1 <- glm(Claims~offset(log(Insured))+Merit+Class,data=carinsca,family="poisson") summary(m1) tapply(residuals(m1),list(carinsca$Merit,carinsca$Class),mean); prclaims<-tapply(predict(m1,type="response"),list(carinsca$Merit,carinsca$Class),mean); prclaims/tinsured carinsca$avcost<-carinsca$Cost/carinsca$Claims; m2<-glm(avcost~Merit+Class,data=carinsca,family=Gamma(link="log"),weights=Claims); summary(m2) tapply(residuals(m2),list(carinsca$Merit,carinsca$Class),mean) tapply(predict(m2,type="response"),list(carinsca$Merit,carinsca$Class),mean) #Example with the SUPPORT data set con <- url("http://biostat.mc.vanderbilt.edu/twiki/pub/Main/DataSets/support.sav") print(load(con)) close(con) library(survival) library(Design) dd <- datadist(support) options(datadist='dd') m1<-coxph(Surv(d.time,death)~age+sex,data=support) m2<-cph(Surv(d.time,death)~age+sex,data=support)