library(Epi)
library(splines)
load(file="../data/inc.Rdata")
levels( inc$sex ) <- c("F","M")
str(inc)
YY <- xtabs( well.Y ~ sex + floor(A) + floor(P), data=inc )
YY <- data.frame( YY )
names(YY) <- c("sex","A","P","Y")
DD <- xtabs(      D ~ sex + floor(A) + floor(P), data=inc )
DD <- data.frame( DD )
names(DD) <- c("sex","A","P","D")
DMDK <- merge( DD, YY )
head( DMDK )
summary( DMDK )
DMDK <- transform( DMDK, A = as.numeric(as.character(A)),
                         P = as.numeric(as.character(P)),
                         rate=D/Y*1000 )
ord <- with( DMDK, order(sex,P,A) )
DMDK <- DMDK[ord,]
head( DMDK )

# Plot the average rates for the entire period
YY <- xtabs( well.Y ~ sex + floor(A), data=inc )
YY <- data.frame( YY )
names(YY) <- c("sex","A","Y")
DD <- xtabs(      D ~ sex + floor(A), data=inc )
DD <- data.frame( DD )
names(DD) <- c("sex","A","D")
DMDKm <- merge( DD, YY )
head( DMDKm )
summary( DMDKm )
DMDKm  <- transform( DMDKm, A = as.numeric(as.character(A)),
                            rate=D/Y*1000 )
ord <- with( DMDKm, order(sex,A) )
DMDKm <- DMDKm[ord,]
head( DMDKm )

pdf( "avg-rates.pdf" )
par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 )
with( DMDKm, plot( A+0.5, rate, type="n", log="y", ylim=c(0.1,20),
                  xlab="Age", ylab="Incidence rate per 1000 PY" ) )
with( subset(DMDKm, sex=="M" & D>0), lines( A+0.5, rate, lwd=3, col="blue" ) )
with( subset(DMDKm, sex=="F" & D>0), lines( A+0.5, rate, lwd=3, col="red"  ) )
dev.off()

source( "c:/stat/r/bxc/library.sources/useful/R/Ns.R" )
Ns

pdf( "fit-rates.pdf" )
par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 )
Apr <- seq(0.5,99.5,0.5)
Akn <- seq(5,95,5) # with( inc, quantile( rep(A,D), probs=0:25/25 ) )
Pkn <- with( inc, quantile( rep(P,D), probs=1:4/5 ) )
mM <- glm( D ~ Ns(A,knots=Akn) + Ns(P,knots=Pkn),
               offset = log(well.Y),
           data = subset(inc,sex=="M"),
           family=poisson )
mF <- glm( D ~ Ns(A,knots=Akn) + Ns(P,knots=Pkn),
               offset = log(well.Y),
           data = subset(inc,sex=="F"),
           family=poisson )

plot( NA, type="n", log="y", ylim=c(0.1,20), xlim=c(0,100),
          xlab="Age", ylab="Incidence rate per 1000 PY", las=1 )
abline( h=c(1:9/19,1:9,1:9*10), v=0:10*10, col=gray(0.8) )
rect(0,6,10,30,col="white",border=gray(0.8))
ff <- 1.25
txty = 7/ff
for( pp in 1995+seq(0,15,3) )
{
lines( Apr,
       predict.glm( mM, newdata = data.frame(A=Apr,P=pp,well.Y=1000),
                               type="response" ),
       lwd=1+(pp%%2), col="blue")
lines( Apr,
       predict.glm( mF, newdata = data.frame(A=Apr,P=pp,well.Y=1000),
                               type="response" ),
       lwd=1+(pp%%2), col="red")
txty <- txty*ff
text( 5, txty, paste(pp), font=1+pp%%2 )
}
box()
dev.off()

new <- expand.grid( A=Apr,
                    P=1995:2010,
                    well.Y=1000,
                    sex=levels(inc$sex) )
head(new)

new <- cbind( new[,c(4,1,2)],
rate = c( predict.glm( mF, newdata = subset(new,sex="F"), type="response" ),
          predict.glm( mM, newdata = subset(new,sex="M"), type="response" ) ) )
str( new )
head( new )

write.csv( new, row.names=FALSE, file="DMDK.csv" )
