bsml {bsml} | R Documentation |
Basis Selection from Multiple Libraries Ver. 1.0-5
Description
This is the implementation of the namesake methodology. Multiple libraries may be used to approximate the mean function. The program will adaptively select basis functions from each library and find the optimal model complexity by using the Generalized Degree of Freedom.
Usage
bsml(y,baseslist,method="bsmlc",maxbas=30,sub.maxbas=c(),backward=NULL,
has.control=list(crit="GCV",idf=1.2,ridge=F),
bsmlc.approach="gdf",
gdf.control=list(nrep=100,tauhat="Rice"),
gcvpk.control=list(delta=.1,job=1005,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
Arguments
y |
Responses. |
baseslist |
List of libraries. |
method |
Three methods available: HAS, BSML-C and BSML-S. Use "has", "bsmlc" and "bsmls" to specify. |
maxbas |
Maximum number of bases to be selected. |
sub.maxbas |
Maximum number of bases to be pre-selected for each library. |
backward |
Backward elimination criterion. Available options are "AIC", "BIC", "Cp" and "GCV". Default is NULL, when no backward elimination is carried out. |
has.control |
Includes three controlling arguments: "crit" for basis selection criterion for HAS; Available options are "AIC", "BIC", "Cp" and "GCV". "idf" for IDF value for HAS; Default is 1.2; If value NA is supplied, it will be estimated. "ridge" for use ridge regression or not. TRUE or FALSE. |
bsmlc.approach |
GDF approach or Covariance Penalty approach. For BSML-C only. |
gdf.control |
Controlling parameters for GDF estimation. |
gcvpk.control |
Controlling parameters for HAS. |
Value
coefficients |
Coefficients |
residuals |
Residuals |
fitted.values |
Fitted Values |
chosen.bases.full |
Full list of indices of chosen bases, excluding null bases. |
chosen.bases.trim |
List of indices of chosen bases in the final model, excluding null bases. |
chosen.bases.matrix |
Matrix of chosen bases in final model, excluding null bases. |
score |
Scores used to determine optimal model complexity. The first score is for the null model. |
idf |
Inflated Degree of Freedom for each chosen basis |
gdf |
Cumulated Generalized Degree of Freedom for the corresponding number of chosen bases in the model. |
nb |
Total number of bases in the model, including both null and chosen bases. |
sigma_sq |
Estimated variance. |
dof |
Degree of Freedom for the model. |
index.bas |
HAS only. Indices of chosen bases in one-dimensional designations. For internal use. |
Author(s)
Junqing Wu, Jeff Sklar, Wendy Meiring, Yuedong Wang
Examples
##############################
# Heavisine Function Example #
##############################
# Install packages nlme and assist first
library(assist)
n=128
x=seq(0,1,length=n)
pt=(rep(1,n)%o%x)[,-c(n)]
f=2.2*(4*sin(4*pi*x)-sign(x-.3)-sign(.72-x))
set.seed(123)
sigma=3
y=f+sigma*rnorm(n)
lib1=stdz(cbind(rep(1,length=n)))
lib2=stdz(1*cbind((x-pt)>0))
lib3=stdz(periodic(x)[,-n])
baseslist=list(lib1,lib2,lib3)
has.obj=bsml(y,baseslist,method="has")
bsmlc.obj=bsml(y,baseslist,method="bsmlc")
bsmls.obj=bsml(y,baseslist,method="bsmls")
plot(x,y)
lines(x,has.obj$fit,col="red")
lines(x,bsmlc.obj$fit,col="green")
lines(x,bsmls.obj$fit,col="blue")
#######################
# Geyser Data Example #
#######################
# Install packages assist, nlme and MASS first
library(assist)
library(MASS)
data(geyser)
n <- nrow(geyser)
y <- geyser[,1]
x <- (geyser[,2]-min(geyser[,2]))/(max(geyser[,2])-min(geyser[,2]))/1.1
bas.nul <- stdz(cbind(1,x-.5))
bas.cub <- stdz(cubic(x,unique(x)))
# remove the first few and the last few points
loc <- seq(.2,.8,len=10)
pt <- rep(1,n)%o%loc
bas.con <- stdz((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con)
# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")
plot(geyser[,2], y, xlab="duration (min)", ylab="waiting time (min)", xlim=c(0.8,5.5), type="n")
points(geyser[,2], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(geyser[,2])-min(geyser[,2]))+min(geyser[,2])
ord <- order(xx)
xx <- xx[ord]
lines(xx,f$fit[ord],lty=2)
lines(xx,bsmls.obj$fit[ord],lty=1,col="blue")
lines(xx,bsmlc.obj$fit[ord],lty=3,col="green")
lines(xx,has.obj$fit[ord],lty=4,col="red")
legend(1,65,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8,col=c("black","blue","green","red"))
###########################
# Motorcycle Data Example #
###########################
# Install packages assist, MASS and nlme first
library(assist)
library(MASS)
data(mcycle)
n <- nrow(mcycle)
y <- mcycle[,2]
x <- (mcycle[,1]-min(mcycle[,1]))/(max(mcycle[,1])-min(mcycle[,1]))/1.1
bas.nul <- stdz(cbind(1,x-.5))
bas.cub <- stdz(cubic(x,unique(x)))
# remove the first few and the last few points
pt <- (rep(1,n)%o%unique(x))[,-c(1:19,86:length(unique(x)))]
bas.con <- stdz((x-pt)*(x-pt>0))
baslist <- list(bas.nul,bas.cub,bas.con)
# HAS
baslist.has <- list(bas.nul,bas.cub)
has.obj <- bsml(y, baslist.has, maxbas=50, method="has",gcvpk.control=list(delta=.001,job=1001,lamlim=c(-6,2),tau=10,ntbl=100,maxtbl=100))
# BSML-C
bsmlc.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmlc")
# BSML-S
bsmls.obj <- bsml(y, baslist, maxbas=30, gdf.control=list(nrep=500,tauhat="Rice"),method="bsmls")
plot(mcycle[,1], y, xlab="time (ms)", ylab="acceleration (g)", xlim=c(0,60), type="n")
points(mcycle[,1], y, cex=.4)
f <- ssr(y~x, cubic(x))
xx <- x*(max(mcycle[,1])-min(mcycle[,1]))+min(mcycle[,1])
lines(xx,f$fit,lty=2)
lines(xx,bsmls.obj$fit,lty=1,col="blue")
lines(xx,bsmlc.obj$fit,lty=3,col="green")
lines(xx,has.obj$fit,lty=4,col="red")
legend(40,-80,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8,col=c("black","blue","green","red"))
######################
# Ozone Data Example #
######################
# Install packages assist, nlme and mda first
library(assist)
library(mda)
data(Arosa)
attach(Arosa)
csmonth <- (month-0.5)/12
csyear <- (year-1)/45
x <- (month+12*(year-1)-.5)/(46*12)
grid <- seq(0,1,len=100)
bas.nul <- cbind(1,x-.5)
bas.per <- periodic(csmonth,unique(csmonth))
bas.cub <- cubic(x,unique(x))
baslist <- list(bas.nul,bas.per,bas.cub)
#### fit with one variable as long time series
# fit SS ANOVA
ozone.ssanova.fit1 <- ssr(thick~x, rk=list(periodic(csmonth),cubic(x)))
ssanovafit <- ozone.ssanova.fit1$fit
m1 <- sapply(split(thick,month),mean)
m1 <- m1-mean(m1)
m2 <- sapply(split(thick,year),mean)
m2 <- m2-mean(m2)
grid1 <- data.frame(csmonth=grid, x=.5)
grid2 <- data.frame(csmonth=.5, x=grid)
p.ozone.ssanova11 <- predict(ozone.ssanova.fit1,grid1,terms=c(0,0,1,0))
p.ozone.ssanova12 <- predict(ozone.ssanova.fit1,grid2,terms=c(0,1,0,1))
ssanovafitmonth <- p.ozone.ssanova11$fit <- p.ozone.ssanova11$fit-mean(p.ozone.ssanova11$fit)
ssanovafityear <- p.ozone.ssanova12$fit <- p.ozone.ssanova12$fit-mean(p.ozone.ssanova12$fit)
# fit HAS
ozone.bsmlfit1 <- bsml(thick,baslist,method="has")
hasfit <- ozone.bsmlfit1$fit
finsel <- ozone.bsmlfit1$chosen.bases.matrix
finselper <- ozone.bsmlfit1$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit1$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit1$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit1$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit1$coef[!is.na(ozone.bsmlfit1$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
hasfityear <- ozone.bsmlfit1$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==3]])%*%fincocub
hasfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit1$chosen.bases.trim[,2][ozone.bsmlfit1$chosen.bases.trim[,1]==2]])%*%fincoper
hasfityear <- hasfityear-mean(hasfityear)
hasfitmonth <- hasfitmonth-mean(hasfitmonth)
# fit BSML-C
ozone.bsmlfit2 <- bsml(thick,baslist,method="bsmlc")
bsmlcfit <- ozone.bsmlfit2$fit
finsel <- ozone.bsmlfit2$chosen.bases.matrix
finselper <- ozone.bsmlfit2$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit2$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit2$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit2$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit2$coef[!is.na(ozone.bsmlfit2$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlcfityear <- ozone.bsmlfit2$coef[2]*grid+cubic(grid,unique(x)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==3]])%*%fincocub
bsmlcfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit2$chosen.bases.trim[,2][ozone.bsmlfit2$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlcfityear <- bsmlcfityear-mean(bsmlcfityear)
bsmlcfitmonth <- bsmlcfitmonth-mean(bsmlcfitmonth)
# fit BSML-S
ozone.bsmlfit3 <- bsml(thick,baslist,method="bsmls")
bsmlsfit <-ozone.bsmlfit3$fit
finsel <- ozone.bsmlfit3$chosen.bases.matrix
finselper <- ozone.bsmlfit3$chosen.bases.trim[,1]==2
finselcub <- ozone.bsmlfit3$chosen.bases.trim[,1]==3
finbasper <- ozone.bsmlfit3$chosen.bases.matrix[,finselper]
finbascub <- ozone.bsmlfit3$chosen.bases.matrix[,finselcub]
finco <- ozone.bsmlfit3$coef[!is.na(ozone.bsmlfit3$coef)][-c(1:ncol(bas.nul))]
fincoper <- finco[finselper]
fincocub <- finco[finselcub]
bsmlsfityear <- ozone.bsmlfit3$coef[2]*grid
bsmlsfitmonth <- periodic(grid,unique(csmonth)[ozone.bsmlfit3$chosen.bases.trim[,2][ozone.bsmlfit3$chosen.bases.trim[,1]==2]])%*%fincoper
bsmlsfityear <- bsmlsfityear-mean(bsmlsfityear)
bsmlsfitmonth <- bsmlsfitmonth-mean(bsmlsfitmonth)
# Overall fit
plot(x,thick,xlab="time",ylab="thickness",pch=".",type="l")
lines(x,ssanovafit)
lines(x, hasfit,col="red")
lines(x, bsmlcfit,col="green")
lines(x, bsmlsfit,col="blue")
mtext("Observations and overall fit",cex=1.2)
# Fits of components
par(mfrow=c(1,2), mgp=c(2,1,0), mar=c(3,3,3,1)+0.1)
plot(unique(csmonth),m1,xlim=c(0,1),ylim=c(-55,50),xlab="month",ylab="thickness",type="n")
points(unique(csmonth),m1,pch="o",cex=0.6)
lines(grid, ssanovafitmonth, lty=2)
lines(grid, hasfitmonth, lty=4,col="red")
lines(grid, bsmlcfitmonth, lty=3,col="green")
lines(grid, bsmlsfitmonth, lty=1,col="blue")
legend(.1,44,lty=c(2,1,3,4),legend=c("spline","BSML-S","BSML-C","HAS"),cex=.8,col=c("black","blue","green","red"))
mtext("Main effect of month",cex=1.2)
plot(unique(csyear),m2,xlim=c(0,1),ylim=c(-55,50),xlab="year",ylab="thickness",type="n")
points(unique(csyear),m2,pch="o",cex=0.6)
lines(grid, ssanovafityear, lty=2)
lines(grid, hasfityear, lty=4,col="red")
lines(grid, bsmlcfityear, lty=3,col="green")
lines(grid, bsmlsfityear, lty=1,col="blue")
mtext("Main effect of year",cex=1.2)