### file scatter.wslope.R: This function makes a scatterplot of y versus x, then superimposes ### either the adjusted or unadjusted regression line (or both), with or without confidence bands ### for the regression line(s). The adjusted line, which adjusts for all covariates identified ### in the "covnames" arguments, shows the predicted line at mean values of all covname variables. ### Also user has the option to add a legend to print the slope and corresponding p-value. ### (If user wants both adjusted and unadjusted lines, and wants the legend, it will show both.) scatterWslope <- function(tdat, yname, xname, covnames=NULL, txlab=NULL, tylab=NULL, ttitle=NULL, cex.title=1, adjusted=TRUE, unadjusted=FALSE, col.adj="blue", col.unadj="darkgreen", conf.bands.adj=TRUE, conf.bands.unadj=TRUE, cex.xylab=1.5, pch.points=1, cex.points=1, print.results=FALSE, check.line=FALSE, save.plots=FALSE, type.plots="pdf", basename.plot=NULL, show.legend=FALSE, legend.loc="topleft", cex.legend=1, round.slope=2,...) { ### ARGUMENTS: ### tdat is database ### yname is name of y variable in tdat ### xname is name of x variable in tdat ### covnames is vector of names of remaining variables ### txlab is name to go on x-axis of plot ### tylab is name to go on y-axis of plot ### adjusted=TRUE if want adjusted line ### unadjusted=TRUE if want unadjusted line (note: can show both adjusted and unadjusted) ### col.adj=color to show adjusted regression line (and associated legend) ### col.unadj=color to show unadjusted regression line (and associated legend) ### conf.bands.adj=TRUE if want confidence band for adjusted regression lines ### conf.bands.unadj=TRUE if want confidence band for unadjusted regression lines ### cex.xylab=size of x and y axis labels ### pch.points=plotting character for the points (pch.points=1 is circles) ### cex.points=size of points ### ### (Next arguments apply to optional checks) ### print.results=TRUE means the program will output the sample size and regression summary ### check.line=TRUE means to also show (overlay) the regression line using the abline command ### (in addition to line using predict command: normally this should be FALSE) ### (For the adjusted line, will not be the same unless covnames are centered.) ### ### (Next arguments apply to the case when user wants to save the file) ### save.plots=TRUE means the user wants to create a file of the plot ### type.plots="eps" or "pdf" are the two options if save.plots=TRUE ### basename.plot=base name of the file, e.g. basename.plot=silly and type.plots="pdf" will create silly.pdf ### ### (Next arguments apply to legend showing slope and p-value) ### show.legend=TRUE means put slope and p-value on plot ### legend.loc=location of legend ### cex.legend=size of legend ### round.slope=n means to round slope in legend to n digits (default=2) if (print.results) cat("Number of observations in original data is",nrow(tdat),"\n") ### Check to see if variable names are valid if (is.element(yname,colnames(tdat))==FALSE) stop("yname not a name in database") if (is.element(xname,colnames(tdat))==FALSE) stop("xname not a name in database") ### Allow null covnames; in this case make unadjusted plot. ### However, if covnames is null, adjusted=TRUE and unadjusted=FALSE, warn user if (is.null(covnames)) { if (adjusted & !unadjusted) stop("Function called with adjusted=TRUE, unadjusted=FALSE, but covnames=NULL") unadjusted <- TRUE adjusted <- FALSE } ### Check for validity of covnames if (!is.null(covnames)) { for (i in 1:length(covnames)) { if (is.element(covnames[i],colnames(tdat))==FALSE) stop("an element of covnames not a name in database") } } # Make sure EITHER adjusted=TRUE or unadjusted=TRUE, or both if (adjusted==FALSE & unadjusted==FALSE) stop("Must show either adjusted or unadjusted line or both") ### If legend.loc is not specified, make it topleft. Also check to see if it is valid if (is.null(legend.loc)) legend.loc <- "topleft" possible.legend.loc <- c("topleft","topright","bottomleft","bottomright","top","bottom","left","right","center") if (is.element(legend.loc,possible.legend.loc)==FALSE & show.legend==TRUE) stop("Legend location must be correctly specified\n") # Exclude missing values use1.dat <- as.data.frame(tdat) if (adjusted) { use.dat <- na.exclude(use1.dat[,c(yname,xname,covnames)]) } else { ### Only interested in unadjusted model use.dat <- na.exclude(use1.dat[,c(yname,xname)]) } if (print.results) cat("Number of observations used in the model (with complete data)",nrow(use.dat),"\n") ### get or create name for x=axis and y-axis if (is.null(txlab)) txlab <- xname if (is.null(tylab)) tylab <- yname ################################# ################################# plot.line <- function(use.dat,yname,xname,covnames=NULL,tmodel,col.lines="blue",conf.bands=TRUE) { ### Arguments: use.dat=dataset ### yname = name of y variable ### xname = name of x variable ### covnames = name of other covariates that model adjusts for ### tmodel is model fit from lm or glm if (!is.null(covnames)) { ### User wants an adjusted regression ### Create a covariate "dataset" with mean values of all covariates mean.covars.dat <- matrix(nrow=nrow(use.dat),ncol=length(covnames)) colnames(mean.covars.dat) <- covnames for (j in 1:length(covnames)) { thiscov <- covnames[j] mean.covars.dat[,thiscov] <- mean(use.dat[,thiscov]) } ### Create dataset of just x and y nocovars.dat <- as.data.frame(use.dat[,c(yname,xname)]) ### Will want to create predicted values at mean values of covariates, so put in the mean values ### for covariates other than x and y topred.dat <- as.data.frame(cbind(nocovars.dat,mean.covars.dat)) topred.order.dat <- topred.dat[order(topred.dat[,xname]),] } else { ### User wants an unadjusted regression ### Need to get the name of the x-variable from the lm object xname <- names(coef(tmodel))[2] # This will be "xvar" (no quotes) ### Create dataset of just x and y use.dat <- as.data.frame(cbind(yvar,xvar)) ### Put dataset in order by x variable topred.order.dat <- use.dat[order(xvar),] } yfit <- predict(tmodel,newdata=topred.order.dat,type="response",interval="confidence") lines(x=topred.order.dat[,xname],y=yfit[,"fit"],lty=1,col=col.lines,lwd=3) if (conf.bands) { lines(x=topred.order.dat[,xname],y=yfit[,"lwr"],lty=2,col=col.lines,lwd=2) lines(x=topred.order.dat[,xname],y=yfit[,"upr"],lty=2,col=col.lines,lwd=2) } ### Note: the following 2 lines show the same slope, but different intercept ### Probably the reason is that the following does not apply to mean values of other covariates # yfit2 <- predict(tmodel,type="terms",interval="confidence") # lines(x=use.dat[,"x"],y=yfit2[,"x"],col="red") } # end of function plot.adj.line ################################# ################################# add.legend <- function(tmodel.adj=NULL,tmodel.unadj=NULL, col.adj="blue",col.unadj="darkgreen", text.adj="slope",text.unadj="slope", cex.legend=1,legend.loc) { ### Add a legend to the plot ### 3 possibilities: (1) only want legend to print adjusted results; ### (2) only want legend to print unadjusted results; (3) want legend with both results ### If want to print both adjusted and unadjusted slopes, might want to call ### this with arguments text.adj="slope (adj)" and text.unadj="slope (unadj)" if (!is.null(tmodel.adj)) { ### There is an adjusted model tslope.adj <- round(coef(summary(tmodel.adj))[xname,"Estimate"],round.slope) tslope.adj <- paste(text.adj,"=",tslope.adj,sep=" ") tpval.adj <- coef(summary(tmodel.adj))[xname,"Pr(>|t|)"] ### Round p-value, but show as "<0.01" if so if (tpval.adj < 0.01) { tpval.adj <- "p < 0.01" } else { tpval.adj <- paste("p =",round(tpval.adj,2),sep=" ") } } if (!is.null(tmodel.unadj)) { ### There is an unadjusted model ### Get the name of the xvariable (will be called xvar) xname <- names(coef(tmodel.unadj))[2] tslope.unadj <- round(coef(summary(tmodel.unadj))[xname,"Estimate"],round.slope) tslope.unadj <- paste(text.unadj,"=",tslope.unadj,sep=" ") tpval.unadj <- coef(summary(tmodel.unadj))[xname,"Pr(>|t|)"] ### Again, round p-value, but show as <0.01 if so if (tpval.unadj < 0.01) { tpval.unadj <- "p < 0.01" } else { tpval.unadj <- paste("p =",round(tpval.unadj,2),sep=" ") } } ### Here is where the function either shows adjusted OR unadjusted OR both if (!is.null(tmodel.adj) & !is.null(tmodel.unadj)) { ### User has both adjusted and unadjusted models on plot ### Put together slope and p-values on same line text.adj <- paste(tslope.adj,", ",tpval.adj,sep="") text.unadj <- paste(tslope.unadj,", ",tpval.unadj,sep="") legend(legend.loc,c(text.adj,text.unadj),text.col=c(col.adj,col.unadj),cex=cex.legend,bty="n") } else { ### User only shows adjusted or unadjusted model if (!is.null(tmodel.adj)) legend(legend.loc,c(tslope.adj,tpval.adj),text.col=c(rep(col.adj,2)),cex=cex.legend,bty="n") if (!is.null(tmodel.unadj)) legend(legend.loc,c(tslope.unadj,tpval.unadj),text.col=c(rep(col.unadj,2)),cex=cex.legend,bty="n") } } # end of add.legend function ###################### ###################### ### If saving plots, open up a pdf or eps file ### Make pdf be the default (e.g. if type.plots is misspelled) if (save.plots) { if (type.plots=="eps") ending <- ".eps" else ending <- ".pdf" fname <- (paste(basename.plot,ending,sep="")) if (type.plots=="eps") postscript(fname) else pdf(fname) ### if latex, print line to be able to include plot in LaTeX # if (latex) { # cat(" \n") # cat("\n \n") # cat("\n \\includegraphics{",fname,"}\n\n",sep="") # } } ### Start the plot ### First set up plot "shell", including x and y axis labels, with size cex.xylab plot(x=use.dat[,xname],y=use.dat[,yname],type="n",xlab=txlab,ylab=tylab,cex.lab=cex.xylab) ### Now plot the points using the character given by pch.points points(x=use.dat[,xname],y=use.dat[,yname],pch=pch.points,cex=cex.points) title(ttitle,cex.main=cex.title) ### define y and x for model: I believe this is needed in order to ### be able to plot the unadjusted line with confidence bandd yvar <- use.dat[,yname] xvar <- use.dat[,xname] ################# if (adjusted) { ### If user wants adjusted regression, fit regression model XX <- use.dat[,c(xname,covnames)] tmodel.adj <- lm(yvar~.,data=XX) if (print.results) { ### Show results, if print.results=TRUE cat("\nCoefficients from adjusted regression\n") print(round(summary.lm(tmodel.adj)$coefficients,4)) } ### Now call on plot.line to show the fitted line, with confidence bands if desired plot.line(use.dat,yname,xname,covnames,tmodel=tmodel.adj,col.lines=col.adj,conf.bands=conf.bands.adj) } if (unadjusted) { ### If user wants unadjusted regression, fit regression model tmodel.unadj <- lm(yvar~xvar) if (print.results) { ### Show results, if print.results=TRUE cat("\nCoefficients from unadjusted regression\n") print(round(summary.lm(tmodel.unadj)$coefficients,4)) } ### Again call on plot.line to show the fitted line, with confidence bands if desired plot.line(use.dat,yname,xname,tmodel=tmodel.unadj,col.lines=col.unadj,conf.bands=conf.bands.unadj) } ################ ### If user wants a legend printing values of the slope and p-value, will call add.legend to do this if (show.legend) { ### If user wanted both adjusted and unadjusted regressions, give these arguments to add.legend if (adjusted & unadjusted) add.legend(tmodel.adj,tmodel.unadj,col.adj,col.unadj, text.adj="slope (adj)",text.unadj="slope (unadj)",legend.loc=legend.loc,cex.legend=cex.legend) else { ### Otherwise, just call add.legend with the relevant arguments for adjusted or unadjusted regression if (adjusted) add.legend(tmodel.adj=tmodel.adj,col.adj=col.adj,legend.loc=legend.loc,cex.legend=cex.legend) if (unadjusted) add.legend(tmodel.unadj=tmodel.unadj,col.unadj=col.unadj,legend.loc=legend.loc,cex.legend=cex.legend) } } ### The next part is just a check to show the line using abline. Note that ### abline can't show the confidence interval for the regression . Also, abline will ### not go through the points for the adjusted regression, unless the other covariates ### are centered. ### if check.line, draw adjusted line without using predict command if (check.line) { if (adjusted) { intercpt <- coef(tmodel.adj)["(Intercept)"] cat("Check.line=TRUE, so showing adjusted regression line in red on plot\n") cat("The line will not go through the points if the mean of the other variables is not zero\n") abline(intercpt,coef(tmodel.adj)[xname],lty=3,lwd=5,col="red") } if (unadjusted) { intercpt <- coef(tmodel.unadj)["(Intercept)"] xname <- names(coef(tmodel.unadj))[2] cat("Check.line=TRUE, so showing unadjusted regression line in brown on plot\n") abline(intercpt,coef(tmodel.unadj)[xname],lty=3,lwd=5,col="brown") } } ### If user wanted to save the plots in a pdf or eps file, need to now close it. if (save.plots) dev.off() } # end of function scatter.wslope