# load internal data set named LifeCycleSavings
# LifeCycleSavings is a data frame
data(LifeCycleSavings)
# start the pdf graphics device and generate a 7x7 inch pdf file
# that will contain all graphics in multiple pages
par(ask=TRUE)
#pdf(file="NumericGraph.pdf",width=7,height=7)
# other graphical devices also are available. see help(Devices)
# histogram of Savings Rate (sr) with the target number of subintervals
# set to 10. The actual number may differ slightly
# the argument main specifies a main title for the graphic
hist(LifeCycleSavings$sr,breaks=10,xlab="",
main="Savings Ratio of 50 Countries, 1960-70",col="green")
# an expression can be split over several lines. It is not evaluated
# until the R parser reaches a valid end of the expression.
# Indentation of a continued expression makes it easier to read
#
# add a vertical line to the plot located at the mean savings rate
# using color red
abline(v=mean(LifeCycleSavings$sr),col="red")
# end of page 1
# repeat for dpi
hist(LifeCycleSavings$dpi,breaks=10,xlab="",
main="Per Capita Disposable Income of 50 Countries, 1960-70",col="green")
abline(v=mean(LifeCycleSavings$dpi),col="red")
# end of page 2
#
# load a saved R object from a file. This file is not a table of data
# but has an object named fuel.frame saved in a special R format.
source("http://www.utdallas.edu/~ammann/fuel.R")
print(fuel.frame)
Weight = fuel.frame$Weight
Mwgt = mean(Weight)
# set graphical parameter mfrow so that 4 histograms appear on 1 page
par(mfrow=c(2,2))
# first histogram.
# argument xlim is a vector of length 2 that specifies
# the range of values on the x-axis
# argument breaks is a vector that defines the breakpoints for the histogram
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
# add a vertical line at mean(Weight)
abline(v=Mwgt,col="blue")
# define value to be added to the Weight vector
nwgt = 10000
# make new Weight vector with an additional component given by nwgt
Weight1 = c(Weight,nwgt)
# second histogram
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
# add a subtitle
title(sub=paste("Added",nwgt,"to data"))
# add vertical line at original mean
abline(v=Mwgt,col="blue")
# add vertical line at new mean
abline(v=mean(Weight1),col="red")
# repeat with larger value
nwgt = 25000
Weight1 = c(Weight,nwgt)
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
title(sub=paste("Added",nwgt,"to data"))
abline(v=Mwgt,col="blue")
abline(v=mean(Weight1),col="red")
# repeat with larger value
nwgt = 75000
Weight1 = c(Weight,nwgt)
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
title(sub=paste("Added",nwgt,"to data"))
abline(v=Mwgt,col="blue")
abline(v=mean(Weight1),col="red")
# repeat these 4 histograms using median instead of mean
Medwgt = median(Weight)
# note that parameter mfrow is still in effect
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
abline(v=Mwgt,col="blue")
abline(v=Medwgt)
nwgt = 10000
Weight1 = c(Weight,nwgt)
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
title(sub=paste("Added",nwgt,"to data"))
abline(v=Mwgt,col="blue")
abline(v=mean(Weight1),col="red")
abline(v=Medwgt)
nwgt = 25000
Weight1 = c(Weight,nwgt)
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
title(sub=paste("Added",nwgt,"to data"))
abline(v=Mwgt,col="blue")
abline(v=mean(Weight1),col="red")
abline(v=Medwgt)
nwgt = 75000
Weight1 = c(Weight,nwgt)
hist(Weight,xlab="",col="green",xlim=c(1500,4500),breaks=seq(1500,4500,by=250))
title(sub=paste("Added",nwgt,"to data"))
abline(v=Mwgt,col="blue")
abline(v=mean(Weight1),col="red")
abline(v=Medwgt)
# construct plot showing distance between c and data
# we need to reset mfrow for 1 plot per page
par(mfrow=c(1,1))
mWgt = mean(Weight)
c1 = min(Weight)
c2 = max(Weight)
# construct vector of length 200 over range of Weight
cloc = seq(c1,c2,length=200)
Scdist = 5000000
cXdist = outer(Weight,cloc,"-")^2
cXdist = apply(cXdist,2,sum)/Scdist
aXdist = pretty(Scdist*cXdist)
hist(Weight,breaks=seq(min(Weight)-1,max(Weight)+1,length=length(Weight)),col="green",
main="Distance Between Weight data and Location c",ylim=c(0,max(cXdist)))
lines(cloc,cXdist,col="blue")
title(sub=paste("Mean(Weight) =",round(mWgt,1)),col.sub="red")
abline(v=mWgt,col="red")
text(cloc[5],cXdist[5],"D(c)",pos=4,col="blue")
axis(4,pretty(cXdist),labels=aXdist,line=-1,col.axis="blue")
mtext("Distance",4,adj=.5,line=1,col="blue")
# extract Mileage
Mileage = fuel.frame$Mileage
# plot Weight vs Mileage using plot(x,y) syntax
# argument pch specifies different plot symbol
plot(Weight,Mileage,pch=19)
title("Scatterplot of Weight vs Mileage")
abline(h=mean(Mileage),col=gray(.5))
abline(v=mean(Weight),col=gray(.5))
corMW = cor(Weight,Mileage)
title(sub=paste("r =",round(corMW,3)))
# replot deviations from respective means
MileageD = Mileage - mean(Mileage)
WeightD = Weight - mean(Weight)
# arguments xlab and ylab specify axis labels
plot(WeightD,MileageD,xlab="Deviation of Weight from mean(Weight)",
ylab="Deviation of Mileage from mean(Mileage)",pch=19)
title("Deviations from the Means: Weight vs Mileage")
abline(h=0,col=gray(.5))
abline(v=0,col=gray(.5))
title(sub=paste("r =",round(corMW,3)))
# replot and add least squares regression line
plot(Mileage ~ Weight,data=fuel.frame,pch=19)
title("Scatterplot of Weight vs Mileage")
Mileage.lm = lm(Mileage~Weight,data=fuel.frame)
Mileage.coef = coef(Mileage.lm)
abline(Mileage.coef,col="red")
# add subtitle that prints correlation
# function paste() combines its arguments into a single string
title(sub=paste("Slope = ",round(Mileage.coef[2],5),"; Intercept = ",round(Mileage.coef[1],2)))
# plot Mileage vs Type using formula. In this case, the terms in the formula:
# Mileage ~ Type
# are part of the data frame named fuel.frame, so we can use the data=
# argument to specify that data.frame
plot(Mileage ~ Type,data=fuel.frame,col="cyan")
# since Type is a factor, this gives boxplots of Mileage by Type
title("Boxplots of Mileage by Auto Type")
# plot residuals vs predicted values
plot(Mileage.lm,which=1)
# quantile-quantile plot
plot(Mileage.lm,which=2)
# just consider cars with Disp < 225
ndx = fuel.frame$Disp < 225
fuel1 = fuel.frame[ndx,]
plot(Mileage ~ Weight,data=fuel1,pch=19)
title("Scatterplot of Weight vs Mileage")
Disp.lm = lm(Mileage~Weight,data=fuel1)
Disp.coef = coef(Disp.lm)
abline(Disp.coef,col="red")
plot(residuals(Disp.lm) ~ Weight,data=fuel1,pch=19,ylab="Residuals")
abline(h=0,col="red")
title("Residuals vs Weight\nData = fuel1")
# qqnorm plot
qqnorm(residuals(Disp.lm),pch=19)
qqline(residuals(Disp.lm),col="red")
# Mileage vs Disp
par(ask=FALSE)
#graphics.off()