# _codes for the talk on introdiction to R _MacEwan_ March 2_ 2011

Document Sample

```					#codes for the talk on introdiction to R
#MacEwan, March 2, 2011

?lm
help(lm)

2+1
2^2
(3-1)*3
1-2*3
2/3

sqrt(2)   #sqrt root of 2
exp(2)    #exponential function
sin(2)    # sin function
log(10)   # natural logarithms of 10

x=2
x+3
x<-exp(1)
x

#vector
Age=c(1,1,3,4,4)
Price=c(13990,13495,12999,9500,10495)
c(Age,Price)
cbind(Age,Price)

data

x=c(1,2,3)
x
y=c(5,2,3)
y
x+y # we get   1+5,   2+2,   3+3
x/y # we get   1/5,   2/2,   3/3
x*y # we get   1*5,   2*2,   3*3
x^2 # we get   1^2,   2^2,   3^2

#an easier example
vec=c(2,1,3)
length(vec) #length of a vector
sum(vec) #sum of the observations in a vector
mean(vec) #mean of the observations in a vector
var(vec) #sample variance of 3 observations
sort(vec) #sort 3 obervations increasingly
min(vec) #minimum value of 3 observations
max(vec) #maximum value of 3 observations
summary(vec) #summary of the observations
#original example, replaced by an easier one
sum(Price) #sum of the observations in a vector
length(Price) # length of a data vector
mean(Price) # mean of the observations in a vector
sort(Price) # sort 9 obervations increasingly
min(Price) # the minimum value of 9 strengths
max(Price) # the maximum value of 9 strengths
var(Price) # sample variance of 9 strengths
summary(Price) #five number summary

1:10 #1 to 10, increase by 1
10:1 #10 to 1, decrease by 1
seq(1,9,by=2) #sequence from 1 to 9
seq(1,10,by=2) #commom difference=2
seq(1,9,length=5) #of length 5
seq(1,10,length=5)

vec=1:5
vec[1] #1st element of vec
vec[1:4] # the first four elements
vec[c(1,3,5)] # the 1st, 3rd and 5th elements
vec[-(1:2)] # all except the first 2 elements
vec[vec>3] # all elements that are greater than 3

matrix(1,2,3) #a 2 by 3 matrix with all elements 1
vec=c(1:6)
matrix(vec,2,3) #create a 2 by 3 matrix by columns
matrix(vec,2,3,byrow=T) #by rows
diag(3) #3 by 3 identity matrix

out=cbind(c(1,2,3),c(4,5,6),c(7,8,9))
out
out[1:2,] #Take the 1st row and 2nd row
out[,c(1,3)] #Take the 1st column and 3rd column
out[1,3] #Take (1,3) element of the matrix out

x =cbind(c(1,2),c(3,4))
x
y =rbind(c(6,8),c(7,9))
y

x+y    # return the sum of x and y

x%*%y # return the matrix product of x and y

x*y    # not matrix product of x and y

t(x)   # transpose of x
det(x) #determinant of x

solve(x) #inverse of x

#Normal probability
dnorm(1,mean=0, sd=1) #pdf of N(0,1) at z=1
pnorm(1.96,mean=0, sd=1) #cdf of N(0,1) at z=1.96
qnorm(0.975,mean=0,sd=1) #0.975 quantile of N(0,1)
rnorm(5,mean=0,sd=1) #generate 5 rvs from N(0,1)
rnorm(5,mean=0,sd=1) #another 5 random numbers

set.seed(649) #set the seed
rnorm(5, mean=0,sd=1)
set.seed(649) #set the same seed
rnorm(5, mean=0,sd=1) #obtain the same numbers
#binomial
dbinom(2,200,1/365) #find p(X=2)
#aproximat by possion
dpois(2,200/365) #approximate by possion P(X=2)
1-dpois(0,200/365)-dpois(1,200/365) #P(X>=2)=1-P(X=0)-P(X=1)
ppois(1,200/365,lower.tail=F) #P(X>1)

#plots
x=seq(-2*pi,2*pi,length=100)
y=sin(x)
z=cos(x)
plot(x,y,type="p") #scatter plot, type=points
plot(x,y,type="l") #scatter plot, type=line
plot(x,y,type="b") #scatter plot, type=line and points

plot(x,y,pch=19,xlab="x",ylab="f(x)") #plot the points using solid
circles
lines(x,y,lty=1,col="red") #add a red solid line
lines(x,z,lty=2,col="blue") #add a blue dashed line
title("Sin & Cos") #add a title
par(mfrow=c(2,3)) #plot 6 graphs, 2 row and 3 columns
postscript(file="/Users/wsu/MacEwan/talk/R/plot.ps",height=8,width=8,h
orizontal=F)#start the graphic device
par(mfrow=c(2,1))#two figures in one plot
plot(x,y,pch=1,xlab="x",ylab="sin(x)")
lines(x,y,lty=1,col="red")
title("Sin(x)")#title of the first figure
plot(x,z,pch=1,xlab="z",ylab="cos(z)")
lines(x,z,lty=1,col="blue")
title("Cos(z)")#title of the second figure
dev.off()#close the graphic device

#more useful functions relative to stat 151 and stat 252, stat 265
#summarize the data
attach(data)

hist(income) #histogram of the variable income
stem(income) #stem-and-leaf diagram
boxplot(income~race,data=data) #side-by-side boxplot, by race
tapply(income,race,mean) #sample means for levels of race
tapply(income,region,mean) #sample means for region
table(race,region) #contingence table by race and region

#integration, root finding and optimization
myfun=function(x) {x} #define integrand
integrate(f=myfun,-1,1) #integration from -1 to 1

fun=function(x){2*x^2+x-1} #define the equation
uniroot(f=fun,c(0,2)) #find the root within (0,2)
polyroot(c(-1,1,2)) #find all roots of the polynomial

optimize(f=fun,c(-2,2)) #find minimum within (-2,2)
nlm(f=fun,-1) #find the minimum by Newton's algorithm with starting
point -1
std=function(x){
m=mean(x)
s=sqrt(var(x))
result=(x-m)/s
return(result)
}

x=1:5
y=std(xvar(y)

age=data[,1]
price=data[,2]
model=lm(price~age,data=data) #fit a linear model
names(model)
postscript(file="/Users/wsu/MacEwan/talk/R/car.ps",height=8,width=8,ho
rizontal=F)
plot(age,price,xlab="Age (year)",ylab="Price (\$)",pch=19,cex.lab=1.3)
abline(coef=model\$coef,col="red",lwd=2)
title("Scatterplot of Used Car",cex.main=1.5)
dev.off()
summary(model) #summary of the model

#poisson example
#plot the distribution
rate=5
xvec=seq(0,20,by=1)
yvec=dpois(xvec,rate)
postscript(file="/Users/wsu/MacEwan/talk/R/poisson.ps",height=8,width=
8,horizontal=F)
plot(xvec,yvec,xlab="y",ylab="p(y)",pch=19)
abline(v=5,col="red",lwd=2)
dev.off()

#find quantile
f=function(y) #define the equation
{
2*y^3-3*y^2+0.95
}
uniroot(f=f,c(0,1)) #root within [0, 1]

xvec=seq(0,1,length=100)
yvec=f(xvec)
postscript(file="/Users/wsu/MacEwan/talk/R/quantile.ps",height=8,width
=8,horizontal=F)
plot(xvec,yvec,xlab="y",ylab="f(y)",type="l")
abline(h=0,col="red",lwd=2)
dev.off()

#two-way ANOVA