R Scripts for Chapter 4

Document Sample
R Scripts for Chapter 4 Powered By Docstoc
					R Scripts
# Script 4.1 for Chapter 4, creating value labels
# and analyzing categorical data
source("c:/r/functions.txt")
# Read in the data
ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE)
# The "header=TRUE" tells R to look for variable names in the
# first line
attach(ecls200)
# The "attach" command makes the variables and their values accessible
# Make labels for gender
ecls200$f.gender <- factor(ecls200$gender, levels = 1:2,
  labels=c("Male", "Female") ,ordered = TRUE)
# This command creates a new variable, f.gender, in the data.frame ecls200
# It is based on the values of the variable gender in the data.frame
# The "ordered=TRUE" option indicates that we want to keep the levels of
# the new factor in the order in which they were created,
# not alphabetized, which is the default in R.
# Make labels for race
ecls200$f.race <- factor(ecls200$race, levels = 1:2,
  labels=c("White", "AA"), ordered = TRUE)
# The comments for f.gender apply here also
# Make labels for childcare
ecls200$f.childcare <- factor(ecls200$p1center, levels = 1:2,
  labels=c("Yes", "No"), ordered = TRUE)
# Make labels for family type using a different “style”
# Note the use of the decimals in the labels.    You can not have spaces
vals = c("2Par.Sibs" = 1, "2Par.NoSibs" = 2, "1Par.Sibs" = 3,
  "1Par.NoSibs" = 4, "Other" = 5)
ecls200$f.famtype <- factor(ecls200$p1hfamil, levels = vals,
  labels = names(vals), ordered = TRUE)
# Code WKMOMED and WKDADED by reusing labels, helpful when many
# variables have the same value labels. Again, note the use of the
# decimal to avoid blanks
ednames <- c("8th.Grade.or.Less",
  "9th.to.12th Grade",
  "HS.Grad/GED",
  "Voc/Tech.Prog",
  "Some.College",
  "Coll.Grad",
  "Some.Grad/Prof.School",
  "Master's",
    "Doctoral/Prof.Deg")
ecls200$f.momed <- factor(ecls200$wkmomed, levels = 1:length(ednames),
    labels = ednames, ordered = TRUE)
ecls200$f.daded <- factor(ecls200$wkdaded, levels = 1:length(ednames),
    labels = ednames, ordered = TRUE)
# SES labels using second word repeatedly, yet another option
sesnames<- paste(c("Lowest", "Second", "Third", "Fourth", "Highest"),
    "Quintile", sep=".")
ecls200$f.ses <- factor(ecls200$wksesq5, levels = 1:5,
    labels = sesnames, ordered = TRUE)
# Looking at the distribution of Mother's educational level
# for each race
# Create an object "t" that will contained the tabled values
t <- with(ecls200, table(f.momed, f.race))
t
# Obtaining IoD for White subsample
w.vals <- c(2,7,26,8,23,22,5,7,0)
indexdisp(w.vals)
#Obtaining IoD for AA subsample
aa.vals <- c(0,9,35,4,30,16,3,3,0)
indexdisp(aa.vals)
# Given the we know that the object "t" will have 9 rows and 2 columns,
# we could also construct the script as follows:
w.vals1 <- c(t[1,1], t[2,1], t[3,1], t[4,1], t[5,1],
    t[6,1], t[7,1], t[8,1], t[9,1])
aa.vals1 <- c(t[1,2], t[2,2], t[3,2], t[4,2], t[5,2],
    t[6,2], t[7,2], t[8,2], t[9,2])
indexdisp(w.vals1)
indexdisp(aa.vals1)
# Obtaining a bar plot for the White subsample
with(subset(ecls200, subset = f.race == "White"),
    barplot(table(f.momed)/length(f.momed),
    xlab = "EDLEVEL for White Mothers", ylab = "Proportions"))
#Obtaining a bar plot for the African American subsample
with(subset(ecls200,subset = f.race== "AA"),
    barplot(table(f.momed)/length(f.momed),
    xlab = "EDLEVEL for African American Mothers", ylab = "Proportions"))


4.2.AdditionalExerciseChapter4


# Describing categorical variables - hisp.asian.all
# Clean out workspace
rm(list=ls())
library(car)
# The "car" package has many useful procedures
# Read the functions from the text file
source("C:/R/functions.txt")
# Suppress levels of significance
options(show.signif.stars = FALSE)
# Reading in the data
hisp.asian <- read.table("c:/rbook/hisp.asian.all.txt", header = TRUE)
attach(hisp.asian)
# Create factor-type variables with labels
hisp.asian$f.race <- factor(race, levels = c(3,5),
  labels = c("Hispanic", "Asian"), ordered = TRUE)
hisp.asian$f.lang <- factor(wklangst, levels = 1:2,
  labels = c("~English", "English"), ordered = TRUE)
hisp.asian$f.school <- factor(s2kpupri, levels = 1:2,
  labels = c("Public", "Private"), ordered = TRUE)
# Set up labels for educational level
ednames <- c("8th.Grade.or.Less",
  "9th.to.12th Grade", "HS.Grad/GED",
  "Voc/Tech.Prog", "Some.College",
  "Coll.Grad", "Some.Grad/Prof.School",
  "Master's", "Doctoral/Prof.Deg")
hisp.asian$f.momed <- factor(wkmomed, levels = 1:length(ednames),
  labels = ednames, ordered = TRUE)
attach(hisp.asian)
table(f.race)
table(f.lang)
table(f.school)
table(f.momed)
# Get frequency tables, saving tables for IOD
t.lang <- table(f.lang, f.race)
t.lang
t.school <- table(f.school, f.race)
t.school
t.momed <- table(f.momed, f.race)
t.momed
# Getting barplots
par(mfrow = c(1, 2))
# Home Language - Plot for Hispanic Children
with(subset(hisp.asian, subset = f.race == "Hispanic"),
  barplot(table(f.lang)/length(f.lang),
  xlab = "Home Lang. for Hispanic Children",
  ylab = "Proportions", ylim = c(0,1)))
# Home Language - Plot for Asian Children
with(subset(hisp.asian, subset = f.race == "Asian"),
  barplot(table(f.lang)/length(f.lang),
  xlab = "Home Lang. for Asian Children",
  ylab = "Proportions", ylim = c(0,1)))
# School Type - Plot for Hispanic Children
with(subset(hisp.asian, subset = f.race == "Hispanic"),
  barplot(table(f.school)/length(f.school),
  xlab = "Sch. Type for Hispanic Children",
  ylab = "Proportions", ylim = c(0,1)))
# School Type - Plot for Asian Children
with(subset(hisp.asian, subset = f.race == "Asian"),
  barplot(table(f.school)/length(f.school),
  xlab = "Sch. Type for Asian Children",
  ylab = "Proportions", ylim = c(0,1)))
par(mfrow = c(1, 1))
# Single for Mother's Educational Level, too many bars
# Mother's Educational Level - Hispanic Students
with(subset(hisp.asian, subset = f.race == "Hispanic"),
  barplot(table(f.momed)/length(f.momed),
  xlab = "Mothers's Educational Level for Hispanic Children",
  ylab = "Proportions", ylim = c(0,1)))
# Mother's Educational Level - Asian Students
with(subset(hisp.asian, subset = f.race == "Asian"),
  barplot(table(f.momed)/length(f.momed),
  xlab = "Mothers's Educational Level for Asian Children",
  ylab = "Proportions", ylim = c(0,1)))
# Do Indices of Disperion
# For Language
h.vals.lang <- c(t.lang[1,1], t.lang[2,1])
a.vals.lang <- c(t.lang[1,2], t.lang[2,2])
# IOD for Language - Hispanic Children
indexdisp(h.vals.lang)
# IOD for Language - Asian Children
indexdisp(a.vals.lang)
# For School Type
h.vals.sch <- c(t.school[1,1], t.school[2,1])
a.vals.sch <- c(t.school[1,2], t.school[2,2])
# IOD for School Type - Hispanic Children
indexdisp(h.vals.sch)
# IOD for School Type - Asian Children
indexdisp(a.vals.sch)
# For Mother's Educational Level
h.vals.ed <- c(t.momed[1,1], t.momed[2,1], t.momed[3,1], t.momed[4,1],
  t.momed[5,1], t.momed[6,1], t.momed[7,1], t.momed[8,1], t.momed[9,1])
a.vals.ed <- c(t.momed[1,2], t.momed[2,2], t.momed[3,2], t.momed[4,2],
  t.momed[5,2], t.momed[6,2], t.momed[7,2], t.momed[8,2], t.momed[9,2])
# IOD for Mother's Educational Level - Hispanic Children
indexdisp(h.vals.ed)
# IOD for Mother's Educational Level - Asian Children
indexdisp(a.vals.ed)

				
DOCUMENT INFO
Shared By:
Categories:
Tags:
Stats:
views:5
posted:3/26/2013
language:Unknown
pages:5