Collier and Hoeffler 2004 Lasso by 3t6txH

VIEWS: 17 PAGES: 3

									##########################
##r code for greed and grievance replication/extension file
##James E. Yonamine
##4/13/2012
##data from http://www.apsanet.org/content_29436.cfm
##code takes 20 minutes to run on a 2.3 ghz intel core i5 with 4gb ram
##########################

library(foreign)
library(ROCR)
library(fields)
library(car)
library(lattice)
library(vioplot)
library(Zelig)
library(Amelia)
library(stats)
library(lars)
library(lattice)
library(MASS)

rm(list=ls())
set.seed(2012)

data <- read.dta("collier and hoeffler 2004.dta")
data<-data[complete.cases(data$warsa),] #this drops all rows with NA for warsa,
 i.e. drops all continuations. C&H use this approach, which is reasonable


m=100
a.out <- amelia(data, m = m, ts = "year", cs = "country")

history <- matrix(data=0, nrow=m, ncol=24)
colnames(history) <-c("sxp", "sxp2", "secm", "gy1", "oilsxp", "oilsxp2", "greedxb",
 "diahpeaa", "difdpeaa", "coldwar", "prevwar", "peace", "mount", "geogia", "lnpop",
 "frac", "elfo", "rf", "pol16", "etdo4590", "dem", "ygini","lgini", "grievxb")

for (j in 1:m){
 data<-a.out$imputations[[j]]

data.x<-data.frame(data$sxp, data$sxp2, data$secm, data$gy1, data$oilsxp,
 data$oilsxp2,
data$greedxb, data$diahpeaa, data$difdpeaa, data$coldwar, data$prevwar,
 data$peace, data$mount, data$geogia,
data$lnpop, data$frac, data$elfo, data$rf, data$pol16, data$etdo4590, data$dem,
 data$ygini, data$lgini,
data$grievxb)


   for (i in 1:ncol(data.x)) {
           data.x[,i] <- (data.x[,i] - mean(data.x[,i])) / sd(data.x[,i])
   }

data.x<-as.matrix(data.x)
data.y<-data$warsa
data.y.0 <- data.y - mean(data.y)
lasso<-lars(data.x, data.y.0,type="lasso", trace=TRUE)
#lasso$lambda
summary(lasso)
#lasso$beta

lasso.matrix<-as.matrix(summary(lasso))
 count=1

   for (k in 1:nrow(lasso.matrix)){
   if (lasso.matrix[k,3]==min(lasso.matrix[,3])) {
        trial<-count
        break
        }
        else {
            count=count+1
        }
   }

best.lasso.coef<-lasso$beta[trial,] #selects the trial # with the lowest Cp score
lasso.coef<-as.matrix(best.lasso.coef)

history[j,]<-lasso.coef[,1]
}

history.1<-history
for (i in 1:nrow(history.1)){
 for (j in 1:ncol(history.1)){
   if(history.1[i,j]!=0){
        history.1[i,j]<-1
   }
 }
}

names<-c("sxp", "sxp2", "secm", "gy1", "oilsxp", "oilsxp2", "greedxb", "diahpeaa",
"difdpeaa", "coldwar", "prevwar", "peace", "mount", "geogia", "lnpop", "frac", "elfo",
"rf", "pol16", "etdo4590", "dem", "ygini","lgini", "grievxb")

colors=c("black", "black","black", "black","black", "black","black", "black","black",
 "grey","grey", "grey"," grey", "grey","grey", "white","white", "white","white",
 "white","white", "white","white", "white")

##plotting number of times each variable is selected into lasso

summary <- matrix(data=0, nrow=1, ncol=24)
colnames(summary) <-c("sxp", "sxp2", "secm", "gy1", "oilsxp", "oilsxp2", "greedxb",
 "diahpeaa", "difdpeaa", "coldwar", "prevwar", "peace", "mount", "geogia", "lnpop",
 "frac", "elfo", "rf", "pol16", "etdo4590", "dem", "ygini","lgini", "grievxb")
for (j in 1:ncol(summary)){
 summary[1,j]<-sum(history.1[,j])
}
legend = c("Grievance variables","'Other' variables","Greed variables")
summary<-c(summary)
  barplot(summary, las=2, ylab="# of times selected by Lasso out of 100
 Imputations", col=colors, names.arg=names)
legend(16, 100, legend, fill=c("black","grey","white"))
  title(main="The Importance Standardized 'Opportunity' and 'Grievance' Variables \n
 Indicated by Lasso Selection (Larger Values Reflect Higher Importance)")

								
To top