443-970-2353
[email protected]
CV Resume
Let’s visualize murder rate in the US using ggplot2 (a package I love). The data used here is provided by the U.S. Census Bureau and the FBI. Details about the data can be found here
library(ggplot2)
library(maps)
library(ggmap)
require(downloader)
statesMap = map_data("state") # use world in lieu of state for world map
str(statesMap)
We can now plot the states using ggplot2
ggplot(statesMap, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = statesMap$group, color = "black") +
theme(axis.title.y = element_text(colour="grey20",
size=15,angle=90,hjust=.5,vjust=1,face="plain"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=1,face="plain"),
axis.text.y = element_text(colour="grey20",size=15,angle=0,hjust=1,vjust=0,face="plain"),
axis.text.x = element_text(colour="grey20",size=15,angle=60,hjust=.5,vjust=.5,face="plain"))
Now, let’s download the murder data.
url<-"http://courses.edx.org/asset-v1:[email protected]+block/murders.csv"
download(url,dest="murders.csv")
murders = read.csv("murders.csv")
str(murders)
Now, let’s create a new variable called region with the lowercase names to match the statesMap.
murders$region = tolower(murders$State)
We have to join the statesMap data and the murders data into one dataframe to use ggplot2.
murderMap = merge(statesMap, murders, by="region")
str(murderMap)
Now, let’s plot the number of murder on our map of the United States.
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Murders)) +
geom_polygon(color = "black") + scale_fill_gradient(low = "skyblue", high = "blue", guide = "legend")+
theme(axis.title.y = element_text(colour="grey20",
size=15,angle=90,hjust=.5,vjust=1,face="plain"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=1,face="plain"),
axis.text.y = element_text(colour="grey20",size=15,angle=0,hjust=1,vjust=0,face="plain"),
axis.text.x = element_text(colour="grey20",size=15,angle=60,hjust=.5,vjust=.5,face="plain"))
Now, let’s see a map of the population.
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Population)) +
geom_polygon(color = "black") + scale_fill_gradient(low = "gray", high = "black", guide = "legend")+
theme(axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="plain"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=1,face="plain"),
axis.text.y = element_text(colour="grey20",size=15,angle=0,hjust=1,vjust=0,face="plain"),
axis.text.x = element_text(colour="grey20",size=15,angle=60,hjust=.5,vjust=.5,face="plain"))
Now, let’s create a new variable which is the number of murders per 100,000 population.
murderMap$MurderRate = murderMap$Murders / murderMap$Population * 100000
Now, let’s generate a plot of the murder rate.
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) +
geom_polygon(color = "black") + scale_fill_gradient(low = "skyblue", high = "blue", guide = "legend")+
theme(axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="plain"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=1,face="plain"),
axis.text.y = element_text(colour="grey20",size=15,angle=0,hjust=1,vjust=0,face="plain"),
axis.text.x = element_text(colour="grey20",size=15,angle=60,hjust=.5,vjust=.5,face="plain"))
Let’s remove states with murder rates above 10.
ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) +
geom_polygon(color = "black") + scale_fill_gradient(low = "skyblue", high = "blue", guide = "legend", limits = c(0,10))+
theme(axis.title.y = element_text(colour="grey20",size=15,angle=90,hjust=.5,vjust=1,face="plain"),
axis.title.x = element_text(colour="grey20",size=15,angle=0,hjust=.5,vjust=1,face="plain"),
axis.text.y = element_text(colour="grey20",size=15,angle=0,hjust=1,vjust=0,face="plain"),
axis.text.x = element_text(colour="grey20",size=15,angle=60,hjust=.5,vjust=.5,face="plain"))