443-970-2353
[email protected]
CV Resume
The grammar of graphics package (ggplot2) is the best data visualization library in R. The concept of grammar of graphics is also implemented in Python with the library ggplot and it has similar commands to ggplot2.
Let's see some examples.
The data used is from here.
ggplot can be installed by simply using this command:
pip install ggplot
Learning ggplot is really easy specially for people who know how to use ggplot2 in R.
from ggplot import *
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
clim = pd.read_csv(r'https://courses.edx.org/asset-v1:[email protected]+block/climate_change.csv')
A quick look at the data
clim.head(3)
ggplot(clim, aes('Year', 'Temp'))+geom_line(color='green')+geom_point()+ggtitle('Temperature Change')+xlab('')+ylab('Temperature')+stat_smooth(colour='blue', span=0.2)
ggplot(clim, aes('Year', 'CO2'))+geom_line(color='black')+geom_point(color='red') +ggtitle("Carbondioxide Concentration")+xlab('')+ylab('ppm')+\
geom_vline(xintercept=[1990, 2000],linetype='dashed',color='green')
ggplot(clim, aes('CO2'))+geom_density(fill='orange')+ggtitle("Carbondioxide Concentration")+xlab('')
Let's create a new variable whether the temperature anomaly is negative or positive.
clim['below_zero']=clim.Temp < 0
ggplot(clim, aes('CO2',fill='below_zero'))+geom_density(alpha=0.5)+ggtitle("Carbondioxide Concentration")
We can also use the 'meat' dataset that comes with ggplot.
ggplot(pd.melt(meat, id_vars=['date']), aes(x='date', y='value', color='variable')) +\
geom_line()
meat_lng = pd.melt(meat[['date', 'beef', 'broilers', 'pork']], id_vars=['date'])
p = ggplot(aes(x='value', colour='variable', fill=True, alpha=0.3), data=meat_lng)
p + geom_density()
Let's re-generate the figures above with minor changes to the codes in ggplot in python.
library(ggplot2)
setwd("C:/Fish/Python/Python_vs_R")
options(jupyter.plot_mimetypes = 'image/png')
clim<-read.csv("climate_change.csv")
names(clim)
options(repr.plot.width = 8)
options(repr.plot.height = 6)
ggplot(clim, aes(Year, Temp))+geom_line(color='green')+geom_point()+ggtitle('Temperature Change')+xlab('')+ylab('Temperature')+stat_smooth(colour='blue', span=0.2)
options(repr.plot.width = 8)
options(repr.plot.height = 6)
ggplot(clim, aes(Year, CO2))+geom_line(color='black')+geom_point(color='red') +ggtitle("Carbondioxide Concentration")+xlab('')+ylab('ppm')+
geom_vline(xintercept = c(1990,2000),colour="green", linetype = "longdash")
clim$below_zero=clim$Temp < 0
options(repr.plot.width = 6)
options(repr.plot.height = 4)
ggplot(clim, aes(CO2,fill=below_zero))+geom_density(alpha=0.5)+ggtitle("Carbondioxide Concentration")
data(mpg)
g<-ggplot(mpg, aes(displ, hwy, color=factor(year)))
g+geom_point()
g+geom_point()+facet_grid(drv~cyl, margins=TRUE)
g+geom_point()+facet_grid(drv~cyl, margins=TRUE)+geom_smooth(method="lm", se=FALSE,size=2, color="black")+labs(x="Displacement",y="Highway Mileage")
data(diamonds)
g<-ggplot(diamonds, aes(depth, price))
g+geom_point(alpha=1/3)
cutpoints<-quantile(diamonds$carat,seq(0,1,length=4),na.rm=TRUE)
diamonds$car2<-cut(diamonds$carat,cutpoints)
g<-ggplot(diamonds, aes(depth, price))
g+geom_point(alpha=1/3)+facet_grid(cut~car2)
g+geom_point(alpha=1/3)+facet_grid(cut~car2)+geom_smooth(method="lm",size=3,color="pink")
ggplot(diamonds,aes(carat,price))+geom_boxplot()+facet_grid(.~cut)