Short R demonstration for I-TIPP / SHINE Fellows (find the slides in the file list above)
October 11, 2015
# Uncomment the code for the first time you download the data:
# create a temporary file to store our downloaded crime data
# tmp<-tempfile()
# go to http://www.atlantapd.org/crimedatadownloads.aspx to find latest file path
# they change it each time they uplaod new data
# crimeURL<-"http://www.atlantapd.org/pdf/crime-data-downloads/85FB2BA9-E419-489A-8079-4ACEB419D937.zip"
# download file, unzip
# download.file(crimeURL,tmp)
# crimefile<-unzip(tmp, list=TRUE)
# crimeatl <- read.csv(unz(tmp, crimefile[1]))
# write.csv(crimeatl, "Desktop/crimeatl.csv", row.names=FALSE)
# if you prefer to download and unzip the file manually, can just use read.csv()
crimeatl<-read.csv("~/Desktop/crimeatl.csv", header=TRUE)
dim(crimeatl)
## [1] 230779 23
summary(crimeatl)
## MI_PRINX offense_id
## -84.36061 : 1 Min. :-8.500e+01
## -84.37037 : 1 1st Qu.: 1.020e+08
## -84.39575 : 1 Median : 1.206e+08
## -84.39589 : 1 Mean : 4.248e+08
## -84.47724 : 1 3rd Qu.: 1.333e+08
## (230758 row(s) affected): 1 Max. : 1.527e+11
## (Other) :230773 NA's :15
## rpt_date
## 11/17/2009 : 171
## 06/01/2009 : 155
## 08/29/2011 : 155
## 07/14/2009 : 154
## 07/27/2009 : 152
## 06/29/2009 : 151
## (Other) :229841
## occur_date
## 08/01/2011 : 158
## 08/05/2009 : 152
## 12/04/2009 : 152
## 05/22/2009 : 151
## 11/17/2009 : 150
## 01/26/2009 : 146
## (Other) :229870
## occur_time
## 12:00:00 : 8819
## 18:00:00 : 6950
## 20:00:00 : 6677
## 22:00:00 : 6465
## 17:00:00 : 6355
## 23:00:00 : 6190
## (Other) :189323
## poss_date
## 11/17/2009 : 167
## 07/27/2009 : 157
## 01/02/2009 : 148
## 01/26/2009 : 148
## 06/09/2009 : 147
## 08/01/2011 : 147
## (Other) :229865
## poss_time beat apt_office_prefix
## 08:00:00 : 5178 Min. : 50.0 NULL :223251
## 12:00:00 : 4991 1st Qu.:208.0 APT : 3690
## 09:00:00 : 4215 Median :401.0 B : 257
## 15:00:00 : 4027 Mean :359.7 A : 213
## 07:00:00 : 4000 3rd Qu.:506.0 D : 187
## 16:00:00 : 3901 Max. :902.0 STE : 182
## (Other) :204467 NA's :24 (Other): 2999
## apt_office_num location
## NULL :181978 1801 HOWELL MILL RD NW : 1707
## A : 1536 3393 PEACHTREE RD NE : 1674
## B : 1411 2685 METROPOLITAN PKWY SW : 801
## 1 : 808 3393 PEACHTREE RD NE @LENOX MALL: 705
## 2 : 802 590 CASCADE AVE SW : 694
## 3 : 622 1275 CAROLINE ST NE : 609
## (Other): 43622 (Other) :224589
## MinOfucr MinOfibr_code dispo_code MaxOfnum_victims
## 0640 :53164 2305 :53104 NULL :202659 1 :203160
## 0511 :29923 2202 :29904 10 : 17449 2 : 18935
## 0710 :24428 2404 :19406 20 : 4106 3 : 3432
## 0630 :19293 2303 :19261 50 : 3829 0 : 2127
## 0690 :18560 2399 :17977 60 : 1733 4 : 1120
## 0670 :12841 2308 :12807 30 : 727 NULL : 1086
## (Other):72570 (Other):78320 (Other): 276 (Other): 919
## Shift Avg.Day loc_type
## Eve :74866 Sat :31292 20 :50448
## Morn :61208 Fri :30728 18 :44802
## Day :58025 Tue :30565 13 :29183
## Unk :36639 Wed :29770 26 :26864
## : 21 Mon :29668 NULL :22197
## 1 : 11 Thu :29653 99 : 7094
## (Other): 9 (Other):49103 (Other):50191
## UC2.Literal neighborhood npu
## LARCENY-FROM VEHICLE:63712 Downtown : 17117 M : 28434
## LARCENY-NON VEHICLE :55533 Midtown : 11931 E : 21618
## BURGLARY-RESIDENCE :38062 NULL : 9888 B : 17536
## AUTO THEFT :33031 Old Fourth Ward: 7023 V : 12386
## AGG ASSAULT :16277 West End : 5974 T : 12035
## ROBBERY-PEDESTRIAN :12372 Lenox : 4106 R : 10866
## (Other) :11792 (Other) :174740 (Other):127904
## x y
## -84.36212: 2945 33.84676: 2945
## -84.41278: 1768 33.80388: 1751
## -84.40902: 838 33.68274: 856
## -84.43276: 835 33.73812: 842
## -84.49773: 831 33.68677: 834
## -84.39201: 744 33.7532 : 716
## (Other) :222818 (Other) :222835
head(crimeatl)
## MI_PRINX offense_id rpt_date
## 1 1160569 90360664 02/05/2009
## 2 1160570 90370891 02/06/2009
## 3 1160572 91681984 06/17/2009
## 4 1160573 72692336 02/24/2010
## 5 1160574 80081069 10/06/2010
## 6 1160575 82040835 02/27/2009
## occur_date occur_time
## 1 02/03/2009 13:50:00
## 2 02/06/2009 08:50:00
## 3 06/17/2009 14:00:00
## 4 02/24/2010 23:29:00
## 5 01/08/2008 13:14:00
## 6 07/21/2008 18:00:00
## poss_date poss_time beat
## 1 02/03/2009 15:00:00 305
## 2 02/06/2009 10:45:00 502
## 3 06/17/2009 15:00:00 604
## 4 02/24/2010 23:30:00 303
## 5 01/08/2008 13:15:00 603
## 6 07/21/2008 18:00:00 104
## apt_office_prefix apt_office_num location MinOfucr
## 1 NULL NULL 55 MCDONOUGH BLVD SW 0670
## 2 NULL NULL 464 ANSLEY WALK TER NW 0640
## 3 NULL 816 375 AUBURN AVE 0670
## 4 NULL NULL 600 MARTIN ST 0420
## 5 NULL NULL 447 ARNOLD STREET NE 0511
## 6 NULL NULL 1721 BROWNING ST 0531
## MinOfibr_code dispo_code MaxOfnum_victims Shift Avg.Day loc_type
## 1 2308 NULL 1 Day Tue 35
## 2 2305 NULL 1 Day Fri 18
## 3 2308 NULL 1 Day Wed NULL
## 4 1315K NULL 1 Morn Wed 26
## 5 2202 NULL 1 Day Tue 20
## 6 2202A 50 1 Eve Mon 20
## UC2.Literal neighborhood npu x y
## 1 LARCENY-NON VEHICLE South Atlanta Y -84.38654 33.72024
## 2 LARCENY-FROM VEHICLE Ansley Park E -84.37276 33.79685
## 3 LARCENY-NON VEHICLE Sweet Auburn M -84.37521 33.7554
## 4 AGG ASSAULT Pittsburgh V -84.3946 33.72212
## 5 BURGLARY-RESIDENCE Old Fourth Ward M -84.36896 33.76658
## 6 BURGLARY-RESIDENCE Mozley Park K -84.44342 33.75265
table(crimeatl$Avg.Day)
##
## 1 Day Eve Fri Mon Morn NULL Sat Sun Thu Tue
## 21 5 4 3 30728 29668 3 1 31292 27701 29653 30565
## Unk Wed
## 21365 29770
# uncomment if this is the first time you are using these packages:
# install.packages(c("ggplot2","ggmap","ggthemes","lubridate"))
library(ggplot2)
library(scales)
library(ggmap)
library(ggthemes)
# the "x" and "y" coordinates are factors, not numbers
summary(crimeatl[,c("x","y")])
## x y
## -84.36212: 2945 33.84676: 2945
## -84.41278: 1768 33.80388: 1751
## -84.40902: 838 33.68274: 856
## -84.43276: 835 33.73812: 842
## -84.49773: 831 33.68677: 834
## -84.39201: 744 33.7532 : 716
## (Other) :222818 (Other) :222835
# convert them to numeric
crimeatl$lon<-as.numeric(as.character(crimeatl$x))
## Warning: NAs introduced by coercion
crimeatl$lat<-as.numeric(as.character(crimeatl$y))
## Warning: NAs introduced by coercion
# still a problem, there might be a typo-- lat of -84?
summary(crimeatl[, c("lon","lat")])
## lon lat
## Min. :-84.55 Min. :-84.50
## 1st Qu.:-84.43 1st Qu.: 33.73
## Median :-84.40 Median : 33.76
## Mean :-84.41 Mean : 33.75
## 3rd Qu.:-84.37 3rd Qu.: 33.78
## Max. :-84.29 Max. : 33.89
## NA's :41 NA's :27
# we will just drop the ones that don't make sense
crimeatl<- crimeatl[ crimeatl$lat > 33, ]
# here are our datapoints:
ggplot(data=crimeatl, aes(x=lon, y=lat)) + geom_point()
## Warning: Removed 27 rows containing missing values (geom_point).
# hexbins help reduce overplotting
ggplot(data=crimeatl, aes(x=lon, y=lat)) + geom_hex()
## Warning: Removed 27 rows containing missing values (stat_hexbin).
# we can add a map projection, to fix the aspect ratio:
ggplot(data=crimeatl, aes(x=lon, y=lat)) + geom_hex()+
scale_fill_distiller(palette="YlOrRd", breaks=pretty_breaks(n=10))
## Warning: Removed 27 rows containing missing values (stat_hexbin).
# figure out map dimensions:
r.lon <- range(crimeatl$lon, na.rm=TRUE)
r.lat <- range(crimeatl$lat, na.rm=TRUE)
bounds<-c(r.lon[1], r.lat[1], r.lon[2], r.lat[2])
# get the map tiles
atl.map <- get_map(location=bounds, maptype = "toner", zoom=13, crop=FALSE)
## maptype = "toner" is only available with source = "stamen".
## resetting to source = "stamen"...
## 56 tiles needed, this may take a while (try a smaller zoom).
## Map from URL : http://tile.stamen.com/toner/13/2172/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3275.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3276.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3277.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3278.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3279.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3280.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3281.png
## Map from URL : http://tile.stamen.com/toner/13/2172/3282.png
## Map from URL : http://tile.stamen.com/toner/13/2173/3282.png
## Map from URL : http://tile.stamen.com/toner/13/2174/3282.png
## Map from URL : http://tile.stamen.com/toner/13/2175/3282.png
## Map from URL : http://tile.stamen.com/toner/13/2176/3282.png
## Map from URL : http://tile.stamen.com/toner/13/2177/3282.png
## Map from URL : http://tile.stamen.com/toner/13/2178/3282.png
# create the map
ggmap(atl.map) +
geom_hex(data=crimeatl, aes(x=lon, y=lat)) +
scale_fill_distiller(palette="YlOrRd", breaks=pretty_breaks(n=10))
## Warning: Removed 27 rows containing missing values (stat_hexbin).
#remove the lat/lon marks, add transparency, change number of bins
ggmap(atl.map) +
geom_hex(data=crimeatl, aes(x=lon, y=lat), alpha=.7, bins=100) +
scale_fill_distiller(palette="YlOrRd", breaks=pretty_breaks(n=10))+
theme_map()+theme(legend.position="right")
## Warning: Removed 27 rows containing missing values (stat_hexbin).
# just look at homicide
murderatl<- crimeatl[ crimeatl$UC2.Literal == "HOMICIDE",]
# are the number of homicides increasing or decreasing over time?
library(lubridate)
murderatl$murderdate<-mdy(murderatl$occur_date)
murderatl$murderyear<-year(murderatl$murderdate)
table(murderatl$murderyear)
##
## 2001 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
## 1 2 2 2 76 88 88 82 83 92 69
# but we are only through week 40 of 2015 -- how does that project?
69/(40/52)
## [1] 89.7
murderatl$murdermonth<-month(murderatl$murderdate)
table(murderatl$murderyear, murderatl$murdermonth)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 2001 0 0 0 0 0 0 0 1 0 0 0 0
## 2006 0 0 0 1 0 0 1 0 0 0 0 0
## 2007 0 1 0 0 0 0 0 1 0 0 0 0
## 2008 0 0 0 1 0 0 0 0 1 0 0 0
## 2009 6 9 5 5 9 9 7 6 4 8 1 7
## 2010 5 4 9 9 6 6 7 10 11 9 6 6
## 2011 6 3 6 7 8 11 7 4 8 7 10 11
## 2012 5 3 10 5 5 9 8 8 6 10 8 5
## 2013 7 7 3 6 7 7 8 9 5 7 10 7
## 2014 6 1 12 4 4 12 8 8 10 9 12 6
## 2015 6 8 5 8 11 5 14 4 7 1 0 0
ggmap(atl.map) +
geom_hex(data=murderatl, aes(x=lon, y=lat), alpha=.7, bins=40) +
scale_fill_distiller(palette="Reds")+
theme_map()+theme(legend.position="right")
## Warning: Removed 27 rows containing missing values (stat_hexbin).