GWRFC is function that replaces the linear regression model of the Geographically Weighted Regression (GWR; Fotheringham, Charlton, and Brunsdon 1998) with the random forest algorithm (RF; Breiman 2001). For this, it applies case weights according to the weightening scheme of GWR in the bagging step of RF. As a result, GWRFC produces spatial representations of variables importance, classification probabilities and accuracy of RF models at local level. To improve processing speed, GWRFC uses the ranger package for train RF and parallel computing (only available for Windows OS).
To cite this work, please use: Santos F, Graw V, Bonilla S (2019) A geographically weighted random forest approach for evaluate forest change drivers in the Northern Ecuadorian Amazon. PLOS ONE 14(12): e0226224. https://doi.org/10.1371/journal.pone.0226224
To use it with last updates, please run/adapt the code below.
#required libraries
list.of.packages <- c("caret","digest","doParallel","foreach","foreign","fpc","ggplot2","gtools","GWmodel","jpeg","kohonen","mclust","NbClust","parallel","plyr","pracma","ranger","raster","reshape","raster",
"rgdal","rgeos","scales","spdep","spgwr","stringr","tmap","zoo")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)){install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
#install GWRFC
require(devtools)
install_github("FSantosCodes/GWRFC")
library(GWRFC)
#view deforestation data
data("deforestation")
tmap_mode("view")
tm_basemap("OpenStreetMap") +
tm_shape(deforestation) +
tm_polygons(col="fao",style="cat",title="Annual deforestation rate 2000-2010 (FAO) - categorical (quantiles)",palette="YlOrRd")
#run GWRFC
GWRFC(input_shapefile = deforestation, #can be a spatial dataframe (points or polygons) or the complete filename of the shapefile to analyze.
remove_columns = c("ID_grid","L_oth"), #for remove variables if they are not informative. Put NA to avoid removal.
dependent_varName = "fao", #the depedent variable to evaluate. It should be of factor or character data type.
kernel_function = "exponential", #the weightening function. See help for other available functions.
kernel_adaptative = T, #use TRUE for adaptative kernel distance or FALSE for a fixed kernel distance.
kernel_bandwidth = 400, #as the kernel is adaptative, 400 refers to the minimun number of observations to use in modelling.
upsampling = T, #improves accuracy (recommended) but is a bit more computing costly.
save_models = T, #save RF models. Beware of hard disk space and extra processing time.
enable_pdp = F, #experimental, use with caution as is sensible to noise.
number_cores = 3, #defines the number of CPU cores to use
output_folder = "E:/demo/deforestation") #check this folder for GWRFC outputs.
#clustering GWRFC LVI outputs
LVIclust(input_LVI = "E:/demo/deforestation/GWRFC_ADP_400_EX_LVI.shp", #filename of the GWRFC LVI output
remove_columns=NA,
method_clustering="ward.D2", #hierarchical clustering is applied here.
ncluster = 4, #number of clusters.
plots=T, #available only for all hierarchical clustering methods and kohonen.
output_folder = "E:/demo/deforestation") #check this folder for outputs generated by the function.