generate csv files
Opened this issue · 0 comments
Hello ,
I try to generate the two csv files, with this script:
`
packages<-function(x){
x<-as.character(match.call()[[2]])
if (!require(x,character.only=TRUE)){
install.packages(pkgs=x,repos="http://cran.r-project.org")
require(x,character.only=TRUE)
}
}
packages(knitr)
packages(sf)
packages(rmarkdown)
packages(rworldxtra)
packages(tidyverse)
packages(shiny)
packages(rgeos)
library("readxl")
select <- dplyr::select
source("functions_needed.R")
phy_sp <- read_excel("data/derived_data/lucru_phy_sp.xlsx")
orderedSpeciesList <- phy_sp %>% ungroup() %>%
group_by(scientificnameaccepted) %>%
summarise(n = n()) %>% arrange(-n) %>% head(100) %>% unlist() %>% unname()
commonSpecies <- phy_sp[phy_sp$scientificnameaccepted %in% orderedSpeciesList,]
for (ii in 1:length(unique(commonSpecies$scientificnameaccepted))){ #length(unique(phy$scientificName))
targetSpecies <- unique(commonSpecies$scientificnameaccepted)[ii] # Species to work with
begin = 1956 # Range of years for plots
end = 1960
if(length(commonSpecies$occurrence) > 0){
selectedDatasets <- commonSpecies %>%
ungroup() %>%
dplyr::filter(scientificnameaccepted == targetSpecies) %>%
distinct(abbr) %>% unlist() %>% unname()
phy_c <- commonSpecies %>%
dplyr::filter(year %in% begin:end) %>%
dplyr::filter(abbr %in% selectedDatasets) %>%
group_by(abbr, year) %>%
tidyr::complete(nesting(aphiaid, scientificnameaccepted), # these will be completed, with their occurrence
nesting(date, decimallongitude, decimallatitude, season), # Combinations of these parameters are to be found
fill = list(occurrence = 0)) %>%
ungroup() %>%
unite(date_decimallongitude_decimallatitude, date, decimallongitude, decimallatitude, remove = FALSE) %>%
filter(scientificnameaccepted == targetSpecies) %>%
ungroup()
dup_zero <- phy_c %>%
arrange(aphiaid, date, decimallongitude, decimallatitude, occurrence, season) %>%
select(-datasetID) %>%
duplicated %>% which
dbs_zero <- phy_c %>%
arrange(aphiaid, date, decimallongitude, decimallatitude, occurrence, season) %>%
ungroup() %>%
slice(sort(c(dup_zero, dup_zero-1)))
if(length(dbs_zero$occurrence) > 0){
write.csv(dbs_zero,
paste0("product/dupl/Dupl_",
targetSpecies, " ",
begin, "-", end, ".csv"),
row.names = FALSE)
}
phy_c <- phy_c %>%
distinct(aphiaid, scientificnameaccepted, date, decimallongitude, decimallatitude, year, season, occurrence, .keep_all = TRUE) %>%
select(datasetID, abbr, year, aphiaid, scientificnameaccepted,date, decimallongitude, decimallatitude, season, eventid, mrgid, month, occurrence)
write.csv(phy_c, paste0("product/csv_files/", targetSpecies, "-", begin, "-", end, ".csv"), row.names = FALSE)
} else next()
}
rm(phy_c)
rm(phy_sp)
phy_gen <- read_excel("data/derived_data/lucru_phy_gen.xlsx")
orderedGenusList <- phy_gen %>% ungroup() %>%
group_by(genus) %>%
summarise(n = n()) %>% arrange(-n) %>% head(100) %>% unlist() %>% unname()
commonGenus <- phy_gen[phy_gen$genus %in% orderedGenusList,]
for (ii in 1:length(unique(commonGenus$genus))){ #length(unique(phy_gen$genus))
targetGen <- unique(commonGenus$genus)[ii] # Species to work with
begin = 1956 # Range of years for plots
end = 1960
if(length(commonGenus$occurrence) > 0){
selectedDatasets <- commonGenus %>%
ungroup() %>%
dplyr::filter(genus == targetGen) %>%
distinct(abbr) %>% unlist() %>% unname()
phy_c_g <- commonGenus %>%
dplyr::filter(year %in% begin:end) %>%
dplyr::filter(abbr %in% selectedDatasets) %>%
group_by(abbr, year) %>%
tidyr::complete(nesting(genus), # these will be completed, with their occurrence
nesting(date, decimallongitude, decimallatitude, season), # Combinations of these parameters are to be found
fill = list(occurrence = 0)) %>%
ungroup() %>%
unite(date_decimallongitude_decimallatitude, date, decimallongitude, decimallatitude, remove = FALSE) %>%
filter(genus == targetGen)
dup_zero_g <- phy_c_g %>%
arrange(genus, date, decimallongitude, decimallatitude, occurrence, season) %>%
select(-datasetID) %>%
duplicated %>% which
dbs_zero_g <- phy_c_g %>%
arrange(genus, date, decimallongitude, decimallatitude, occurrence, season) %>%
ungroup() %>%
slice(sort(c(dup_zero_g, dup_zero_g-1)))
if(length(dbs_zero_g$occurrence) > 0){
write.csv(dbs_zero_g,
paste0("product/dupl/Dupl_",
params$targetGen, " ",
params$begin, "-", params$end, ".csv"),
row.names = FALSE)
}
phy_c_g <- phy_c_g %>%
distinct(genus, date, decimallongitude, decimallatitude, year, season, occurrence, .keep_all = TRUE) %>%
select(datasetID, abbr, year, aphiaid, scientificnameaccepted,date, decimallongitude, decimallatitude, season, eventid, mrgid, month, occurrence)
write.csv(phy_c_g, paste0("product/csv_files/", targetGen, "-", begin, "-", end, ".csv"), row.names = FALSE)
} else next()
}`
But I got two errors like this:
Error: distinct()
must use existing variables.
x abbr
not found in .data
.
What does abbr represent ? I attach the excel files that I used in the above script.
Cheers !
George
lucru_phy_gen.xlsx
lucru_phy_sp.xlsx