EMODnet/EMODnet-Biology-Phytoplankton-Greater-NorthSea

generate csv files

Opened this issue · 0 comments

Hello ,

I try to generate the two csv files, with this script:
`
packages<-function(x){
x<-as.character(match.call()[[2]])
if (!require(x,character.only=TRUE)){
install.packages(pkgs=x,repos="http://cran.r-project.org")
require(x,character.only=TRUE)
}
}

packages(knitr)
packages(sf)
packages(rmarkdown)
packages(rworldxtra)
packages(tidyverse)
packages(shiny)
packages(rgeos)
library("readxl")
select <- dplyr::select

source("functions_needed.R")

phy_sp <- read_excel("data/derived_data/lucru_phy_sp.xlsx")

orderedSpeciesList <- phy_sp %>% ungroup() %>%
group_by(scientificnameaccepted) %>%
summarise(n = n()) %>% arrange(-n) %>% head(100) %>% unlist() %>% unname()

commonSpecies <- phy_sp[phy_sp$scientificnameaccepted %in% orderedSpeciesList,]

for (ii in 1:length(unique(commonSpecies$scientificnameaccepted))){ #length(unique(phy$scientificName))

targetSpecies <- unique(commonSpecies$scientificnameaccepted)[ii] # Species to work with
begin = 1956 # Range of years for plots
end = 1960

if(length(commonSpecies$occurrence) > 0){

selectedDatasets <- commonSpecies %>%
ungroup() %>%
dplyr::filter(scientificnameaccepted == targetSpecies) %>%
distinct(abbr) %>% unlist() %>% unname()

phy_c <- commonSpecies %>%
dplyr::filter(year %in% begin:end) %>%
dplyr::filter(abbr %in% selectedDatasets) %>%
group_by(abbr, year) %>%
tidyr::complete(nesting(aphiaid, scientificnameaccepted), # these will be completed, with their occurrence
nesting(date, decimallongitude, decimallatitude, season), # Combinations of these parameters are to be found
fill = list(occurrence = 0)) %>%
ungroup() %>%
unite(date_decimallongitude_decimallatitude, date, decimallongitude, decimallatitude, remove = FALSE) %>%
filter(scientificnameaccepted == targetSpecies) %>%
ungroup()

dup_zero <- phy_c %>%
arrange(aphiaid, date, decimallongitude, decimallatitude, occurrence, season) %>%
select(-datasetID) %>%
duplicated %>% which

dbs_zero <- phy_c %>%
arrange(aphiaid, date, decimallongitude, decimallatitude, occurrence, season) %>%
ungroup() %>%
slice(sort(c(dup_zero, dup_zero-1)))

if(length(dbs_zero$occurrence) > 0){
write.csv(dbs_zero,
paste0("product/dupl/Dupl_",
targetSpecies, " ",
begin, "-", end, ".csv"),
row.names = FALSE)
}

phy_c <- phy_c %>%
distinct(aphiaid, scientificnameaccepted, date, decimallongitude, decimallatitude, year, season, occurrence, .keep_all = TRUE) %>%
select(datasetID, abbr, year, aphiaid, scientificnameaccepted,date, decimallongitude, decimallatitude, season, eventid, mrgid, month, occurrence)

write.csv(phy_c, paste0("product/csv_files/", targetSpecies, "-", begin, "-", end, ".csv"), row.names = FALSE)
} else next()
}

rm(phy_c)
rm(phy_sp)

phy_gen <- read_excel("data/derived_data/lucru_phy_gen.xlsx")

orderedGenusList <- phy_gen %>% ungroup() %>%
group_by(genus) %>%
summarise(n = n()) %>% arrange(-n) %>% head(100) %>% unlist() %>% unname()

commonGenus <- phy_gen[phy_gen$genus %in% orderedGenusList,]

for (ii in 1:length(unique(commonGenus$genus))){ #length(unique(phy_gen$genus))

targetGen <- unique(commonGenus$genus)[ii] # Species to work with
begin = 1956 # Range of years for plots
end = 1960

if(length(commonGenus$occurrence) > 0){

selectedDatasets <- commonGenus %>%
ungroup() %>%
dplyr::filter(genus == targetGen) %>%
distinct(abbr) %>% unlist() %>% unname()

phy_c_g <- commonGenus %>%
dplyr::filter(year %in% begin:end) %>%
dplyr::filter(abbr %in% selectedDatasets) %>%
group_by(abbr, year) %>%
tidyr::complete(nesting(genus), # these will be completed, with their occurrence
nesting(date, decimallongitude, decimallatitude, season), # Combinations of these parameters are to be found
fill = list(occurrence = 0)) %>%
ungroup() %>%
unite(date_decimallongitude_decimallatitude, date, decimallongitude, decimallatitude, remove = FALSE) %>%
filter(genus == targetGen)

dup_zero_g <- phy_c_g %>%
arrange(genus, date, decimallongitude, decimallatitude, occurrence, season) %>%
select(-datasetID) %>%
duplicated %>% which

dbs_zero_g <- phy_c_g %>%
arrange(genus, date, decimallongitude, decimallatitude, occurrence, season) %>%
ungroup() %>%
slice(sort(c(dup_zero_g, dup_zero_g-1)))

if(length(dbs_zero_g$occurrence) > 0){

write.csv(dbs_zero_g,
paste0("product/dupl/Dupl_",
params$targetGen, " ",
params$begin, "-", params$end, ".csv"),
row.names = FALSE)
}

phy_c_g <- phy_c_g %>%
distinct(genus, date, decimallongitude, decimallatitude, year, season, occurrence, .keep_all = TRUE) %>%
select(datasetID, abbr, year, aphiaid, scientificnameaccepted,date, decimallongitude, decimallatitude, season, eventid, mrgid, month, occurrence)

write.csv(phy_c_g, paste0("product/csv_files/", targetGen, "-", begin, "-", end, ".csv"), row.names = FALSE)

} else next()

}`

But I got two errors like this:

Error: distinct() must use existing variables.
x abbr not found in .data.

What does abbr represent ? I attach the excel files that I used in the above script.

Cheers !
George
lucru_phy_gen.xlsx
lucru_phy_sp.xlsx