Iterating through a shapefile in a loop
javedali99 opened this issue · 1 comments
javedali99 commented
Hi, I am trying to use a loop to iterate through a shapefile and select a particular area to subset the netcdf data using salem.roi()
function. See the code below:
Code
# import libraries
import xarray as xr
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
import salem
import os
# path to the folder containing the netcdf files
data_path = "data/processed-data/final-data/"
# path to the folder to save the csv files
save_path = "data/processed-data/final-data/CSVs/Counties"
# list of netcdf files to convert to csv
file_list = ["precipitation_us_coastline.nc", "wind_us_coastline_daily_max.nc", "rh_us_coastline_daily_max.nc",
"tmin_us_coastline.nc", "tmax_us_coastline.nc", "discharge_us_coastline.nc"]
# path to the shapefile
shapefile_path = "data/US_counties.shp"
# load the shapefile
us_counties = salem.read_shapefile('data/shapefile.shp')
# loop
for i in tqdm(range(len(us_counties))):
# select the county
county = us_counties[i]
# create a loop for each dataset in file_list
for file in file_list:
# open the netcdf file
data = xr.open_dataset(data_path + file)
# subset the netcdf data for the county
data_subset = data.salem.roi(shape=county.geometry)
# convert the subset data to a dataframe
df = data_subset.to_dataframe()
# save the dataframe as a csv file
df.to_csv(save_path + file[:-3] + "_" + str(i) + ".csv")
# close the netcdf files
data.close()
data_subset.close()
Running this code I am getting the following error. I do not know how to solve this issue. Could you please help me how to iterate through each row of a shapefile and subset the data based on that row (county) coordinates?
0%| | 0/248 [00:00<?, ?it/s]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~\AppData\Roaming\Python\Python38\site-packages\pandas\core\indexes\base.py:3361, in Index.get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
File ~\AppData\Roaming\Python\Python38\site-packages\pandas\_libs\index.pyx:76, in pandas._libs.index.IndexEngine.get_loc()
File ~\AppData\Roaming\Python\Python38\site-packages\pandas\_libs\index.pyx:108, in pandas._libs.index.IndexEngine.get_loc()
File pandas\_libs\hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas\_libs\hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [35], in <cell line: 2>()
1 # create a loop for selecting each county coordinates from a shapefile and subset the netcdf data from each dataset in file_list for that county and save each county's data as a csv file
2 for i in tqdm(range(len(us_counties))):
3
4 # select the county coordinates
----> 5 county = us_counties[i]
7 # create a loop for each dataset in file_list
8 for file in file_list:
9
10 # open the netcdf file
File ~\AppData\Roaming\Python\Python38\site-packages\geopandas\geodataframe.py:1299, in GeoDataFrame.__getitem__(self, key)
1293 def __getitem__(self, key):
1294 """
1295 If the result is a column containing only 'geometry', return a
1296 GeoSeries. If it's a DataFrame with a 'geometry' column, return a
1297 GeoDataFrame.
1298 """
-> 1299 result = super(GeoDataFrame, self).__getitem__(key)
1300 geo_col = self._geometry_column_name
1301 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
File ~\AppData\Roaming\Python\Python38\site-packages\pandas\core\frame.py:3458, in DataFrame.__getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
File ~\AppData\Roaming\Python\Python38\site-packages\pandas\core\indexes\base.py:3363, in Index.get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3365 if is_scalar(key) and isna(key) and not self.hasnans:
3366 raise KeyError(key)
KeyError: 0
Shapefile data looks like this:
fmaussion commented
Hi, this does not look a salem issue - you need to learn how to iterate over pandas dataframes first: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iterrows.html