DOC: Example of thresholds functionality?
DamienIrving opened this issue · 2 comments
- icclim version: 6.4.0
- Python version: 3.10.8
Description
I'm trying to use the thresholds functionality described here and not having success, so I was wondering if it would be possible to add an example to the documentation?
Minimal reproducible example
In this example (see full notebook), I want to calculate the TX90p
index for a future period using the same 90th percentile thresholds that I used for an historical period.
import icclim
import xarray as xr
hist_files = [
'tasmax_historical_day_19800101-19801231.nc',
'tasmax_historical_day_19810101-19811231.nc',
'tasmax_historical_day_19820101-19821231.nc',
]
ds_hist = xr.open_mfdataset(hist_files)
icclim.index(
index_name="TX90p",
in_files=ds_hist,
var_name="tasmax",
slice_mode="year",
out_file='TX90p.nc',
save_thresholds=True,
)
ds_thresholds = xr.open_dataset('TX90p.nc')
ds_thresholds = ds_thresholds[['tasmax_thresholds']].squeeze()
ds_thresholds = ds_thresholds.rename({'tasmax_thresholds': 'tasmax'})
ssp_files = [
'tasmax_ssp370_day_20950101-20951231.nc',
'tasmax_ssp370_day_20960101-20961231.nc',
'tasmax_ssp370_day_20970101-20971231.nc',
]
ds_ssp = xr.open_mfdataset(ssp_files)
icclim.index(
index_name="TX90p",
in_files={'tasmax': ds_ssp, 'thresholds': ds_thresholds},
slice_mode="year",
out_file='result.nc',
)
Output received
2023-08-25 16:42:09,330 --- icclim 6.4.0
2023-08-25 16:42:09,333 --- BEGIN EXECUTION
2023-08-25 16:42:09,334 Processing: 0%
---------------------------------------------------------------------------
MergeError Traceback (most recent call last)
Cell In [8], line 1
----> 1 icclim.index(
2 index_name="TX90p",
3 in_files={'tasmax': ds_ssp, 'thresholds': ds_thresholds},
4 slice_mode="year",
5 out_file='result.nc',
6 )
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/main.py:413, in index(***failed resolving arguments***)
411 elif isinstance(threshold, Sequence):
412 threshold = [build_configured_threshold(t) for t in threshold]
--> 413 climate_vars_dict = read_in_files(
414 in_files=in_files,
415 var_names=var_name,
416 threshold=threshold,
417 standard_index=standard_index,
418 )
419 # We use groupby instead of resample when there is a single variable that must be
420 # compared to its reference period values.
421 is_compared_to_reference = _must_add_reference_var(
422 climate_vars_dict, base_period_time_range
423 )
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/models/climate_variable.py:216, in read_in_files(in_files, var_names, threshold, standard_index)
213 return in_files
214 else:
215 # case of in_files={tasmax: "tasmax.nc"}
--> 216 return _build_in_file_dict(
217 in_files=list(in_files.values()),
218 standard_index=standard_index,
219 threshold=threshold,
220 var_names=list(in_files.keys()),
221 )
222 else:
223 # case of in_files="tasmax.nc" and var_names="tasmax"
224 return _build_in_file_dict(in_files, var_names, threshold, standard_index)
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/models/climate_variable.py:236, in _build_in_file_dict(in_files, var_names, threshold, standard_index)
227 def _build_in_file_dict(
228 in_files: InFileBaseType,
229 var_names: Sequence[str],
230 threshold: Threshold | Sequence[Threshold] | None,
231 standard_index: StandardIndex | None,
232 ):
233 standard_var = (
234 standard_index.input_variables[0] if standard_index is not None else None
235 )
--> 236 input_dataset = read_dataset(
237 in_files=in_files, standard_var=standard_var, var_name=var_names
238 )
239 var_names = guess_var_names(
240 ds=input_dataset, standard_index=standard_index, var_names=var_names
241 )
242 if threshold is not None:
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/pre_processing/input_parsing.py:127, in read_dataset(in_files, standard_var, var_name)
125 ds = xr.open_zarr(in_files)
126 elif isinstance(in_files, (list, tuple)):
--> 127 return xr.merge(
128 [
129 read_dataset(in_file, standard_var, var_name[i])
130 for i, in_file in enumerate(in_files)
131 ]
132 )
133 else:
134 raise NotImplementedError(
135 f"`in_files` format {type(in_files)} was not" f" recognized."
136 )
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:1025, in merge(objects, compat, join, fill_value, combine_attrs)
1022 obj = obj.to_dataset(promote_attrs=True) if isinstance(obj, DataArray) else obj
1023 dict_like_objects.append(obj)
-> 1025 merge_result = merge_core(
1026 dict_like_objects,
1027 compat,
1028 join,
1029 combine_attrs=combine_attrs,
1030 fill_value=fill_value,
1031 )
1032 return Dataset._construct_direct(**merge_result._asdict())
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:757, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)
755 collected = collect_variables_and_indexes(aligned, indexes=indexes)
756 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
--> 757 variables, out_indexes = merge_collected(
758 collected, prioritized, compat=compat, combine_attrs=combine_attrs
759 )
761 dims = calculate_dimensions(variables)
763 coord_names, noncoord_names = determine_coords(coerced)
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:302, in merge_collected(grouped, prioritized, compat, combine_attrs, equals)
300 variables = [variable for variable, _ in elements_list]
301 try:
--> 302 merged_vars[name] = unique_variable(
303 name, variables, compat, equals.get(name, None)
304 )
305 except MergeError:
306 if compat != "minimal":
307 # we need more than "minimal" compatibility (for which
308 # we drop conflicting coordinates)
File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:156, in unique_variable(name, variables, compat, equals)
153 break
155 if not equals:
--> 156 raise MergeError(
157 f"conflicting values for variable {name!r} on objects to be combined. "
158 "You can skip this check by specifying compat='override'."
159 )
161 if combine_method:
162 for var in variables[1:]:
MergeError: conflicting values for variable 'tasmax' on objects to be combined. You can skip this check by specifying compat='override'.
Hi @DamienIrving and sorry for the late reply.
To me it seems the error is due to the merge of the 3 ssp370 files.
I guess there is an issue with the coordinates or attributes of tasmax
variable in one of the file.
Or, icclim uses the wrong merge strategy in which case we need to fix this.
I can only find aggregated data for tasmax_day_ACCESS-ESM1-5_ssp370_r6i1p1f1_gn_20650101-21001231.nc.
If you have a link to the 3 ssp files that you used, that would be helpful to reproduce the issue. Otherwise I will try to do it with another dataset.
After some experimentation, this is indeed not an issue with icclim.
I was able to reproduce the error with the following MRE, using xarray only. The MergeError is raised if the datasets subject to the merge (the 3 ssp370 datasets in your case) have overlapping dimensions and incompatible values for the overlaps.
In the case below, this is cause by the time dimension overlapping for year 2044 in both dataset and by having different values for these overlaps (1 in da1
and 2 in da2
).
import numpy as np
import xarray as xr
import pandas as pd
da1 = xr.DataArray(
data=(np.full(365 * 5, 1).reshape((365 * 2, 1, 1))),
dims=["time", "lat", "lon"],
coords={
"lat": [42],
"lon": [42],
"time": pd.date_range("2043-01-01", periods=365 * 2, freq="D"),
},
attrs={"units": "K"},
name="tas",
)
da2 = xr.DataArray(
data=(np.full(365 * 5, 2).reshape((365 * 2, 1, 1))),
dims=["time", "lat", "lon"],
coords={
"lat": [42],
"lon": [42],
"time": pd.date_range("2043-01-01", periods=365 * 2, freq="D"),
},
attrs={"units": "K"},
name="tas",
)
xr.merge([da1, da2])
# ^ MergeError
Stacktrace:
Traceback (most recent call last):
File "/home/bzah/workspace/cerfacs/icclim/mre_277.py", line 29, in <module>
xr.merge([da1, da2])
File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 996, in merge
merge_result = merge_core(
^^^^^^^^^^^
File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 720, in merge_core
variables, out_indexes = merge_collected(
^^^^^^^^^^^^^^^^
File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 290, in merge_collected
merged_vars[name] = unique_variable(
^^^^^^^^^^^^^^^^
File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 144, in unique_variable
raise MergeError(
xarray.core.merge.MergeError: conflicting values for variable 'tas' on objects to be combined. You can skip this check by specifying compat='override'.