stat_summary_bin multiple errors
Closed this issue · 1 comments
This may actually be two separate issues, but since I'm not sure of the second one I'll condense them both in one.
stat_summary_bin
's documentation seems to say that bins
, binwidth
, and breaks
are effectively exclusive (since breaks
> overrides binwidth
> overrides bins
), and that all of them can be tuples. From the docs:
class stat_summary_bin(stat):
"""
Summarise y values at x intervals
{usage}
Parameters
----------
{common_parameters}
binwidth : float | tuple, default=None
The width of the bins. The default is to use bins bins that
cover the range of the data. You should always override this
value, exploring multiple widths to find the best to illustrate
the stories in your data.
bins : int | tuple, default=30
Number of bins. Overridden by binwidth.
breaks : array_like | tuple[array_like, array_like], default=None
Bin boundaries. This supercedes the `binwidth`, `bins`
and `boundary` arguments.
...
Well, neither of these things seem to be true in 0.13.6.
bins, binwidth and breaks
When passing binwidth
such that the resulting number of bins is < 30, or breaks
with less than 30 bins, then unless bins
is also specified to be correct we get an IndexError:
Traceback
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[251], line 53
30 g = (
31 p9.ggplot(condense_for_onset(dev))
32 + p9.aes(x=\"prog_onset_P\", y=\"temperature\")
(...)
50 # + p9.geom_line(data=manual_fit_data())
51 )
52 g
---> 53 g.draw(True)
54 g._build()
55 g.layers[0].data
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/ggplot.py:272, in ggplot.draw(self, show)
270 self = deepcopy(self)
271 with plot_context(self, show=show):
--> 272 self._build()
274 # setup
275 self.figure, self.axs = self.facet.setup(self)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/ggplot.py:376, in ggplot._build(self)
373 layout.map_position(layers)
375 # Apply and map statistics
--> 376 layers.compute_statistic(layout)
377 layers.map_statistic(self)
379 # Prepare data in geoms
380 # e.g. from y and width to ymin and ymax
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/layer.py:461, in Layers.compute_statistic(self, layout)
459 def compute_statistic(self, layout: Layout):
460 for l in self:
--> 461 l.compute_statistic(layout)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/layer.py:284, in layer.compute_statistic(self, layout)
282 data = self.stat.use_defaults(data)
283 data = self.stat.setup_data(data)
--> 284 data = self.stat.compute_layer(data, params, layout)
285 self.data = data
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat.py:308, in stat.compute_layer(cls, data, params, layout)
305 pscales = layout.get_scales(pdata[\"PANEL\"].iloc[0])
306 return cls.compute_panel(pdata, pscales, **params)
--> 308 return groupby_apply(data, \"PANEL\", fn)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/_utils/__init__.py:666, in groupby_apply(df, cols, func, *args, **kwargs)
662 lst = []
663 for _, d in df.groupby(cols, observed=True):
664 # function fn should be free to modify dataframe d, therefore
665 # do not mark d as a slice of df i.e no SettingWithCopyWarning
--> 666 lst.append(func(d, *args, **kwargs))
667 return pd.concat(lst, axis=axis, ignore_index=True)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat.py:306, in stat.compute_layer.<locals>.fn(pdata)
304 return pdata
305 pscales = layout.get_scales(pdata[\"PANEL\"].iloc[0])
--> 306 return cls.compute_panel(pdata, pscales, **params)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat.py:343, in stat.compute_panel(cls, data, scales, **params)
341 stats = []
342 for _, old in data.groupby(\"group\"):
--> 343 new = cls.compute_group(old, scales, **params)
344 unique = uniquecols(old)
345 missing = unique.columns.difference(new.columns)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat_summary_bin.py:168, in stat_summary_bin.compute_group(cls, data, scales, **params)
166 out[\"width\"] = 0.9
167 else:
--> 168 out[\"width\"] = np.diff(breaks)[bins - 1]
170 return out
IndexError: index 29 is out of bounds for axis 0 with size 19
Code for clarity:
import plotnine as p9
# this works, and gives 30 bins:
p9.ggplot(df) + p9.aes(x="x", y="y") + p9.stat_summary_bin()
# this fails unless the number of resulting bins is > 30:
p9.ggplot(df) + p9.aes(x="x", y="y") + p9.stat_summary_bin(binwidth=10)
# this also fails:
p9.ggplot(df) + p9.aes(x="x", y="y") + p9.stat_summary_bin(breaks = np.linspace(df.x.min(), df.x.max(), 29)) # any int smaller than 30 fails
tuples
When passing a tuple to any of the three, the function just fails with a traceback like this one:
Traceback 2
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[252], line 53
30 g = (
31 p9.ggplot(condense_for_onset(dev))
32 + p9.aes(x=\"prog_onset_P\", y=\"temperature\")
(...)
50 # + p9.geom_line(data=manual_fit_data())
51 )
52 g
---> 53 g.draw(True)
54 g._build()
55 g.layers[0].data
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/ggplot.py:272, in ggplot.draw(self, show)
270 self = deepcopy(self)
271 with plot_context(self, show=show):
--> 272 self._build()
274 # setup
275 self.figure, self.axs = self.facet.setup(self)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/ggplot.py:376, in ggplot._build(self)
373 layout.map_position(layers)
375 # Apply and map statistics
--> 376 layers.compute_statistic(layout)
377 layers.map_statistic(self)
379 # Prepare data in geoms
380 # e.g. from y and width to ymin and ymax
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/layer.py:461, in Layers.compute_statistic(self, layout)
459 def compute_statistic(self, layout: Layout):
460 for l in self:
--> 461 l.compute_statistic(layout)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/layer.py:284, in layer.compute_statistic(self, layout)
282 data = self.stat.use_defaults(data)
283 data = self.stat.setup_data(data)
--> 284 data = self.stat.compute_layer(data, params, layout)
285 self.data = data
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat.py:308, in stat.compute_layer(cls, data, params, layout)
305 pscales = layout.get_scales(pdata[\"PANEL\"].iloc[0])
306 return cls.compute_panel(pdata, pscales, **params)
--> 308 return groupby_apply(data, \"PANEL\", fn)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/_utils/__init__.py:666, in groupby_apply(df, cols, func, *args, **kwargs)
662 lst = []
663 for _, d in df.groupby(cols, observed=True):
664 # function fn should be free to modify dataframe d, therefore
665 # do not mark d as a slice of df i.e no SettingWithCopyWarning
--> 666 lst.append(func(d, *args, **kwargs))
667 return pd.concat(lst, axis=axis, ignore_index=True)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat.py:306, in stat.compute_layer.<locals>.fn(pdata)
304 return pdata
305 pscales = layout.get_scales(pdata[\"PANEL\"].iloc[0])
--> 306 return cls.compute_panel(pdata, pscales, **params)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat.py:343, in stat.compute_panel(cls, data, scales, **params)
341 stats = []
342 for _, old in data.groupby(\"group\"):
--> 343 new = cls.compute_group(old, scales, **params)
344 unique = uniquecols(old)
345 missing = unique.columns.difference(new.columns)
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/stat_summary_bin.py:143, in stat_summary_bin.compute_group(cls, data, scales, **params)
133 boundary = params[\"boundary\"]
135 func = make_summary_fun(
136 params[\"fun_data\"],
137 params[\"fun_y\"],
(...)
140 params[\"fun_args\"],
141 )
--> 143 breaks = fuzzybreaks(scales.x, breaks, boundary, binwidth, bins)
144 data[\"bin\"] = pd.cut(
145 data[\"x\"],
146 bins=breaks, # pyright: ignore
147 labels=False,
148 include_lowest=True,
149 )
151 def func_wrapper(data: pd.DataFrame) -> pd.DataFrame:
File ~/.cache/micromamba/lib/python3.12/site-packages/plotnine/stats/binning.py:305, in fuzzybreaks(scale, breaks, boundary, binwidth, bins, right)
299 bins = int(np.ceil((srange[1] - boundary) / binwidth))
301 # To minimise precision errors, we do not pass the boundary and
302 # binwidth into np.arange as params. The resulting breaks
303 # can then be adjusted with finer(epsilon based rather than
304 # some arbitrary small number) precision.
--> 305 breaks = np.arange(boundary, srange[1] + binwidth, binwidth)
306 return _adjust_breaks(breaks, right)
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"
I haven't looked in the code in depth, but my guess is that there is no handling for tuples.