1313
1414def func_nullable_series_contains (fn : Callable ) -> Callable :
1515 @functools .wraps (fn )
16- def inner (
17- config : Settings , series : pd .Series , state : dict , * args , ** kwargs
18- ) -> bool :
16+ def inner (config : Settings , series : pd .Series , state : dict , * args , ** kwargs ) -> bool :
1917 if series .hasnans :
2018 series = series .dropna ()
2119 if series .empty :
@@ -32,18 +30,12 @@ def safe_histogram(
3230 weights : Optional [np .ndarray ] = None ,
3331 density : bool = False ,
3432) -> Tuple [np .ndarray , np .ndarray ]:
35- """
36- Wrapper to avoid
37- ValueError: Too many bins for data range. Cannot create N finite-sized bins.
38- """
3933 try :
4034 return np .histogram (values , bins = bins , weights = weights , density = density )
4135 except ValueError as exc :
4236 if "Too many bins for data range" in str (exc ):
4337 try :
44- return np .histogram (
45- values , bins = "auto" , weights = weights , density = density
46- )
38+ return np .histogram (values , bins = "auto" , weights = weights , density = density )
4739 except ValueError :
4840 finite = values [np .isfinite (values )]
4941 if finite .size == 0 :
@@ -55,9 +47,7 @@ def safe_histogram(
5547 bin_edges = np .array ([vmin - eps , vmin + eps ])
5648 else :
5749 bin_edges = np .array ([vmin , vmax ])
58- return np .histogram (
59- values , bins = bin_edges , weights = weights , density = density
60- )
50+ return np .histogram (values , bins = bin_edges , weights = weights , density = density )
6151 raise
6252
6353
@@ -69,18 +59,20 @@ def histogram_compute(
6959 weights : Optional [np .ndarray ] = None ,
7060) -> dict :
7161 stats = {}
62+
7263 if len (finite_values ) == 0 :
7364 return {name : []}
7465
7566 hist_config = config .plot .histogram
7667
77- # Compute data range
7868 finite = finite_values [np .isfinite (finite_values )]
69+ if len (finite ) == 0 :
70+ return {name : []}
71+
7972 vmin = float (np .min (finite ))
8073 vmax = float (np .max (finite ))
8174 data_range = vmax - vmin
8275
83- # Choose of Bins based on observed data values
8476 if data_range == 0 :
8577 eps = 0.5 if vmin == 0 else abs (vmin ) * 0.1
8678 bins = np .array ([vmin - eps , vmin + eps ])
@@ -89,14 +81,13 @@ def histogram_compute(
8981
9082 if isinstance (requested_bins , int ):
9183 safe_bins = min (requested_bins , n_unique , hist_config .max_bins )
92-
9384 safe_bins = max (1 , safe_bins )
94-
9585 bins = np .linspace (vmin , vmax , safe_bins + 1 )
9686 else :
97- bins = np .histogram_bin_edges (finite_values , bins = "auto" )
98- if len (bins ) - 1 > hist_config .max_bins :
99- bins = np .linspace (vmin , vmax , hist_config .max_bins + 1 )
87+ bins = np .histogram_bin_edges (
88+ finite_values ,
89+ bins = min (len (finite_values ), hist_config .max_bins ),
90+ )
10091
10192 hist = np .histogram (
10293 finite_values ,
@@ -113,16 +104,13 @@ def chi_square(
113104 values : Optional [np .ndarray ] = None ,
114105 histogram : Optional [np .ndarray ] = None ,
115106) -> dict :
116- # Case 1: histogram not passed → we compute it
117107 if histogram is None :
118108 if values is None :
119109 return {"statistic" : 0 , "pvalue" : 0 }
120110
121- # Try NumPy "auto" binning (may fail under NumPy 2)
122111 try :
123112 bins = np .histogram_bin_edges (values , bins = "auto" )
124113 except ValueError :
125- # Fallback: basic 1-bin histogram covering the min→max range
126114 finite = values [np .isfinite (values )]
127115 if finite .size == 0 :
128116 return {"statistic" : 0 , "pvalue" : 0 }
@@ -136,141 +124,101 @@ def chi_square(
136124
137125 histogram , _ = np .histogram (values , bins = bins )
138126
139- # Case 2: histogram exists but is empty
140127 if histogram .size == 0 or histogram .sum () == 0 :
141128 return {"statistic" : 0 , "pvalue" : 0 }
142129
143130 return dict (chisquare (histogram )._asdict ())
144131
145132
146- def series_hashable (
147- fn : Callable [[Settings , pd .Series , dict ], Tuple [Settings , pd .Series , dict ]]
148- ) -> Callable [[Settings , pd .Series , dict ], Tuple [Settings , pd .Series , dict ]]:
133+ def series_hashable (fn ):
149134 @functools .wraps (fn )
150- def inner (
151- config : Settings , series : pd .Series , summary : dict
152- ) -> Tuple [Settings , pd .Series , dict ]:
135+ def inner (config : Settings , series : pd .Series , summary : dict ):
153136 if not summary ["hashable" ]:
154137 return config , series , summary
155138 return fn (config , series , summary )
156139
157140 return inner
158141
159142
160- def series_handle_nulls (
161- fn : Callable [[Settings , pd .Series , dict ], Tuple [Settings , pd .Series , dict ]]
162- ) -> Callable [[Settings , pd .Series , dict ], Tuple [Settings , pd .Series , dict ]]:
163- """Decorator for nullable series"""
164-
143+ def series_handle_nulls (fn ):
165144 @functools .wraps (fn )
166- def inner (
167- config : Settings , series : pd .Series , summary : dict
168- ) -> Tuple [Settings , pd .Series , dict ]:
145+ def inner (config : Settings , series : pd .Series , summary : dict ):
169146 if series .hasnans :
170147 series = series .dropna ()
171-
172148 return fn (config , series , summary )
173149
174150 return inner
175151
176152
177153def named_aggregate_summary (series : pd .Series , key : str ) -> dict :
178- summary = {
154+ return {
179155 f"max_{ key } " : np .max (series ),
180156 f"mean_{ key } " : np .mean (series ),
181157 f"median_{ key } " : np .median (series ),
182158 f"min_{ key } " : np .min (series ),
183159 }
184160
185- return summary
186-
187161
188162@multimethod
189- def describe_counts (
190- config : Settings , series : Any , summary : dict
191- ) -> Tuple [Settings , Any , dict ]:
163+ def describe_counts (config : Settings , series : Any , summary : dict ):
192164 raise NotImplementedError ()
193165
194166
195167@multimethod
196- def describe_supported (
197- config : Settings , series : Any , series_description : dict
198- ) -> Tuple [Settings , Any , dict ]:
168+ def describe_supported (config : Settings , series : Any , summary : dict ):
199169 raise NotImplementedError ()
200170
201171
202172@multimethod
203- def describe_generic (
204- config : Settings , series : Any , summary : dict
205- ) -> Tuple [Settings , Any , dict ]:
173+ def describe_generic (config : Settings , series : Any , summary : dict ):
206174 raise NotImplementedError ()
207175
208176
209177@multimethod
210- def describe_numeric_1d (
211- config : Settings , series : Any , summary : dict
212- ) -> Tuple [Settings , Any , dict ]:
178+ def describe_numeric_1d (config : Settings , series : Any , summary : dict ):
213179 raise NotImplementedError ()
214180
215181
216182@multimethod
217- def describe_text_1d (
218- config : Settings , series : Any , summary : dict
219- ) -> Tuple [Settings , Any , dict , Any ]:
183+ def describe_text_1d (config : Settings , series : Any , summary : dict ):
220184 raise NotImplementedError ()
221185
222186
223187@multimethod
224- def describe_date_1d (
225- config : Settings , series : Any , summary : dict
226- ) -> Tuple [Settings , Any , dict ]:
188+ def describe_date_1d (config : Settings , series : Any , summary : dict ):
227189 raise NotImplementedError ()
228190
229191
230192@multimethod
231- def describe_categorical_1d (
232- config : Settings , series : pd .Series , summary : dict
233- ) -> Tuple [Settings , pd .Series , dict ]:
193+ def describe_categorical_1d (config : Settings , series : pd .Series , summary : dict ):
234194 raise NotImplementedError ()
235195
236196
237197@multimethod
238- def describe_url_1d (
239- config : Settings , series : Any , summary : dict
240- ) -> Tuple [Settings , Any , dict ]:
198+ def describe_url_1d (config : Settings , series : Any , summary : dict ):
241199 raise NotImplementedError ()
242200
243201
244202@multimethod
245- def describe_file_1d (
246- config : Settings , series : Any , summary : dict
247- ) -> Tuple [Settings , Any , dict ]:
203+ def describe_file_1d (config : Settings , series : Any , summary : dict ):
248204 raise NotImplementedError ()
249205
250206
251207@multimethod
252- def describe_path_1d (
253- config : Settings , series : Any , summary : dict
254- ) -> Tuple [Settings , Any , dict ]:
208+ def describe_path_1d (config : Settings , series : Any , summary : dict ):
255209 raise NotImplementedError ()
256210
257211
258212@multimethod
259- def describe_image_1d (
260- config : Settings , series : Any , summary : dict
261- ) -> Tuple [Settings , Any , dict ]:
213+ def describe_image_1d (config : Settings , series : Any , summary : dict ):
262214 raise NotImplementedError ()
263215
264216
265217@multimethod
266- def describe_boolean_1d (
267- config : Settings , series : Any , summary : dict
268- ) -> Tuple [Settings , Any , dict ]:
218+ def describe_boolean_1d (config : Settings , series : Any , summary : dict ):
269219 raise NotImplementedError ()
270220
271221
272222@multimethod
273- def describe_timeseries_1d (
274- config : Settings , series : Any , summary : dict
275- ) -> Tuple [Settings , Any , dict ]:
276- raise NotImplementedError ()
223+ def describe_timeseries_1d (config : Settings , series : Any , summary : dict ):
224+ raise NotImplementedError ()
0 commit comments