When categorical variable has
1 | NaN |
s, ValueError with following traceback is shown:
In categorical columns, replacing
1 | np.NaN |
with string
1 | NaN |
helped to remove this error.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | --------------------------------------------------------------------------- ValueError Traceback (most recent call last) 6 target_entity=MONTH_ENTITY_NAME, 7 features_only=not RUN_DFS, ----> 8 **dfs_definition 9 ) 10 c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\synthesis\dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types) 233 n_jobs=n_jobs, 234 dask_kwargs=dask_kwargs, --> 235 verbose=verbose) 236 return feature_matrix, features c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in calculate_feature_matrix(features, entityset, cutoff_time, instance_ids, entities, relationships, cutoff_time_in_index, training_window, approximate, save_progress, verbose, chunk_size, n_jobs, dask_kwargs, profile) 252 target_time=target_time, 253 pass_columns=pass_columns, --> 254 dask_kwargs=dask_kwargs or {}) 255 else: 256 feature_matrix = linear_calculate_chunks(chunks=chunks, c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in parallel_calculate_chunks(chunks, features, approximate, training_window, verbose, save_progress, entityset, n_jobs, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns, dask_kwargs) 600 pbar = make_tqdm_iterator(total=len(_chunks), bar_format=pbar_str) 601 for batch in iterator: --> 602 results = client.gather(batch) 603 for result in results: 604 feature_matrix.append(result) c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous) 1654 return self.sync(self._gather, futures, errors=errors, 1655 direct=direct, local_worker=local_worker, -> 1656 asynchronous=asynchronous) 1657 1658 .coroutine c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs) 674 return future 675 else: --> 676 return sync(self.loop, func, *args, **kwargs) 677 678 def __repr__(self): c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs) 275 e.wait(10) 276 if error[0]: --> 277 six.reraise(*error[0]) 278 else: 279 return result[0] ~\AppData\Roaming\Python\Python37\site-packages\six.py in reraise(tp, value, tb) 691 if value.__traceback__ is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\utils.py in f() 260 if timeout is not None: 261 future = gen.with_timeout(timedelta(seconds=timeout), future) --> 262 result[0] = yield future 263 except Exception as exc: 264 error[0] = sys.exc_info() c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\tornado\gen.py in run(self) 727 728 try: --> 729 value = future.result() 730 except Exception: 731 exc_info = sys.exc_info() c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\tornado\gen.py in run(self) 734 if exc_info is not None: 735 try: --> 736 yielded = self.gen.throw(*exc_info) # type: ignore 737 finally: 738 # Break up a reference to itself c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker) 1495 six.reraise(type(exception), 1496 exception, -> 1497 traceback) 1498 if errors == 'skip': 1499 bad_keys.add(key) ~\AppData\Roaming\Python\Python37\site-packages\six.py in reraise(tp, value, tb) 690 value = tp() 691 if value.__traceback__ is not tb: --> 692 raise value.with_traceback(tb) 693 raise value 694 finally: c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\dask\compatibility.py in apply() 91 def apply(func, args, kwargs=None): 92 if kwargs: ---> 93 return func(*args, **kwargs) 94 else: 95 return func(*args) c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in calculate_chunk() 349 ids, 350 precalculated_features=precalculated_features, --> 351 training_window=window) 352 353 id_name = _feature_matrix.index.name c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\utils.py in wrapped() 34 def wrapped(*args, **kwargs): 35 if save_progress is None: ---> 36 r = method(*args, **kwargs) 37 else: 38 time = args[0].to_pydatetime() c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in calc_results() 323 precalculated_features=precalculated_features, 324 ignored=all_approx_feature_set, --> 325 profile=profile) 326 return matrix 327 c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\pandas_backend.py in calculate_all_features() 195 196 handler = self._feature_type_handler(test_feature) --> 197 result_frame = handler(group, input_frames) 198 199 output_frames_type = self.feature_tree.output_frames_type(test_feature) c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\pandas_backend.py in _calculate_transform_features() 321 values = feature_func(*variable_data, time=self.time_last) 322 else: --> 323 values = feature_func(*variable_data) 324 325 # if we don't get just the values, the assignment breaks when indexes don't match c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\primitives\standard\binary_transform.py in equal_scalar() 159 def equal_scalar(vals): 160 # case to correct pandas type for comparison --> 161 return pd.Series(vals).astype(pd.Series([self.value]).dtype) == self.value 162 return equal_scalar 163 c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\generic.py in astype() 5689 # else, only a single dtype is given 5690 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors, -> 5691 **kwargs) 5692 return self._constructor(new_data).__finalize__(self) 5693 c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\managers.py in astype() 529 530 def astype(self, dtype, **kwargs): --> 531 return self.apply('astype', dtype=dtype, **kwargs) 532 533 def convert(self, **kwargs): c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\managers.py in apply() 393 copy=align_copy) 394 --> 395 applied = getattr(b, f)(**kwargs) 396 result_blocks = _extend_blocks(applied, result_blocks) 397 c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\blocks.py in astype() 532 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): 533 return self._astype(dtype, copy=copy, errors=errors, values=values, --> 534 **kwargs) 535 536 def _astype(self, dtype, copy=False, errors='raise', values=None, c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\blocks.py in _astype() 631 632 # _astype_nansafe works fine with 1-d only --> 633 values = astype_nansafe(values.ravel(), dtype, copy=True) 634 635 # TODO(extension) c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe() 700 if copy or is_object_dtype(arr) or is_object_dtype(dtype): 701 # Explicit copy, or required since NumPy can't view from / to object. --> 702 return arr.astype(dtype, copy=True) 703 704 return arr.view(dtype) ValueError: could not convert string to float: 'Sázava' |
该提问来源于开源项目:FeatureLabs/featuretools
This should have been fixed by #504, closing