热门标签 | HotTags
当前位置:  开发笔记 > 编程语言 > 正文

ImproveerrormessagewhenCategoricalvariablehasNaNs

BugDescriptionWhencategoricalvariablehas


Bug Description

When categorical variable has

1
NaN

s, ValueError with following traceback is shown:

In categorical columns, replacing

1
np.NaN

with string

1
NaN

helped to remove this error.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

in

      6     target_entity=MONTH_ENTITY_NAME,

      7     features_only=not RUN_DFS,

----> 8     **dfs_definition

      9 )

     10



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\synthesis\dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types)

    233                                                   n_jobs=n_jobs,

    234                                                   dask_kwargs=dask_kwargs,

--> 235                                                   verbose=verbose)

    236     return feature_matrix, features



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in calculate_feature_matrix(features, entityset, cutoff_time, instance_ids, entities, relationships, cutoff_time_in_index, training_window, approximate, save_progress, verbose, chunk_size, n_jobs, dask_kwargs, profile)

    252                                                    target_time=target_time,

    253                                                    pass_columns=pass_columns,

--> 254                                                    dask_kwargs=dask_kwargs or {})

    255     else:

    256         feature_matrix = linear_calculate_chunks(chunks=chunks,



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in parallel_calculate_chunks(chunks, features, approximate, training_window, verbose, save_progress, entityset, n_jobs, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns, dask_kwargs)

    600             pbar = make_tqdm_iterator(total=len(_chunks), bar_format=pbar_str)

    601         for batch in iterator:

--> 602             results = client.gather(batch)

    603             for result in results:

    604                 feature_matrix.append(result)



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous)

   1654             return self.sync(self._gather, futures, errors=errors,

   1655                              direct=direct, local_worker=local_worker,

-> 1656                              asynchronous=asynchronous)

   1657

   1658     .coroutine



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs)

    674             return future

    675         else:

--> 676             return sync(self.loop, func, *args, **kwargs)

    677

    678     def __repr__(self):



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs)

    275             e.wait(10)

    276     if error[0]:

--> 277         six.reraise(*error[0])

    278     else:

    279         return result[0]



~\AppData\Roaming\Python\Python37\site-packages\six.py in reraise(tp, value, tb)

    691             if value.__traceback__ is not tb:

    692                 raise value.with_traceback(tb)

--> 693             raise value

    694         finally:

    695             value = None



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\utils.py in f()

    260             if timeout is not None:

    261                 future = gen.with_timeout(timedelta(seconds=timeout), future)

--> 262             result[0] = yield future

    263         except Exception as exc:

    264             error[0] = sys.exc_info()



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\tornado\gen.py in run(self)

    727

    728                     try:

--> 729                         value = future.result()

    730                     except Exception:

    731                         exc_info = sys.exc_info()



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\tornado\gen.py in run(self)

    734                     if exc_info is not None:

    735                         try:

--> 736                             yielded = self.gen.throw(*exc_info)  # type: ignore

    737                         finally:

    738                             # Break up a reference to itself



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker)

   1495                             six.reraise(type(exception),

   1496                                         exception,

-> 1497                                         traceback)

   1498                     if errors == 'skip':

   1499                         bad_keys.add(key)



~\AppData\Roaming\Python\Python37\site-packages\six.py in reraise(tp, value, tb)

    690                 value = tp()

    691             if value.__traceback__ is not tb:

--> 692                 raise value.with_traceback(tb)

    693             raise value

    694         finally:



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\dask\compatibility.py in apply()

     91     def apply(func, args, kwargs=None):

     92         if kwargs:

---> 93             return func(*args, **kwargs)

     94         else:

     95             return func(*args)



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in calculate_chunk()

    349                                            ids,

    350                                            precalculated_features=precalculated_features,

--> 351                                            training_window=window)

    352

    353             id_name = _feature_matrix.index.name



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\utils.py in wrapped()

     34         def wrapped(*args, **kwargs):

     35             if save_progress is None:

---> 36                 r = method(*args, **kwargs)

     37             else:

     38                 time = args[0].to_pydatetime()



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\calculate_feature_matrix.py in calc_results()

    323                                                     precalculated_features=precalculated_features,

    324                                                     ignored=all_approx_feature_set,

--> 325                                                     profile=profile)

    326             return matrix

    327



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\pandas_backend.py in calculate_all_features()

    195

    196                     handler = self._feature_type_handler(test_feature)

--> 197                     result_frame = handler(group, input_frames)

    198

    199                     output_frames_type = self.feature_tree.output_frames_type(test_feature)



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\computational_backends\pandas_backend.py in _calculate_transform_features()

    321                 values = feature_func(*variable_data, time=self.time_last)

    322             else:

--> 323                 values = feature_func(*variable_data)

    324

    325             # if we don't get just the values, the assignment breaks when indexes don't match



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\featuretools\primitives\standard\binary_transform.py in equal_scalar()

    159         def equal_scalar(vals):

    160             # case to correct pandas type for comparison

--> 161             return pd.Series(vals).astype(pd.Series([self.value]).dtype) == self.value

    162         return equal_scalar

    163



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\generic.py in astype()

   5689             # else, only a single dtype is given

   5690             new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,

-> 5691                                          **kwargs)

   5692             return self._constructor(new_data).__finalize__(self)

   5693



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\managers.py in astype()

    529

    530     def astype(self, dtype, **kwargs):

--> 531         return self.apply('astype', dtype=dtype, **kwargs)

    532

    533     def convert(self, **kwargs):



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\managers.py in apply()

    393                                             copy=align_copy)

    394

--> 395             applied = getattr(b, f)(**kwargs)

    396             result_blocks = _extend_blocks(applied, result_blocks)

    397



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\blocks.py in astype()

    532     def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):

    533         return self._astype(dtype, copy=copy, errors=errors, values=values,

--> 534                             **kwargs)

    535

    536     def _astype(self, dtype, copy=False, errors='raise', values=None,



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\internals\blocks.py in _astype()

    631

    632                     # _astype_nansafe works fine with 1-d only

--> 633                     values = astype_nansafe(values.ravel(), dtype, copy=True)

    634

    635                 # TODO(extension)



c:\users\jan.hynek\appdata\local\programs\python\python37-32\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe()

    700     if copy or is_object_dtype(arr) or is_object_dtype(dtype):

    701         # Explicit copy, or required since NumPy can't view from / to object.

--> 702         return arr.astype(dtype, copy=True)

    703

    704     return arr.view(dtype)



ValueError: could not convert string to float: 'Sázava'


该提问来源于开源项目:FeatureLabs/featuretools

This should have been fixed by #504, closing





   



推荐阅读
author-avatar
去奥迪店买本驰
这个家伙很懒,什么也没留下!
PHP1.CN | 中国最专业的PHP中文社区 | DevBox开发工具箱 | json解析格式化 |PHP资讯 | PHP教程 | 数据库技术 | 服务器技术 | 前端开发技术 | PHP框架 | 开发工具 | 在线工具
Copyright © 1998 - 2020 PHP1.CN. All Rights Reserved | 京公网安备 11010802041100号 | 京ICP备19059560号-4 | PHP1.CN 第一PHP社区 版权所有