작성
·
69
0
# 사이킷런 래퍼 XGBoost 클래스인 XGBClassifier 임포트
from xgboost import XGBClassifier
# Warning 메시지를 없애기 위해 eval_metric 값을 XGBClassifier 생성 인자로 입력. 미 입력해도 수행에 문제 없음.
xgb_wrapper = XGBClassifier(n_estimators=400, learning_rate=0.05, max_depth=3, eval_metric='logloss')
xgb_wrapper.fit(X_train, y_train, verbose=True)
w_preds = xgb_wrapper.predict(X_test)
w_pred_proba = xgb_wrapper.predict_proba(X_test)[:, 1]
위 코드 수행시 다음과 같은 오류가 발생해서 아래의 코드로 수정하여 고쳤는데도 오류가 발생하는데, 찾아봐도 무엇이 문제인지 알 수 가 없어 질문을 남깁니다.
어떤 이유인지 몰라 질문 자세히 못드리는 점 죄송합니다..
수정된 코드
# early_stopping_rounds를 10으로 설정하고 재 학습.
xgb_wrapper.fit(X_tr.values, y_tr.values, early_stopping_rounds=10,
eval_metric="logloss", eval_set=evals,verbose=True)
ws10_preds = xgb_wrapper.predict(X_test.values)
ws10_pred_proba = xgb_wrapper.predict_proba(X_test.values)[:, 1]
get_clf_eval(y_test , ws10_preds, ws10_pred_proba)
AttributeError Traceback (most recent call last)
Cell In[40], line 16
14 # XGBoost 모델 학습
15 xgb_wrapper = XGBClassifier(n_estimators=400, learning_rate=0.05, max_depth=3)
---> 16 xgb_wrapper.fit(X_tr.values, y_tr.values, early_stopping_rounds=10,
17 eval_metric="logloss", eval_set=evals, verbose=True)
19 # 예측 및 확률 추출
20 ws10_preds = xgb_wrapper.predict(X_test.values)
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\core.py:506, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
504 for k, arg in zip(sig.parameters, args):
505 kwargs[k] = arg
--> 506 return f(**kwargs)
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\sklearn.py:1231, in XGBClassifier.fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
1228 label_transform = lambda x: x
1230 model, feval, params = self._configure_fit(xgb_model, eval_metric, params)
-> 1231 train_dmatrix, evals = _wrap_evaluation_matrices(
1232 missing=self.missing,
1233 X=X,
1234 y=y,
1235 group=None,
1236 qid=None,
1237 sample_weight=sample_weight,
1238 base_margin=base_margin,
1239 feature_weights=feature_weights,
1240 eval_set=eval_set,
1241 sample_weight_eval_set=sample_weight_eval_set,
1242 base_margin_eval_set=base_margin_eval_set,
1243 eval_group=None,
1244 eval_qid=None,
1245 create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
1246 enable_categorical=self.enable_categorical,
1247 label_transform=label_transform,
1248 )
1250 self._Booster = train(
1251 params,
1252 train_dmatrix,
(...)
1261 callbacks=callbacks,
1262 )
1264 if not callable(self.objective):
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\sklearn.py:334, in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, label_transform)
332 evals.append(train_dmatrix)
333 else:
--> 334 m = create_dmatrix(
335 data=valid_X,
336 label=label_transform(valid_y),
337 weight=sample_weight_eval_set[i],
338 group=eval_group[i],
339 qid=eval_qid[i],
340 base_margin=base_margin_eval_set[i],
341 missing=missing,
342 enable_categorical=enable_categorical,
343 )
344 evals.append(m)
345 nevals = len(evals)
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\sklearn.py:1245, in XGBClassifier.fit.<locals>.<lambda>(**kwargs)
1228 label_transform = lambda x: x
1230 model, feval, params = self._configure_fit(xgb_model, eval_metric, params)
1231 train_dmatrix, evals = _wrap_evaluation_matrices(
1232 missing=self.missing,
1233 X=X,
1234 y=y,
1235 group=None,
1236 qid=None,
1237 sample_weight=sample_weight,
1238 base_margin=base_margin,
1239 feature_weights=feature_weights,
1240 eval_set=eval_set,
1241 sample_weight_eval_set=sample_weight_eval_set,
1242 base_margin_eval_set=base_margin_eval_set,
1243 eval_group=None,
1244 eval_qid=None,
-> 1245 create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
1246 enable_categorical=self.enable_categorical,
1247 label_transform=label_transform,
1248 )
1250 self._Booster = train(
1251 params,
1252 train_dmatrix,
(...)
1261 callbacks=callbacks,
1262 )
1264 if not callable(self.objective):
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\core.py:506, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
504 for k, arg in zip(sig.parameters, args):
505 kwargs[k] = arg
--> 506 return f(**kwargs)
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\core.py:616, in DMatrix.__init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical)
613 assert self.handle is not None
614 return
--> 616 handle, feature_names, feature_types = dispatch_data_backend(
617 data,
618 missing=self.missing,
619 threads=self.nthread,
620 feature_names=feature_names,
621 feature_types=feature_types,
622 enable_categorical=enable_categorical,
623 )
624 assert handle is not None
625 self.handle = handle
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\data.py:707, in dispatch_data_backend(data, missing, threads, feature_names, feature_types, enable_categorical)
705 return _from_tuple(data, missing, threads, feature_names, feature_types)
706 if _is_pandas_df(data):
--> 707 return _from_pandas_df(data, enable_categorical, missing, threads,
708 feature_names, feature_types)
709 if _is_pandas_series(data):
710 return _from_pandas_series(data, missing, threads, feature_names,
711 feature_types)
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\data.py:297, in _from_pandas_df(data, enable_categorical, missing, nthread, feature_names, feature_types)
289 def _from_pandas_df(
290 data,
291 enable_categorical: bool,
(...)
295 feature_types: Optional[List[str]],
296 ):
--> 297 data, feature_names, feature_types = _transform_pandas_df(
298 data, enable_categorical, feature_names, feature_types)
299 return _from_numpy_array(data, missing, nthread, feature_names,
300 feature_types)
File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\data.py:250, in _transform_pandas_df(data, enable_categorical, feature_names, feature_types, meta, meta_type)
246 if isinstance(data.columns, pd.MultiIndex):
247 feature_names = [
248 ' '.join([str(x) for x in i]) for i in data.columns
249 ]
--> 250 elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
251 feature_names = list(map(str, data.columns))
252 else:
AttributeError: module 'pandas' has no attribute 'Int64Index'
답변