강의

멘토링

로드맵

인프런 커뮤니티 질문&답변

JUNWON님의 프로필 이미지
JUNWON

작성한 질문수

pandas 오류 - 사이킷런 XGBoost Wrapper강의에서

작성

·

69

0

# 사이킷런 래퍼 XGBoost 클래스인 XGBClassifier 임포트
from xgboost import XGBClassifier

# Warning 메시지를 없애기 위해 eval_metric 값을 XGBClassifier 생성 인자로 입력. 미 입력해도 수행에 문제 없음.   
xgb_wrapper = XGBClassifier(n_estimators=400, learning_rate=0.05, max_depth=3, eval_metric='logloss')
xgb_wrapper.fit(X_train, y_train, verbose=True)
w_preds = xgb_wrapper.predict(X_test)
w_pred_proba = xgb_wrapper.predict_proba(X_test)[:, 1]

위 코드 수행시 다음과 같은 오류가 발생해서 아래의 코드로 수정하여 고쳤는데도 오류가 발생하는데, 찾아봐도 무엇이 문제인지 알 수 가 없어 질문을 남깁니다.

어떤 이유인지 몰라 질문 자세히 못드리는 점 죄송합니다..


수정된 코드

# early_stopping_rounds를 10으로 설정하고 재 학습. 
xgb_wrapper.fit(X_tr.values, y_tr.values, early_stopping_rounds=10, 
                eval_metric="logloss", eval_set=evals,verbose=True)

ws10_preds = xgb_wrapper.predict(X_test.values)
ws10_pred_proba = xgb_wrapper.predict_proba(X_test.values)[:, 1]
get_clf_eval(y_test , ws10_preds, ws10_pred_proba)
AttributeError                            Traceback (most recent call last)
Cell In[40], line 16
     14 # XGBoost 모델 학습
     15 xgb_wrapper = XGBClassifier(n_estimators=400, learning_rate=0.05, max_depth=3)
---> 16 xgb_wrapper.fit(X_tr.values, y_tr.values, early_stopping_rounds=10, 
     17                 eval_metric="logloss", eval_set=evals, verbose=True)
     19 # 예측 및 확률 추출
     20 ws10_preds = xgb_wrapper.predict(X_test.values)

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\core.py:506, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
    504 for k, arg in zip(sig.parameters, args):
    505     kwargs[k] = arg
--> 506 return f(**kwargs)

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\sklearn.py:1231, in XGBClassifier.fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
   1228     label_transform = lambda x: x
   1230 model, feval, params = self._configure_fit(xgb_model, eval_metric, params)
-> 1231 train_dmatrix, evals = _wrap_evaluation_matrices(
   1232     missing=self.missing,
   1233     X=X,
   1234     y=y,
   1235     group=None,
   1236     qid=None,
   1237     sample_weight=sample_weight,
   1238     base_margin=base_margin,
   1239     feature_weights=feature_weights,
   1240     eval_set=eval_set,
   1241     sample_weight_eval_set=sample_weight_eval_set,
   1242     base_margin_eval_set=base_margin_eval_set,
   1243     eval_group=None,
   1244     eval_qid=None,
   1245     create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
   1246     enable_categorical=self.enable_categorical,
   1247     label_transform=label_transform,
   1248 )
   1250 self._Booster = train(
   1251     params,
   1252     train_dmatrix,
   (...)
   1261     callbacks=callbacks,
   1262 )
   1264 if not callable(self.objective):

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\sklearn.py:334, in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, label_transform)
    332         evals.append(train_dmatrix)
    333     else:
--> 334         m = create_dmatrix(
    335             data=valid_X,
    336             label=label_transform(valid_y),
    337             weight=sample_weight_eval_set[i],
    338             group=eval_group[i],
    339             qid=eval_qid[i],
    340             base_margin=base_margin_eval_set[i],
    341             missing=missing,
    342             enable_categorical=enable_categorical,
    343         )
    344         evals.append(m)
    345 nevals = len(evals)

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\sklearn.py:1245, in XGBClassifier.fit.<locals>.<lambda>(**kwargs)
   1228     label_transform = lambda x: x
   1230 model, feval, params = self._configure_fit(xgb_model, eval_metric, params)
   1231 train_dmatrix, evals = _wrap_evaluation_matrices(
   1232     missing=self.missing,
   1233     X=X,
   1234     y=y,
   1235     group=None,
   1236     qid=None,
   1237     sample_weight=sample_weight,
   1238     base_margin=base_margin,
   1239     feature_weights=feature_weights,
   1240     eval_set=eval_set,
   1241     sample_weight_eval_set=sample_weight_eval_set,
   1242     base_margin_eval_set=base_margin_eval_set,
   1243     eval_group=None,
   1244     eval_qid=None,
-> 1245     create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
   1246     enable_categorical=self.enable_categorical,
   1247     label_transform=label_transform,
   1248 )
   1250 self._Booster = train(
   1251     params,
   1252     train_dmatrix,
   (...)
   1261     callbacks=callbacks,
   1262 )
   1264 if not callable(self.objective):

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\core.py:506, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
    504 for k, arg in zip(sig.parameters, args):
    505     kwargs[k] = arg
--> 506 return f(**kwargs)

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\core.py:616, in DMatrix.__init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical)
    613     assert self.handle is not None
    614     return
--> 616 handle, feature_names, feature_types = dispatch_data_backend(
    617     data,
    618     missing=self.missing,
    619     threads=self.nthread,
    620     feature_names=feature_names,
    621     feature_types=feature_types,
    622     enable_categorical=enable_categorical,
    623 )
    624 assert handle is not None
    625 self.handle = handle

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\data.py:707, in dispatch_data_backend(data, missing, threads, feature_names, feature_types, enable_categorical)
    705     return _from_tuple(data, missing, threads, feature_names, feature_types)
    706 if _is_pandas_df(data):
--> 707     return _from_pandas_df(data, enable_categorical, missing, threads,
    708                            feature_names, feature_types)
    709 if _is_pandas_series(data):
    710     return _from_pandas_series(data, missing, threads, feature_names,
    711                                feature_types)

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\data.py:297, in _from_pandas_df(data, enable_categorical, missing, nthread, feature_names, feature_types)
    289 def _from_pandas_df(
    290     data,
    291     enable_categorical: bool,
   (...)
    295     feature_types: Optional[List[str]],
    296 ):
--> 297     data, feature_names, feature_types = _transform_pandas_df(
    298         data, enable_categorical, feature_names, feature_types)
    299     return _from_numpy_array(data, missing, nthread, feature_names,
    300                              feature_types)

File ~\anaconda3\envs\sklearn_env\lib\site-packages\xgboost\data.py:250, in _transform_pandas_df(data, enable_categorical, feature_names, feature_types, meta, meta_type)
    246 if isinstance(data.columns, pd.MultiIndex):
    247     feature_names = [
    248         ' '.join([str(x) for x in i]) for i in data.columns
    249     ]
--> 250 elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
    251     feature_names = list(map(str, data.columns))
    252 else:

AttributeError: module 'pandas' has no attribute 'Int64Index'

답변

답변을 기다리고 있는 질문이에요
첫번째 답변을 남겨보세요!
JUNWON님의 프로필 이미지
JUNWON

작성한 질문수

질문하기