https://dataq.goorm.io/exam/116674/체험하기/quiz/4
여기서 하는 성별예측 문제에서
import pandas as pd
train = pd.read_csv("data/customer_train.csv")
test = pd.read_csv("data/customer_test.csv")
# print(train.shape, test.shape)
# print(train.info())
# print(train.isnull().sum())
# print(test.isnull().sum())
#결측치 처리 (환불금액)
train['환불금액'] = train['환불금액'].fillna(0)
test['환불금액'] = test['환불금액'].fillna(0)
# print(test.isnull().sum())
# print(train['성별'].value_counts())
#인코딩
target = train.pop('성별')
train = pd.get_dummies(train)
test = pd.get_dummies(test)
#검증 데이터 분리
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(train, target, test_size=0.2, random_state=0)
# print(X_tr.shape, X_val.shape, y_tr.shape, y_val.shape)
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=0)
rf.fit(X_tr, y_tr)
pred = rf.predict_proba(X_val)
# print(pred)
from sklearn.metrics import roc_auc_score
roc = roc_auc_score(y_val, pred[:,1])
#예측 및 파일생성
pred = rf.predict_proba(test)
submit = pd.DataFrame({
"pred":pred[:,1]
})
submit.to_csv("result.csv", index=False)
# print(pd.read_csv("result.csv"))
이렇게 코딩을 했는데 예측 및 파일 생성에서
pred = rf.predict_proba(test) 여기 부분이 오류가 뜨는데 왜 그런가요 ㅠㅠ
> Makefile:6: recipe for target 'py3_run' failed
make: *** [py3_run] Error 1
Traceback (most recent call last):
File "/goorm/Main.out", line 43, in <module>
pred = rf.predict_proba(test)
File "/usr/local/lib/python3.9/dist-packages/sklearn/ensemble/_forest.py", line 674, in predict_proba
X = self._validate_X_predict(X)
File "/usr/local/lib/python3.9/dist-packages/sklearn/ensemble/_forest.py", line 422, in validateX_predict
return self.estimators_[0]._validate_X_predict(X, check_input=True)
File "/usr/local/lib/python3.9/dist-packages/sklearn/tree/_classes.py", line 407, in validateX_predict
X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr",
File "/usr/local/lib/python3.9/dist-packages/sklearn/base.py", line 437, in validatedata
self._check_n_features(X, reset=reset)
File "/usr/local/lib/python3.9/dist-packages/sklearn/base.py", line 365, in checkn_features
raise ValueError(
ValueError: X has 73 features, but DecisionTreeClassifier is expecting 74 features as input.
이렇게 뜹니다,,
고민하기 카테고리는 제게 미답변으로 나타나지 않아 답변이 늦었어요
카테고리가 달라 에러가 발생했어요
강의 영상중 꿀팁 섹션 인코딩에서 에러 발생한다면 영상 확인 부탁드립니다.
답글