예시문제 신용카드 데이터 문제 문의

Question

# 출력을 원하실 경우 print() 함수 활용 # 예시) print(df.head()) # getcwd(), chdir() 등 작업 폴더 설정 불필요 # 파일 경로 상 내부 드라이브 경로(C: 등) 접근 불가 # 데이터 파일 읽기 예제 import pandas as pd X_test = pd.read_csv("data/X_test.csv") X_train = pd.read_csv("data/X_train.csv") y_train = pd.read_csv("data/y_train.csv") # 사용자 코딩 # 답안 제출 참고 # 아래 코드 예측변수와 수험번호를 개인별로 변경하여 활용 # pd.DataFrame({'cust_id': X_test.cust_id, 'gender': pred}).to_csv('003000000.csv', index=False) # print(X_test.shape, X_train.shape, y_train.shape) # print(X_test.isnull().sum()) # print(X_test['환불금액'].describe()) # print(X_train.isnull().sum()) # print(y_train.isnull().sum()) # print(X_test.shape, X_train.shape, y_train.shape) # print(X_train.isnull().sum()) X_test['환불금액'] = X_test['환불금액'].fillna(0) X_train['환불금액'] = X_train['환불금액'].fillna(0) # print(X_train.isnull().sum()) # print(X_test.shape, X_train.shape) # print(X_test.describe(include ='object')) # print(X_train.describe(include ='object')) # print(X_test.shape, X_train.shape) # con_data = X_train.concat(X_test.) print(X_test.shape, X_train.shape) cust_ID= X_test.pop('cust_id') X_train = X_train.drop('cust_id',axis =1) y_cust_ID = y_train.pop('cust_id') print(X_test.shape, X_train.shape) print(y_train.head()) X_com = pd.concat([X_test, X_train],axis=0) X_com = pd.get_dummies(X_com) print(X_com.shape) X_test = X_com.iloc[0:2482,:] X_train = X_com.iloc[2482:5982,:] print(X_test.shape, X_train.shape,y_train.shape) from sklearn.model_selection import train_test_split X_tr, X_val, y_tr, y_val =train_test_split(X_train,y_train,test_size =0.1,random_state =0) print(X_tr.shape, X_val.shape, y_tr.shape, y_val.shape) import lightgbm as lgm model = lgm.LGBMClassifier() model.fit(X_tr, y_tr) pred = model.predict_proba(X_val) 마지막에서 오류가 발생했습니다. DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). return f(*args, **kwargs) # from sklearn.ensemble import RandomForestClassifier # rf = RandomForestClassifier() # rf.fit (X_tr, y_tr) # pred = rf.predict_proba(X_val) 랜덤 포레스트로 돌려도 같은 오류가 발생합니다 어떤걸 잘못한건가요?

퇴근후딴짓 · Answer

train_test_split(X_train,y_train['타겟 컬럼']) y_train['타겟 컬럼'] -> 이 부분이 시리즈 형태여야 합니다. 현재 데이터 프레임 형태임