标签:object cti use features line ros test linear values
import numpy as np import pandas as pd train_data = pd.read_csv("C:/Users/Liubotao/Desktop/House_price/input/train.csv", index_col=0) test_data = pd.read_csv("C:/Users/Liubotao/Desktop/House_price/input/test.csv", index_col=0) prices = pd.DataFrame({"price":train_data["SalePrice"],"log(prine+1)":np.log1p(train_data["SalePrice"])}) prices.hist() y_train = np.log1p(train_data.pop(‘SalePrice‘)) all_data = pd.concat((train_data, test_data), axis=0) all_data[‘MSSubClass‘] = all_data[‘MSSubClass‘].astype(str) all_dummy_data = pd.get_dummies(all_data) mean_col = all_dummy_data.mean() all_dummy_data = all_dummy_data.fillna(mean_col) numeric_col = all_data.columns[all_data.dtypes != ‘object‘] numeric_col_means = all_dummy_data.loc[:,numeric_col].mean() numeric_col_std = all_dummy_data.loc[:,numeric_col].std() all_dummy_data.loc[:, numeric_col] = (all_dummy_data.loc[:, numeric_col] - numeric_col_means) / numeric_col_std dummy_train_data = all_dummy_data.loc[train_data.index] dummy_test_data = all_dummy_data.loc[test_data.index] from sklearn.linear_model import Ridge from sklearn.model_selection import cross_val_score X_train = dummy_train_data.values X_test = dummy_test_data.values ridge = Ridge(alpha=15) rf = RandomForestRegressor(n_estimators=500, max_features=.3) ridge.fit(X_train, y_train) rf.fit(X_train, y_train) y_ridge = np.expm1(ridge.predict(X_test)) y_rf = np.expm1(rf.predict(X_test)) y_final = (y_ridge + y_rf) / 2 submission_df = pd.DataFrame(data= {‘Id‘ : test_data.index, ‘SalePrice‘: y_final})
标签:object cti use features line ros test linear values
原文地址:https://www.cnblogs.com/L-BT/p/9772864.html