机器学习误差图绘
【代码】机器学习误差图绘。
·
机器学习误差图绘制
绘图类
# Define the ModelComparisonPlot class
class ModelComparisonPlot:
def __init__(self, model_name):
self.model_name = model_name
def plot_comparison(self, y_val, y_pred, mse, mae, r2):
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5))
# Plot the predicted vs true values
sns.regplot(x=y_val, y=y_pred, color='blue', scatter_kws={'alpha':0.5}, ax=axes[0])
axes[0].plot([y_val.min(), y_val.max()], [y_val.min(), y_val.max()], 'k--', lw=2)
axes[0].set_xlabel('True values', fontsize=12)
axes[0].set_ylabel('Predicted values', fontsize=12)
axes[0].set_title('Predicted vs true values')
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals = y_val - y_pred
sns.residplot(x=y_pred, y=residuals, color='blue', scatter_kws={'alpha':0.5}, ax=axes[1])
axes[1].plot([y_val.min(), y_val.max()], [0, 0], 'k--', lw=2)
axes[1].set_xlabel('Predicted values', fontsize=12)
axes[1].set_ylabel('Residuals', fontsize=12)
axes[1].set_title('Residual plot', fontsize=15)
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\n{}'.format(self.model_name), fontsize=15)
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
独热编码
# Assuming 'Entity' is a categorical variable
X_encoded = pd.get_dummies(df, columns=['实体'], drop_first=True)
确定预测值
# Assuming 'gdp_growth' is target variable
X = X_encoded.drop('人均国内生产总值', axis=1)
y = X_encoded['人均国内生产总值']
X_encoded = X_encoded.fillna(0) # Replace with your preferred imputation method
区分训练集与测试集
# Assuming X and y are features and labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
机器学习代码
ExtraTreesRegressor
# Create an Extra Trees Regressor model
model_ETR = ExtraTreesRegressor(
max_depth=None,
max_features=None,
min_samples_leaf=1,
min_samples_split=2,
n_estimators=300
)
# Fit the model
model_ETR.fit(X_train, y_train)
# Make predictions
y_pred = model_ETR.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Print evaluation metrics
print(f"Model: {type(model_ETR).__name__}, mse: {mse}")
print(f"Model: {type(model_ETR).__name__}, mae: {mae}")
print(f"Model: {type(model_ETR).__name__}, r2: {r2}")
model_ETR_plot = ModelComparisonPlot('ExtraTreesRegressor')
# Set Seaborn style
sns.set(style="whitegrid", rc={"axes.facecolor": "white", "grid.color": "#D3D3D3"})
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5), facecolor="#F5F5F5")
# Plot the predicted vs true values
sns.regplot(x=y_test, y=y_pred, color='red', scatter_kws={'alpha': 0.5}, ax=axes[0])
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Remove color argument here
axes[0].set_xlabel('True values', fontsize=12, color="red")
axes[0].set_ylabel('Predicted values', fontsize=12, color="red")
axes[0].set_title('Predicted vs true values', color="red")
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals = y_test - y_pred
sns.residplot(x=y_pred, y=residuals, color='green', scatter_kws={'alpha': 0.5}, ax=axes[1])
axes[1].plot([y_test.min(), y_test.max()], [0, 0], 'k--', lw=2) # Remove color argument here
axes[1].set_xlabel('Predicted values', fontsize=12, color="green")
axes[1].set_ylabel('Residuals', fontsize=12, color="green")
axes[1].set_title('Residual plot', fontsize=15, color="green")
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\n{}'.format(model_ETR_plot.model_name), fontsize=15, color="black")
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
DecisionTreeRegressor
# Create a decision tree regression model
dt_model = DecisionTreeRegressor()
# Fit the model
dt_model.fit(X_train, y_train)
# Make predictions
predictions = dt_model.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
# Print evaluation metrics
print(f"Model: {type(dt_model).__name__}, mse: {mse}")
print(f"Model: {type(dt_model).__name__}, mae: {mae}")
print(f"Model: {type(dt_model).__name__}, r2: {r2}")
dt_model_plot = ModelComparisonPlot('DecisionTreeRegressor')
# Set Seaborn style
sns.set(style="whitegrid", rc={"axes.facecolor": "white", "grid.color": "#D3D3D3"})
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5), facecolor="#F5F5F5")
# Plot the predicted vs true values
sns.regplot(x=y_test, y=predictions, color='red', scatter_kws={'alpha': 0.5}, ax=axes[0])
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Remove color argument here
axes[0].set_xlabel('True values', fontsize=12, color="red")
axes[0].set_ylabel('Predicted values', fontsize=12, color="red")
axes[0].set_title('Predicted vs true values', color="red")
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals = y_test - predictions
sns.residplot(x=predictions, y=residuals, color='green', scatter_kws={'alpha': 0.5}, ax=axes[1])
axes[1].plot([y_test.min(), y_test.max()], [0, 0], 'k--', lw=2) # Remove color argument here
axes[1].set_xlabel('Predicted values', fontsize=12, color="green")
axes[1].set_ylabel('Residuals', fontsize=12, color="green")
axes[1].set_title('Residual plot', fontsize=15, color="green")
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\n{}'.format(dt_model_plot.model_name), fontsize=15, color="black")
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
LinearRegression
# Create a linear regression model
linear_model = LinearRegression()
# Fit the model
linear_model.fit(X_train, y_train)
# Make predictions
predictions_linear = linear_model.predict(X_test)
# Evaluate the model
mse_linear = mean_squared_error(y_test, predictions_linear)
mae_linear = mean_absolute_error(y_test, predictions_linear)
r2_linear = r2_score(y_test, predictions_linear)
# Print evaluation metrics
print(f"Model: {type(linear_model).__name__}, mse: {mse_linear}")
print(f"Model: {type(linear_model).__name__}, mae: {mae_linear}")
print(f"Model: {type(linear_model).__name__}, r2: {r2_linear}")
linear_model_plot = ModelComparisonPlot('LinearRegression')
# Set Seaborn style
sns.set(style="whitegrid", rc={"axes.facecolor": "white", "grid.color": "#D3D3D3"})
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5), facecolor="#F5F5F5")
# Plot the predicted vs true values
sns.regplot(x=y_test, y=predictions_linear, color='red', scatter_kws={'alpha': 0.5}, ax=axes[0])
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Remove color argument here
axes[0].set_xlabel('True values', fontsize=12, color="red")
axes[0].set_ylabel('Predicted values', fontsize=12, color="red")
axes[0].set_title('Predicted vs true values', color="red")
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals_linear = y_test - predictions_linear
sns.residplot(x=predictions_linear, y=residuals_linear, color='green', scatter_kws={'alpha': 0.5}, ax=axes[1])
axes[1].plot([y_test.min(), y_test.max()], [0, 0], 'k--', lw=2) # Remove color argument here
axes[1].set_xlabel('Predicted values', fontsize=12, color="green")
axes[1].set_ylabel('Residuals', fontsize=12, color="green")
axes[1].set_title('Residual plot', fontsize=15, color="green")
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\n{}'.format(linear_model_plot.model_name), fontsize=15, color="black")
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
KNeighborsRegressor
# Create a KNN regression model
knn_model = KNeighborsRegressor()
# Fit the model
knn_model.fit(X_train, y_train)
# Make predictions
predictions_knn = knn_model.predict(X_test)
# Evaluate the model
mse_knn = mean_squared_error(y_test, predictions_knn)
mae_knn = mean_absolute_error(y_test, predictions_knn)
r2_knn = r2_score(y_test, predictions_knn)
# Print evaluation metrics
print(f"Model: {type(knn_model).__name__}, mse: {mse_knn}")
print(f"Model: {type(knn_model).__name__}, mae: {mae_knn}")
print(f"Model: {type(knn_model).__name__}, r2: {r2_knn}")
knn_model_plot = ModelComparisonPlot('KNeighborsRegressor')
# Set Seaborn style
sns.set(style="whitegrid", rc={"axes.facecolor": "white", "grid.color": "#D3D3D3"})
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5), facecolor="#F5F5F5")
# Plot the predicted vs true values
sns.regplot(x=y_test, y=predictions_knn, color='red', scatter_kws={'alpha': 0.5}, ax=axes[0])
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Remove color argument here
axes[0].set_xlabel('True values', fontsize=12, color="red")
axes[0].set_ylabel('Predicted values', fontsize=12, color="red")
axes[0].set_title('Predicted vs true values', color="red")
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals_knn = y_test - predictions_knn
sns.residplot(x=predictions_knn, y=residuals_knn, color='green', scatter_kws={'alpha': 0.5}, ax=axes[1])
axes[1].plot([y_test.min(), y_test.max()], [0, 0], 'k--', lw=2) # Remove color argument here
axes[1].set_xlabel('Predicted values', fontsize=12, color="green")
axes[1].set_ylabel('Residuals', fontsize=12, color="green")
axes[1].set_title('Residual plot', fontsize=15, color="green")
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\n{}'.format(knn_model_plot.model_name), fontsize=15, color="black")
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
XGboost
# Convert 'Density' column to numeric
X_train['Density'] = pd.to_numeric(X_train['Density'], errors='coerce')
X_test['Density'] = pd.to_numeric(X_test['Density'], errors='coerce')
# Drop rows with missing values after conversion
X_train = X_train.dropna()
X_test = X_test.dropna()
# Create an XGBoost regression model
xgb_model = XGBRegressor()
# Fit the model
xgb_model.fit(X_train, y_train)
# Make predictions
predictions_xgb = xgb_model.predict(X_test)
# Evaluate the model
mse_xgb = mean_squared_error(y_test, predictions_xgb)
mae_xgb = mean_absolute_error(y_test, predictions_xgb)
r2_xgb = r2_score(y_test, predictions_xgb)
# Print evaluation metrics
print(f"Model: {type(xgb_model).__name__}, mse: {mse_xgb}")
print(f"Model: {type(xgb_model).__name__}, mae: {mae_xgb}")
print(f"Model: {type(xgb_model).__name__}, r2: {r2_xgb}")
xgb_model_plot = ModelComparisonPlot('XGBRegressor')
# Set Seaborn style
sns.set(style="whitegrid", rc={"axes.facecolor": "white", "grid.color": "#D3D3D3"})
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5), facecolor="#F5F5F5")
# Plot the predicted vs true values
sns.regplot(x=y_test, y=predictions_xgb, color='red', scatter_kws={'alpha': 0.5}, ax=axes[0])
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Remove color argument here
axes[0].set_xlabel('True values', fontsize=12, color="red")
axes[0].set_ylabel('Predicted values', fontsize=12, color="red")
axes[0].set_title('Predicted vs true values', color="red")
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals_xgb = y_test - predictions_xgb
sns.residplot(x=predictions_xgb, y=residuals_xgb, color='green', scatter_kws={'alpha': 0.5}, ax=axes[1])
axes[1].plot([y_test.min(), y_test.max()], [0, 0], 'k--', lw=2) # Remove color argument here
axes[1].set_xlabel('Predicted values', fontsize=12, color="green")
axes[1].set_ylabel('Residuals', fontsize=12, color="green")
axes[1].set_title('Residual plot', fontsize=15, color="green")
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\n{}'.format(xgb_model_plot.model_name), fontsize=15, color="black")
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
Gaussian Naive Bayes
# Convert continuous labels to binary categories
y_train_binary = (y_train > y_train.mean()).astype(int)
y_test_binary = (y_test > y_train.mean()).astype(int)
# Create a Naive Bayes model
nb_model = GaussianNB()
# Fit the model
nb_model.fit(X_train, y_train_binary)
# Make predictions
predictions = nb_model.predict(X_test)
X_encoded['Density'] = pd.to_numeric(X_encoded['Density'], errors='coerce')
# Drop 'gdp_growth' as before
X = X_encoded.drop('国内生产总值增长率', axis=1)
y = X_encoded['国内生产总值增长率']
X_encoded = X_encoded.fillna(0)
# Assuming X and y are your features and labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# List of algorithms to check
algorithms = [
LinearRegression(),
DecisionTreeRegressor(),
KNeighborsRegressor(),
XGBRegressor()
]
best_mse = float('inf')
best_model = None
# Loop through each algorithm
for model in algorithms:
# Fit the model
model.fit(X_train, y_train)
# Make predictions
predictions = model.predict(X_test)
# Evaluate the model using cross-validation with mean squared error
mse_scores = -cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
mean_mse = np.mean(mse_scores)
# Print the cross-validation mean squared error
print(f"{model.__class__.__name__} - Cross-Validation MSE: {mean_mse}")
# Update the best model if the current model has lower mean squared error
if mean_mse < best_mse:
best_mse = mean_mse
best_model = model
# Print the best model and its mean squared error
print("\nBest Model:")
print(best_model)
print("Best Cross-Validation MSE:", best_mse)
# Set Seaborn style
sns.set(style="whitegrid", rc={"axes.facecolor": "white", "grid.color": "#D3D3D3"})
# Create a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(11, 5), facecolor="#F5F5F5")
# Plot the predicted vs true values
sns.regplot(x=y_test_binary, y=predictions, color='red', scatter_kws={'alpha': 0.5}, ax=axes[0])
axes[0].plot([y_test_binary.min(), y_test_binary.max()], [y_test_binary.min(), y_test_binary.max()], 'k--', lw=2)
axes[0].set_xlabel('True values', fontsize=12, color="red")
axes[0].set_ylabel('Predicted values', fontsize=12, color="red")
axes[0].set_title('Predicted vs true values', color="red")
axes[0].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Plot the residuals vs predicted values
residuals = y_test_binary - predictions
sns.residplot(x=predictions, y=residuals, color='green', scatter_kws={'alpha': 0.5}, ax=axes[1])
axes[1].plot([y_test_binary.min(), y_test_binary.max()], [0, 0], 'k--', lw=2)
axes[1].set_xlabel('Predicted values', fontsize=12, color="green")
axes[1].set_ylabel('Residuals', fontsize=12, color="green")
axes[1].set_title('Residual plot', fontsize=15, color="green")
axes[1].grid(color='lightgray', linestyle='--', linewidth=0.5)
# Add a title to the figure
fig.suptitle('Comparison of Predicted vs True Values and Residual Plot\nGaussianNB', fontsize=15, color="black")
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.4)
# Display the figure with the title
plt.show()
更多推荐




所有评论(0)