欧美一区二区不卡在线,国产精品,人妻内射视频麻豆,欧美xxxx精品另类

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['axes.unicode_minus'] = False
df = pd.read_excel('2024-11-6-公眾號(hào)Python機(jī)器學(xué)習(xí)AI—regression.xlsx')
from sklearn.model_selection import train_test_split, KFold

X = df.drop(['待預(yù)測變量Y'],axis=1)
y = df['待預(yù)測變量Y']

# 劃分訓(xùn)練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

從文件中加載數(shù)據(jù)，分離出特征和目標(biāo)變量“待預(yù)測變量Y”，并將數(shù)據(jù)集劃分為訓(xùn)練集（80%）和測試集（20%），為后續(xù)的機(jī)器學(xué)習(xí)建模做準(zhǔn)備

利用貝葉斯優(yōu)化提升XGBoost(回歸)模型性能

from hyperopt import fmin, tpe, hp

from xgboost import XGBRegressor

from sklearn.metrics import mean_squared_error



# 定義超參數(shù)搜索空間

parameter_space_xgb = {

    'n_estimators': hp.choice('n_estimators', [50, 100, 200, 300]),          # 決策樹數(shù)量

    'max_depth': hp.choice('max_depth', [3, 5, 10, 15]),                     # 樹的最大深度

    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),                 # 學(xué)習(xí)率

    'subsample': hp.uniform('subsample', 0.5, 1.0),                          # 每棵樹的樣本采樣比例

    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),            # 每棵樹的特征采樣比例

    'gamma': hp.uniform('gamma', 0, 5)                                       # 剪枝所需的最小損失減少量

}



# 定義目標(biāo)函數(shù)

def objective(params):

    # 使用超參數(shù)創(chuàng)建XGBoost回歸模型

    model = XGBRegressor(

        n_estimators=params['n_estimators'],

        max_depth=params['max_depth'],

        learning_rate=params['learning_rate'],

        subsample=params['subsample'],

        colsample_bytree=params['colsample_bytree'],

        gamma=params['gamma'],

        random_state=42

    )



    # 在訓(xùn)練集上擬合模型

    model.fit(X_train, y_train)



    # 在測試集上預(yù)測

    y_pred = model.predict(X_test)



    # 計(jì)算均方誤差（MSE）

    mse = mean_squared_error(y_test, y_pred)



    # 返回MSE，Hyperopt會(huì)最小化該目標(biāo)值

    return mse



# 運(yùn)行超參數(shù)優(yōu)化

best_params = fmin(

    fn=objective,                   # 優(yōu)化的目標(biāo)函數(shù)

    space=parameter_space_xgb,       # 搜索空間

    algo=tpe.suggest,                # 貝葉斯優(yōu)化算法

    max_evals=100                    # 最大評(píng)估次數(shù)

)



# 顯示最優(yōu)超參數(shù)組合

print("Best hyperparameters:", best_params)



# 使用最佳超參數(shù)組合重新訓(xùn)練模型

best_model_regression = XGBRegressor(

    n_estimators=[50, 100, 200, 300][best_params['n_estimators']],

    max_depth=[3, 5, 10, 15][best_params['max_depth']],

    learning_rate=best_params['learning_rate'],

    subsample=best_params['subsample'],

    colsample_bytree=best_params['colsample_bytree'],

    gamma=best_params['gamma'],

    random_state=42

)



# 在訓(xùn)練集上訓(xùn)練模型

best_model_regression.fit(X_train, y_train)

使用Hyperopt庫中的貝葉斯優(yōu)化方法，對(duì)XGBoost回歸模型的超參數(shù)進(jìn)行自動(dòng)化調(diào)優(yōu)，目標(biāo)是最小化模型在測試集上的均方誤差（MSE），首先，定義超參數(shù)搜索空間，包括決策樹的數(shù)量、最大深度、學(xué)習(xí)率、采樣比例、特征采樣比例和剪枝閾值等關(guān)鍵參數(shù)。然后，編寫目標(biāo)函數(shù)，該函數(shù)使用給定參數(shù)組合構(gòu)建XGBoost模型，在訓(xùn)練集上擬合并在測試集上進(jìn)行預(yù)測，最后返回測試誤差MSE。通過Hyperopt的 fmin 函數(shù)，在100次評(píng)估內(nèi)找到能最小化MSE的最佳參數(shù)組合。找到最佳參數(shù)后，代碼會(huì)重新構(gòu)建并訓(xùn)練XGBoost模型，以確保最終模型能夠在最佳參數(shù)下實(shí)現(xiàn)最優(yōu)的預(yù)測性能。這種優(yōu)化流程不僅節(jié)省了手動(dòng)調(diào)參的時(shí)間，還提高了模型的準(zhǔn)確性，為后續(xù)的預(yù)測任務(wù)打下了堅(jiān)實(shí)基礎(chǔ)

評(píng)估模型性能：訓(xùn)練集與測試集的回歸指標(biāo)對(duì)比

from sklearn import metrics



# 預(yù)測

y_pred_train = best_model_regression.predict(X_train)

y_pred_test = best_model_regression.predict(X_test)



y_pred_train_list = y_pred_train.tolist()

y_pred_test_list = y_pred_test.tolist()



# 計(jì)算訓(xùn)練集的指標(biāo)

mse_train = metrics.mean_squared_error(y_train, y_pred_train_list)

rmse_train = np.sqrt(mse_train)

mae_train = metrics.mean_absolute_error(y_train, y_pred_train_list)

r2_train = metrics.r2_score(y_train, y_pred_train_list)



# 計(jì)算測試集的指標(biāo)

mse_test = metrics.mean_squared_error(y_test, y_pred_test_list)

rmse_test = np.sqrt(mse_test)

mae_test = metrics.mean_absolute_error(y_test, y_pred_test_list)

r2_test = metrics.r2_score(y_test, y_pred_test_list)



print("訓(xùn)練集評(píng)價(jià)指標(biāo):")

print("均方誤差 (MSE):", mse_train)

print("均方根誤差 (RMSE):", rmse_train)

print("平均絕對(duì)誤差 (MAE):", mae_train)

print("擬合優(yōu)度 (R-squared):", r2_train)



print("\n測試集評(píng)價(jià)指標(biāo):")

print("均方誤差 (MSE):", mse_test)

print("均方根誤差 (RMSE):", rmse_test)

print("平均絕對(duì)誤差 (MAE):", mae_test)

print("擬合優(yōu)度 (R-squared):", r2_test)

計(jì)算并輸出XGBoost回歸模型在訓(xùn)練集和測試集上的性能指標(biāo)，包括均方誤差（MSE）、均方根誤差（RMSE）、平均絕對(duì)誤差（MAE）和擬合優(yōu)度（R-squared），以評(píng)估模型在訓(xùn)練集和測試集上的預(yù)測準(zhǔn)確度和擬合效果，從而幫助判斷模型的泛化能力

模型預(yù)測可視化

通過計(jì)算置信區(qū)間來可視化XGBoost回歸模型的預(yù)測效果，展示預(yù)測值與真實(shí)值之間的擬合關(guān)系，首先，計(jì)算模型的擬合線，并使用殘差和樣本量估計(jì)出預(yù)測的均方誤差，進(jìn)而基于95%置信水平和t分布計(jì)算出置信區(qū)間，在繪圖部分，生成一個(gè)包含以下元素的圖表：實(shí)際觀測值與預(yù)測值的散點(diǎn)圖、一條理想的1:1對(duì)角線、預(yù)測值的擬合線（包括模型的R2和MAE值），以及帶有擴(kuò)展置信區(qū)間的陰影區(qū)域，最終，這個(gè)圖不僅展示了模型的擬合效果，還直觀地表明模型預(yù)測的可靠性和不確定性。

分類模型

利用貝葉斯優(yōu)化提升XGBoost(分類)模型性能

import pandas as pd

import numpy as np

from hyperopt import fmin, tpe, hp

from sklearn.model_selection import train_test_split

from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score



# 讀取數(shù)據(jù)

df = pd.read_excel('2024-11-6-公眾號(hào)Python機(jī)器學(xué)習(xí)AI—class.xlsx')



# 劃分特征和目標(biāo)變量

X = df.drop(['目標(biāo)'], axis=1)

y = df['目標(biāo)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 

                                                    random_state=42, stratify=df['目標(biāo)'])



# 定義超參數(shù)空間

parameter_space_xgb = {

    'n_estimators': hp.choice('n_estimators', [50, 100, 200, 300]),           # 決策樹數(shù)量

    'max_depth': hp.choice('max_depth', [3, 5, 10, 15]),                      # 最大深度

    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),                  # 學(xué)習(xí)率

    'subsample': hp.uniform('subsample', 0.5, 1.0),                           # 每棵樹的樣本采樣比例

    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),             # 每棵樹的特征采樣比例

    'gamma': hp.uniform('gamma', 0, 5)                                        # 剪枝所需的最小損失減少量

}



# 定義目標(biāo)函數(shù)

def objective(params):

    # 初始化XGBoost分類模型并傳入超參數(shù)

    model = XGBClassifier(

        n_estimators=params['n_estimators'],

        max_depth=params['max_depth'],

        learning_rate=params['learning_rate'],

        subsample=params['subsample'],

        colsample_bytree=params['colsample_bytree'],

        gamma=params['gamma'],

        random_state=42,

        use_label_encoder=False,

        eval_metric='logloss'

    )



    # 模型擬合

    model.fit(X_train, y_train)



    # 測試集上的預(yù)測

    y_pred = model.predict(X_test)



    # 計(jì)算準(zhǔn)確率

    accuracy = accuracy_score(y_test, y_pred)



    # 返回負(fù)的準(zhǔn)確率（因?yàn)镠yperopt默認(rèn)最小化目標(biāo)函數(shù)）

    return -accuracy



# 運(yùn)行貝葉斯優(yōu)化

best_params = fmin(

    fn=objective,                   # 目標(biāo)函數(shù)

    space=parameter_space_xgb,       # 搜索空間

    algo=tpe.suggest,                # 貝葉斯優(yōu)化算法

    max_evals=100                    # 最大評(píng)估次數(shù)

)



# 顯示最優(yōu)參數(shù)

print("Best hyperparameters:", best_params)



# 使用最佳參數(shù)創(chuàng)建最終模型

best_model_class = XGBClassifier(

    n_estimators=[50, 100, 200, 300][best_params['n_estimators']],

    max_depth=[3, 5, 10, 15][best_params['max_depth']],

    learning_rate=best_params['learning_rate'],

    subsample=best_params['subsample'],

    colsample_bytree=best_params['colsample_bytree'],

    gamma=best_params['gamma'],

    random_state=42,

    use_label_encoder=False,

    eval_metric='logloss'

)



# 在訓(xùn)練集上擬合模型

best_model_class.fit(X_train, y_train)

使用貝葉斯優(yōu)化（Hyperopt庫）對(duì)XGBoost分類模型的超參數(shù)進(jìn)行自動(dòng)調(diào)優(yōu)，以最大化分類模型在測試集上的準(zhǔn)確率。首先，從Excel中加載數(shù)據(jù)并將其分為特征和目標(biāo)變量，再劃分為訓(xùn)練集和測試集。然后，定義超參數(shù)搜索空間，涵蓋決策樹數(shù)量、樹的最大深度、學(xué)習(xí)率、樣本和特征采樣比例，以及剪枝所需的損失減少量等關(guān)鍵參數(shù)，定義的目標(biāo)函數(shù)會(huì)根據(jù)每組超參數(shù)組合訓(xùn)練模型，并返回測試集上的負(fù)準(zhǔn)確率，以便Hyperopt最小化該值來尋找到最佳超參數(shù)組合，完成優(yōu)化后，代碼構(gòu)建并訓(xùn)練一個(gè)包含最佳超參數(shù)的最終XGBoost分類模型，以確保模型在最佳條件下達(dá)到最高的分類準(zhǔn)確率，這種自動(dòng)化調(diào)參方法不僅顯著提高模型性能，減少手動(dòng)調(diào)參的時(shí)間和工作量

模型性能評(píng)估：多維度分類指標(biāo)分析

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report

# 計(jì)算各項(xiàng)指標(biāo)

accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

f1 = f1_score(y_test, y_pred)

# 如果是二分類且有預(yù)測概率值

try:

    y_pred_proba = best_model_class.predict_proba(X_test)[:, 1]  # 僅適用于二分類

    auc = roc_auc_score(y_test, y_pred_proba)

except AttributeError:

    auc = None  # 對(duì)于多分類或無 predict_proba 時(shí)不適用

metrics_df = pd.DataFrame({

    'Metric': [ 'AUC', 'Precision', 'Recall', 'F1 Score','Accuracy'],

    'Value': [auc, precision, recall, f1, accuracy]

})

計(jì)算XGBoost分類模型在測試集上的多個(gè)評(píng)估指標(biāo)，包括準(zhǔn)確率（accuracy）、精確率（precision）、召回率（recall）、F1分?jǐn)?shù)（F1 Score）和AUC（僅適用于二分類），這些指標(biāo)各自反映模型不同方面的性能：準(zhǔn)確率表示整體預(yù)測的準(zhǔn)確程度，精確率關(guān)注正類預(yù)測的準(zhǔn)確性，召回率衡量模型找到所有正類的能力，F(xiàn)1分?jǐn)?shù)則平衡了精確率和召回率的權(quán)重。AUC則進(jìn)一步評(píng)估了模型在不同閾值下區(qū)分正負(fù)類的能力。因此，使用多個(gè)指標(biāo)能夠更全面地了解模型的表現(xiàn)，從而幫助我們避免因單一指標(biāo)誤導(dǎo)而造成的模型優(yōu)化偏差

模型預(yù)測可視化