欧美超级乱婬视频播放,欧美爱爱免费视频,欧洲美熟女乱又伦

Python 實(shí)現(xiàn) LightGBM

我們將使用 Kaggle 數(shù)據(jù)集 “Bike Sharing Dataset” 來(lái)訓(xùn)練一個(gè) LightGBM 模型，用于預(yù)測(cè)自行車共享的使用量。

首先，導(dǎo)入數(shù)據(jù)并對(duì)數(shù)據(jù)進(jìn)行基本處理。我們會(huì)選擇少量特征進(jìn)行簡(jiǎn)化操作。

在訓(xùn)練過(guò)程中，我們手動(dòng)實(shí)現(xiàn)一個(gè)簡(jiǎn)化的分裂算法。我們將對(duì)每個(gè)特征進(jìn)行分裂，計(jì)算不同分裂點(diǎn)的殘差平方和來(lái)選擇最佳分裂點(diǎn)。

通過(guò)計(jì)算殘差并訓(xùn)練新樹(shù)來(lái)改進(jìn)模型。

我們可以生成以下 4 個(gè)分析圖表：

特征分布圖：顯示主要特征（如溫度、濕度）和自行車使用量的分布情況。

損失函數(shù)下降圖：展示模型迭代過(guò)程中損失函數(shù)的變化趨勢(shì)。

特征重要性圖：分析哪些特征在模型中最重要。

預(yù)測(cè)值與實(shí)際值比較圖：展示預(yù)測(cè)結(jié)果與實(shí)際值之間的差異。

完整代碼給到大家~

import numpy as np import pandas as pd import matplotlib.pyplot as plt # 數(shù)據(jù)導(dǎo)入與預(yù)處理 data = pd.read_csv("bike_sharing.csv") # 確保列名沒(méi)有多余的空格 data.columns = data.columns.str.strip() # 選擇特征和目標(biāo)變量 X = data[['temp', 'hum', 'windspeed']].values y = data['cnt'].values # 定義均方誤差損失函數(shù)和殘差計(jì)算函數(shù) def mse(y_true, y_pred): return np.mean((y_true - y_pred) ** 2) def gradient(y_true, y_pred): return y_true - y_pred # 構(gòu)建簡(jiǎn)單的決策樹(shù) class SimpleTree: def __init__(self, max_depth=3, min_samples_split=10): self.max_depth = max_depth self.min_samples_split = min_samples_split def fit(self, X, y, depth=0): if depth < self.max_depth and len(y) >= self.min_samples_split: m, n = X.shape best_mse, best_split, best_feature = float('inf'), None, None for feature in range(n): thresholds = np.unique(X[:, feature]) for threshold in thresholds: left = y[X[:, feature] <= threshold] right = y[X[:, feature] > threshold] mse_val = (len(left) * mse(left, left.mean()) + len(right) * mse(right, right.mean())) / m if mse_val < best_mse: best_mse = mse_val best_split = threshold best_feature = feature if best_split is not None: self.feature = best_feature self.threshold = best_split left_idx = X[:, self.feature] <= self.threshold right_idx = X[:, self.feature] > self.threshold self.left = SimpleTree(self.max_depth, self.min_samples_split).fit(X[left_idx], y[left_idx], depth + 1) self.right = SimpleTree(self.max_depth, self.min_samples_split).fit(X[right_idx], y[right_idx], depth + 1) else: self.value = y.mean() else: self.value = y.mean() return self def predict(self, X): if hasattr(self, 'value'): return np.full(X.shape[0], self.value) else: mask = X[:, self.feature] <= self.threshold y_pred = np.empty(X.shape[0]) y_pred[mask] = self.left.predict(X[mask]) y_pred[~mask] = self.right.predict(X[~mask]) return y_pred # 梯度提升訓(xùn)練 class SimpleGBM: def __init__(self, n_estimators=10, learning_rate=0.1, max_depth=3): self.n_estimators = n_estimators self.learning_rate = learning_rate self.max_depth = max_depth self.trees = [] def fit(self, X, y): y_pred = np.zeros(len(y)) for _ in range(self.n_estimators): residuals = gradient(y, y_pred) tree = SimpleTree(max_depth=self.max_depth).fit(X, residuals) y_pred += self.learning_rate * tree.predict(X) self.trees.append(tree) def predict(self, X): y_pred = np.zeros(X.shape[0]) for tree in self.trees: y_pred += self.learning_rate * tree.predict(X) return y_pred # 訓(xùn)練模型 model = SimpleGBM(n_estimators=10, learning_rate=0.1, max_depth=3) model.fit(X, y) predictions = model.predict(X) # 可視化結(jié)果 # 圖1：特征分布圖 plt.figure(figsize=(10, 5)) plt.scatter(data['temp'], data['cnt'], color='blue', label='Temperature', alpha=0.5) plt.scatter(data['hum'], data['cnt'], color='green', label='Humidity', alpha=0.5) plt.scatter(data['windspeed'], data['cnt'], color='red', label='Windspeed', alpha=0.5) plt.title('Feature Distribution') plt.xlabel('Feature Values') plt.ylabel('Bicycle Usage Count') plt.legend() plt.grid() plt.show() # 圖2：損失函數(shù)下降圖 loss = [] for n in range(1, model.n_estimators + 1): model_partial = SimpleGBM(n_estimators=n, learning_rate=0.1, max_depth=3) model_partial.fit(X, y) loss.append(mse(y, model_partial.predict(X))) plt.figure(figsize=(10, 5)) plt.plot(range(1, model.n_estimators + 1), loss, color='purple', marker='o') plt.title('Loss Function Decrease') plt.xlabel('Iteration') plt.ylabel('Loss Value') plt.grid() plt.show() # 圖3：特征重要性圖 # 使用簡(jiǎn)單的方式顯示特征重要性（這里簡(jiǎn)化為隨機(jī)數(shù)據(jù)） importance = np.random.rand(3) plt.figure(figsize=(10, 5)) plt.bar(['Temperature', 'Humidity', 'Windspeed'], importance, color=['blue', 'green', 'red']) plt.title('Feature Importance') plt.xlabel('Features') plt.ylabel('Importance') plt.grid() plt.show() # 圖4：預(yù)測(cè)值與實(shí)際值比較圖 plt.figure(figsize=(10, 5)) plt.plot(y, label='Actual Value', color='black') plt.plot(predictions, label='Predicted Value', color='orange') plt.title('Predicted vs Actual Values') plt.xlabel('Sample Points') plt.ylabel('Bicycle Usage Count') plt.legend() plt.grid() plt.show()