Coverage for wifa_uq / postprocessing / error_predictor / error_predictor.py: 89%

722 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-19 02:10 +0000

1import numpy as np 

2import pandas as pd 

3from pathlib import Path 

4import matplotlib.pyplot as plt 

5import shap 

6 

7from sklearn.base import BaseEstimator, RegressorMixin 

8from sklearn.pipeline import Pipeline 

9from sklearn.preprocessing import StandardScaler, PolynomialFeatures 

10from sklearn.linear_model import LinearRegression 

11from sklearn.exceptions import NotFittedError 

12 

13import xgboost as xgb 

14from sklearn.model_selection import KFold, LeaveOneGroupOut 

15from sklearn.metrics import mean_absolute_error, r2_score 

16from wifa_uq.postprocessing.PCE_tool.pce_utils import construct_PCE_ot 

17from wifa_uq.postprocessing.calibration import ( 

18 MinBiasCalibrator, 

19 # DefaultParams, 

20 # LocalParameterPredictor, 

21) 

22from sliced import SlicedInverseRegression 

23 

24""" 

25This script contains: 

26- SIRPolynomialRegressor class (NEW) 

27- Calibrator classes 

28- BiasPredictor class 

29- MainPipeline class 

30- Cross validation routine 

31- SHAP/SIR sensitivity analysis functions 

32- Multi-farm CV visualization functions (NEW) 

33""" 

34 

35 

36class PCERegressor(BaseEstimator, RegressorMixin): 

37 """ 

38 A scikit-learn compatible regressor that wraps the OpenTURNS-based PCE 

39 from PCE_tool. 

40 

41 Safety guard: 

42 - By default, refuses to run if the number of input features > max_features 

43 unless allow_high_dim=True is explicitly set. 

44 """ 

45 

46 def __init__( 

47 self, 

48 degree=5, 

49 marginals="kernel", 

50 copula="independent", 

51 q=1.0, 

52 max_features=5, # safety limit on input dimension 

53 allow_high_dim=False, # must be True to allow > max_features 

54 ): 

55 self.degree = degree 

56 self.marginals = marginals 

57 self.copula = copula 

58 self.q = q 

59 self.max_features = max_features 

60 self.allow_high_dim = allow_high_dim 

61 

62 def fit(self, X, y): 

63 X = np.asarray(X) 

64 y = np.asarray(y).ravel() 

65 

66 n_features = X.shape[1] 

67 

68 # --- Safety check on dimensionality --- 

69 if n_features > self.max_features and not self.allow_high_dim: 

70 raise ValueError( 

71 f"PCERegressor refused to run: number of input variables = {n_features}, " 

72 f"which exceeds the default safety limit of {self.max_features}. " 

73 f"Set allow_high_dim=True or increase max_features to override." 

74 ) 

75 

76 marginals = [self.marginals] * n_features 

77 

78 # Construct PCE using your existing helper 

79 self.pce_result_ = construct_PCE_ot( 

80 input_array=X, 

81 output_array=y, 

82 marginals=marginals, 

83 copula=self.copula, 

84 degree=self.degree, 

85 q=self.q, 

86 ) 

87 self.metamodel_ = self.pce_result_.getMetaModel() 

88 return self 

89 

90 def predict(self, X): 

91 if not hasattr(self, "metamodel_"): 

92 raise NotFittedError("PCERegressor instance is not fitted yet.") 

93 X = np.asarray(X) 

94 preds = np.zeros(X.shape[0]) 

95 for i, xi in enumerate(X): 

96 preds[i] = self.metamodel_(xi)[0] 

97 return preds 

98 

99 

100class LinearRegressor(BaseEstimator, RegressorMixin): 

101 """ 

102 A simple linear regression wrapper with optional regularization. 

103 

104 Supports: 'ols' (ordinary least squares), 'ridge', 'lasso', 'elasticnet' 

105 """ 

106 

107 def __init__(self, method="ols", alpha=1.0, l1_ratio=0.5): 

108 self.method = method 

109 self.alpha = alpha 

110 self.l1_ratio = l1_ratio # Only used for elasticnet 

111 

112 def fit(self, X, y): 

113 from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet 

114 

115 if self.method == "ols": 

116 self.model_ = LinearRegression() 

117 elif self.method == "ridge": 

118 self.model_ = Ridge(alpha=self.alpha) 

119 elif self.method == "lasso": 

120 self.model_ = Lasso(alpha=self.alpha) 

121 elif self.method == "elasticnet": 

122 self.model_ = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio) 

123 else: 

124 raise ValueError( 

125 f"Unknown method '{self.method}'. Use 'ols', 'ridge', 'lasso', or 'elasticnet'." 

126 ) 

127 

128 self.scaler_ = StandardScaler() 

129 X_scaled = self.scaler_.fit_transform(X) 

130 self.model_.fit(X_scaled, y) 

131 return self 

132 

133 def predict(self, X): 

134 if not hasattr(self, "model_"): 

135 raise NotFittedError("LinearRegressor is not fitted yet.") 

136 X_scaled = self.scaler_.transform(X) 

137 return self.model_.predict(X_scaled) 

138 

139 def get_feature_importance(self, feature_names): 

140 """Return absolute coefficients as feature importance.""" 

141 if not hasattr(self, "model_"): 

142 raise NotFittedError("LinearRegressor is not fitted yet.") 

143 return pd.Series(np.abs(self.model_.coef_), index=feature_names) 

144 

145 

146## ------------------------------------------------------------------ ## 

147## NEW REGRESSOR CLASS 

148## ------------------------------------------------------------------ ## 

149class SIRPolynomialRegressor(BaseEstimator, RegressorMixin): 

150 """ 

151 A scikit-learn compatible regressor that first applies SIR for 

152 dimension reduction and then fits a polynomial regression 

153 on the reduced dimension(s). 

154 """ 

155 

156 def __init__(self, n_directions=1, degree=2): 

157 self.n_directions = n_directions 

158 self.degree = degree 

159 

160 def fit(self, X, y): 

161 # 1. Standard Scaler 

162 self.scaler_ = StandardScaler() 

163 X_scaled = self.scaler_.fit_transform(X, y) 

164 

165 # 2. Sliced Inverse Regression 

166 self.sir_ = SlicedInverseRegression(n_directions=self.n_directions) 

167 # Sliced package expects y as a 1D array 

168 y_ravel = np.ravel(y) 

169 X_sir = self.sir_.fit_transform(X_scaled, y_ravel) 

170 

171 # Store the directions (feature importance) 

172 # We take the absolute value for importance ranking 

173 self.sir_directions_ = np.abs(self.sir_.directions_[0, :]) 

174 

175 # 3. Polynomial Regression 

176 self.poly_reg_ = Pipeline( 

177 [ 

178 ("poly", PolynomialFeatures(degree=self.degree, include_bias=False)), 

179 ("lin_reg", LinearRegression()), 

180 ] 

181 ) 

182 self.poly_reg_.fit(X_sir, y) 

183 

184 return self 

185 

186 def predict(self, X): 

187 if not hasattr(self, "scaler_"): 

188 raise NotFittedError( 

189 "This SIRPolynomialRegressor instance is not fitted yet." 

190 ) 

191 

192 X_scaled = self.scaler_.transform(X) 

193 X_sir = self.sir_.transform(X_scaled) 

194 return self.poly_reg_.predict(X_sir) 

195 

196 def get_feature_importance(self, feature_names): 

197 if not hasattr(self, "sir_directions_"): 

198 raise NotFittedError( 

199 "This SIRPolynomialRegressor instance is not fitted yet." 

200 ) 

201 

202 return pd.Series(self.sir_directions_, index=feature_names) 

203 

204 

205## ------------------------------------------------------------------ ## 

206 

207 

208class BiasPredictor: 

209 """ 

210 Predict bias as a function of features and parameter samples 

211 """ 

212 

213 def __init__(self, regressor_pipeline): 

214 self.pipeline = regressor_pipeline 

215 

216 def fit(self, X_train, y_train): 

217 # print('shape of X and Y training: ',X_train.shape, y_train.shape) 

218 self.pipeline.fit(X_train, y_train) 

219 return self 

220 

221 def predict(self, X_test): 

222 y_pred = self.pipeline.predict(X_test) 

223 return y_pred 

224 

225 

226class MainPipeline: 

227 """ 

228 Main pipeline that combines calibration and bias prediction. 

229 

230 Supports two calibration modes: 

231 1. Global calibration (MinBiasCalibrator, DefaultParams): 

232 - Single parameter set for all cases 

233 - calibrator.best_idx_ gives the sample index 

234 

235 2. Local calibration (LocalParameterPredictor): 

236 - Different optimal parameters per case 

237 - calibrator.get_optimal_indices() gives per-case indices 

238 """ 

239 

240 def __init__( 

241 self, 

242 calibrator, 

243 bias_predictor, 

244 features_list: list, 

245 calibration_mode: str = "global", 

246 ): 

247 """ 

248 Args: 

249 calibrator: Calibrator instance (already initialized with dataset_train) 

250 bias_predictor: BiasPredictor instance 

251 features_list: List of feature names to use for bias prediction 

252 calibration_mode: "global" or "local" 

253 """ 

254 self.calibrator = calibrator 

255 self.bias_predictor = bias_predictor 

256 self.features_list = features_list 

257 self.calibration_mode = calibration_mode 

258 

259 if not self.features_list: 

260 raise ValueError("features_list cannot be empty.") 

261 

262 def fit(self, dataset_train, dataset_test): 

263 """ 

264 Fit the calibrator and bias predictor. 

265 

266 Returns: 

267 X_test, y_test, idxs (for compatibility with cross-validation) 

268 """ 

269 # 1. Fit calibrator 

270 self.calibrator.fit() 

271 

272 if self.calibration_mode == "global": 

273 return self._fit_global(dataset_train, dataset_test) 

274 elif self.calibration_mode == "local": 

275 return self._fit_local(dataset_train, dataset_test) 

276 else: 

277 raise ValueError(f"Unknown calibration_mode: {self.calibration_mode}") 

278 

279 def _fit_global(self, dataset_train, dataset_test): 

280 """Fit using global calibration (single parameter set).""" 

281 idxs = self.calibrator.best_idx_ 

282 

283 # Select the calibrated sample for train and test 

284 dataset_train_cal = dataset_train.sel(sample=idxs) 

285 dataset_test_cal = dataset_test.sel(sample=idxs) 

286 

287 # Prepare features 

288 X_train_df = dataset_train_cal.to_dataframe().reset_index() 

289 X_test_df = dataset_test_cal.to_dataframe().reset_index() 

290 

291 X_train = self._extract_features(X_train_df) 

292 X_test = self._extract_features(X_test_df) 

293 

294 y_train = dataset_train_cal["model_bias_cap"].values 

295 y_test = dataset_test_cal["model_bias_cap"].values 

296 

297 # Fit bias predictor 

298 self.bias_predictor.fit(X_train, y_train) 

299 

300 # Store for predict() 

301 self.X_test_ = X_test 

302 

303 return X_test, y_test, idxs 

304 

305 def _fit_local(self, dataset_train, dataset_test): 

306 """Fit using local calibration (per-case optimal parameters).""" 

307 # 1. Get per-case optimal sample indices from the local calibrator 

308 train_optimal_indices = self.calibrator.get_optimal_indices() 

309 n_train_cases = len(dataset_train.case_index) 

310 

311 # 2. Build training feature matrix from sample=0 

312 # (features do not depend on sampled parameters) 

313 train_base = dataset_train.isel(sample=0) 

314 train_df = train_base.to_dataframe().reset_index() 

315 X_train = self._extract_features(train_df) 

316 

317 # 3. Build training targets: bias at the optimal sample for each case 

318 y_train = np.zeros(n_train_cases) 

319 for case_idx, sample_idx in enumerate(train_optimal_indices): 

320 y_train[case_idx] = float( 

321 dataset_train["model_bias_cap"] 

322 .isel(case_index=case_idx, sample=sample_idx) 

323 .values 

324 ) 

325 

326 # 4. Test features (also from sample=0) 

327 X_test_df = dataset_test.isel(sample=0).to_dataframe().reset_index() 

328 X_test_features = self._extract_features(X_test_df) 

329 

330 # 5. Predict optimal parameters for test cases, 

331 # then find the closest sampled parameter set in the database 

332 predicted_params = self.calibrator.predict(X_test_features) 

333 test_optimal_indices = self._find_closest_samples( 

334 dataset_test, predicted_params 

335 ) 

336 

337 # 6. Build test targets: bias at the chosen sample for each test case 

338 n_test_cases = len(dataset_test.case_index) 

339 y_test = np.zeros(n_test_cases) 

340 for case_idx, sample_idx in enumerate(test_optimal_indices): 

341 y_test[case_idx] = float( 

342 dataset_test["model_bias_cap"] 

343 .isel(case_index=case_idx, sample=sample_idx) 

344 .values 

345 ) 

346 

347 # 7. Fit the bias predictor on per-case data 

348 self.bias_predictor.fit(X_train, y_train) 

349 

350 # Store X_test_ so .predict() can be called without args 

351 self.X_test_ = X_test_features 

352 

353 # Return in the same shape run_cross_validation expects 

354 return X_test_features, y_test, test_optimal_indices 

355 

356 def _extract_features(self, df): 

357 """Extract and clean features from dataframe.""" 

358 try: 

359 X = df[self.features_list].copy() 

360 except KeyError as e: 

361 print(f"Error: Feature not found in dataset: {e}") 

362 print(f"Available columns: {list(df.columns)}") 

363 raise 

364 

365 # Clean string-like columns 

366 for col in X.columns: 

367 if X[col].dtype == "object": 

368 if X[col].dropna().empty: 

369 continue 

370 first_item = X[col].dropna().iloc[0] 

371 if isinstance(first_item, str): 

372 X[col] = X[col].str.replace(r"[\[\]]", "", regex=True).astype(float) 

373 else: 

374 X[col] = X[col].astype(float) 

375 

376 return X 

377 

378 def _find_closest_samples(self, dataset, predicted_params): 

379 """ 

380 Find the sample index closest to predicted parameters for each case. 

381 

382 Args: 

383 dataset: xarray Dataset with 'sample' dimension 

384 predicted_params: DataFrame with predicted optimal parameters 

385 

386 Returns: 

387 Array of sample indices (one per case) 

388 

389 Raises: 

390 ValueError: If no valid parameters found for distance calculation 

391 """ 

392 n_cases = len(predicted_params) 

393 n_samples = len(dataset.sample) 

394 swept_params = self.calibrator.swept_params 

395 

396 # Validation 1: Check swept_params is not empty 

397 if not swept_params: 

398 raise ValueError( 

399 "No swept parameters defined. Cannot find closest samples. " 

400 "Check that the database has 'swept_params' in attrs or that " 

401 "parameters were correctly inferred." 

402 ) 

403 

404 # Validation 2: Check which parameters are actually available 

405 available_params = [] 

406 missing_in_dataset = [] 

407 missing_in_predictions = [] 

408 

409 for param_name in swept_params: 

410 in_dataset = param_name in dataset.coords 

411 in_predictions = param_name in predicted_params.columns 

412 

413 if in_dataset and in_predictions: 

414 available_params.append(param_name) 

415 elif not in_dataset: 

416 missing_in_dataset.append(param_name) 

417 elif not in_predictions: 

418 missing_in_predictions.append(param_name) 

419 

420 # Validation 3: Ensure we have at least one parameter to use 

421 if not available_params: 

422 raise ValueError( 

423 f"No valid parameters for distance calculation.\n" 

424 f" Swept params: {swept_params}\n" 

425 f" Missing in dataset.coords: {missing_in_dataset}\n" 

426 f" Missing in predicted_params: {missing_in_predictions}" 

427 ) 

428 

429 # Warn about partial matches (some params missing) 

430 if missing_in_dataset or missing_in_predictions: 

431 import warnings 

432 

433 warnings.warn( 

434 f"Some swept parameters unavailable for distance calculation:\n" 

435 f" Using: {available_params}\n" 

436 f" Missing in dataset: {missing_in_dataset}\n" 

437 f" Missing in predictions: {missing_in_predictions}", 

438 UserWarning, 

439 ) 

440 

441 # Calculate distances using only available parameters 

442 closest_indices = np.zeros(n_cases, dtype=int) 

443 

444 for case_idx in range(n_cases): 

445 target_params = predicted_params.iloc[case_idx] 

446 

447 distances = np.zeros(n_samples) 

448 for param_name in available_params: 

449 sample_values = dataset.coords[param_name].values 

450 target_value = target_params[param_name] 

451 

452 # Normalize by parameter range to handle different scales 

453 param_range = sample_values.max() - sample_values.min() 

454 if param_range > 0: 

455 normalized_diff = (sample_values - target_value) / param_range 

456 else: 

457 # All samples have same value for this param 

458 normalized_diff = np.zeros_like(sample_values) 

459 

460 distances += normalized_diff**2 

461 

462 closest_indices[case_idx] = int(np.argmin(distances)) 

463 

464 return closest_indices 

465 

466 def predict(self, X=None): 

467 """Predict bias for test data.""" 

468 if X is None: 

469 X = self.X_test_ 

470 return self.bias_predictor.predict(X) 

471 

472 

473def compute_metrics(y_true, bias_samples, pw, ref, data_driv=None): 

474 mse = ((y_true - bias_samples) ** 2).mean() 

475 rmse = np.sqrt(mse) 

476 mae = mean_absolute_error(y_true, bias_samples) 

477 r2 = r2_score(y_true, bias_samples) 

478 

479 if pw is not None and ref is not None and data_driv is None: 

480 pw_bias = np.mean(pw - ref) 

481 pw_bias_corrected = np.mean((pw - bias_samples) - ref) 

482 else: 

483 pw_bias = None 

484 pw_bias_corrected = None 

485 

486 if data_driv and ref: 

487 data_driv_bias = np.mean(data_driv - ref) 

488 else: 

489 data_driv_bias = None 

490 

491 return { 

492 "rmse": rmse, 

493 "mse": mse, 

494 "mae": mae, 

495 "r2": r2, 

496 "pw_bias": pw_bias, 

497 "pw_bias_corrected": pw_bias_corrected, 

498 "data_driv_bias": data_driv_bias, 

499 } 

500 

501 

502## ------------------------------------------------------------------ ## 

503## MULTI-FARM CV VISUALIZATION FUNCTIONS (NEW) 

504## ------------------------------------------------------------------ ## 

505 

506 

507def plot_multi_farm_cv_metrics( 

508 cv_results: pd.DataFrame, 

509 fold_labels: list, 

510 output_dir: Path, 

511 splitting_mode: str = "LeaveOneGroupOut", 

512): 

513 """ 

514 Create visualizations for multi-farm cross-validation results. 

515 

516 Shows per-fold (per-group) performance metrics to understand 

517 how well the model generalizes across different wind farms. 

518 

519 Args: 

520 cv_results: DataFrame with metrics per fold (rmse, r2, mae, etc.) 

521 fold_labels: List of strings identifying each fold (e.g., group names left out) 

522 output_dir: Directory to save plots 

523 splitting_mode: CV splitting mode for title annotation 

524 """ 

525 output_dir = Path(output_dir) 

526 n_folds = len(cv_results) 

527 

528 # --- 1. Per-Fold Metrics Bar Chart --- 

529 fig, axes = plt.subplots(1, 3, figsize=(15, 5)) 

530 fig.suptitle( 

531 f"Cross-Validation Performance by Fold ({splitting_mode})", fontsize=14 

532 ) 

533 

534 metrics = ["rmse", "r2", "mae"] 

535 colors = ["#1f77b4", "#2ca02c", "#ff7f0e"] 

536 

537 x = np.arange(n_folds) 

538 

539 for ax, metric, color in zip(axes, metrics, colors): 

540 values = cv_results[metric].values 

541 bars = ax.bar(x, values, color=color, alpha=0.7, edgecolor="black") 

542 

543 # Add value labels on bars 

544 for bar, val in zip(bars, values): 

545 height = bar.get_height() 

546 ax.annotate( 

547 f"{val:.4f}", 

548 xy=(bar.get_x() + bar.get_width() / 2, height), 

549 xytext=(0, 3), 

550 textcoords="offset points", 

551 ha="center", 

552 va="bottom", 

553 fontsize=8, 

554 rotation=45, 

555 ) 

556 

557 # Add mean line 

558 mean_val = values.mean() 

559 ax.axhline( 

560 mean_val, 

561 color="red", 

562 linestyle="--", 

563 linewidth=2, 

564 label=f"Mean: {mean_val:.4f}", 

565 ) 

566 

567 ax.set_xlabel("Fold (Left-Out Group)") 

568 ax.set_ylabel(metric.upper()) 

569 ax.set_title(f"{metric.upper()} per Fold") 

570 ax.set_xticks(x) 

571 ax.set_xticklabels(fold_labels, rotation=45, ha="right", fontsize=8) 

572 ax.legend(loc="best") 

573 ax.grid(axis="y", alpha=0.3) 

574 

575 plt.tight_layout() 

576 plt.savefig(output_dir / "cv_fold_metrics.png", dpi=150, bbox_inches="tight") 

577 plt.close(fig) 

578 print(f" Saved per-fold metrics plot to: {output_dir / 'cv_fold_metrics.png'}") 

579 

580 # --- 2. Metrics Comparison Heatmap --- 

581 fig, ax = plt.subplots(figsize=(10, 6)) 

582 

583 # Normalize metrics for heatmap (z-score within each metric) 

584 metrics_for_heatmap = ["rmse", "mae", "r2"] 

585 heatmap_data = cv_results[metrics_for_heatmap].copy() 

586 

587 # For r2, higher is better; for rmse/mae, lower is better 

588 # Normalize so that "better" is always higher for visualization 

589 heatmap_normalized = heatmap_data.copy() 

590 heatmap_normalized["rmse"] = -heatmap_data["rmse"] # Negate so higher = better 

591 heatmap_normalized["mae"] = -heatmap_data["mae"] # Negate so higher = better 

592 

593 # Create heatmap 

594 im = ax.imshow(heatmap_normalized.T, cmap="RdYlGn", aspect="auto") 

595 

596 # Add colorbar 

597 cbar = plt.colorbar(im, ax=ax) 

598 cbar.set_label( 

599 "Performance (normalized, higher = better)", rotation=270, labelpad=15 

600 ) 

601 

602 # Set ticks and labels 

603 ax.set_xticks(np.arange(n_folds)) 

604 ax.set_xticklabels(fold_labels, rotation=45, ha="right") 

605 ax.set_yticks(np.arange(len(metrics_for_heatmap))) 

606 ax.set_yticklabels([m.upper() for m in metrics_for_heatmap]) 

607 

608 # Add text annotations with actual values 

609 for i in range(len(metrics_for_heatmap)): 

610 for j in range(n_folds): 

611 ax.text( 

612 j, 

613 i, 

614 f"{heatmap_data.iloc[j, i]:.3f}", 

615 ha="center", 

616 va="center", 

617 color="black", 

618 fontsize=8, 

619 ) 

620 

621 ax.set_xlabel("Fold (Left-Out Group)") 

622 ax.set_title("Performance Heatmap Across CV Folds\n(Green = Better Performance)") 

623 

624 plt.tight_layout() 

625 plt.savefig(output_dir / "cv_fold_heatmap.png", dpi=150, bbox_inches="tight") 

626 plt.close(fig) 

627 print(f" Saved heatmap plot to: {output_dir / 'cv_fold_heatmap.png'}") 

628 

629 # --- 3. Box Plot Summary --- 

630 fig, ax = plt.subplots(figsize=(8, 6)) 

631 

632 # Prepare data for box plot 

633 metrics_data = [cv_results[m].values for m in ["rmse", "mae", "r2"]] 

634 

635 bp = ax.boxplot(metrics_data, labels=["RMSE", "MAE", "R²"], patch_artist=True) 

636 

637 # Color the boxes 

638 colors_box = ["#1f77b4", "#ff7f0e", "#2ca02c"] 

639 for patch, color in zip(bp["boxes"], colors_box): 

640 patch.set_facecolor(color) 

641 patch.set_alpha(0.7) 

642 

643 # Add individual points 

644 for i, (metric_vals, color) in enumerate(zip(metrics_data, colors_box)): 

645 x_jitter = np.random.normal(i + 1, 0.04, size=len(metric_vals)) 

646 ax.scatter( 

647 x_jitter, metric_vals, alpha=0.6, color=color, edgecolor="black", s=50 

648 ) 

649 

650 ax.set_ylabel("Metric Value") 

651 ax.set_title(f"Distribution of CV Metrics Across {n_folds} Folds") 

652 ax.grid(axis="y", alpha=0.3) 

653 

654 plt.tight_layout() 

655 plt.savefig(output_dir / "cv_metrics_boxplot.png", dpi=150, bbox_inches="tight") 

656 plt.close(fig) 

657 print(f" Saved boxplot to: {output_dir / 'cv_metrics_boxplot.png'}") 

658 

659 

660def plot_farm_wise_predictions( 

661 y_tests: list, 

662 y_preds: list, 

663 fold_labels: list, 

664 fold_farm_names: list, 

665 output_dir: Path, 

666): 

667 """ 

668 Create scatter plots showing predictions vs true values, colored by farm/group. 

669 

670 Args: 

671 y_tests: List of test targets per fold 

672 y_preds: List of predictions per fold 

673 fold_labels: List of fold identifiers (left-out groups) 

674 fold_farm_names: List of arrays, each containing farm names for test cases in that fold 

675 output_dir: Directory to save plots 

676 """ 

677 output_dir = Path(output_dir) 

678 

679 # Combine all folds 

680 all_y_test = np.concatenate(y_tests) 

681 all_y_pred = np.concatenate(y_preds) 

682 all_fold_ids = np.concatenate( 

683 [np.full(len(y_tests[i]), fold_labels[i]) for i in range(len(y_tests))] 

684 ) 

685 

686 # Get unique fold labels for coloring 

687 unique_folds = np.unique(all_fold_ids) 

688 n_folds = len(unique_folds) 

689 

690 # Create colormap 

691 cmap = plt.cm.get_cmap("tab10" if n_folds <= 10 else "tab20") 

692 colors = {fold: cmap(i / n_folds) for i, fold in enumerate(unique_folds)} 

693 

694 # --- Main scatter plot colored by fold --- 

695 fig, ax = plt.subplots(figsize=(10, 8)) 

696 

697 for fold_label in unique_folds: 

698 mask = all_fold_ids == fold_label 

699 ax.scatter( 

700 all_y_test[mask], 

701 all_y_pred[mask], 

702 c=[colors[fold_label]], 

703 label=f"Left out: {fold_label}", 

704 alpha=0.6, 

705 s=30, 

706 edgecolor="white", 

707 linewidth=0.5, 

708 ) 

709 

710 # Add 1:1 line 

711 min_val = min(all_y_test.min(), all_y_pred.min()) 

712 max_val = max(all_y_test.max(), all_y_pred.max()) 

713 ax.plot( 

714 [min_val, max_val], [min_val, max_val], "k--", linewidth=2, label="1:1 Line" 

715 ) 

716 

717 # Calculate overall metrics 

718 overall_rmse = np.sqrt(np.mean((all_y_test - all_y_pred) ** 2)) 

719 overall_r2 = r2_score(all_y_test, all_y_pred) 

720 

721 ax.set_xlabel("True Bias", fontsize=12) 

722 ax.set_ylabel("Predicted Bias", fontsize=12) 

723 ax.set_title( 

724 f"Predictions vs True Values by Left-Out Group\n" 

725 f"Overall RMSE: {overall_rmse:.4f}, R²: {overall_r2:.4f}", 

726 fontsize=14, 

727 ) 

728 ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=9) 

729 ax.grid(alpha=0.3) 

730 ax.set_aspect("equal", adjustable="box") 

731 

732 plt.tight_layout() 

733 plt.savefig(output_dir / "cv_predictions_by_fold.png", dpi=150, bbox_inches="tight") 

734 plt.close(fig) 

735 print( 

736 f" Saved predictions scatter plot to: {output_dir / 'cv_predictions_by_fold.png'}" 

737 ) 

738 

739 # --- Per-fold subplots --- 

740 n_cols = min(3, n_folds) 

741 n_rows = (n_folds + n_cols - 1) // n_cols 

742 

743 fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 5 * n_rows)) 

744 if n_folds == 1: 

745 axes = np.array([[axes]]) 

746 elif n_rows == 1: 

747 axes = axes.reshape(1, -1) 

748 

749 for idx, fold_label in enumerate(unique_folds): 

750 row, col = divmod(idx, n_cols) 

751 ax = axes[row, col] 

752 

753 mask = all_fold_ids == fold_label 

754 y_test_fold = all_y_test[mask] 

755 y_pred_fold = all_y_pred[mask] 

756 

757 ax.scatter( 

758 y_test_fold, 

759 y_pred_fold, 

760 c=[colors[fold_label]], 

761 alpha=0.6, 

762 s=30, 

763 edgecolor="white", 

764 linewidth=0.5, 

765 ) 

766 

767 # 1:1 line 

768 min_v = min(y_test_fold.min(), y_pred_fold.min()) 

769 max_v = max(y_test_fold.max(), y_pred_fold.max()) 

770 ax.plot([min_v, max_v], [min_v, max_v], "k--", linewidth=1.5) 

771 

772 # Per-fold metrics 

773 fold_rmse = np.sqrt(np.mean((y_test_fold - y_pred_fold) ** 2)) 

774 fold_r2 = r2_score(y_test_fold, y_pred_fold) if len(y_test_fold) > 1 else 0 

775 

776 ax.set_xlabel("True Bias") 

777 ax.set_ylabel("Predicted Bias") 

778 ax.set_title( 

779 f"Left Out: {fold_label}\nRMSE: {fold_rmse:.4f}, R²: {fold_r2:.4f}" 

780 ) 

781 ax.grid(alpha=0.3) 

782 ax.set_aspect("equal", adjustable="box") 

783 

784 # Hide unused subplots 

785 for idx in range(n_folds, n_rows * n_cols): 

786 row, col = divmod(idx, n_cols) 

787 axes[row, col].set_visible(False) 

788 

789 plt.suptitle("Prediction Quality per CV Fold", fontsize=14, y=1.02) 

790 plt.tight_layout() 

791 plt.savefig( 

792 output_dir / "cv_predictions_per_fold.png", dpi=150, bbox_inches="tight" 

793 ) 

794 plt.close(fig) 

795 print( 

796 f" Saved per-fold predictions to: {output_dir / 'cv_predictions_per_fold.png'}" 

797 ) 

798 

799 

800def plot_generalization_matrix( 

801 cv_results: pd.DataFrame, fold_labels: list, output_dir: Path 

802): 

803 """ 

804 Create a generalization analysis visualization showing how training on 

805 certain farm groups affects prediction on others. 

806 

807 Args: 

808 cv_results: DataFrame with metrics per fold 

809 fold_labels: List of fold identifiers (left-out groups) 

810 output_dir: Directory to save plots 

811 """ 

812 output_dir = Path(output_dir) 

813 n_folds = len(fold_labels) 

814 

815 # Create summary table 

816 fig, ax = plt.subplots(figsize=(12, max(4, n_folds * 0.5))) 

817 

818 # Prepare data for table 

819 table_data = [] 

820 for i, label in enumerate(fold_labels): 

821 row = [ 

822 label, 

823 f"{cv_results['rmse'].iloc[i]:.4f}", 

824 f"{cv_results['r2'].iloc[i]:.4f}", 

825 f"{cv_results['mae'].iloc[i]:.4f}", 

826 ] 

827 table_data.append(row) 

828 

829 # Add mean row 

830 table_data.append( 

831 [ 

832 "MEAN", 

833 f"{cv_results['rmse'].mean():.4f}", 

834 f"{cv_results['r2'].mean():.4f}", 

835 f"{cv_results['mae'].mean():.4f}", 

836 ] 

837 ) 

838 

839 # Add std row 

840 table_data.append( 

841 [ 

842 "STD", 

843 f"{cv_results['rmse'].std():.4f}", 

844 f"{cv_results['r2'].std():.4f}", 

845 f"{cv_results['mae'].std():.4f}", 

846 ] 

847 ) 

848 

849 columns = ["Left-Out Group", "RMSE", "R²", "MAE"] 

850 

851 # Hide axes 

852 ax.axis("off") 

853 

854 # Create table 

855 table = ax.table( 

856 cellText=table_data, 

857 colLabels=columns, 

858 loc="center", 

859 cellLoc="center", 

860 ) 

861 

862 # Style the table 

863 table.auto_set_font_size(False) 

864 table.set_fontsize(10) 

865 table.scale(1.2, 1.5) 

866 

867 # Color header row 

868 for j, col in enumerate(columns): 

869 table[(0, j)].set_facecolor("#4472C4") 

870 table[(0, j)].set_text_props(color="white", weight="bold") 

871 

872 # Color mean/std rows 

873 for j in range(len(columns)): 

874 table[(n_folds + 1, j)].set_facecolor("#E2EFDA") # Mean row 

875 table[(n_folds + 2, j)].set_facecolor("#FCE4D6") # Std row 

876 

877 # Color-code RMSE cells based on value 

878 rmse_values = cv_results["rmse"].values 

879 rmse_min, rmse_max = rmse_values.min(), rmse_values.max() 

880 

881 for i in range(n_folds): 

882 # Normalize RMSE (lower is better, so invert for color) 

883 if rmse_max > rmse_min: 

884 norm_val = (rmse_values[i] - rmse_min) / (rmse_max - rmse_min) 

885 else: 

886 norm_val = 0.5 

887 

888 # Color from green (good) to red (bad) 

889 color = plt.cm.RdYlGn(1 - norm_val) 

890 table[(i + 1, 1)].set_facecolor(color) 

891 

892 plt.title( 

893 "Cross-Validation Generalization Summary\n" 

894 "(Testing on each group after training on all others)", 

895 fontsize=14, 

896 pad=20, 

897 ) 

898 

899 plt.tight_layout() 

900 plt.savefig( 

901 output_dir / "cv_generalization_summary.png", dpi=150, bbox_inches="tight" 

902 ) 

903 plt.close(fig) 

904 print( 

905 f" Saved generalization summary to: {output_dir / 'cv_generalization_summary.png'}" 

906 ) 

907 

908 

909## ------------------------------------------------------------------ ## 

910 

911 

912def run_observation_sensitivity( 

913 database, 

914 features_list, 

915 ml_pipeline, 

916 model_type, 

917 output_dir, 

918 method: str = "auto", 

919 pce_config: dict = None, 

920): 

921 """ 

922 Sensitivity analysis on observations. 

923 

924 Args: 

925 database: xarray Dataset 

926 features_list: List of feature names 

927 ml_pipeline: ML pipeline (used for shap/sir methods) 

928 model_type: "tree" or "sir" (used for shap/sir methods) 

929 output_dir: Where to save plots 

930 method: "auto", "shap", "sir", or "pce_sobol" 

931 "auto" uses shap for tree models, sir directions for sir models 

932 pce_config: Config dict for PCE (only used if method="pce_sobol") 

933 """ 

934 print(f"--- Running Observation Sensitivity (method={method}) ---") 

935 

936 # Prepare data (sample 0 = default params) 

937 data = database.isel(sample=0).to_dataframe().reset_index() 

938 X = data[features_list] 

939 y = data["ref_power_cap"].values # observations 

940 

941 # Determine method 

942 if method == "auto": 

943 if model_type == "tree": 

944 method = "shap" 

945 elif model_type == "sir": 

946 method = "sir" 

947 elif model_type == "pce": 

948 # For PCE models, default "auto" to PCE-based Sobol SA 

949 method = "pce_sobol" 

950 else: 

951 raise ValueError( 

952 f"Unknown model_type '{model_type}' for 'auto' sensitivity. " 

953 f"Expected one of ['tree', 'sir', 'pce']." 

954 ) 

955 

956 if method == "pce_sobol": 

957 # PCE-based Sobol indices 

958 from wifa_uq.postprocessing.PCE_tool.pce_utils import run_pce_sensitivity 

959 

960 run_pce_sensitivity( 

961 database, 

962 feature_names=features_list, 

963 pce_config=pce_config or {}, 

964 output_dir=output_dir, 

965 ) 

966 

967 elif method == "shap": 

968 # Train model, then SHAP 

969 ml_pipeline.fit(X, y) 

970 

971 if hasattr(ml_pipeline, "named_steps"): 

972 model = ml_pipeline.named_steps["model"] 

973 else: 

974 model = ml_pipeline 

975 

976 # Get scaled data and model for SHAP 

977 X_scaled = ml_pipeline.named_steps["scaler"].transform(X) 

978 model = ml_pipeline.named_steps["model"] 

979 

980 explainer = shap.TreeExplainer(model) 

981 shap_values = explainer.shap_values(X_scaled) 

982 

983 # Plot 

984 shap.summary_plot(shap_values, X, feature_names=features_list, show=False) 

985 plt.savefig(Path(output_dir) / "observation_sensitivity_shap.png", dpi=150) 

986 plt.close() 

987 print(f" Saved SHAP plot to {output_dir}/observation_sensitivity_shap.png") 

988 

989 elif method == "sir": 

990 # Train SIR model, use direction coefficients 

991 scaler = StandardScaler() 

992 X_scaled = scaler.fit_transform(X) 

993 ml_pipeline.fit(X_scaled, y) 

994 

995 # Get SIR direction coefficients as importance 

996 directions = ml_pipeline.sir_.directions_.flatten() 

997 importance = np.abs(directions) 

998 

999 # Identify the feature with the largest influence on the first direction 

1000 top_idx = np.argmax(importance) 

1001 top_feature_name = features_list[top_idx] 

1002 print(f" Dominant feature identified: {top_feature_name}") 

1003 

1004 # Plot 

1005 sorted_idx = np.argsort(importance) 

1006 plt.figure(figsize=(8, 6)) 

1007 plt.barh(range(len(features_list)), importance[sorted_idx]) 

1008 plt.yticks(range(len(features_list)), [features_list[i] for i in sorted_idx]) 

1009 plt.xlabel("Absolute SIR Direction Coefficient") 

1010 plt.title("Observation Sensitivity (SIR)") 

1011 plt.tight_layout() 

1012 plt.savefig(Path(output_dir) / "observation_sensitivity_sir.png", dpi=150) 

1013 plt.close() 

1014 print(f" Saved SIR plot to {output_dir}/observation_sensitivity_sir.png") 

1015 

1016 # 2. Shadow Plot (Error vs. First Eigenvector, Colored by Top Feature) 

1017 # Project data onto the first found direction 

1018 X_projected = ml_pipeline.sir_.transform(X_scaled) 

1019 first_component = X_projected[:, 0] 

1020 

1021 # Get values of the top feature for coloring 

1022 # X is likely a DataFrame here given the setup code 

1023 color_values = X.iloc[:, top_idx].values 

1024 

1025 plt.figure(figsize=(9, 7)) 

1026 # 

1027 scatter = plt.scatter( 

1028 first_component, 

1029 y, 

1030 c=color_values, 

1031 cmap="viridis", 

1032 alpha=0.7, 

1033 edgecolor="k", 

1034 linewidth=0.5, 

1035 ) 

1036 

1037 cbar = plt.colorbar(scatter) 

1038 cbar.set_label(f"Feature Value: {top_feature_name}", rotation=270, labelpad=15) 

1039 

1040 plt.xlabel(r"Projected Input $\beta_1^T \mathbf{x}$ (1st SIR Direction)") 

1041 plt.ylabel("Observed Error (y)") 

1042 plt.title(f"SIR Shadow Plot\nColored by dominant feature '{top_feature_name}'") 

1043 plt.grid(True, alpha=0.3) 

1044 plt.tight_layout() 

1045 

1046 shadow_plot_path = Path(output_dir) / "observation_sensitivity_sir_shadow.png" 

1047 plt.savefig(shadow_plot_path, dpi=150) 

1048 plt.close() 

1049 print(f" Saved SIR shadow plot to {shadow_plot_path}") 

1050 

1051 else: 

1052 raise ValueError( 

1053 f"Unknown method: {method}. Use 'auto', 'shap', 'sir', or 'pce_sobol'" 

1054 ) 

1055 

1056 

1057def run_cross_validation( 

1058 xr_data, 

1059 ML_pipeline, 

1060 model_type, 

1061 Calibrator_cls, 

1062 BiasPredictor_cls, 

1063 MainPipeline_cls, 

1064 cv_config: dict, 

1065 features_list: list, 

1066 output_dir: Path, 

1067 sa_config: dict, 

1068 calibration_mode: str = "global", 

1069 local_regressor: str = None, 

1070 local_regressor_params: dict = None, 

1071): 

1072 validation_data = xr_data[ 

1073 ["turb_rated_power", "pw_power_cap", "ref_power_cap", "case_index"] 

1074 ] 

1075 groups = xr_data["wind_farm"].values 

1076 

1077 splitting_mode = cv_config.get("splitting_mode", "kfold_shuffled") 

1078 

1079 # Track fold labels for multi-farm visualization 

1080 fold_labels = [] 

1081 is_multi_farm = False 

1082 

1083 if splitting_mode == "LeaveOneGroupOut": 

1084 is_multi_farm = True 

1085 groups = xr_data["wind_farm"].values 

1086 

1087 groups_cfg = cv_config.get("groups") 

1088 if groups_cfg: 

1089 # Flatten config into a name -> label mapping 

1090 wf_to_group = {} 

1091 for group_label, wf_list in groups_cfg.items(): 

1092 for wf in wf_list: 

1093 wf_to_group[wf] = group_label # can stay as string! 

1094 

1095 default_group = "__OTHER__" 

1096 manual_groups = np.array( 

1097 [wf_to_group.get(str(w), default_group) for w in groups] 

1098 ) 

1099 else: 

1100 # Fully generic fallback: each wind_farm is its own group 

1101 manual_groups = groups 

1102 

1103 cv = LeaveOneGroupOut() 

1104 splits = list(cv.split(xr_data.case_index, groups=manual_groups)) 

1105 n_splits = cv.get_n_splits(groups=manual_groups) 

1106 

1107 # Extract fold labels (the left-out group for each fold) 

1108 unique_groups = np.unique(manual_groups) 

1109 for train_idx, test_idx in splits: 

1110 # Find which group is left out (present in test but not in train) 

1111 test_groups = np.unique(manual_groups[test_idx]) 

1112 fold_labels.append( 

1113 str(test_groups[0]) if len(test_groups) == 1 else str(test_groups) 

1114 ) 

1115 

1116 print(f"Using LeaveOneGroupOut with {n_splits} groups: {list(unique_groups)}") 

1117 

1118 if splitting_mode == "kfold_shuffled": 

1119 n_splits = cv_config.get("n_splits", 5) 

1120 cv = KFold(n_splits=n_splits, shuffle=True, random_state=42) 

1121 splits = list(cv.split(xr_data.case_index)) 

1122 fold_labels = [f"Fold {i + 1}" for i in range(n_splits)] 

1123 print(f"Using KFold with {n_splits} splits.") 

1124 

1125 stats_cv, y_preds, y_tests, pw_all, ref_all = [], [], [], [], [] 

1126 fold_farm_names = [] # Track farm names per fold for visualization 

1127 

1128 # --- Add lists to store items for SHAP --- 

1129 all_models = [] 

1130 all_xtest_scaled = [] # Only used for tree models 

1131 all_features_df = [] 

1132 

1133 # --- Add lists for local calibration parameter prediction tracking --- 

1134 all_predicted_params = [] # Predicted parameter values per fold 

1135 all_actual_optimal_params = [] # Actual optimal parameter values per fold 

1136 swept_params = xr_data.attrs.get("swept_params", []) 

1137 

1138 for i, (train_idx_locs, test_idx_locs) in enumerate(splits): 

1139 # Get the actual case_index *values* at these integer locations 

1140 train_indices = xr_data.case_index.values[train_idx_locs] 

1141 test_indices = xr_data.case_index.values[test_idx_locs] 

1142 

1143 dataset_train = xr_data.where(xr_data.case_index.isin(train_indices), drop=True) 

1144 dataset_test = xr_data.where(xr_data.case_index.isin(test_indices), drop=True) 

1145 

1146 # Track farm names for this fold (for visualization) 

1147 if "wind_farm" in xr_data.coords: 

1148 fold_farm_names.append(xr_data.wind_farm.values[test_idx_locs]) 

1149 

1150 if calibration_mode == "local": 

1151 calibrator = Calibrator_cls( 

1152 dataset_train, 

1153 feature_names=features_list, 

1154 regressor_name=local_regressor, 

1155 regressor_params=local_regressor_params, 

1156 ) 

1157 else: 

1158 calibrator = Calibrator_cls(dataset_train) 

1159 

1160 bias_pred = BiasPredictor_cls(ML_pipeline) 

1161 

1162 # --- Pass features_list to MainPipeline --- 

1163 main_pipe = MainPipeline_cls( 

1164 calibrator, 

1165 bias_pred, 

1166 features_list=features_list, 

1167 calibration_mode=calibration_mode, 

1168 ) 

1169 

1170 x_test, y_test, idxs = main_pipe.fit(dataset_train, dataset_test) 

1171 y_pred = main_pipe.predict(x_test) 

1172 

1173 # Get correct validation data for this fold 

1174 val_data_fold = validation_data.sel(sample=idxs).where( 

1175 validation_data.case_index.isin(test_indices), drop=True 

1176 ) 

1177 

1178 if calibration_mode == "global": 

1179 # Single sample index for all cases 

1180 val_data_fold = validation_data.sel(sample=idxs).where( 

1181 validation_data.case_index.isin(test_indices), drop=True 

1182 ) 

1183 

1184 pw = val_data_fold["pw_power_cap"].values 

1185 ref = val_data_fold["ref_power_cap"].values 

1186 else: 

1187 # Local calibration: idxs is an array of per-case sample indices 

1188 # We must build pw/ref per test case using those indices. 

1189 

1190 # Local calibration: idxs is an array of per-case sample indices 

1191 # We must build pw/ref per test case using those indices. 

1192 idxs = np.asarray(idxs) 

1193 if idxs.shape[0] != len(test_indices): 

1194 raise ValueError( 

1195 f"Local calibration returned {idxs.shape[0]} indices, " 

1196 f"but there are {len(test_indices)} test cases." 

1197 ) 

1198 

1199 # --- Track predicted vs actual optimal parameters for this fold --- 

1200 X_test_features = main_pipe._extract_features( 

1201 dataset_test.isel(sample=0).to_dataframe().reset_index() 

1202 ) 

1203 predicted_params_fold = main_pipe.calibrator.predict(X_test_features) 

1204 all_predicted_params.append(predicted_params_fold) 

1205 

1206 # Get actual optimal parameters (from the sample indices we chose) 

1207 actual_params_fold = {p: [] for p in swept_params} 

1208 for sample_idx in idxs: 

1209 for param_name in swept_params: 

1210 if param_name in dataset_test.coords: 

1211 actual_params_fold[param_name].append( 

1212 float( 

1213 dataset_test.coords[param_name] 

1214 .isel(sample=sample_idx) 

1215 .values 

1216 ) 

1217 ) 

1218 actual_params_df = pd.DataFrame(actual_params_fold) 

1219 all_actual_optimal_params.append(actual_params_df) 

1220 

1221 pw_list = [] 

1222 ref_list = [] 

1223 

1224 # dataset_test.case_index is the subset used inside MainPipeline 

1225 local_case_indices = dataset_test.case_index.values 

1226 

1227 for local_case_idx, sample_idx in enumerate(idxs): 

1228 case_index_val = local_case_indices[local_case_idx] 

1229 

1230 # Pick the appropriate sample & case_index from validation_data 

1231 this_point = validation_data.sel( 

1232 sample=int(sample_idx), case_index=case_index_val 

1233 ) 

1234 

1235 pw_list.append(float(this_point["pw_power_cap"].values)) 

1236 ref_list.append(float(this_point["ref_power_cap"].values)) 

1237 

1238 pw = np.array(pw_list) 

1239 ref = np.array(ref_list) 

1240 

1241 stats = compute_metrics(y_test, y_pred, pw=pw, ref=ref) 

1242 stats_cv.append(stats) 

1243 

1244 y_preds.append(y_pred) 

1245 y_tests.append(y_test) 

1246 pw_all.append(pw) 

1247 ref_all.append(ref) 

1248 

1249 # --- Store model and data for SHAP / SIR global importance --- 

1250 all_models.append(main_pipe.bias_predictor.pipeline) 

1251 all_features_df.append(x_test) 

1252 if model_type == "tree": 

1253 X_test_scaled = main_pipe.bias_predictor.pipeline.named_steps[ 

1254 "scaler" 

1255 ].transform(x_test) 

1256 all_xtest_scaled.append(X_test_scaled) 

1257 

1258 cv_results = pd.DataFrame(stats_cv) 

1259 

1260 # --- START PLOTTING BLOCK (Visualization) --- 

1261 

1262 # Flatten all fold results into single arrays for plotting 

1263 y_preds_flat = np.concatenate(y_preds) 

1264 y_tests_flat = np.concatenate(y_tests) 

1265 

1266 have_power = all(p is not None for p in pw_all) and all( 

1267 r is not None for r in ref_all 

1268 ) 

1269 

1270 if have_power: 

1271 pw_flat = np.concatenate(pw_all) 

1272 ref_flat = np.concatenate(ref_all) 

1273 corrected_power_flat = pw_flat - y_preds_flat 

1274 

1275 fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6)) 

1276 fig.suptitle("Cross-Validation Model Performance", fontsize=16) 

1277 

1278 # 1. Predicted vs. True Bias 

1279 ax1.scatter(y_tests_flat, y_preds_flat, alpha=0.5, s=10) 

1280 min_bias = min(y_tests_flat.min(), y_preds_flat.min()) 

1281 max_bias = max(y_tests_flat.max(), y_preds_flat.max()) 

1282 ax1.plot([min_bias, max_bias], [min_bias, max_bias], "r--", label="1:1 Line") 

1283 ax1.set_xlabel("True Bias (PyWake - Ref)") 

1284 ax1.set_ylabel("Predicted Bias (ML)") 

1285 ax1.set_title("ML Model Performance") 

1286 ax1.grid(True) 

1287 ax1.legend() 

1288 ax1.axis("equal") 

1289 

1290 # 2. Uncorrected Power vs. Reference 

1291 ax2.scatter(ref_flat, pw_flat, alpha=0.5, s=10, label="Data") 

1292 min_power = min(ref_flat.min(), pw_flat.min()) 

1293 max_power = max(ref_flat.max(), pw_flat.max()) 

1294 ax2.plot([min_power, max_power], [min_power, max_power], "r--", label="1:1 Line") 

1295 ax2.set_xlabel("Reference Power (Truth)") 

1296 ax2.set_ylabel("Uncorrected Power (Calibrated)") 

1297 ax2.set_title("Uncorrected Model") 

1298 ax2.grid(True) 

1299 ax2.legend() 

1300 ax2.axis("equal") 

1301 

1302 # 3. Corrected Power vs. Reference 

1303 ax3.scatter(ref_flat, corrected_power_flat, alpha=0.5, s=10, label="Data") 

1304 ax3.plot([min_power, max_power], [min_power, max_power], "r--", label="1:1 Line") 

1305 ax3.set_xlabel("Reference Power (Truth)") 

1306 ax3.set_ylabel("Corrected Power (Calibrated + ML)") 

1307 ax3.set_title("Corrected Model") 

1308 ax3.grid(True) 

1309 ax3.legend() 

1310 ax3.axis("equal") 

1311 

1312 plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 

1313 plot_path = output_dir / "correction_results.png" 

1314 plt.savefig(plot_path, dpi=150) 

1315 print(f"Saved correction plot to: {plot_path}") 

1316 plt.close(fig) # Close the figure 

1317 

1318 # --- MULTI-FARM CV VISUALIZATION (NEW) --- 

1319 if is_multi_farm or splitting_mode == "LeaveOneGroupOut": 

1320 print("--- Generating Multi-Farm CV Visualizations ---") 

1321 

1322 # 1. Per-fold metrics visualization 

1323 plot_multi_farm_cv_metrics( 

1324 cv_results=cv_results, 

1325 fold_labels=fold_labels, 

1326 output_dir=output_dir, 

1327 splitting_mode=splitting_mode, 

1328 ) 

1329 

1330 # 2. Predictions colored by fold 

1331 plot_farm_wise_predictions( 

1332 y_tests=y_tests, 

1333 y_preds=y_preds, 

1334 fold_labels=fold_labels, 

1335 fold_farm_names=fold_farm_names, 

1336 output_dir=output_dir, 

1337 ) 

1338 

1339 # 3. Generalization summary table 

1340 plot_generalization_matrix( 

1341 cv_results=cv_results, fold_labels=fold_labels, output_dir=output_dir 

1342 ) 

1343 

1344 # --- PARAMETER PREDICTION PLOT (Local Calibration Only) --- 

1345 if calibration_mode == "local" and all_predicted_params and swept_params: 

1346 print("--- Generating Parameter Prediction Quality Plot ---") 

1347 

1348 # Concatenate all folds 

1349 predicted_all = pd.concat(all_predicted_params, axis=0, ignore_index=True) 

1350 actual_all = pd.concat(all_actual_optimal_params, axis=0, ignore_index=True) 

1351 

1352 n_params = len(swept_params) 

1353 fig, axes = plt.subplots(1, n_params, figsize=(6 * n_params, 5)) 

1354 if n_params == 1: 

1355 axes = [axes] 

1356 

1357 fig.suptitle("Local Calibration: Parameter Prediction Quality", fontsize=14) 

1358 

1359 for idx, param_name in enumerate(swept_params): 

1360 ax = axes[idx] 

1361 pred_vals = predicted_all[param_name].values 

1362 actual_vals = actual_all[param_name].values 

1363 

1364 ax.scatter(actual_vals, pred_vals, alpha=0.5, s=15) 

1365 

1366 # 1:1 line 

1367 min_val = min(actual_vals.min(), pred_vals.min()) 

1368 max_val = max(actual_vals.max(), pred_vals.max()) 

1369 ax.plot([min_val, max_val], [min_val, max_val], "r--", label="1:1 Line") 

1370 

1371 # Calculate R² for parameter prediction 

1372 ss_res = np.sum((actual_vals - pred_vals) ** 2) 

1373 ss_tot = np.sum((actual_vals - actual_vals.mean()) ** 2) 

1374 r2_param = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0 

1375 

1376 ax.set_xlabel(f"Actual Optimal {param_name}") 

1377 ax.set_ylabel(f"Predicted {param_name}") 

1378 ax.set_title(f"{param_name} (R² = {r2_param:.3f})") 

1379 ax.legend() 

1380 ax.grid(True) 

1381 ax.set_aspect("equal", adjustable="box") 

1382 

1383 plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 

1384 param_plot_path = output_dir / "local_parameter_prediction.png" 

1385 plt.savefig(param_plot_path, dpi=150) 

1386 print(f"Saved parameter prediction plot to: {param_plot_path}") 

1387 plt.close(fig) 

1388 

1389 # --- END PLOTTING BLOCK --- 

1390 

1391 # --- START SHAP ON BIAS BLOCK --- 

1392 if sa_config.get("run_bias_sensitivity", False): 

1393 print(f"--- Running Bias Sensitivity (Model Type: {model_type}) ---") 

1394 

1395 if model_type == "tree": 

1396 try: 

1397 print("--- Calculating Bias SHAP (TreeExplainer) ---") 

1398 all_shap_values = [] 

1399 

1400 for i in range(n_splits): 

1401 model = all_models[i].named_steps["model"] 

1402 X_test_scaled = all_xtest_scaled[i] 

1403 

1404 explainer = shap.TreeExplainer(model) 

1405 shap_values_fold = explainer.shap_values(X_test_scaled) 

1406 all_shap_values.append(shap_values_fold) 

1407 

1408 final_shap_values = np.concatenate(all_shap_values, axis=0) 

1409 

1410 final_features_df = pd.concat(all_features_df, axis=0) 

1411 # ... (string cleaning logic) ... 

1412 for col in final_features_df.columns: 

1413 if final_features_df[col].dtype == "object": 

1414 if final_features_df[col].dropna().empty: 

1415 continue 

1416 first_item = final_features_df[col].dropna().iloc[0] 

1417 if isinstance(first_item, str): 

1418 print(f" Cleaning string column in SHAP: {col}") 

1419 final_features_df[col] = ( 

1420 final_features_df[col] 

1421 .str.replace(r"[\[\]]", "", regex=True) 

1422 .astype(float) 

1423 ) 

1424 else: 

1425 final_features_df[col] = final_features_df[col].astype( 

1426 float 

1427 ) 

1428 final_features_df = final_features_df.astype(float) # Final cast 

1429 

1430 # --- 1. GENERATE AND SAVE BAR PLOT --- 

1431 print("--- Calculating Bias SHAP Global Feature Importance ---") 

1432 mean_abs_shap = np.mean(np.abs(final_shap_values), axis=0) 

1433 shap_scores = pd.Series( 

1434 mean_abs_shap, index=final_features_df.columns 

1435 ).sort_values(ascending=True) 

1436 

1437 fig, ax = plt.subplots(figsize=(10, 8)) 

1438 shap_scores.plot(kind="barh", ax=ax) 

1439 ax.set_title( 

1440 "Global SHAP Feature Importance (Mean Absolute SHAP Value)" 

1441 ) 

1442 ax.set_xlabel("Mean |SHAP Value| (Impact on Bias Prediction)") 

1443 plt.tight_layout() 

1444 

1445 bar_plot_path = output_dir / "bias_prediction_shap_importance.png" 

1446 plt.savefig(bar_plot_path, dpi=150, bbox_inches="tight") 

1447 plt.close(fig) 

1448 print(f"Saved SHAP importance bar plot to: {bar_plot_path}") 

1449 

1450 # --- 2. GENERATE AND SAVE BEESWARM PLOT --- 

1451 shap.summary_plot(final_shap_values, final_features_df, show=False) 

1452 plot_path = output_dir / "bias_prediction_shap.png" 

1453 plt.savefig(plot_path, dpi=150, bbox_inches="tight") 

1454 plt.close() 

1455 print(f"Saved bias SHAP beeswarm plot to: {plot_path}") 

1456 

1457 except Exception as e: 

1458 print(f"Could not run bias SHAP (Tree) analysis: {e}") 

1459 raise e 

1460 

1461 elif model_type == "linear": 

1462 try: 

1463 print( 

1464 "--- Calculating Bias Linear Feature Importance (Coefficients) ---" 

1465 ) 

1466 all_linear_scores = [] 

1467 feature_names = all_features_df[0].columns 

1468 

1469 for i in range(n_splits): 

1470 model = all_models[i] 

1471 fold_scores = model.get_feature_importance(feature_names) 

1472 all_linear_scores.append(fold_scores) 

1473 

1474 # Average importances across folds 

1475 importance_scores = pd.concat(all_linear_scores, axis=1).mean(axis=1) 

1476 importance_scores = importance_scores.sort_values(ascending=True) 

1477 

1478 fig, ax = plt.subplots(figsize=(10, 8)) 

1479 importance_scores.plot(kind="barh", ax=ax) 

1480 ax.set_title("Linear Model Feature Importance (Mean |Coefficient|)") 

1481 ax.set_xlabel("Mean |Coefficient| (Impact on Bias Prediction)") 

1482 plt.tight_layout() 

1483 

1484 bar_plot_path = output_dir / "bias_prediction_linear_importance.png" 

1485 plt.savefig(bar_plot_path, dpi=150, bbox_inches="tight") 

1486 plt.close(fig) 

1487 print(f"Saved linear importance plot to: {bar_plot_path}") 

1488 

1489 except Exception as e: 

1490 print(f"Could not run bias linear analysis: {e}") 

1491 raise e 

1492 elif model_type == "sir": 

1493 try: 

1494 print( 

1495 "--- Calculating Bias SIR Feature Importance (Averaged over folds) ---" 

1496 ) 

1497 all_sir_scores = [] 

1498 feature_names = all_features_df[ 

1499 0 

1500 ].columns # Get feature names from first fold 

1501 

1502 for i in range(n_splits): 

1503 model = all_models[i] # This is the SIRPolynomialRegressor instance 

1504 fold_scores = model.get_feature_importance(feature_names) 

1505 all_sir_scores.append(fold_scores) 

1506 

1507 # Average the importances across all folds 

1508 shap_scores = pd.concat(all_sir_scores, axis=1).mean(axis=1) 

1509 shap_scores = shap_scores.sort_values(ascending=True) # Sort for barh 

1510 

1511 # Generate and save the bar plot 

1512 fig, ax = plt.subplots(figsize=(10, 8)) 

1513 shap_scores.plot(kind="barh", ax=ax) 

1514 ax.set_title( 

1515 "Global SIR Feature Importance (Mean Absolute Direction Coefficient)" 

1516 ) 

1517 ax.set_xlabel( 

1518 "Mean |SIR Direction Coefficient| (Impact on Bias Prediction)" 

1519 ) 

1520 plt.tight_layout() 

1521 

1522 bar_plot_path = output_dir / "bias_prediction_sir_importance.png" 

1523 plt.savefig(bar_plot_path, dpi=150, bbox_inches="tight") 

1524 plt.close(fig) 

1525 print(f"Saved SIR importance bar plot to: {bar_plot_path}") 

1526 print("--- NOTE: Beeswarm plot is not available for SIR model. ---") 

1527 

1528 except Exception as e: 

1529 print(f"Could not run bias SIR analysis: {e}") 

1530 raise e 

1531 

1532 # --- END SHAP ON BIAS BLOCK --- 

1533 

1534 return cv_results, y_preds, y_tests 

1535 

1536 

1537# Testing 

1538if __name__ == "__main__": 

1539 import xarray as xr 

1540 

1541 xr_data = xr.load_dataset("results_stacked_hh.nc") 

1542 pipe_xgb = Pipeline( 

1543 [ 

1544 ("scaler", StandardScaler()), 

1545 ("model", xgb.XGBRegressor(max_depth=3, n_estimators=500)), 

1546 ] 

1547 ) 

1548 

1549 # Create a dummy output_dir for testing 

1550 test_output_dir = Path("./test_results") 

1551 test_output_dir.mkdir(exist_ok=True) 

1552 

1553 cv_results, y_preds, y_tests = run_cross_validation( 

1554 xr_data, 

1555 ML_pipeline=pipe_xgb, 

1556 model_type="tree", # <-- Need to provide this for the test 

1557 Calibrator_cls=MinBiasCalibrator, 

1558 BiasPredictor_cls=BiasPredictor, 

1559 MainPipeline_cls=MainPipeline, 

1560 cv_config={"splitting_mode": "kfold_shuffled", "n_splits": 5}, 

1561 features_list=["turbulence_intensity"], # Example features 

1562 output_dir=test_output_dir, 

1563 sa_config={"run_bias_shap": True}, # Example config 

1564 ) 

1565 print(cv_results.mean())