Coverage for wifa_uq / model_error_database / run_sweep.py: 64%
90 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-19 02:10 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-19 02:10 +0000
1import numpy as np
2import xarray as xr
3from typing import Dict, List
4from windIO.yaml import load_yaml
5import time
6from pathlib import Path
7from wifa import run_pywake, run_foxes
8import json
9import argparse
12def set_nested_dict_value(d: dict, path: List[str], value: float) -> None:
13 """Set value in nested dictionary using a path list."""
14 current = d
15 for key in path[:-1]:
16 current = current[key]
17 current[path[-1]] = value
20def create_parameter_samples(
21 param_config: Dict[str, dict], n_samples: int, seed: int = None
22) -> Dict[str, np.ndarray]:
23 """
24 Create samples for multiple parameters based on their ranges.
26 Args:
27 param_config: Dictionary mapping parameter paths to (min, max) tuples
28 n_samples: Number of samples to generate
29 seed: Random seed for reproducibility
31 Returns:
32 Dictionary mapping parameter paths to arrays of samples
33 """
35 if seed is not None:
36 np.random.seed(seed)
38 samples = {}
39 for param_path, config in param_config.items():
40 min_val, max_val = config["range"]
41 samples[param_path] = np.random.uniform(min_val, max_val, n_samples)
43 # First sample is always the default (for baseline comparison)
44 if "default" in config:
45 samples[param_path][0] = config["default"]
47 return samples
50def run_parameter_sweep(
51 run_func: callable,
52 turb_rated_power,
53 dat: dict,
54 param_config: Dict[str, dict],
55 reference_power: dict,
56 reference_physical_inputs: dict,
57 n_samples: int = 100,
58 seed: int = None,
59 output_dir="cases/default/sampling/",
60 run_func_kwargs={},
61) -> List[xr.Dataset]:
62 """
63 run the wifa api for a range of parameter samples
64 compare reference power to engineering wake model power
65 calculate the RMSE over the entire farm
66 normalize based on rated power
67 return the power errors for each sample as a netcdf
69 Args:
70 run_func: callable to run the simulation (run_foxes or run_pywake)
71 turb_rated_power: rated power of a single turbine in the park (for normalizing power errors)
72 dat: windIO system dat file
73 param_config: specifying which parameters to sample from and ranges of values
74 reference_power: xarray with the power values from the reference simulation
75 reference_physical_inputs: xarray with physical inputs to the reference simulations
76 n_samples: number of parameter samples
77 seed: random seed for generating parameter samples
78 run_func_kwargs: additional keyword arguments to pass to the run_func
80 """
82 samples = create_parameter_samples(param_config, n_samples, seed)
83 n_flow_cases = reference_power.time.size
84 sample_coords = np.arange(n_samples, dtype=np.float64)
85 flow_case_coords = np.arange(n_flow_cases, dtype=np.float64)
87 bias_cap = np.zeros((n_samples, n_flow_cases), dtype=np.float64)
88 pw = np.zeros((n_samples, n_flow_cases), dtype=np.float64)
89 ref = np.zeros((n_samples, n_flow_cases), dtype=np.float64)
91 # Run the first sample with specific (default) samples
93 for i in range(n_samples):
94 # Update all parameters for this sample
95 for param_path, param_samples in samples.items():
96 # Convert string path to list of keys
97 path = param_path.split(".")
98 set_nested_dict_value(dat, path, param_samples[i])
100 sample_dir = output_dir / f"sample_{i}"
102 # Run simulation
103 run_func(dat, output_dir=sample_dir, **run_func_kwargs)
105 # Process results (in terms of power)
106 pw_power = xr.open_dataset(sample_dir / "turbine_data.nc").power.values
108 ref_power = reference_power.power.values
109 # workaround for some cases
110 if ref_power.shape == (pw_power.shape[1], pw_power.shape[0]):
111 ref_power = ref_power.T
113 model_err = pw_power - ref_power
114 # # in case there are nan reference values (relevant for scada)
115 # masked_model_err = np.ma.masked_array(model_err, mask=np.isnan(ref_power))
117 """.
118 In the context of bias-correction, dividing by the model value makes more sense
119 If bias = 10%, model overpredicts by 10% (of the model value)
120 Therefore, a multiplication by 0.9 would correct the bias.
122 In the context of forecasting accuracy... it seems more intuitive to divide by the reference value?
123 If bias = 10%, model output is 10% higher than the actual value
124 """
126 # calculating farm level bias
127 model_bias_cap = np.nanmean(model_err, axis=0) / turb_rated_power
129 # Fill pre-allocated arrays for all samples
130 bias_cap[i, :] = model_bias_cap
131 # pywake or foxes power (farm average)
132 pw[i, :] = np.nanmean(pw_power, axis=0) / turb_rated_power
133 # reference power (farm average)
134 ref[i, :] = np.nanmean(ref_power, axis=0) / turb_rated_power
136 # Build parameter coordinates: one coordinate per swept parameter
137 param_coords = {}
138 for param_path, param_samples in samples.items():
139 cfg = param_config.get(param_path, {})
140 short_name = cfg.get("short_name", param_path.split(".")[-1])
142 param_coords[short_name] = xr.DataArray(
143 param_samples, dims=["sample"], coords={"sample": sample_coords}
144 )
146 # Build dataset directly from NumPy arrays (bias_cap, pw, ref)
147 merged_data = xr.Dataset(
148 data_vars={
149 "model_bias_cap": (("sample", "flow_case"), bias_cap),
150 "pw_power_cap": (("sample", "flow_case"), pw),
151 "ref_power_cap": (("sample", "flow_case"), ref),
152 },
153 coords={
154 "sample": sample_coords,
155 "flow_case": flow_case_coords,
156 **param_coords,
157 },
158 )
160 # Store metadata
161 merged_data.attrs["swept_params"] = [
162 param_config[p]["short_name"] for p in param_config.keys()
163 ]
164 merged_data.attrs["param_paths"] = list(param_config.keys())
165 merged_data.attrs["param_defaults"] = json.dumps(
166 {
167 param_config[p]["short_name"]: param_config[p].get("default")
168 for p in param_config.keys()
169 }
170 )
172 return merged_data
175if __name__ == "__main__":
176 parser = argparse.ArgumentParser()
177 parser.add_argument(
178 "tool",
179 help="The simulation tool, either 'foxes' or 'pywake'",
180 )
181 parser.add_argument(
182 "-e",
183 "--example",
184 help="The sub folder name within examples/data",
185 default="EDF_datasets",
186 )
187 parser.add_argument(
188 "-c",
189 "--case",
190 help="The case name within the example folder",
191 default="HR1",
192 )
193 parser.add_argument(
194 "-o",
195 "--out_name",
196 help="The name of the samples output folder within the case folder",
197 default="samples",
198 )
199 args = parser.parse_args()
201 if args.tool == "foxes":
202 run_func = run_foxes
203 run_func_kwargs = {"verbosity": 0}
204 elif args.tool == "pywake":
205 run_func = run_pywake
206 run_func_kwargs = {}
207 else:
208 raise ValueError(
209 "Invalid simulation tool specified. Choose either 'foxes' or 'pywake'."
210 )
212 # Example usage:
213 param_config = {
214 "attributes.analysis.wind_deficit_model.wake_expansion_coefficient.k_b": {
215 "range": [0.01, 0.03],
216 "default": 0.04,
217 "short_name": "k_b",
218 },
219 "attributes.analysis.blockage_model.ss_alpha": {
220 "range": [0.75, 1.25],
221 "default": 0.875,
222 "short_name": "ss_alpha",
223 },
224 }
226 # navigating to a file containing metadata required to run wifa api
227 case = args.case
228 base_dir = Path(__file__).parent.parent.parent
229 edf_dir = base_dir / "examples" / "data" / args.example
230 case_dir = edf_dir / case
231 meta_file = case_dir / "meta.yaml"
232 meta = load_yaml(Path(meta_file))
234 print(f"metadata for flow case: {meta}")
235 dat = load_yaml(case_dir / f"{meta['system']}")
236 reference_physical_inputs = xr.load_dataset(case_dir / f"{meta['ref_resource']}")
237 turb_rated_power = meta["rated_power"]
238 reference_power = xr.load_dataset(case_dir / f"{meta['ref_power']}")
239 output_dir = case_dir / args.out_name
241 start = time.time()
242 print(f"Output directory to save results: {output_dir}")
243 results = run_parameter_sweep(
244 run_func,
245 turb_rated_power,
246 dat,
247 param_config,
248 reference_power,
249 reference_physical_inputs,
250 n_samples=10,
251 seed=3,
252 output_dir=output_dir,
253 run_func_kwargs=run_func_kwargs,
254 )
255 print("Time taken for parameter sweep:", time.time() - start)
256 results.to_netcdf("results.nc")