Coverage for wifa_uq / model_error_database / run_sweep.py: 64%

90 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-19 02:10 +0000

1import numpy as np 

2import xarray as xr 

3from typing import Dict, List 

4from windIO.yaml import load_yaml 

5import time 

6from pathlib import Path 

7from wifa import run_pywake, run_foxes 

8import json 

9import argparse 

10 

11 

12def set_nested_dict_value(d: dict, path: List[str], value: float) -> None: 

13 """Set value in nested dictionary using a path list.""" 

14 current = d 

15 for key in path[:-1]: 

16 current = current[key] 

17 current[path[-1]] = value 

18 

19 

20def create_parameter_samples( 

21 param_config: Dict[str, dict], n_samples: int, seed: int = None 

22) -> Dict[str, np.ndarray]: 

23 """ 

24 Create samples for multiple parameters based on their ranges. 

25 

26 Args: 

27 param_config: Dictionary mapping parameter paths to (min, max) tuples 

28 n_samples: Number of samples to generate 

29 seed: Random seed for reproducibility 

30 

31 Returns: 

32 Dictionary mapping parameter paths to arrays of samples 

33 """ 

34 

35 if seed is not None: 

36 np.random.seed(seed) 

37 

38 samples = {} 

39 for param_path, config in param_config.items(): 

40 min_val, max_val = config["range"] 

41 samples[param_path] = np.random.uniform(min_val, max_val, n_samples) 

42 

43 # First sample is always the default (for baseline comparison) 

44 if "default" in config: 

45 samples[param_path][0] = config["default"] 

46 

47 return samples 

48 

49 

50def run_parameter_sweep( 

51 run_func: callable, 

52 turb_rated_power, 

53 dat: dict, 

54 param_config: Dict[str, dict], 

55 reference_power: dict, 

56 reference_physical_inputs: dict, 

57 n_samples: int = 100, 

58 seed: int = None, 

59 output_dir="cases/default/sampling/", 

60 run_func_kwargs={}, 

61) -> List[xr.Dataset]: 

62 """ 

63 run the wifa api for a range of parameter samples 

64 compare reference power to engineering wake model power 

65 calculate the RMSE over the entire farm 

66 normalize based on rated power 

67 return the power errors for each sample as a netcdf 

68 

69 Args: 

70 run_func: callable to run the simulation (run_foxes or run_pywake) 

71 turb_rated_power: rated power of a single turbine in the park (for normalizing power errors) 

72 dat: windIO system dat file 

73 param_config: specifying which parameters to sample from and ranges of values 

74 reference_power: xarray with the power values from the reference simulation 

75 reference_physical_inputs: xarray with physical inputs to the reference simulations 

76 n_samples: number of parameter samples 

77 seed: random seed for generating parameter samples 

78 run_func_kwargs: additional keyword arguments to pass to the run_func 

79 

80 """ 

81 

82 samples = create_parameter_samples(param_config, n_samples, seed) 

83 n_flow_cases = reference_power.time.size 

84 sample_coords = np.arange(n_samples, dtype=np.float64) 

85 flow_case_coords = np.arange(n_flow_cases, dtype=np.float64) 

86 

87 bias_cap = np.zeros((n_samples, n_flow_cases), dtype=np.float64) 

88 pw = np.zeros((n_samples, n_flow_cases), dtype=np.float64) 

89 ref = np.zeros((n_samples, n_flow_cases), dtype=np.float64) 

90 

91 # Run the first sample with specific (default) samples 

92 

93 for i in range(n_samples): 

94 # Update all parameters for this sample 

95 for param_path, param_samples in samples.items(): 

96 # Convert string path to list of keys 

97 path = param_path.split(".") 

98 set_nested_dict_value(dat, path, param_samples[i]) 

99 

100 sample_dir = output_dir / f"sample_{i}" 

101 

102 # Run simulation 

103 run_func(dat, output_dir=sample_dir, **run_func_kwargs) 

104 

105 # Process results (in terms of power) 

106 pw_power = xr.open_dataset(sample_dir / "turbine_data.nc").power.values 

107 

108 ref_power = reference_power.power.values 

109 # workaround for some cases 

110 if ref_power.shape == (pw_power.shape[1], pw_power.shape[0]): 

111 ref_power = ref_power.T 

112 

113 model_err = pw_power - ref_power 

114 # # in case there are nan reference values (relevant for scada) 

115 # masked_model_err = np.ma.masked_array(model_err, mask=np.isnan(ref_power)) 

116 

117 """. 

118 In the context of bias-correction, dividing by the model value makes more sense 

119 If bias = 10%, model overpredicts by 10% (of the model value) 

120 Therefore, a multiplication by 0.9 would correct the bias. 

121 

122 In the context of forecasting accuracy... it seems more intuitive to divide by the reference value? 

123 If bias = 10%, model output is 10% higher than the actual value 

124 """ 

125 

126 # calculating farm level bias 

127 model_bias_cap = np.nanmean(model_err, axis=0) / turb_rated_power 

128 

129 # Fill pre-allocated arrays for all samples 

130 bias_cap[i, :] = model_bias_cap 

131 # pywake or foxes power (farm average) 

132 pw[i, :] = np.nanmean(pw_power, axis=0) / turb_rated_power 

133 # reference power (farm average) 

134 ref[i, :] = np.nanmean(ref_power, axis=0) / turb_rated_power 

135 

136 # Build parameter coordinates: one coordinate per swept parameter 

137 param_coords = {} 

138 for param_path, param_samples in samples.items(): 

139 cfg = param_config.get(param_path, {}) 

140 short_name = cfg.get("short_name", param_path.split(".")[-1]) 

141 

142 param_coords[short_name] = xr.DataArray( 

143 param_samples, dims=["sample"], coords={"sample": sample_coords} 

144 ) 

145 

146 # Build dataset directly from NumPy arrays (bias_cap, pw, ref) 

147 merged_data = xr.Dataset( 

148 data_vars={ 

149 "model_bias_cap": (("sample", "flow_case"), bias_cap), 

150 "pw_power_cap": (("sample", "flow_case"), pw), 

151 "ref_power_cap": (("sample", "flow_case"), ref), 

152 }, 

153 coords={ 

154 "sample": sample_coords, 

155 "flow_case": flow_case_coords, 

156 **param_coords, 

157 }, 

158 ) 

159 

160 # Store metadata 

161 merged_data.attrs["swept_params"] = [ 

162 param_config[p]["short_name"] for p in param_config.keys() 

163 ] 

164 merged_data.attrs["param_paths"] = list(param_config.keys()) 

165 merged_data.attrs["param_defaults"] = json.dumps( 

166 { 

167 param_config[p]["short_name"]: param_config[p].get("default") 

168 for p in param_config.keys() 

169 } 

170 ) 

171 

172 return merged_data 

173 

174 

175if __name__ == "__main__": 

176 parser = argparse.ArgumentParser() 

177 parser.add_argument( 

178 "tool", 

179 help="The simulation tool, either 'foxes' or 'pywake'", 

180 ) 

181 parser.add_argument( 

182 "-e", 

183 "--example", 

184 help="The sub folder name within examples/data", 

185 default="EDF_datasets", 

186 ) 

187 parser.add_argument( 

188 "-c", 

189 "--case", 

190 help="The case name within the example folder", 

191 default="HR1", 

192 ) 

193 parser.add_argument( 

194 "-o", 

195 "--out_name", 

196 help="The name of the samples output folder within the case folder", 

197 default="samples", 

198 ) 

199 args = parser.parse_args() 

200 

201 if args.tool == "foxes": 

202 run_func = run_foxes 

203 run_func_kwargs = {"verbosity": 0} 

204 elif args.tool == "pywake": 

205 run_func = run_pywake 

206 run_func_kwargs = {} 

207 else: 

208 raise ValueError( 

209 "Invalid simulation tool specified. Choose either 'foxes' or 'pywake'." 

210 ) 

211 

212 # Example usage: 

213 param_config = { 

214 "attributes.analysis.wind_deficit_model.wake_expansion_coefficient.k_b": { 

215 "range": [0.01, 0.03], 

216 "default": 0.04, 

217 "short_name": "k_b", 

218 }, 

219 "attributes.analysis.blockage_model.ss_alpha": { 

220 "range": [0.75, 1.25], 

221 "default": 0.875, 

222 "short_name": "ss_alpha", 

223 }, 

224 } 

225 

226 # navigating to a file containing metadata required to run wifa api 

227 case = args.case 

228 base_dir = Path(__file__).parent.parent.parent 

229 edf_dir = base_dir / "examples" / "data" / args.example 

230 case_dir = edf_dir / case 

231 meta_file = case_dir / "meta.yaml" 

232 meta = load_yaml(Path(meta_file)) 

233 

234 print(f"metadata for flow case: {meta}") 

235 dat = load_yaml(case_dir / f"{meta['system']}") 

236 reference_physical_inputs = xr.load_dataset(case_dir / f"{meta['ref_resource']}") 

237 turb_rated_power = meta["rated_power"] 

238 reference_power = xr.load_dataset(case_dir / f"{meta['ref_power']}") 

239 output_dir = case_dir / args.out_name 

240 

241 start = time.time() 

242 print(f"Output directory to save results: {output_dir}") 

243 results = run_parameter_sweep( 

244 run_func, 

245 turb_rated_power, 

246 dat, 

247 param_config, 

248 reference_power, 

249 reference_physical_inputs, 

250 n_samples=10, 

251 seed=3, 

252 output_dir=output_dir, 

253 run_func_kwargs=run_func_kwargs, 

254 ) 

255 print("Time taken for parameter sweep:", time.time() - start) 

256 results.to_netcdf("results.nc")