Coverage for wifa_uq / model_error_database / path_inference.py: 82%

146 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-19 02:10 +0000

1""" 

2Shared utilities for inferring paths from windIO system configurations. 

3 

4These functions parse windIO YAML files to auto-detect paths to: 

5- reference_power (turbine_data.nc) 

6- reference_resource (wind resource NetCDF) 

7- wind_farm_layout (wind_farm.yaml) 

8""" 

9 

10from __future__ import annotations 

11 

12import logging 

13import re 

14from pathlib import Path 

15 

16import xarray as xr 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21def extract_include_paths_windio(yaml_path: Path) -> dict[str, Path]: 

22 """ 

23 Parse a windIO YAML file and extract paths from !include directives. 

24 

25 Uses windIO-style parsing to recursively find all included files 

26 and map them to their semantic keys. 

27 

28 Returns a dict with keys like: 

29 - 'site': path to site yaml 

30 - 'wind_farm': path to wind farm yaml 

31 - 'energy_resource': path to energy resource yaml 

32 - 'turbine_data': path to turbine data nc 

33 - 'wind_resource': path to wind resource nc (the actual resource file) 

34 """ 

35 base_dir = yaml_path.parent 

36 includes = {} 

37 

38 # Read raw YAML content to find !include directives 

39 with open(yaml_path, "r") as f: 

40 content = f.read() 

41 

42 # Pattern for key: !include filename 

43 # Handles nested indentation 

44 include_pattern = re.compile( 

45 r'^\s*(\w+):\s*!include\s+["\']?([^"\'\s\n#]+)["\']?', re.MULTILINE 

46 ) 

47 

48 for match in include_pattern.finditer(content): 

49 key = match.group(1) 

50 filename = match.group(2) 

51 file_path = base_dir / filename 

52 

53 if file_path.exists(): 

54 includes[key] = file_path 

55 

56 # If this is another YAML, recursively extract its includes 

57 if filename.endswith(".yaml") or filename.endswith(".yml"): 

58 try: 

59 nested = extract_include_paths_windio(file_path) 

60 # Add nested includes, but don't overwrite top-level keys 

61 for nested_key, nested_path in nested.items(): 

62 if nested_key not in includes: 

63 includes[nested_key] = nested_path 

64 except Exception as e: 

65 logger.debug(f"Could not parse nested YAML {file_path}: {e}") 

66 

67 return includes 

68 

69 

70def find_resource_file_from_windio(system_yaml_path: Path) -> Path | None: 

71 """ 

72 Follow the windIO include chain to find the actual resource NC file. 

73 

74 Path is typically: 

75 wind_energy_system.yaml 

76 -> site: !include energy_site.yaml 

77 -> energy_resource: !include energy_resource.yaml 

78 -> wind_resource: !include <resource_file>.nc 

79 

80 Returns the path to the NC file or None if not found. 

81 """ 

82 includes = extract_include_paths_windio(system_yaml_path) 

83 

84 # Direct wind_resource reference 

85 if "wind_resource" in includes: 

86 path = includes["wind_resource"] 

87 if path.suffix in [".nc", ".netcdf"]: 

88 return path 

89 

90 # Check energy_resource (might be YAML or NC) 

91 if "energy_resource" in includes: 

92 er_path = includes["energy_resource"] 

93 if er_path.suffix in [".nc", ".netcdf"]: 

94 return er_path 

95 elif er_path.suffix in [".yaml", ".yml"]: 

96 # Parse the energy_resource YAML 

97 er_includes = extract_include_paths_windio(er_path) 

98 if "wind_resource" in er_includes: 

99 return er_includes["wind_resource"] 

100 

101 # Check site YAML 

102 if "site" in includes: 

103 site_path = includes["site"] 

104 if site_path.suffix in [".yaml", ".yml"]: 

105 site_includes = extract_include_paths_windio(site_path) 

106 

107 # Check for energy_resource in site 

108 if "energy_resource" in site_includes: 

109 er_path = site_includes["energy_resource"] 

110 if er_path.suffix in [".nc", ".netcdf"]: 

111 return er_path 

112 elif er_path.suffix in [".yaml", ".yml"]: 

113 er_includes = extract_include_paths_windio(er_path) 

114 if "wind_resource" in er_includes: 

115 return er_includes["wind_resource"] 

116 

117 return None 

118 

119 

120def infer_paths_from_system_config( 

121 system_config_path: Path, 

122 explicit_paths: dict[str, Path | str] | None = None, 

123) -> dict[str, Path]: 

124 """ 

125 Infer all required paths from a windIO system config file. 

126 

127 Explicit paths override inferred ones (for backward compatibility). 

128 

129 Args: 

130 system_config_path: Path to the wind_energy_system.yaml file 

131 explicit_paths: Optional dict with explicit path overrides: 

132 - reference_power 

133 - reference_resource 

134 - wind_farm_layout 

135 

136 Returns: 

137 Dict with resolved paths: 

138 - system_config 

139 - reference_power 

140 - reference_resource 

141 - wind_farm_layout 

142 

143 Raises: 

144 FileNotFoundError: If required paths cannot be found 

145 """ 

146 system_config_path = Path(system_config_path) 

147 farm_dir = system_config_path.parent 

148 explicit_paths = explicit_paths or {} 

149 

150 paths = { 

151 "system_config": system_config_path, 

152 } 

153 

154 # Use explicit paths if provided (convert to Path) 

155 for key in ["reference_power", "reference_resource", "wind_farm_layout"]: 

156 if key in explicit_paths and explicit_paths[key] is not None: 

157 paths[key] = Path(explicit_paths[key]) 

158 

159 # For missing paths, try to infer from windIO structure 

160 missing_keys = {"reference_power", "reference_resource", "wind_farm_layout"} - set( 

161 paths.keys() 

162 ) 

163 

164 if missing_keys: 

165 logger.info(f"Inferring paths for: {missing_keys}") 

166 try: 

167 includes = extract_include_paths_windio(system_config_path) 

168 logger.debug(f"Found windIO includes: {list(includes.keys())}") 

169 

170 # Reference power: simulation_output.turbine_data 

171 if "reference_power" not in paths: 

172 if "turbine_data" in includes: 

173 paths["reference_power"] = includes["turbine_data"] 

174 logger.info( 

175 f"Found reference_power: {paths['reference_power'].name}" 

176 ) 

177 

178 # Reference resource: Follow the windIO chain to find the NC file 

179 if "reference_resource" not in paths: 

180 resource_path = find_resource_file_from_windio(system_config_path) 

181 if resource_path and resource_path.exists(): 

182 paths["reference_resource"] = resource_path 

183 logger.info( 

184 f"Found reference_resource: {paths['reference_resource'].name}" 

185 ) 

186 

187 # Wind farm layout 

188 if "wind_farm_layout" not in paths: 

189 if "wind_farm" in includes: 

190 paths["wind_farm_layout"] = includes["wind_farm"] 

191 logger.info( 

192 f"Found wind_farm_layout: {paths['wind_farm_layout'].name}" 

193 ) 

194 

195 except Exception as e: 

196 logger.warning(f"Could not parse windIO structure: {e}") 

197 

198 # Final fallback: pattern matching on common filenames 

199 if "reference_power" not in paths: 

200 for name in [ 

201 "turbine_data.nc", 

202 "power.nc", 

203 "ref_power.nc", 

204 "observedPower*.nc", 

205 ]: 

206 candidates = list(farm_dir.glob(name)) 

207 if candidates: 

208 paths["reference_power"] = candidates[0] 

209 logger.info( 

210 f"Found reference_power by pattern: {paths['reference_power'].name}" 

211 ) 

212 break 

213 

214 if "reference_resource" not in paths: 

215 # Try common names first 

216 for name in ["resource.nc", "energy_resource.nc", "originalData.nc"]: 

217 candidate = farm_dir / name 

218 if candidate.exists(): 

219 paths["reference_resource"] = candidate 

220 logger.info(f"Found reference_resource by pattern: {candidate.name}") 

221 break 

222 

223 # If still not found, look for any NC file with resource-like variables 

224 if "reference_resource" not in paths: 

225 for nc_file in farm_dir.glob("*.nc"): 

226 if nc_file.name in ["turbine_data.nc"]: 

227 continue # Skip power files 

228 if "reference_power" in paths and nc_file == paths["reference_power"]: 

229 continue 

230 try: 

231 with xr.open_dataset(nc_file) as ds: 

232 resource_vars = [ 

233 "wind_speed", 

234 "WS", 

235 "ws", 

236 "u", 

237 "U", 

238 "wind_direction", 

239 "WD", 

240 "wd", 

241 "potential_temperature", 

242 "temperature", 

243 ] 

244 if any( 

245 v in ds.data_vars or v in ds.coords for v in resource_vars 

246 ): 

247 paths["reference_resource"] = nc_file 

248 logger.info( 

249 f"Found reference_resource by content: {nc_file.name}" 

250 ) 

251 break 

252 except Exception: 

253 continue 

254 

255 if "wind_farm_layout" not in paths: 

256 for name in [ 

257 "wind_farm.yaml", 

258 "layout.yaml", 

259 "plant_wind_farm.yaml", 

260 "*wind_farm*.yaml", 

261 ]: 

262 candidates = list(farm_dir.glob(name)) 

263 if candidates: 

264 paths["wind_farm_layout"] = candidates[0] 

265 logger.info( 

266 f"Found wind_farm_layout by pattern: {paths['wind_farm_layout'].name}" 

267 ) 

268 break 

269 

270 return paths 

271 

272 

273def validate_required_paths( 

274 paths: dict[str, Path], 

275 required: list[str] | None = None, 

276) -> None: 

277 """ 

278 Validate that all required paths exist. 

279 

280 Args: 

281 paths: Dict of path names to Path objects 

282 required: List of required keys (default: all standard paths) 

283 

284 Raises: 

285 FileNotFoundError: If any required path is missing or doesn't exist 

286 """ 

287 if required is None: 

288 required = [ 

289 "system_config", 

290 "reference_power", 

291 "reference_resource", 

292 "wind_farm_layout", 

293 ] 

294 

295 missing = [] 

296 not_found = [] 

297 

298 for key in required: 

299 if key not in paths: 

300 missing.append(key) 

301 elif not paths[key].exists(): 

302 not_found.append(f"{key}: {paths[key]}") 

303 

304 if missing or not_found: 

305 # Get the directory for helpful error message 

306 if "system_config" in paths: 

307 farm_dir = paths["system_config"].parent 

308 existing_files = sorted(farm_dir.glob("*")) 

309 file_list = "\n ".join(str(f.name) for f in existing_files[:20]) 

310 if len(existing_files) > 20: 

311 file_list += f"\n ... and {len(existing_files) - 20} more files" 

312 else: 

313 file_list = "(unknown directory)" 

314 

315 error_msg = "Could not find required paths:\n" 

316 if missing: 

317 error_msg += f" Missing: {missing}\n" 

318 if not_found: 

319 error_msg += f" Not found: {not_found}\n" 

320 error_msg += f"\nFiles in directory:\n {file_list}" 

321 error_msg += "\n\nYou can specify these paths explicitly in your config file." 

322 

323 raise FileNotFoundError(error_msg)