Coverage for wifa_uq / model_error_database / path_inference.py: 82%
146 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-19 02:10 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-19 02:10 +0000
1"""
2Shared utilities for inferring paths from windIO system configurations.
4These functions parse windIO YAML files to auto-detect paths to:
5- reference_power (turbine_data.nc)
6- reference_resource (wind resource NetCDF)
7- wind_farm_layout (wind_farm.yaml)
8"""
10from __future__ import annotations
12import logging
13import re
14from pathlib import Path
16import xarray as xr
18logger = logging.getLogger(__name__)
21def extract_include_paths_windio(yaml_path: Path) -> dict[str, Path]:
22 """
23 Parse a windIO YAML file and extract paths from !include directives.
25 Uses windIO-style parsing to recursively find all included files
26 and map them to their semantic keys.
28 Returns a dict with keys like:
29 - 'site': path to site yaml
30 - 'wind_farm': path to wind farm yaml
31 - 'energy_resource': path to energy resource yaml
32 - 'turbine_data': path to turbine data nc
33 - 'wind_resource': path to wind resource nc (the actual resource file)
34 """
35 base_dir = yaml_path.parent
36 includes = {}
38 # Read raw YAML content to find !include directives
39 with open(yaml_path, "r") as f:
40 content = f.read()
42 # Pattern for key: !include filename
43 # Handles nested indentation
44 include_pattern = re.compile(
45 r'^\s*(\w+):\s*!include\s+["\']?([^"\'\s\n#]+)["\']?', re.MULTILINE
46 )
48 for match in include_pattern.finditer(content):
49 key = match.group(1)
50 filename = match.group(2)
51 file_path = base_dir / filename
53 if file_path.exists():
54 includes[key] = file_path
56 # If this is another YAML, recursively extract its includes
57 if filename.endswith(".yaml") or filename.endswith(".yml"):
58 try:
59 nested = extract_include_paths_windio(file_path)
60 # Add nested includes, but don't overwrite top-level keys
61 for nested_key, nested_path in nested.items():
62 if nested_key not in includes:
63 includes[nested_key] = nested_path
64 except Exception as e:
65 logger.debug(f"Could not parse nested YAML {file_path}: {e}")
67 return includes
70def find_resource_file_from_windio(system_yaml_path: Path) -> Path | None:
71 """
72 Follow the windIO include chain to find the actual resource NC file.
74 Path is typically:
75 wind_energy_system.yaml
76 -> site: !include energy_site.yaml
77 -> energy_resource: !include energy_resource.yaml
78 -> wind_resource: !include <resource_file>.nc
80 Returns the path to the NC file or None if not found.
81 """
82 includes = extract_include_paths_windio(system_yaml_path)
84 # Direct wind_resource reference
85 if "wind_resource" in includes:
86 path = includes["wind_resource"]
87 if path.suffix in [".nc", ".netcdf"]:
88 return path
90 # Check energy_resource (might be YAML or NC)
91 if "energy_resource" in includes:
92 er_path = includes["energy_resource"]
93 if er_path.suffix in [".nc", ".netcdf"]:
94 return er_path
95 elif er_path.suffix in [".yaml", ".yml"]:
96 # Parse the energy_resource YAML
97 er_includes = extract_include_paths_windio(er_path)
98 if "wind_resource" in er_includes:
99 return er_includes["wind_resource"]
101 # Check site YAML
102 if "site" in includes:
103 site_path = includes["site"]
104 if site_path.suffix in [".yaml", ".yml"]:
105 site_includes = extract_include_paths_windio(site_path)
107 # Check for energy_resource in site
108 if "energy_resource" in site_includes:
109 er_path = site_includes["energy_resource"]
110 if er_path.suffix in [".nc", ".netcdf"]:
111 return er_path
112 elif er_path.suffix in [".yaml", ".yml"]:
113 er_includes = extract_include_paths_windio(er_path)
114 if "wind_resource" in er_includes:
115 return er_includes["wind_resource"]
117 return None
120def infer_paths_from_system_config(
121 system_config_path: Path,
122 explicit_paths: dict[str, Path | str] | None = None,
123) -> dict[str, Path]:
124 """
125 Infer all required paths from a windIO system config file.
127 Explicit paths override inferred ones (for backward compatibility).
129 Args:
130 system_config_path: Path to the wind_energy_system.yaml file
131 explicit_paths: Optional dict with explicit path overrides:
132 - reference_power
133 - reference_resource
134 - wind_farm_layout
136 Returns:
137 Dict with resolved paths:
138 - system_config
139 - reference_power
140 - reference_resource
141 - wind_farm_layout
143 Raises:
144 FileNotFoundError: If required paths cannot be found
145 """
146 system_config_path = Path(system_config_path)
147 farm_dir = system_config_path.parent
148 explicit_paths = explicit_paths or {}
150 paths = {
151 "system_config": system_config_path,
152 }
154 # Use explicit paths if provided (convert to Path)
155 for key in ["reference_power", "reference_resource", "wind_farm_layout"]:
156 if key in explicit_paths and explicit_paths[key] is not None:
157 paths[key] = Path(explicit_paths[key])
159 # For missing paths, try to infer from windIO structure
160 missing_keys = {"reference_power", "reference_resource", "wind_farm_layout"} - set(
161 paths.keys()
162 )
164 if missing_keys:
165 logger.info(f"Inferring paths for: {missing_keys}")
166 try:
167 includes = extract_include_paths_windio(system_config_path)
168 logger.debug(f"Found windIO includes: {list(includes.keys())}")
170 # Reference power: simulation_output.turbine_data
171 if "reference_power" not in paths:
172 if "turbine_data" in includes:
173 paths["reference_power"] = includes["turbine_data"]
174 logger.info(
175 f"Found reference_power: {paths['reference_power'].name}"
176 )
178 # Reference resource: Follow the windIO chain to find the NC file
179 if "reference_resource" not in paths:
180 resource_path = find_resource_file_from_windio(system_config_path)
181 if resource_path and resource_path.exists():
182 paths["reference_resource"] = resource_path
183 logger.info(
184 f"Found reference_resource: {paths['reference_resource'].name}"
185 )
187 # Wind farm layout
188 if "wind_farm_layout" not in paths:
189 if "wind_farm" in includes:
190 paths["wind_farm_layout"] = includes["wind_farm"]
191 logger.info(
192 f"Found wind_farm_layout: {paths['wind_farm_layout'].name}"
193 )
195 except Exception as e:
196 logger.warning(f"Could not parse windIO structure: {e}")
198 # Final fallback: pattern matching on common filenames
199 if "reference_power" not in paths:
200 for name in [
201 "turbine_data.nc",
202 "power.nc",
203 "ref_power.nc",
204 "observedPower*.nc",
205 ]:
206 candidates = list(farm_dir.glob(name))
207 if candidates:
208 paths["reference_power"] = candidates[0]
209 logger.info(
210 f"Found reference_power by pattern: {paths['reference_power'].name}"
211 )
212 break
214 if "reference_resource" not in paths:
215 # Try common names first
216 for name in ["resource.nc", "energy_resource.nc", "originalData.nc"]:
217 candidate = farm_dir / name
218 if candidate.exists():
219 paths["reference_resource"] = candidate
220 logger.info(f"Found reference_resource by pattern: {candidate.name}")
221 break
223 # If still not found, look for any NC file with resource-like variables
224 if "reference_resource" not in paths:
225 for nc_file in farm_dir.glob("*.nc"):
226 if nc_file.name in ["turbine_data.nc"]:
227 continue # Skip power files
228 if "reference_power" in paths and nc_file == paths["reference_power"]:
229 continue
230 try:
231 with xr.open_dataset(nc_file) as ds:
232 resource_vars = [
233 "wind_speed",
234 "WS",
235 "ws",
236 "u",
237 "U",
238 "wind_direction",
239 "WD",
240 "wd",
241 "potential_temperature",
242 "temperature",
243 ]
244 if any(
245 v in ds.data_vars or v in ds.coords for v in resource_vars
246 ):
247 paths["reference_resource"] = nc_file
248 logger.info(
249 f"Found reference_resource by content: {nc_file.name}"
250 )
251 break
252 except Exception:
253 continue
255 if "wind_farm_layout" not in paths:
256 for name in [
257 "wind_farm.yaml",
258 "layout.yaml",
259 "plant_wind_farm.yaml",
260 "*wind_farm*.yaml",
261 ]:
262 candidates = list(farm_dir.glob(name))
263 if candidates:
264 paths["wind_farm_layout"] = candidates[0]
265 logger.info(
266 f"Found wind_farm_layout by pattern: {paths['wind_farm_layout'].name}"
267 )
268 break
270 return paths
273def validate_required_paths(
274 paths: dict[str, Path],
275 required: list[str] | None = None,
276) -> None:
277 """
278 Validate that all required paths exist.
280 Args:
281 paths: Dict of path names to Path objects
282 required: List of required keys (default: all standard paths)
284 Raises:
285 FileNotFoundError: If any required path is missing or doesn't exist
286 """
287 if required is None:
288 required = [
289 "system_config",
290 "reference_power",
291 "reference_resource",
292 "wind_farm_layout",
293 ]
295 missing = []
296 not_found = []
298 for key in required:
299 if key not in paths:
300 missing.append(key)
301 elif not paths[key].exists():
302 not_found.append(f"{key}: {paths[key]}")
304 if missing or not_found:
305 # Get the directory for helpful error message
306 if "system_config" in paths:
307 farm_dir = paths["system_config"].parent
308 existing_files = sorted(farm_dir.glob("*"))
309 file_list = "\n ".join(str(f.name) for f in existing_files[:20])
310 if len(existing_files) > 20:
311 file_list += f"\n ... and {len(existing_files) - 20} more files"
312 else:
313 file_list = "(unknown directory)"
315 error_msg = "Could not find required paths:\n"
316 if missing:
317 error_msg += f" Missing: {missing}\n"
318 if not_found:
319 error_msg += f" Not found: {not_found}\n"
320 error_msg += f"\nFiles in directory:\n {file_list}"
321 error_msg += "\n\nYou can specify these paths explicitly in your config file."
323 raise FileNotFoundError(error_msg)