""" Prometheus API client for ServerManager. Provides instant queries, range queries, target discovery, alert listing, and rule inspection via the Prometheus HTTP API. """ from __future__ import annotations from typing import Any from core.logger import log class PrometheusClient: """Client for interacting with a Prometheus instance via its HTTP API.""" def __init__(self, server: dict) -> None: """ Initialize the Prometheus client. Args: server: dict with keys: ip, port, use_ssl """ self.ip: str = server["ip"] self.port: int = int(server["port"]) self.use_ssl: bool = bool(server.get("use_ssl", False)) scheme = "https" if self.use_ssl else "http" self.base_url: str = f"{scheme}://{self.ip}:{self.port}" self.timeout: int = 10 def _get(self, path: str, params: dict | None = None) -> Any: """Send a GET request to the Prometheus API.""" import requests url = f"{self.base_url}{path}" log.debug("Prometheus GET %s", url) resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() return resp.json() def check_connection(self) -> bool: """ Check connectivity to Prometheus via GET /-/healthy. Returns: True if Prometheus responds successfully, False otherwise. """ import requests try: url = f"{self.base_url}/-/healthy" log.debug("Prometheus health check: %s", url) resp = requests.get(url, timeout=self.timeout) healthy = resp.status_code == 200 log.info("Prometheus health check: %s", "OK" if healthy else "FAIL") return healthy except Exception as exc: log.error("Prometheus health check failed: %s", exc) return False def query(self, promql: str) -> dict: """ Execute an instant query via GET /api/v1/query. Args: promql: PromQL expression string. Returns: API response dict with 'status', 'data', etc., or empty dict on error. """ try: result = self._get("/api/v1/query", params={"query": promql}) log.info("Prometheus query: %s -> status=%s", promql, result.get("status")) return result except Exception as exc: log.error("Prometheus query(%s) failed: %s", promql, exc) return {} def query_range( self, promql: str, start: str, end: str, step: str ) -> dict: """ Execute a range query via GET /api/v1/query_range. Args: promql: PromQL expression string. start: Start timestamp (RFC3339 or unix timestamp). end: End timestamp (RFC3339 or unix timestamp). step: Query resolution step (e.g. '15s', '1m'). Returns: API response dict, or empty dict on error. """ try: result = self._get( "/api/v1/query_range", params={"query": promql, "start": start, "end": end, "step": step}, ) log.info("Prometheus query_range: %s -> status=%s", promql, result.get("status")) return result except Exception as exc: log.error("Prometheus query_range(%s) failed: %s", promql, exc) return {} def targets(self) -> dict: """ List all scrape targets via GET /api/v1/targets. Returns: API response dict with active/dropped targets, or empty dict on error. """ try: result = self._get("/api/v1/targets") active = len(result.get("data", {}).get("activeTargets", [])) log.info("Prometheus: %d active targets", active) return result except Exception as exc: log.error("Prometheus targets failed: %s", exc) return {} def alerts(self) -> dict: """ List active alerts via GET /api/v1/alerts. Returns: API response dict with alerts, or empty dict on error. """ try: result = self._get("/api/v1/alerts") count = len(result.get("data", {}).get("alerts", [])) log.info("Prometheus: %d active alerts", count) return result except Exception as exc: log.error("Prometheus alerts failed: %s", exc) return {} def rules(self) -> dict: """ List all rules (recording + alerting) via GET /api/v1/rules. Returns: API response dict with rule groups, or empty dict on error. """ try: result = self._get("/api/v1/rules") groups = len(result.get("data", {}).get("groups", [])) log.info("Prometheus: %d rule groups", groups) return result except Exception as exc: log.error("Prometheus rules failed: %s", exc) return {}