226 lines
7.3 KiB
Python
Executable File
226 lines
7.3 KiB
Python
Executable File
"""
|
|
System Service Module
|
|
|
|
Provides system monitoring and health check functionality for the Remote Access API.
|
|
Includes checks for database connectivity, daemon status, and system resources.
|
|
"""
|
|
|
|
import socket
|
|
import psutil
|
|
from datetime import datetime
|
|
from typing import Dict, Any, Optional
|
|
import structlog
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
class SystemService:
|
|
"""Service for system health checks and monitoring"""
|
|
|
|
def __init__(self, service_start_time: Optional[datetime] = None):
|
|
"""
|
|
Initialize SystemService
|
|
|
|
Args:
|
|
service_start_time: Service startup time for uptime calculation
|
|
"""
|
|
self.service_start_time = service_start_time or datetime.now()
|
|
|
|
@staticmethod
|
|
def check_database_connection(guacamole_client: Any, guacamole_url: str) -> Dict[str, Any]:
|
|
"""
|
|
Check Guacamole database connectivity
|
|
|
|
Args:
|
|
guacamole_client: Guacamole client instance
|
|
guacamole_url: Guacamole base URL
|
|
|
|
Returns:
|
|
Status dictionary with connection state
|
|
"""
|
|
try:
|
|
# Try to get system token (requires database access)
|
|
token = guacamole_client.get_system_token()
|
|
|
|
if token:
|
|
return {
|
|
"status": "ok",
|
|
"message": "Database connection healthy"
|
|
}
|
|
else:
|
|
return {
|
|
"status": "error",
|
|
"message": "Failed to obtain system token"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Database connection check failed", error=str(e))
|
|
return {
|
|
"status": "error",
|
|
"error": str(e),
|
|
"message": "Database connection failed"
|
|
}
|
|
|
|
@staticmethod
|
|
def check_guacd_daemon() -> Dict[str, Any]:
|
|
"""
|
|
Check if guacd daemon is running
|
|
|
|
Returns:
|
|
Status dictionary with daemon state
|
|
"""
|
|
try:
|
|
# Check if guacd is listening on default port 4822
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
sock.settimeout(2)
|
|
result = sock.connect_ex(('localhost', 4822))
|
|
sock.close()
|
|
|
|
if result == 0:
|
|
return {
|
|
"status": "ok",
|
|
"message": "guacd daemon is running",
|
|
"port": 4822
|
|
}
|
|
else:
|
|
return {
|
|
"status": "error",
|
|
"message": "guacd daemon is not accessible",
|
|
"port": 4822
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("guacd daemon check failed", error=str(e))
|
|
return {
|
|
"status": "error",
|
|
"error": str(e),
|
|
"message": "Failed to check guacd daemon"
|
|
}
|
|
|
|
@staticmethod
|
|
def check_system_resources() -> Dict[str, Any]:
|
|
"""
|
|
Check system resources (CPU, RAM, Disk)
|
|
|
|
Returns:
|
|
Status dictionary with resource usage
|
|
"""
|
|
try:
|
|
# CPU usage
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
|
|
# Memory usage
|
|
memory = psutil.virtual_memory()
|
|
memory_percent = memory.percent
|
|
|
|
# Disk usage
|
|
disk = psutil.disk_usage('/')
|
|
disk_percent = disk.percent
|
|
|
|
# Determine overall status based on thresholds
|
|
status = "ok"
|
|
warnings = []
|
|
|
|
if cpu_percent > 90:
|
|
status = "critical"
|
|
warnings.append(f"CPU usage critical: {cpu_percent}%")
|
|
elif cpu_percent > 80:
|
|
status = "warning"
|
|
warnings.append(f"CPU usage high: {cpu_percent}%")
|
|
|
|
if memory_percent > 90:
|
|
status = "critical"
|
|
warnings.append(f"Memory usage critical: {memory_percent}%")
|
|
elif memory_percent > 80:
|
|
if status == "ok":
|
|
status = "warning"
|
|
warnings.append(f"Memory usage high: {memory_percent}%")
|
|
|
|
if disk_percent > 90:
|
|
status = "critical"
|
|
warnings.append(f"Disk usage critical: {disk_percent}%")
|
|
elif disk_percent > 80:
|
|
if status == "ok":
|
|
status = "warning"
|
|
warnings.append(f"Disk usage high: {disk_percent}%")
|
|
|
|
result = {
|
|
"status": status,
|
|
"cpu_percent": round(cpu_percent, 2),
|
|
"memory_percent": round(memory_percent, 2),
|
|
"disk_percent": round(disk_percent, 2),
|
|
"memory_available_gb": round(memory.available / (1024**3), 2),
|
|
"disk_free_gb": round(disk.free / (1024**3), 2)
|
|
}
|
|
|
|
if warnings:
|
|
result["warnings"] = warnings
|
|
|
|
if status == "ok":
|
|
result["message"] = "System resources healthy"
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error("System resources check failed", error=str(e))
|
|
return {
|
|
"status": "error",
|
|
"error": str(e),
|
|
"message": "Failed to check system resources"
|
|
}
|
|
|
|
def get_system_info(self) -> Dict[str, Any]:
|
|
"""
|
|
Get system information (uptime, version, etc.)
|
|
|
|
Returns:
|
|
Dictionary with system information
|
|
"""
|
|
try:
|
|
uptime_seconds = int((datetime.now() - self.service_start_time).total_seconds())
|
|
|
|
return {
|
|
"uptime_seconds": uptime_seconds,
|
|
"uptime_formatted": self._format_uptime(uptime_seconds),
|
|
"python_version": f"{psutil.PROCFS_PATH if hasattr(psutil, 'PROCFS_PATH') else 'N/A'}",
|
|
"cpu_count": psutil.cpu_count(),
|
|
"boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to get system info", error=str(e))
|
|
return {
|
|
"error": str(e),
|
|
"message": "Failed to retrieve system information"
|
|
}
|
|
|
|
@staticmethod
|
|
def _format_uptime(seconds: int) -> str:
|
|
"""
|
|
Format uptime seconds to human-readable string
|
|
|
|
Args:
|
|
seconds: Uptime in seconds
|
|
|
|
Returns:
|
|
Formatted uptime string
|
|
"""
|
|
days = seconds // 86400
|
|
hours = (seconds % 86400) // 3600
|
|
minutes = (seconds % 3600) // 60
|
|
secs = seconds % 60
|
|
|
|
parts = []
|
|
if days > 0:
|
|
parts.append(f"{days}d")
|
|
if hours > 0:
|
|
parts.append(f"{hours}h")
|
|
if minutes > 0:
|
|
parts.append(f"{minutes}m")
|
|
if secs > 0 or not parts:
|
|
parts.append(f"{secs}s")
|
|
|
|
return " ".join(parts)
|
|
|
|
|