# Status Monitoring

The Status API provides information about Kibana server health and operational metrics. This is essential for monitoring, alerting, and troubleshooting your Kibana deployment.

## Overview

The Status API allows you to:
- Check Kibana server health
- Monitor service availability
- Retrieve operational statistics
- Implement health checks for automation
- Track resource usage and performance

## Checking Kibana Status

### Basic Status Check

```python
from kibana import Kibana

client = Kibana("http://localhost:5601", api_key="your_api_key")

# Get current status
response = client.status.get_status()
status = response.body

# Check overall health
overall_status = status["status"]["overall"]["level"]
print(f"Kibana status: {overall_status}")

client.close()
```

### Status Levels

Kibana reports three status levels:

- **`available`**: All services are operational
- **`degraded`**: Some services are experiencing issues but Kibana is still functional
- **`unavailable`**: Kibana is not operational

### Detailed Status Information

```python
response = client.status.get_status()
status = response.body

# Overall status
print(f"Overall: {status['status']['overall']['level']}")
print(f"Summary: {status['status']['overall']['summary']}")

# Core services status
for service_name, service_info in status['status']['core'].items():
    print(f"{service_name}: {service_info['level']}")
    if service_info.get('summary'):
        print(f"  Summary: {service_info['summary']}")

# Plugin statuses
for plugin_name, plugin_info in status['status'].get('plugins', {}).items():
    print(f"Plugin {plugin_name}: {plugin_info['level']}")
```

### Version Information

```python
response = client.status.get_status()
status = response.body

# Kibana version
version_info = status['version']
print(f"Kibana version: {version_info['number']}")
print(f"Build number: {version_info['build_number']}")
print(f"Build hash: {version_info['build_hash']}")
```

## Getting Operational Statistics

### Basic Statistics

```python
# Get detailed statistics
response = client.status.get_stats()
stats = response.body

# Process information
process = stats['process']
print(f"Uptime: {process['uptime_in_millis'] / 1000:.2f} seconds")
print(f"Memory used: {process['memory']['heap']['used_bytes'] / (1024**2):.2f} MB")
print(f"Memory total: {process['memory']['heap']['total_bytes'] / (1024**2):.2f} MB")

# OS information
os_info = stats['os']
print(f"Platform: {os_info['platform']}")
print(f"Load average (1m): {os_info['load']['1m']}")
print(f"Load average (5m): {os_info['load']['5m']}")
print(f"Load average (15m): {os_info['load']['15m']}")
```

### Memory Statistics

```python
response = client.status.get_stats()
stats = response.body

memory = stats['process']['memory']
heap = memory['heap']

print(f"Heap used: {heap['used_bytes'] / (1024**2):.2f} MB")
print(f"Heap total: {heap['total_bytes'] / (1024**2):.2f} MB")
print(f"Heap limit: {heap['size_limit'] / (1024**2):.2f} MB")
print(f"Heap usage: {(heap['used_bytes'] / heap['size_limit']) * 100:.1f}%")
```

### Request Statistics

```python
response = client.status.get_stats()
stats = response.body

# HTTP request statistics
requests = stats.get('requests', {})
print(f"Total requests: {requests.get('total', 0)}")
print(f"Disconnects: {requests.get('disconnects', 0)}")
print(f"Status codes: {requests.get('statusCodes', {})}")
```

## Health Check Patterns

### Simple Health Check

```python
def is_kibana_healthy(client):
    """Check if Kibana is healthy."""
    try:
        response = client.status.get_status()
        status_level = response.body['status']['overall']['level']
        return status_level == 'available'
    except Exception:
        return False

# Usage
if is_kibana_healthy(client):
    print("✅ Kibana is healthy")
else:
    print("❌ Kibana is unhealthy")
```

### Detailed Health Check

```python
def check_kibana_health(client):
    """Perform detailed health check."""
    try:
        response = client.status.get_status()
        status = response.body

        overall = status['status']['overall']['level']

        result = {
            'healthy': overall == 'available',
            'status': overall,
            'version': status['version']['number'],
            'services': {}
        }

        # Check core services
        for service_name, service_info in status['status']['core'].items():
            result['services'][service_name] = {
                'level': service_info['level'],
                'summary': service_info.get('summary', '')
            }

        return result

    except Exception as e:
        return {
            'healthy': False,
            'error': str(e)
        }

# Usage
health = check_kibana_health(client)
print(f"Healthy: {health['healthy']}")
print(f"Status: {health.get('status', 'unknown')}")
for service, info in health.get('services', {}).items():
    print(f"  {service}: {info['level']}")
```

### Monitoring with Alerts

```python
import time

def monitor_kibana(client, check_interval=60, alert_threshold=3):
    """Monitor Kibana and alert on issues."""
    consecutive_failures = 0

    while True:
        try:
            response = client.status.get_status()
            status_level = response.body['status']['overall']['level']

            if status_level == 'available':
                consecutive_failures = 0
                print(f"✅ Kibana is healthy")
            elif status_level == 'degraded':
                consecutive_failures += 1
                print(f"⚠️  Kibana is degraded ({consecutive_failures}/{alert_threshold})")

                if consecutive_failures >= alert_threshold:
                    send_alert("Kibana is degraded")
            else:  # unavailable
                consecutive_failures += 1
                print(f"❌ Kibana is unavailable ({consecutive_failures}/{alert_threshold})")

                if consecutive_failures >= alert_threshold:
                    send_alert("Kibana is unavailable")

        except Exception as e:
            consecutive_failures += 1
            print(f"❌ Failed to check status: {e}")

            if consecutive_failures >= alert_threshold:
                send_alert(f"Cannot connect to Kibana: {e}")

        time.sleep(check_interval)

def send_alert(message):
    """Send alert notification."""
    print(f"🚨 ALERT: {message}")
    # Implement your alerting logic here
    # (email, Slack, PagerDuty, etc.)
```

## Performance Monitoring

### Memory Usage Monitoring

```python
def check_memory_usage(client, threshold_percent=80):
    """Check if memory usage exceeds threshold."""
    response = client.status.get_stats()
    stats = response.body

    heap = stats['process']['memory']['heap']
    used = heap['used_bytes']
    limit = heap['size_limit']
    usage_percent = (used / limit) * 100

    if usage_percent > threshold_percent:
        print(f"⚠️  High memory usage: {usage_percent:.1f}%")
        return False
    else:
        print(f"✅ Memory usage OK: {usage_percent:.1f}%")
        return True
```

### Load Average Monitoring

```python
def check_load_average(client, threshold=2.0):
    """Check if system load is high."""
    response = client.status.get_stats()
    stats = response.body

    load_1m = stats['os']['load']['1m']

    if load_1m > threshold:
        print(f"⚠️  High load average: {load_1m}")
        return False
    else:
        print(f"✅ Load average OK: {load_1m}")
        return True
```

## Integration with Monitoring Systems

### Prometheus Metrics

```python
def export_prometheus_metrics(client):
    """Export Kibana metrics in Prometheus format."""
    response = client.status.get_stats()
    stats = response.body

    metrics = []

    # Memory metrics
    heap = stats['process']['memory']['heap']
    metrics.append(f'kibana_heap_used_bytes {heap["used_bytes"]}')
    metrics.append(f'kibana_heap_total_bytes {heap["total_bytes"]}')
    metrics.append(f'kibana_heap_limit_bytes {heap["size_limit"]}')

    # Uptime metric
    uptime = stats['process']['uptime_in_millis'] / 1000
    metrics.append(f'kibana_uptime_seconds {uptime}')

    # Load average
    load = stats['os']['load']
    metrics.append(f'kibana_load_1m {load["1m"]}')
    metrics.append(f'kibana_load_5m {load["5m"]}')
    metrics.append(f'kibana_load_15m {load["15m"]}')

    return '\n'.join(metrics)

# Usage
metrics = export_prometheus_metrics(client)
print(metrics)
```

### Health Check Endpoint

```python
from flask import Flask, jsonify

app = Flask(__name__)

@app.route('/health')
def health_check():
    """Health check endpoint for load balancers."""
    try:
        response = client.status.get_status()
        status_level = response.body['status']['overall']['level']

        if status_level == 'available':
            return jsonify({'status': 'healthy'}), 200
        elif status_level == 'degraded':
            return jsonify({'status': 'degraded'}), 200
        else:
            return jsonify({'status': 'unhealthy'}), 503

    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)}), 503

@app.route('/metrics')
def metrics():
    """Metrics endpoint for monitoring."""
    try:
        response = client.status.get_stats()
        stats = response.body

        return jsonify({
            'uptime_seconds': stats['process']['uptime_in_millis'] / 1000,
            'memory_used_mb': stats['process']['memory']['heap']['used_bytes'] / (1024**2),
            'memory_limit_mb': stats['process']['memory']['heap']['size_limit'] / (1024**2),
            'load_1m': stats['os']['load']['1m']
        }), 200

    except Exception as e:
        return jsonify({'error': str(e)}), 500
```

## Best Practices

### 1. Implement Regular Health Checks

```python
# Check health before critical operations
if not is_kibana_healthy(client):
    print("Kibana is unhealthy, skipping operation")
    return

# Proceed with operation
result = client.actions.create(...)
```

### 2. Monitor Key Metrics

```python
def monitor_key_metrics(client):
    """Monitor key Kibana metrics."""
    response = client.status.get_stats()
    stats = response.body

    # Memory usage
    heap = stats['process']['memory']['heap']
    memory_usage = (heap['used_bytes'] / heap['size_limit']) * 100

    # Load average
    load_1m = stats['os']['load']['1m']

    # Uptime
    uptime_hours = stats['process']['uptime_in_millis'] / (1000 * 60 * 60)

    return {
        'memory_usage_percent': memory_usage,
        'load_1m': load_1m,
        'uptime_hours': uptime_hours
    }
```

### 3. Set Up Alerts

```python
def check_and_alert(client):
    """Check metrics and send alerts if needed."""
    metrics = monitor_key_metrics(client)

    if metrics['memory_usage_percent'] > 80:
        send_alert(f"High memory usage: {metrics['memory_usage_percent']:.1f}%")

    if metrics['load_1m'] > 2.0:
        send_alert(f"High load average: {metrics['load_1m']}")
```

### 4. Log Status Information

```python
import logging

logger = logging.getLogger(__name__)

def log_status(client):
    """Log Kibana status information."""
    try:
        response = client.status.get_status()
        status = response.body

        logger.info(
            "Kibana status check",
            extra={
                'status': status['status']['overall']['level'],
                'version': status['version']['number'],
                'uptime': status.get('metrics', {}).get('process', {}).get('uptime_in_millis')
            }
        )
    except Exception as e:
        logger.error(f"Failed to check Kibana status: {e}")
```

## Troubleshooting

### Connection Issues

**Problem**: Cannot connect to Kibana

**Solutions**:
- Verify Kibana URL is correct
- Check network connectivity
- Verify authentication credentials
- Check firewall rules

### Degraded Status

**Problem**: Kibana reports degraded status

**Solutions**:
- Check individual service statuses
- Review Kibana server logs
- Verify Elasticsearch connectivity
- Check resource availability (memory, disk)

### High Memory Usage

**Problem**: Memory usage is consistently high

**Solutions**:
- Increase heap size in Kibana configuration
- Review and optimize dashboards and visualizations
- Check for memory leaks
- Consider scaling horizontally

## Next Steps

- Learn about [Error Handling](error-handling.md) for comprehensive error management
- Explore [Observability](observability.md) for distributed tracing
- Check [Advanced Usage](advanced-usage.md) for performance optimization
- See [Examples](../examples/index.md) for practical code samples