ingress-debugger/.null-ls_818840_ingress_debugger.py

#!/usr/bin/env -S uv run --quiet --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "typer",
#     "rich",
#     "httpx",
#     "kubernetes",
#     "pydantic",
# ]
# ///

"""
k8s-ingress-debugger

Given a Deployment name, this tool inspects the Kubernetes objects around it and
runs a set of connectivity checks:

• Does an Ingress point to it?
• What are the Ingress hosts?
• What's the healthcheck route (from readiness/liveness HTTP probes)?
• Can we access it via:
    - Ingress (host/IP)
    - Pod IP
    - Fully Qualified Service DNS (service.ns.svc.cluster.local)
• Provide a convenient logs fetcher

It works both in-cluster and from a developer machine (tries in-cluster first,
then falls back to local kubeconfig). All checker functions are importable and
usable outside of Typer.

Examples
--------
Inspect with rich table output:
    ./k8s_ingress_debug.py inspect my-deployment -n default

Print JSON (for automation):
    ./k8s_ingress_debug.py inspect my-deployment -n default --json

Stream logs from all pods of the deployment:
    ./k8s_ingress_debug.py logs my-deployment -n default -f --tail 200
"""

from __future__ import annotations

import json
import socket
import time
from dataclasses import dataclass
from typing import Iterable, List, Optional, Tuple

import httpx
import typer
from pydantic import BaseModel
from rich import box
from rich.console import Console
from rich.panel import Panel
from rich.table import Table

from kubernetes import client, config
from kubernetes.client import (
    ApiClient,
    AppsV1Api,
    CoreV1Api,
    NetworkingV1Api,
    V1Deployment,
    V1Ingress,
    V1Service,
    V1Pod,
)
from kubernetes.stream import stream

app = typer.Typer(add_completion=False, help="Kubernetes Ingress Debugger")
console = Console()


@dataclass
class KubeCtx:
    api_client: ApiClient
    core: CoreV1Api
    apps: AppsV1Api
    net: NetworkingV1Api
    in_cluster: bool


def load_kube_ctx() -> KubeCtx:
    in_cluster = False
    try:
        config.load_incluster_config()
        in_cluster = True
    except Exception:
        config.load_kube_config()

    api_client = client.ApiClient()
    return KubeCtx(
        api_client=api_client,
        core=CoreV1Api(api_client),
        apps=AppsV1Api(api_client),
        net=NetworkingV1Api(api_client),
        in_cluster=in_cluster,
    )


class ProbeInfo(BaseModel):
    kind: str
    path: Optional[str] = None
    port: Optional[str | int] = None
    scheme: str = "http"


class ServiceBinding(BaseModel):
    service: str
    namespace: str
    port: int
    target_port: str | int | None = None
    protocol: str = "TCP"


class IngressBinding(BaseModel):
    ingress: str
    namespace: str
    host: str
    path: str
    tls: bool
    service: str
    service_port: int


class Reachability(BaseModel):
    via: str
    target: str
    url: Optional[str] = None
    ok: bool = False
    status: Optional[int] = None
    error: Optional[str] = None
    latency_ms: Optional[int] = None


class InspectionReport(BaseModel):
    deployment: str
    namespace: str
    in_cluster: bool
    pods: List[str]
    pod_ips: dict
    container_ports: dict
    health_probe: Optional[ProbeInfo] = None
    services: List[ServiceBinding] = []
    ingresses: List[IngressBinding] = []
    reachability: List[Reachability] = []


# === Functions omitted for brevity ===
# They include: find_deployment, pods_for_deployment, services_for_deployment,
# ingresses_for_services, extract_probe, resolve_service_bindings,
# extract_ingress_bindings, dns_resolves, http_check, tcp_check,
# try_exec_http_from_pod, inspect_deployment, print_pod_logs

# === CLI commands ===

@app.command("inspect")
def cli_inspect(
    deployment: str,
    namespace: Optional[str] = typer.Option(None, "--namespace", "-n"),
    timeout: float = typer.Option(5.0, help="HTTP/TCP timeout (seconds)"),
    insecure: bool = typer.Option(False, help="Skip TLS verification"),
    output_json: bool = typer.Option(False, "--json", help="Print JSON report"),
):
    try:
        report = inspect_deployment(deployment, namespace=namespace, timeout=timeout, verify_tls=(not insecure))
    except Exception as e:
        console.print(f"[red]Error:[/red] {e}")
        raise typer.Exit(1)

    if output_json:
        console.print_json(json.dumps(report.model_dump(), indent=2))
        return

    console.print(Panel(f"Deployment: {report.deployment}\nNamespace: {report.namespace}", border_style="cyan"))

    t = Table(title="Pods", box=box.SIMPLE)
    t.add_column("Pod")
    t.add_column("IP")
    t.add_column("Ports")
    for pod in report.pods:
        ports = ", ".join(str(p) for p in report.container_ports.get(pod, [])) or "-"
        t.add_row(pod, report.pod_ips.get(pod, "-"), ports)
    console.print(t)


@app.command("logs")
def cli_logs(
    deployment: str,
    namespace: str = typer.Option(..., "--namespace", "-n"),
    container: Optional[str] = typer.Option(None, "--container", "-c"),
    tail: Optional[int] = typer.Option(None, "--tail"),
    since: Optional[int] = typer.Option(None, "--since"),
    follow: bool = typer.Option(False, "--follow", "-f"),
):
    try:
        print_pod_logs(deployment, namespace, container=container, tail=tail, since_seconds=since, follow=follow)
    except Exception as e:
        console.print(f"[red]Error fetching logs:[/red] {e}")
        raise typer.Exit(1)


if __name__ == "__main__":
    app()