// container orchestration at scale — every component, every concept, every YAML
// A Kubernetes cluster consists of a control plane and one or more worker nodes. The control plane manages the cluster state; worker nodes run containerized workloads.
// The components that form the cluster's "brain" — responsible for global decisions and responding to cluster events.
// Run on every node, maintaining running pods and providing the Kubernetes runtime environment.
// Kubernetes offers several built-in workload resources for different application patterns and deployment strategies.
| Resource | Type | Use Case | Key Features | Scaling |
|---|---|---|---|---|
| Pod | ATOMIC | Smallest deployable unit. One or more containers sharing network and storage. | Shared IP, shared volumes, sidecar pattern, init containers | Manual |
| Deployment | STATELESS | Web servers, APIs, microservices. Rolling updates and rollbacks. | ReplicaSet management, rolling update, rollback history | HPA / Manual |
| StatefulSet | STATEFUL | Databases (MySQL, PostgreSQL), Kafka, Zookeeper. Needs stable identity. | Stable network identity, ordered rollout, persistent volumes per pod | Manual |
| DaemonSet | DAEMON | Node-level agents: log collectors, monitoring agents, network plugins. | One pod per node, runs on all/selected nodes, node affinity | Node count |
| ReplicaSet | STATELESS | Maintain a stable set of replica Pods. Usually managed by Deployments. | Pod template, replica count, label selectors | HPA / Manual |
| Job | BATCH | One-time batch processing tasks. Database migrations, report generation. | Completion guarantee, parallelism, retry on failure | Fixed |
| CronJob | SCHEDULED | Recurring tasks. Backups, cleanup jobs, scheduled reports. | Cron schedule syntax, concurrency policy, history limits | Schedule-based |
// Every Pod gets its own IP. All Pods can communicate with all other Pods without NAT. Nodes can communicate with all Pods. The IP a Pod sees itself as is the same IP others see it as.
// The 4C's of Cloud Native Security: Code, Container, Cluster, Cloud. Defense in depth — security at every layer.
// Kubernetes abstracts storage from compute. Volumes attach to pods, PersistentVolumes are cluster-level resources, PersistentVolumeClaims are user requests for storage.
// Production-ready YAML configurations for all major Kubernetes resources. Copy, adapt, deploy.
apiVersion: apps/v1 kind: Deployment metadata: name: web-app namespace: production labels: app: web-app version: v1.2.0 spec: replicas: 3 selector: matchLabels: app: web-app strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 template: metadata: labels: app: web-app spec: serviceAccountName: web-app-sa securityContext: runAsNonRoot: true runAsUser: 1000 fsGroup: 2000 containers: - name: web-app image: myapp:1.2.0 ports: - containerPort: 8080 resources: requests: memory: "128Mi" cpu: "100m" limits: memory: "256Mi" cpu: "500m" livenessProbe: httpGet: path: /healthz port: 8080 initialDelaySeconds: 30 periodSeconds: 10 readinessProbe: httpGet: path: /ready port: 8080 initialDelaySeconds: 5 periodSeconds: 5 securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true capabilities: drop: ["ALL"] envFrom: - configMapRef: name: web-app-config - secretRef: name: web-app-secrets
apiVersion: apps/v1 kind: StatefulSet metadata: name: postgres namespace: database spec: serviceName: postgres-headless replicas: 3 selector: matchLabels: app: postgres template: metadata: labels: app: postgres spec: containers: - name: postgres image: postgres:15-alpine ports: - containerPort: 5432 env: - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: name: postgres-secret key: password volumeMounts: - name: data mountPath: /var/lib/postgresql/data resources: requests: memory: "512Mi" cpu: "250m" limits: memory: "2Gi" cpu: "1000m" volumeClaimTemplates: - metadata: name: data spec: accessModes: ["ReadWriteOnce"] storageClassName: fast-ssd resources: requests: storage: 10Gi
apiVersion: v1 kind: Service metadata: name: web-app-svc namespace: production spec: selector: app: web-app ports: - protocol: TCP port: 80 targetPort: 8080 type: ClusterIP --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: web-app-ingress namespace: production annotations: nginx.ingress.kubernetes.io/rewrite-target: / cert-manager.io/cluster-issuer: letsencrypt-prod spec: ingressClassName: nginx tls: - hosts: - app.example.com secretName: app-tls rules: - host: app.example.com http: paths: - path: / pathType: Prefix backend: service: name: web-app-svc port: number: 80 - path: /api pathType: Prefix backend: service: name: api-svc port: number: 8080
# Deny all ingress and egress by default apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: default-deny-all namespace: production spec: podSelector: {} policyTypes: - Ingress - Egress --- # Allow web-app to receive traffic from ingress apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: allow-ingress-controller namespace: production spec: podSelector: matchLabels: app: web-app policyTypes: - Ingress ingress: - from: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: ingress-nginx - podSelector: matchLabels: app.kubernetes.io/name: ingress-nginx ports: - protocol: TCP port: 8080 --- # Allow egress to database namespace only apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: allow-db-egress namespace: production spec: podSelector: matchLabels: app: web-app policyTypes: - Egress egress: - to: - namespaceSelector: matchLabels: name: database ports: - protocol: TCP port: 5432 - to: # Allow DNS - namespaceSelector: {} ports: - protocol: UDP port: 53
apiVersion: v1 kind: ServiceAccount metadata: name: web-app-sa namespace: production annotations: eks.amazonaws.com/role-arn: arn:aws:iam::123:role/web-app --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: web-app-role namespace: production rules: - apiGroups: [""] resources: ["configmaps"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["secrets"] resourceNames: ["web-app-secrets"] verbs: ["get"] - apiGroups: [""] resources: ["pods"] verbs: ["get", "list"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: web-app-binding namespace: production subjects: - kind: ServiceAccount name: web-app-sa namespace: production roleRef: kind: Role name: web-app-role apiGroup: rbac.authorization.k8s.io
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: web-app-hpa namespace: production spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: web-app minReplicas: 3 maxReplicas: 20 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 70 - type: Resource resource: name: memory target: type: AverageValue averageValue: 200Mi behavior: scaleDown: stabilizationWindowSeconds: 300 --- apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: web-app-pdb namespace: production spec: minAvailable: 2 selector: matchLabels: app: web-app
apiVersion: batch/v1 kind: CronJob metadata: name: db-backup namespace: production spec: schedule: "0 2 * * *" # 2 AM daily timeZone: "UTC" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 startingDeadlineSeconds: 300 jobTemplate: spec: backoffLimit: 3 activeDeadlineSeconds: 3600 template: spec: restartPolicy: OnFailure containers: - name: backup image: postgres:15-alpine command: - /bin/sh - -c - pg_dump -h postgres-0.postgres-headless -U postgres mydb | gzip | aws s3 cp - s3://backups/$(date +%Y%m%d).sql.gz env: - name: PGPASSWORD valueFrom: secretKeyRef: name: postgres-secret key: password
apiVersion: apps/v1 kind: DaemonSet metadata: name: fluent-bit namespace: logging labels: app: fluent-bit spec: selector: matchLabels: app: fluent-bit template: metadata: labels: app: fluent-bit spec: tolerations: # Run on control-plane too - key: node-role.kubernetes.io/control-plane operator: Exists effect: NoSchedule containers: - name: fluent-bit image: fluent/fluent-bit:2.2 volumeMounts: - name: varlog mountPath: /var/log readOnly: true - name: containers mountPath: /var/lib/docker/containers readOnly: true resources: limits: memory: 200Mi requests: cpu: 100m memory: 100Mi volumes: - name: varlog hostPath: path: /var/log - name: containers hostPath: path: /var/lib/docker/containers
apiVersion: v1 kind: ConfigMap metadata: name: web-app-config namespace: production data: APP_ENV: production LOG_LEVEL: info MAX_CONNECTIONS: "100" app.properties: | server.port=8080 cache.ttl=300 feature.flags=auth,metrics immutable: false --- apiVersion: v1 kind: Secret metadata: name: web-app-secrets namespace: production annotations: reloader.stakater.com/match: "true" type: Opaque stringData: # plain text (auto base64) DATABASE_URL: postgresql://user:pass@postgres:5432/db API_KEY: supersecretapikey123 JWT_SECRET: myverysecretjwtkey
apiVersion: v1 kind: ResourceQuota metadata: name: production-quota namespace: production spec: hard: requests.cpu: "20" requests.memory: 40Gi limits.cpu: "40" limits.memory: 80Gi persistentvolumeclaims: "10" pods: "100" services: "20" services.loadbalancers: "2" secrets: "50" --- apiVersion: v1 kind: LimitRange metadata: name: default-limits namespace: production spec: limits: - type: Container default: cpu: 500m memory: 256Mi defaultRequest: cpu: 100m memory: 128Mi max: cpu: "4" memory: 4Gi min: cpu: 10m memory: 32Mi
// The kube-scheduler places Pods onto nodes through a filtering + scoring pipeline. Advanced controls let you influence exactly where and how workloads run.
apiVersion: scheduling.k8s.io/v1 kind: PriorityClass metadata: name: high-priority value: 1000000 globalDefault: false preemptionPolicy: PreemptLowerPriority description: "Critical production workloads" --- # Deployment with topology spread + priority apiVersion: apps/v1 kind: Deployment metadata: name: critical-app spec: replicas: 6 selector: matchLabels: app: critical-app template: metadata: labels: app: critical-app spec: priorityClassName: high-priority topologySpreadConstraints: - maxSkew: 1 topologyKey: topology.kubernetes.io/zone whenUnsatisfiable: DoNotSchedule labelSelector: matchLabels: app: critical-app - maxSkew: 1 topologyKey: kubernetes.io/hostname whenUnsatisfiable: ScheduleAnyway labelSelector: matchLabels: app: critical-app containers: - name: app image: myapp:latest
// Kubernetes is designed to be extensible. Add new resource types, custom controllers, admission logic, and API endpoints without modifying core code.
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: databases.mycompany.io spec: group: mycompany.io versions: - name: v1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object required: ["engine", "version"] properties: engine: type: string enum: ["postgres", "mysql"] version: type: string replicas: type: integer minimum: 1 maximum: 5 storageGB: type: integer status: type: object properties: phase: type: string readyReplicas: type: integer subresources: status: {} additionalPrinterColumns: - name: Engine type: string jsonPath: .spec.engine - name: Phase type: string jsonPath: .status.phase scope: Namespaced names: plural: databases singular: database kind: Database shortNames: [db] --- # Custom Resource instance apiVersion: mycompany.io/v1 kind: Database metadata: name: my-postgres namespace: production spec: engine: postgres version: "15" replicas: 3 storageGB: 50
apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: resource-limits-enforcer webhooks: - name: check-limits.mycompany.io admissionReviewVersions: ["v1"] sideEffects: None failurePolicy: Fail rules: - apiGroups: ["apps"] apiVersions: ["v1"] operations: ["CREATE", "UPDATE"] resources: ["deployments"] namespaceSelector: matchLabels: admission-webhook: enabled clientConfig: service: name: webhook-service namespace: webhook-system path: /validate port: 443 caBundle: LS0t... # base64 CA cert timeoutSeconds: 5 --- apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: name: sidecar-injector webhooks: - name: inject.istio.io admissionReviewVersions: ["v1"] sideEffects: None rules: - apiGroups: [""] apiVersions: ["v1"] operations: ["CREATE"] resources: ["pods"] clientConfig: service: name: istiod namespace: istio-system path: /inject
apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: prod-gateway namespace: production spec: gatewayClassName: cilium listeners: - name: https protocol: HTTPS port: 443 tls: certificateRefs: - name: app-tls --- apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: name: web-app-route namespace: production spec: parentRefs: - name: prod-gateway hostnames: ["app.example.com"] rules: - matches: - path: type: PathPrefix value: / backendRefs: - name: web-app-stable port: 80 weight: 90 - name: web-app-canary port: 80 weight: 10 # 10% canary traffic
// Day-2 operations: bootstrapping, node lifecycle management, upgrades, etcd backup, multi-tenancy, and cluster-level policies.
apiVersion: v1 kind: Pod metadata: name: lifecycle-demo spec: terminationGracePeriodSeconds: 60 containers: - name: app image: myapp:1.0 lifecycle: postStart: exec: command: - /bin/sh - -c - echo "Container started" >> /var/log/lifecycle.log preStop: exec: command: - /bin/sh - -c - | # Graceful shutdown: stop accepting new connections, # wait for in-flight requests to complete kill -SIGTERM 1 sleep 30 startupProbe: httpGet: path: /startup port: 8080 failureThreshold: 30 periodSeconds: 10 livenessProbe: httpGet: path: /healthz port: 8080 initialDelaySeconds: 0 periodSeconds: 10
# Label namespace to enforce security standards apiVersion: v1 kind: Namespace metadata: name: production labels: # Enforce: reject violating pods pod-security.kubernetes.io/enforce: restricted pod-security.kubernetes.io/enforce-version: v1.35 # Audit: log violations pod-security.kubernetes.io/audit: restricted # Warn: show warnings pod-security.kubernetes.io/warn: restricted --- # Restricted-compliant pod (all security requirements met) apiVersion: v1 kind: Pod metadata: name: restricted-pod namespace: production spec: securityContext: runAsNonRoot: true runAsUser: 1000 seccompProfile: type: RuntimeDefault containers: - name: app image: myapp:1.0 securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true capabilities: drop: ["ALL"]
// The three pillars of observability in Kubernetes: metrics for dashboards & alerting, logs for debugging, traces for distributed request flows.
apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: web-app-monitor namespace: production labels: release: kube-prometheus-stack spec: selector: matchLabels: app: web-app endpoints: - port: http path: /metrics interval: 30s scrapeTimeout: 10s namespaceSelector: matchNames: - production --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: web-app-alerts namespace: production spec: groups: - name: web-app.rules rules: - alert: HighErrorRate expr: | rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05 for: 2m labels: severity: critical annotations: summary: High HTTP error rate on web-app - alert: PodCrashLooping expr: increase(kube_pod_container_status_restarts_total[1h]) > 5 for: 5m labels: severity: warning
apiVersion: opentelemetry.io/v1alpha1 kind: OpenTelemetryCollector metadata: name: otel-collector namespace: observability spec: mode: DaemonSet config: | receivers: otlp: protocols: grpc: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 processors: batch: timeout: 1s memory_limiter: limit_mib: 400 exporters: jaeger: endpoint: jaeger-collector:14250 tls: insecure: true prometheusremotewrite: endpoint: http://prometheus:9090/api/v1/write service: pipelines: traces: receivers: [otlp] processors: [memory_limiter, batch] exporters: [jaeger] metrics: receivers: [otlp] processors: [batch] exporters: [prometheusremotewrite]
// The primary command-line tool for interacting with Kubernetes clusters. Essential commands, flags, and patterns for daily operations.
# ── CONTEXT & CLUSTER ──────────────────────────────────────── kubectl config get-contexts # list all contexts kubectl config use-context my-cluster # switch context kubectl config set-context --current --namespace=prod # set default ns kubectl cluster-info # cluster endpoints kubectl api-resources # all resource types kubectl api-versions # all API versions # ── GET / INSPECT ───────────────────────────────────────────── kubectl get pods -A -o wide # all pods, all namespaces kubectl get pod my-pod -o yaml # full pod spec kubectl describe pod my-pod # events + status detail kubectl get events --sort-by=.lastTimestamp # sorted events kubectl get all -n production # all resources in ns kubectl top nodes # node resource usage kubectl top pods --containers # container-level usage # ── APPLY / MANAGE ──────────────────────────────────────────── kubectl apply -f manifest.yaml # declarative apply kubectl apply -f ./k8s/ # apply whole directory kubectl delete -f manifest.yaml # delete from file kubectl patch deploy my-app -p '{"spec":{"replicas":5}}' kubectl scale deploy my-app --replicas=5 kubectl set image deploy/my-app app=myapp:2.0 # update image kubectl label node node1 disktype=ssd # label a node # ── ROLLOUTS ───────────────────────────────────────────────── kubectl rollout status deploy/my-app kubectl rollout history deploy/my-app kubectl rollout undo deploy/my-app kubectl rollout undo deploy/my-app --to-revision=3 kubectl rollout restart deploy/my-app # rolling restart kubectl rollout pause deploy/my-app # pause rollout # ── DEBUG / TROUBLESHOOT ────────────────────────────────────── kubectl logs my-pod -c my-container -f # follow logs kubectl logs my-pod --previous # crashed container logs kubectl exec -it my-pod -- /bin/sh # interactive shell kubectl exec my-pod -- env # list env vars kubectl debug my-pod -it --image=busybox # ephemeral debug container kubectl port-forward svc/my-svc 8080:80 # local port forward kubectl cp my-pod:/app/logs.txt ./logs.txt # copy from pod # ── NODE MANAGEMENT ─────────────────────────────────────────── kubectl cordon node1 # mark unschedulable kubectl drain node1 --ignore-daemonsets --delete-emptydir-data kubectl uncordon node1 # re-enable scheduling kubectl taint nodes node1 key=val:NoSchedule # add taint kubectl taint nodes node1 key=val:NoSchedule- # remove taint # ── GENERATING YAML ────────────────────────────────────────── kubectl create deploy my-app --image=nginx --dry-run=client -o yaml kubectl create svc clusterip my-svc --tcp=80:8080 --dry-run=client -o yaml kubectl create secret generic my-secret --from-literal=key=val --dry-run=client -o yaml # ── USEFUL OUTPUT FORMATS ──────────────────────────────────── kubectl get pods -o jsonpath='{.items[*].metadata.name}' kubectl get nodes -o custom-columns='NAME:.metadata.name,STATUS:.status.conditions[-1].type' kubectl get pods --sort-by='.status.startTime'
// Additional production patterns: Kyverno policies, etcd backup jobs, namespace setup, and node management.
apiVersion: kyverno.io/v1 kind: ClusterPolicy metadata: name: enforce-registry-and-limits spec: validationFailureAction: Enforce background: true rules: - name: restrict-image-registries match: any: - resources: kinds: ["Pod"] validate: message: "Only images from approved registries allowed" pattern: spec: containers: - image: "registry.mycompany.io/* | gcr.io/*" - name: require-resource-limits match: any: - resources: kinds: ["Pod"] validate: message: "CPU and memory limits are required" pattern: spec: containers: - resources: limits: cpu: "?*" memory: "?*" - name: add-default-labels # mutate rule match: any: - resources: kinds: ["Deployment"] mutate: patchStrategicMerge: metadata: labels: managed-by: kyverno
apiVersion: batch/v1 kind: CronJob metadata: name: etcd-backup namespace: kube-system spec: schedule: "0 */6 * * *" # every 6 hours jobTemplate: spec: template: spec: hostNetwork: true restartPolicy: OnFailure nodeSelector: node-role.kubernetes.io/control-plane: "" tolerations: - key: node-role.kubernetes.io/control-plane operator: Exists effect: NoSchedule containers: - name: etcd-backup image: bitnami/etcd:3.5 command: - /bin/sh - -c - | BACKUP_FILE="/tmp/etcd-$(date +%Y%m%d-%H%M%S).db" ETCDCTL_API=3 etcdctl snapshot save $BACKUP_FILE \ --endpoints=https://127.0.0.1:2379 \ --cacert=/etc/kubernetes/pki/etcd/ca.crt \ --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt \ --key=/etc/kubernetes/pki/etcd/healthcheck-client.key aws s3 cp $BACKUP_FILE s3://my-etcd-backups/ echo "Backup complete: $BACKUP_FILE" volumeMounts: - name: etcd-certs mountPath: /etc/kubernetes/pki/etcd readOnly: true volumes: - name: etcd-certs hostPath: path: /etc/kubernetes/pki/etcd
# 1. Namespace apiVersion: v1 kind: Namespace metadata: name: team-alpha labels: team: alpha pod-security.kubernetes.io/enforce: baseline --- # 2. Team RBAC - developers get edit rights apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: team-alpha-developers namespace: team-alpha subjects: - kind: Group name: team-alpha-devs apiGroup: rbac.authorization.k8s.io roleRef: kind: ClusterRole name: edit apiGroup: rbac.authorization.k8s.io --- # 3. Quota apiVersion: v1 kind: ResourceQuota metadata: name: team-alpha-quota namespace: team-alpha spec: hard: requests.cpu: "10" requests.memory: 20Gi limits.cpu: "20" limits.memory: 40Gi pods: "50" --- # 4. Namespace isolation network policy apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: namespace-isolation namespace: team-alpha spec: podSelector: {} policyTypes: [Ingress] ingress: - from: - namespaceSelector: matchLabels: team: alpha # only same-team ns - namespaceSelector: matchLabels: kubernetes.io/metadata.name: ingress-nginx
# VolumeSnapshotClass (CSI driver dependent) apiVersion: snapshot.storage.k8s.io/v1 kind: VolumeSnapshotClass metadata: name: csi-aws-vsc driver: ebs.csi.aws.com deletionPolicy: Delete --- # Take a snapshot of existing PVC apiVersion: snapshot.storage.k8s.io/v1 kind: VolumeSnapshot metadata: name: postgres-snapshot-20240101 namespace: database spec: volumeSnapshotClassName: csi-aws-vsc source: persistentVolumeClaimName: data-postgres-0 --- # Restore: create new PVC from snapshot apiVersion: v1 kind: PersistentVolumeClaim metadata: name: postgres-restored namespace: database spec: accessModes: ["ReadWriteOnce"] storageClassName: fast-ssd resources: requests: storage: 10Gi dataSource: name: postgres-snapshot-20240101 kind: VolumeSnapshot apiGroup: snapshot.storage.k8s.io
// Multiple ways to run Kubernetes depending on your use case — local development, bare metal, or cloud-managed. Choose the right tool for the right environment.
| Tool | Best For | Complexity | Production? | Notes |
|---|---|---|---|---|
| k3d | Local dev, CI/CD testing | LOW | ❌ | k3s in Docker containers — fastest spin-up (<30s) |
| kind | Local dev, e2e testing | LOW | ❌ | Kubernetes IN Docker — used by k8s upstream CI |
| minikube | Local dev, learning | LOW | ❌ | Single-node, many drivers (Docker, VM, Podman) |
| k3s | Edge, IoT, bare-metal, RPi | MEDIUM | ✅ | Lightweight k8s — 40MB binary, SQLite or etcd |
| kubeadm | Self-managed production clusters | HIGH | ✅ | Official bootstrap tool — full control, manual upgrades |
| RKE2 | Enterprise, FIPS-compliant | MEDIUM | ✅ | Rancher's hardened k8s distribution |
| EKS / GKE / AKS | Cloud-managed, teams | LOW | ✅ | Managed control plane — pay for worker nodes only |
// k3d wraps k3s (a lightweight Kubernetes) inside Docker containers. Create full multi-node clusters on your laptop in seconds.
# ── INSTALL k3d ────────────────────────────────────────────── # Linux / macOS curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash # macOS via Homebrew brew install k3d # Verify installation k3d version kubectl version --client # ── CREATE CLUSTERS ────────────────────────────────────────── # Simple single-server cluster k3d cluster create mycluster # Production-like: 1 server + 3 agents + port mappings k3d cluster create devcluster \ --servers 1 \ --agents 3 \ --port "80:80@loadbalancer" \ --port "443:443@loadbalancer" \ --api-port 6550 \ --k3s-arg "--disable=traefik@server:0" # disable built-in Traefik # With local registry (for custom images without pushing to remote) k3d registry create myregistry --port 5050 k3d cluster create devcluster \ --registry-use k3d-myregistry:5050 \ --agents 2 # ── CLUSTER MANAGEMENT ─────────────────────────────────────── k3d cluster list # list all clusters k3d cluster stop devcluster # stop cluster (keep state) k3d cluster start devcluster # restart cluster k3d cluster delete devcluster # delete cluster k3d node list # list all nodes k3d node add --cluster devcluster # add a worker node # ── KUBECONFIG ─────────────────────────────────────────────── # Automatically merged into ~/.kube/config kubectl config use-context k3d-devcluster kubectl get nodes # ── LOAD IMAGES INTO CLUSTER ───────────────────────────────── # Build locally and import into k3d (no registry push needed) docker build -t myapp:dev . k3d image import myapp:dev --cluster devcluster
# k3d-config.yaml apiVersion: k3d.io/v1alpha5 kind: Simple metadata: name: dev-cluster servers: 1 agents: 2 kubeAPI: hostPort: "6550" ports: - port: 8080:80 nodeFilters: [loadbalancer] - port: 8443:443 nodeFilters: [loadbalancer] volumes: - volume: /tmp/k3dvol:/data nodeFilters: ["server:*", "agent:*"] registries: use: [k3d-myregistry:5050] config: | mirrors: "docker.io": endpoint: - "https://mirror.gcr.io" options: k3s: extraArgs: - arg: --disable=traefik nodeFilters: ["server:*"] - arg: --cluster-cidr=10.20.0.0/16 nodeFilters: ["server:*"] # Create from config file: # k3d cluster create --config k3d-config.yaml
// kubeadm is the official Kubernetes cluster bootstrapping tool. Use it to set up production-grade clusters on bare metal, VMs, or cloud instances.
## ═══════════════════════════════════════════════════ ## RUN ON ALL NODES (control-plane + workers) ## ═══════════════════════════════════════════════════ # 1. Disable swap (required by kubelet) swapoff -a sed -i '/swap/d' /etc/fstab # 2. Enable required kernel modules cat <# 3. Kernel networking params cat < # 4. Install containerd runtime apt-get install -y containerd mkdir -p /etc/containerd containerd config default | tee /etc/containerd/config.toml # Enable SystemdCgroup (critical!) sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' \ /etc/containerd/config.toml systemctl restart containerd && systemctl enable containerd # 5. Install kubeadm, kubelet, kubectl apt-get install -y apt-transport-https ca-certificates curl curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.35/deb/Release.key \ | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.35/deb/ /' \ | tee /etc/apt/sources.list.d/kubernetes.list apt-get update apt-get install -y kubelet=1.30.0-1.1 kubeadm=1.30.0-1.1 kubectl=1.30.0-1.1 apt-mark hold kubelet kubeadm kubectl # prevent auto-upgrade ## ═══════════════════════════════════════════════════ ## RUN ON CONTROL-PLANE NODE ONLY ## ═══════════════════════════════════════════════════ # 6. Initialize the cluster kubeadm init \ --control-plane-endpoint "k8s-api.example.com:6443" \ --pod-network-cidr "10.244.0.0/16" \ --service-cidr "10.96.0.0/12" \ --upload-certs # needed for HA: share certs with other control-planes # 7. Set up kubeconfig for root mkdir -p $HOME/.kube cp /etc/kubernetes/admin.conf $HOME/.kube/config chown $(id -u):$(id -g) $HOME/.kube/config # 8. Install CNI plugin (Calico) kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.28.0/manifests/tigera-operator.yaml kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.28.0/manifests/custom-resources.yaml # 9. Verify control plane is ready kubectl get nodes kubectl get pods -n kube-system ## ═══════════════════════════════════════════════════ ## RUN ON EACH WORKER NODE ## ═══════════════════════════════════════════════════ # 10. Join worker nodes (token from kubeadm init output) kubeadm join k8s-api.example.com:6443 \ --token abc123.0123456789abcdef \ --discovery-token-ca-cert-hash sha256:<hash> # Regenerate join token if expired (24h TTL) kubeadm token create --print-join-command
# kubeadm-config.yaml apiVersion: kubeadm.k8s.io/v1beta3 kind: InitConfiguration localAPIEndpoint: advertiseAddress: 192.168.1.10 bindPort: 6443 nodeRegistration: criSocket: unix:///run/containerd/containerd.sock kubeletExtraArgs: node-labels: "node-role=control-plane" --- apiVersion: kubeadm.k8s.io/v1beta3 kind: ClusterConfiguration clusterName: production kubernetesVersion: v1.35.0 controlPlaneEndpoint: "k8s-api.example.com:6443" networking: podSubnet: 10.244.0.0/16 serviceSubnet: 10.96.0.0/12 dnsDomain: cluster.local etcd: local: dataDir: /var/lib/etcd extraArgs: auto-compaction-retention: "8" quota-backend-bytes: "8589934592" # 8Gi apiServer: certSANs: - k8s-api.example.com - 192.168.1.10 - 192.168.1.11 - 127.0.0.1 extraArgs: audit-log-path: /var/log/kubernetes/audit.log audit-policy-file: /etc/kubernetes/audit-policy.yaml encryption-provider-config: /etc/kubernetes/encryption.yaml enable-admission-plugins: NodeRestriction,PodSecurity extraVolumes: - name: audit-logs hostPath: /var/log/kubernetes mountPath: /var/log/kubernetes controllerManager: extraArgs: bind-address: 0.0.0.0 scheduler: extraArgs: bind-address: 0.0.0.0 --- apiVersion: kubelet.config.k8s.io/v1beta1 kind: KubeletConfiguration cgroupDriver: systemd containerLogMaxSize: 100Mi containerLogMaxFiles: 5 maxPods: 110 kubeReserved: cpu: 200m memory: 500Mi systemReserved: cpu: 200m memory: 500Mi evictionHard: memory.available: "300Mi" nodefs.available: "10%" # Run: kubeadm init --config kubeadm-config.yaml --upload-certs
// k3s is a CNCF-certified, fully conformant Kubernetes distribution packaged as a single binary. Ideal for edge computing, IoT, Raspberry Pi, and resource-constrained environments.
# ── INSTALL k3s SERVER (Control Plane) ─────────────────────── # Single command install — runs as systemd service curl -sfL https://get.k3s.io | sh -s - server \ --cluster-init \ --tls-san k3s.example.com \ --disable traefik \ --disable servicelb \ --flannel-backend=none \ # disable flannel (use Calico instead) --write-kubeconfig-mode 644 # Get node token (needed for agents to join) cat /var/lib/rancher/k3s/server/node-token # Get kubeconfig cat /etc/rancher/k3s/k3s.yaml # ── JOIN AGENT NODES ───────────────────────────────────────── # Run on each worker node curl -sfL https://get.k3s.io | K3S_URL=https://k3s.example.com:6443 \ K3S_TOKEN=<node-token> sh - # ── HA k3s WITH EMBEDDED etcd ──────────────────────────────── # First server (bootstraps etcd) curl -sfL https://get.k3s.io | sh -s - server \ --cluster-init \ --token my-shared-secret # Additional control plane servers join the cluster curl -sfL https://get.k3s.io | sh -s - server \ --server https://first-server:6443 \ --token my-shared-secret # ── k3s MANAGEMENT ─────────────────────────────────────────── kubectl get nodes # k3s bundles kubectl systemctl status k3s # service status systemctl restart k3s # restart server k3s kubectl get pods -A # alternative kubectl # ── UNINSTALL ──────────────────────────────────────────────── /usr/local/bin/k3s-uninstall.sh # server /usr/local/bin/k3s-agent-uninstall.sh # agent
// Kubernetes uses TLS everywhere for secure communication between all components. Understanding the PKI is essential for troubleshooting, rotating certs, and securing clusters.
# ── CHECK CERTIFICATE EXPIRY ───────────────────────────────── kubeadm certs check-expiration # Manual check with openssl openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -dates openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text \ | grep -A2 "Subject Alternative" # Check all certs in /etc/kubernetes/pki for cert in /etc/kubernetes/pki/*.crt; do echo "=== $cert ==="; openssl x509 -in $cert -noout -subject -dates 2>/dev/null; done # ── RENEW ALL CERTIFICATES ─────────────────────────────────── # Renew all control plane certs (run on control-plane node) kubeadm certs renew all # Renew specific cert kubeadm certs renew apiserver kubeadm certs renew apiserver-kubelet-client kubeadm certs renew front-proxy-client # After renewal: restart control plane components kubectl -n kube-system delete pod -l component=kube-apiserver kubectl -n kube-system delete pod -l component=kube-controller-manager kubectl -n kube-system delete pod -l component=kube-scheduler # Update kubeconfig after renewal kubeadm kubeconfig user --client-name admin > ~/.kube/config # ── CREATE A NEW USER WITH CERT AUTH ───────────────────────── # Step 1: Generate user private key openssl genrsa -out alice.key 4096 # Step 2: Create CSR (CN=username, O=group) openssl req -new -key alice.key \ -subj "/CN=alice/O=team-alpha" \ -out alice.csr # Step 3: Submit as Kubernetes CSR object cat <# Step 4: Approve the CSR kubectl certificate approve alice # Step 5: Download signed cert kubectl get csr alice -o jsonpath='{.status.certificate}' | \ base64 -d > alice.crt # Step 6: Add to kubeconfig kubectl config set-credentials alice \ --client-certificate=alice.crt \ --client-key=alice.key \ --embed-certs=true kubectl config set-context alice-context \ --cluster=my-cluster \ --user=alice \ --namespace=team-alpha
# Install cert-manager # kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.0/cert-manager.yaml # ── ClusterIssuer: Let's Encrypt Production ─────────────────── apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: letsencrypt-prod spec: acme: server: https://acme-v02.api.letsencrypt.org/directory email: [email protected] privateKeySecretRef: name: letsencrypt-prod-key solvers: - http01: # HTTP-01 challenge via Ingress ingress: ingressClassName: nginx - dns01: # DNS-01 for wildcard certs route53: region: us-east-1 hostedZoneID: YOURZONEID --- # ── Internal CA Issuer ──────────────────────────────────────── apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: internal-ca spec: ca: secretName: internal-ca-key-pair --- # ── Certificate Resource ────────────────────────────────────── apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: app-tls namespace: production spec: secretName: app-tls duration: 2160h # 90 days renewBefore: 360h # renew 15 days before expiry subject: organizations: [MyCompany] isCA: false privateKey: algorithm: RSA encoding: PKCS1 size: 2048 usages: [server auth, client auth] dnsNames: - app.example.com - www.app.example.com issuerRef: name: letsencrypt-prod kind: ClusterIssuer group: cert-manager.io
// Helm is the package manager for Kubernetes. Charts are packages of pre-configured Kubernetes resources. Releases track deployed instances. Repositories store and share charts.
# ── INSTALLATION ───────────────────────────────────────────── curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash brew install helm # macOS helm version # ── REPOSITORIES ───────────────────────────────────────────── helm repo add stable https://charts.helm.sh/stable helm repo add bitnami https://charts.bitnami.com/bitnami helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx helm repo update # fetch latest chart versions helm repo list # list configured repos helm repo remove bitnami # remove repo helm search repo nginx # search in repos helm search hub wordpress # search artifact hub # ── INSTALL / DEPLOY ───────────────────────────────────────── helm install my-nginx ingress-nginx/ingress-nginx \ --namespace ingress-nginx \ --create-namespace \ --version 4.10.0 \ -f values-prod.yaml \ --set controller.replicaCount=2 # Upsert (install or upgrade if exists) helm upgrade --install my-app ./my-chart \ --namespace production \ --create-namespace \ --atomic \ --timeout 5m \ -f values.yaml \ --set image.tag=v1.2.3 # ── INSPECT BEFORE INSTALLING ──────────────────────────────── helm show chart bitnami/postgresql # chart metadata helm show values bitnami/postgresql # all default values helm template my-release ./my-chart \ -f values.yaml > rendered.yaml # render locally helm install my-release ./my-chart --dry-run --debug # ── MANAGE RELEASES ────────────────────────────────────────── helm list -A # all releases, all namespaces helm status my-app -n production # release status helm history my-app -n production # revision history helm rollback my-app 2 -n production # rollback to revision 2 helm uninstall my-app -n production # remove release helm get values my-app -n production # get user-supplied values helm get manifest my-app -n production # get rendered manifests # ── CHART DEVELOPMENT ──────────────────────────────────────── helm create my-chart # scaffold new chart helm lint ./my-chart # validate chart helm package ./my-chart # create .tgz package helm push my-chart-1.0.0.tgz oci://registry.example.com/charts # push to OCI # ── PLUGINS ────────────────────────────────────────────────── helm plugin install https://github.com/databus23/helm-diff helm diff upgrade my-app ./my-chart -f values.yaml # show diff before upgrade helm plugin install https://github.com/jkroepke/helm-secrets helm secrets upgrade my-app ./my-chart -f secrets.enc.yaml
# Chart.yaml apiVersion: v2 name: web-app description: A Helm chart for web-app microservice type: application version: 1.4.2 # chart version (semver) appVersion: "2.1.0" # app version (informational) keywords: [web, api, microservice] maintainers: - name: Platform Team email: [email protected] dependencies: - name: postgresql version: "~14.x.x" repository: https://charts.bitnami.com/bitnami condition: postgresql.enabled - name: redis version: "~18.x.x" repository: https://charts.bitnami.com/bitnami condition: redis.enabled --- # values.yaml replicaCount: 2 image: repository: registry.example.com/web-app pullPolicy: IfNotPresent tag: "" # overridden by CI with .Chart.AppVersion service: type: ClusterIP port: 80 targetPort: 8080 ingress: enabled: true className: nginx host: app.example.com tls: true resources: requests: { cpu: 100m, memory: 128Mi } limits: { cpu: 500m, memory: 256Mi } autoscaling: enabled: true minReplicas: 2 maxReplicas: 10 targetCPUUtilizationPercentage: 70 postgresql: enabled: true auth: database: mydb existingSecret: postgres-secret
{{/* templates/deployment.yaml */}} apiVersion: apps/v1 kind: Deployment metadata: name: {{ include "web-app.fullname" . }} namespace: {{ .Release.Namespace }} labels: {{- include "web-app.labels" . | nindent 4 }} annotations: app.kubernetes.io/managed-by: {{ .Release.Service }} helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} spec: {{- if not .Values.autoscaling.enabled }} replicas: {{ .Values.replicaCount }} {{- end }} selector: matchLabels: {{- include "web-app.selectorLabels" . | nindent 6 }} template: metadata: labels: {{- include "web-app.selectorLabels" . | nindent 8 }} annotations: {{/* Force pod restart when configmap changes */}} checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} spec: containers: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - containerPort: {{ .Values.service.targetPort }} {{- with .Values.resources }} resources: {{- toYaml . | nindent 10 }} {{- end }} {{- if .Values.envFrom }} envFrom: {{- toYaml .Values.envFrom | nindent 10 }} {{- end }} --- {{/* templates/_helpers.tpl */}} {{- define "web-app.fullname" -}} {{- printf "%s-%s" .Release.Name .Chart.Name | trunc 63 | trimSuffix "-" }} {{- end }} {{- define "web-app.labels" -}} helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} {{ include "web-app.selectorLabels" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end }} {{- define "web-app.selectorLabels" -}} app.kubernetes.io/name: {{ .Chart.Name }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }}
// Advanced topics for senior engineers and platform teams: performance tuning, GitOps, multi-cluster, eBPF networking, cost optimization, and production hardening.
apiVersion: argoproj.io/v1alpha1 kind: Application metadata: name: web-app-production namespace: argocd finalizers: - resources-finalizer.argocd.argoproj.io spec: project: production source: repoURL: https://github.com/myorg/k8s-gitops targetRevision: HEAD path: apps/web-app helm: valueFiles: - values-prod.yaml parameters: - name: image.tag value: v2.1.0 destination: server: https://kubernetes.default.svc namespace: production syncPolicy: automated: prune: true # delete resources removed from git selfHeal: true # revert manual cluster changes syncOptions: - CreateNamespace=true - PrunePropagationPolicy=foreground - ApplyOutOfSyncOnly=true retry: limit: 5 backoff: duration: 5s maxDuration: 3m factor: 2 revisionHistoryLimit: 10
apiVersion: argoproj.io/v1alpha1 kind: Rollout metadata: name: web-app-rollout namespace: production spec: replicas: 10 selector: matchLabels: app: web-app template: metadata: labels: app: web-app spec: containers: - name: web-app image: myapp:v2.0.0 strategy: canary: canaryService: web-app-canary stableService: web-app-stable trafficRouting: nginx: stableIngress: web-app-ingress steps: - setWeight: 5 # 5% traffic to canary - pause: {duration: 5m} - analysis: # run automated analysis templates: - templateName: success-rate - setWeight: 20 - pause: {duration: 10m} - setWeight: 50 - pause: {duration: 10m} - setWeight: 100 --- apiVersion: argoproj.io/v1alpha1 kind: AnalysisTemplate metadata: name: success-rate spec: metrics: - name: success-rate interval: 1m successCondition: result[0] >= 0.95 failureLimit: 3 provider: prometheus: address: http://prometheus:9090 query: | sum(rate(http_requests_total{status!~"5.."}[5m])) / sum(rate(http_requests_total[5m]))
apiVersion: keda.sh/v1alpha1 kind: ScaledObject metadata: name: kafka-consumer-scaler namespace: production spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: kafka-consumer pollingInterval: 15 cooldownPeriod: 300 minReplicaCount: 0 # scale to zero! maxReplicaCount: 50 triggers: - type: kafka metadata: bootstrapServers: kafka:9092 consumerGroup: my-consumer-group topic: orders lagThreshold: "100" # 1 replica per 100 messages lag offsetResetPolicy: latest --- # ScaledJob: scale Jobs (not Deployments) for batch processing apiVersion: keda.sh/v1alpha1 kind: ScaledJob metadata: name: sqs-processor spec: jobTargetRef: template: spec: containers: - name: processor image: myprocessor:latest restartPolicy: Never maxReplicaCount: 100 triggers: - type: aws-sqs-queue metadata: queueURL: https://sqs.us-east-1.amazonaws.com/123/my-queue targetQueueLength: "1" awsRegion: us-east-1
# Crossplane lets you provision cloud infra as K8s resources # Developers request infrastructure via K8s objects apiVersion: database.example.com/v1alpha1 kind: PostgreSQLInstance metadata: name: my-db namespace: production spec: parameters: storageGB: 20 size: db.t3.medium version: "15" compositionSelector: matchLabels: provider: aws env: production writeConnectionSecretToRef: name: my-db-conn # K8s Secret with DB connection string --- # Kustomize overlay structure example # base/kustomization.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - deployment.yaml - service.yaml - configmap.yaml --- # overlays/production/kustomization.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: production namePrefix: prod- resources: - ../../base patches: - patch: | - op: replace path: /spec/replicas value: 5 target: kind: Deployment images: - name: myapp newTag: v2.1.0
// A service mesh is a dedicated infrastructure layer for handling service-to-service communication. It provides traffic management, security (mTLS), and observability — without changing application code.
// Major service mesh implementations — choose based on complexity tolerance, performance needs, and feature requirements.
| Mesh | Data Plane | Architecture | Strengths | Trade-offs | Best For |
|---|---|---|---|---|---|
| Istio | Envoy | Sidecar + Istiod control plane | Most features, rich traffic mgmt, large community, Gateway API support | High resource overhead, complexity, steep learning curve | Large enterprises needing full feature set |
| Linkerd | Rust proxy | Sidecar + linkerd-control-plane | Ultra-lightweight, simple install, excellent performance, CNCF graduated | Fewer advanced features than Istio, no Envoy ecosystem | Teams wanting simplicity and low overhead |
| Cilium | eBPF | No sidecar — kernel-level eBPF | Zero sidecar overhead, highest performance, L3-L7, NetworkPolicy, Gateway API | Requires Linux kernel ≥5.10, newer project | Performance-critical, CNI + mesh in one |
| Consul Connect | Envoy | Sidecar + Consul server | Multi-platform (VMs + K8s), HashiCorp ecosystem, service catalog | Requires Consul cluster, more ops burden | Hybrid cloud / VM + Kubernetes environments |
| AWS App Mesh | Envoy | Sidecar + AWS managed CP | Native AWS integration, managed control plane, no CP ops | AWS-only, less flexible, fewer features | AWS-native teams wanting managed option |
| Kuma / Kong | Envoy | Sidecar or sidecarless | Multi-zone mesh, universal (K8s + VMs), Kong ecosystem integration | Smaller community than Istio | Multi-zone deployments, Kong API Gateway users |
apiVersion: networking.istio.io/v1alpha3 kind: VirtualService metadata: name: web-app-vs namespace: production spec: hosts: - web-app http: - # Testers with X-Canary header go to v2 match: - headers: x-canary: exact: "true" route: - destination: host: web-app subset: v2 - # Everyone else: 90/10 split route: - destination: host: web-app subset: v1 weight: 90 - destination: host: web-app subset: v2 weight: 10 timeout: 30s retries: attempts: 3 perTryTimeout: 10s retryOn: gateway-error,connect-failure,retriable-4xx mirror: host: web-app-shadow subset: v2 mirrorPercentage: value: 10.0 # Mirror 10% to shadow
apiVersion: networking.istio.io/v1alpha3 kind: DestinationRule metadata: name: web-app-dr namespace: production spec: host: web-app trafficPolicy: connectionPool: tcp: maxConnections: 100 http: http1MaxPendingRequests: 50 http2MaxRequests: 1000 maxRequestsPerConnection: 10 outlierDetection: # Circuit breaker consecutiveGatewayErrors: 5 consecutive5xxErrors: 5 interval: 30s baseEjectionTime: 30s maxEjectionPercent: 50 loadBalancer: simple: LEAST_CONN subsets: - name: v1 labels: version: v1 trafficPolicy: loadBalancer: simple: ROUND_ROBIN - name: v2 labels: version: v2 trafficPolicy: loadBalancer: consistentHash: httpHeaderName: x-user-id # Sticky by user
# Enforce STRICT mTLS for entire namespace apiVersion: security.istio.io/v1beta1 kind: PeerAuthentication metadata: name: default namespace: production spec: mtls: mode: STRICT --- # Allow frontend → backend on /api/* GET only apiVersion: security.istio.io/v1beta1 kind: AuthorizationPolicy metadata: name: backend-authz namespace: production spec: selector: matchLabels: app: backend action: ALLOW rules: - from: - source: principals: - cluster.local/ns/production/sa/frontend-sa to: - operation: methods: ["GET", "POST"] paths: ["/api/*"] --- # Deny all other traffic to backend (default deny) apiVersion: security.istio.io/v1beta1 kind: AuthorizationPolicy metadata: name: backend-deny-all namespace: production spec: selector: matchLabels: app: backend action: DENY rules: - from: - source: notPrincipals: - cluster.local/ns/production/sa/frontend-sa
# Istio Ingress Gateway apiVersion: networking.istio.io/v1alpha3 kind: Gateway metadata: name: main-gateway namespace: istio-system spec: selector: istio: ingressgateway servers: - port: number: 443 name: https protocol: HTTPS tls: mode: SIMPLE credentialName: app-tls-cert hosts: - app.example.com - port: number: 80 name: http protocol: HTTP tls: httpsRedirect: true # Force HTTPS hosts: - app.example.com --- # Allow egress to external Stripe API apiVersion: networking.istio.io/v1alpha3 kind: ServiceEntry metadata: name: stripe-api namespace: production spec: hosts: - api.stripe.com ports: - number: 443 name: https protocol: HTTPS resolution: DNS location: MESH_EXTERNAL
# Inject 3s delay for 10% of requests to ratings service apiVersion: networking.istio.io/v1alpha3 kind: VirtualService metadata: name: ratings-fault-injection spec: hosts: - ratings http: - fault: delay: percentage: value: 10.0 fixedDelay: 3s abort: percentage: value: 5.0 httpStatus: 503 route: - destination: host: ratings subset: v1 --- # Sidecar resource scoping for large clusters apiVersion: networking.istio.io/v1alpha3 kind: Sidecar metadata: name: web-app-sidecar namespace: production spec: workloadSelector: labels: app: web-app egress: - hosts: - ./backend # same namespace - ./postgres - istio-system/* - monitoring/prometheus # Only these — Envoy won't load other services
# Enable Linkerd injection for a namespace apiVersion: v1 kind: Namespace metadata: name: production annotations: linkerd.io/inject: enabled --- # Per-pod injection control apiVersion: apps/v1 kind: Deployment metadata: name: web-app spec: template: metadata: annotations: linkerd.io/inject: enabled config.linkerd.io/proxy-cpu-request: "10m" config.linkerd.io/proxy-memory-request: "20Mi" --- # ServiceProfile for per-route observability & retries apiVersion: linkerd.io/v1alpha2 kind: ServiceProfile metadata: name: web-app.production.svc.cluster.local namespace: production spec: routes: - name: GET /api/users condition: method: GET pathRegex: /api/users.* isRetryable: true timeout: 30s - name: POST /api/orders condition: method: POST pathRegex: /api/orders isRetryable: false # Non-idempotent timeout: 60s
| Scenario | Without Mesh | With Mesh | Verdict |
|---|---|---|---|
| mTLS between services | Manual cert management per service | Automatic, zero-config mTLS | USE MESH |
| Canary deployments | Needs two services + Ingress hacks | VirtualService weight split | USE MESH |
| Distributed tracing | Instrument every app with SDK | Automatic from sidecar | USE MESH |
| Small cluster (1–5 services) | Simple, low overhead | Adds 50–100ms latency, complexity | SKIP MESH |
| Circuit breaking | Implement per-service (Hystrix, Resilience4j) | One DestinationRule for all | USE MESH |
| Compliance (SOC2, PCI-DSS) | Hard to prove in-transit encryption | mTLS + audit logs prove it | USE MESH |
| Resource-constrained edge/IoT | Direct pod communication | Sidecar doubles memory per pod | SKIP MESH |
// kubectl plugins extend the CLI with new commands. Krew is the official plugin manager — over 200 community plugins available. Any executable named kubectl-* in your PATH becomes a kubectl subcommand.
$PATH for executables starting with kubectl-. No registration needed.kubectl-ns-switch → command kubectl ns-switch. Hyphens in filenames become spaces in the command. Must be executable.kubectl krew install <name># ── INSTALL KREW (macOS/Linux) ──────────────────────────────── ( set -x; cd "$(mktemp -d)" && OS="$(uname | tr '[:upper:]' '[:lower:]')" && ARCH="$(uname -m | sed 's/x86_64/amd64/;s/arm.*/arm/;s/aarch64$/arm64/')" && KREW="krew-${OS}_${ARCH}" && curl -fsSLO "https://github.com/kubernetes-sigs/krew/releases/latest/download/${KREW}.tar.gz" && tar zxvf "${KREW}.tar.gz" && ./"${KREW}" install krew ) # Add to shell profile (.bashrc / .zshrc) export PATH="${KREW_ROOT:-$HOME/.krew}/bin:$PATH" # ── KREW COMMANDS ───────────────────────────────────────────── kubectl krew version # show krew version kubectl krew update # update plugin index kubectl krew search # list all available plugins kubectl krew search <keyword> # search plugins by keyword kubectl krew info <plugin> # details about a plugin kubectl krew install <plugin> # install a plugin kubectl krew install <p1> <p2> <p3> # install multiple at once kubectl krew upgrade # upgrade all installed plugins kubectl krew upgrade <plugin> # upgrade specific plugin kubectl krew uninstall <plugin> # remove a plugin kubectl krew list # list installed plugins # ── DISCOVER ALL PLUGINS (without krew) ────────────────────── kubectl plugin list # show all plugins in PATH # ── INSTALL ESSENTIAL PLUGINS IN ONE GO ────────────────────── kubectl krew install \ ctx ns stern neat tree \ who-can access-matrix rbac-view \ resource-capacity node-shell \ images outdated \ konfig view-secret \ doctor popeye
// Context, namespace & cluster navigation
kubectl ctx — list all contextskubectl ctx prod — switch to prodkubectl ctx - — switch to previouskubectl ctx -d old-ctx — delete contextkrew install ctxkubectl ns — list all namespaceskubectl ns production — switch namespacekubectl ns - — switch to previous nskrew install nskubectl konfig merge a.yaml b.yamlkubectl konfig split — split into fileskubectl konfig import --save cfg.yamlkrew install konfig// Logs, debugging & troubleshooting
stern web-app — tail all pods matching namestern . -n prod — all pods in namespacestern web --since 15m — last 15 minutesstern web -c sidecar — specific containerkrew install sternkubectl tree deploy web-appkrew install treekubectl get pod web -o yaml | kubectl neatkrew install neatkubectl doctor — full cluster scankrew install doctorkubectl node-shell node1krew install node-shellkubectl popeye — full scan with reportkubectl popeye -n productionkrew install popeye// Security, RBAC & access auditing
kubectl who-can get podskubectl who-can delete secrets -n prodkubectl who-can create deploymentskrew install who-cankubectl access-matrixkubectl access-matrix --sa mysa -n prodkrew install access-matrixkubectl rbac-view — opens browser UIkrew install rbac-viewkubectl view-secret my-secretkubectl view-secret my-secret keykrew install view-secret// Resources, capacity & image management
kubectl resource-capacity — node overviewkubectl resource-capacity --podskubectl resource-capacity --util — live usagekrew install resource-capacitykubectl images — all images cluster-widekubectl images -n prodkubectl images --no-trunc — full nameskrew install imageskubectl outdated — full cluster scankrew install outdatedkubectl pv-migrate migrate src-pvc dst-pvckrew install pv-migrate// Networking, certificates & cluster tools
kubectl ingress-nginx backendskubectl ingress-nginx conf --host foo.comkubectl ingress-nginx logskubectl ingress-nginx exec -- nginx -Tkrew install ingress-nginxkubectl cert-manager status certificate tlskubectl cert-manager renew tls-certkubectl cert-manager inspect secret tlskrew install cert-managerkubectl np-viewer -n productionkrew install np-viewerkubectl view-cert my-tls-secretkrew install view-cert// Plugins can be simple shell scripts or full Go binaries. Any executable named kubectl-* in your PATH works instantly.
kubectl podfull — shows detailed pod info with node, IP, age, and resource usage in one view.#!/usr/bin/env bash # File: kubectl-podfull (chmod +x, place in PATH) # Usage: kubectl podfull [namespace] set -euo pipefail NS="${1:--A}" # default: all namespaces FLAG="--all-namespaces" [ "$NS" != "-A" ] && FLAG="-n $NS" echo "" printf "%-50s %-15s %-15s %-8s %-8s\n" \ "POD" "NODE" "IP" "STATUS" "RESTARTS" echo "$(printf '─%.0s' {1..100})" kubectl get pods $FLAG \ -o custom-columns=\ 'NAME:.metadata.name, NODE:.spec.nodeName, IP:.status.podIP, STATUS:.status.phase, RESTARTS:.status.containerStatuses[0].restartCount' \ --sort-by='.status.phase' 2>/dev/null echo "" echo "Resource usage:" kubectl top pods $FLAG 2>/dev/null || \ echo "(metrics-server not available)"
#!/usr/bin/env python3 # File: kubectl-ns-cleanup (chmod +x, place in PATH) # Usage: kubectl ns-cleanup [namespace] [--delete] import subprocess, sys, json ns = sys.argv[1] if len(sys.argv) > 1 else "default" do_delete = "--delete" in sys.argv result = subprocess.run( ["kubectl", "get", "pods", "-n", ns, "-o", "json"], capture_output=True, text=True ) pods = json.loads(result.stdout)["items"] to_clean = [] for pod in pods: phase = pod["status"].get("phase", "") name = pod["metadata"]["name"] # Find completed, failed, or evicted pods if phase in ("Succeeded", "Failed"): to_clean.append(name) reason = pod["status"].get("reason", "") if reason == "Evicted": to_clean.append(name) print(f"Found {len(to_clean)} pods to clean in '{ns}':") for p in to_clean: print(f" - {p}") if do_delete and to_clean: for p in to_clean: subprocess.run( ["kubectl", "delete", "pod", p, "-n", ns], check=True ) print(f"\n✓ Deleted {len(to_clean)} pods")
// File: kubectl-whoami/main.go // Build: go build -o kubectl-whoami && mv to PATH package main import ( "context" "fmt" "os" authv1 "k8s.io/api/authorization/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" ) func main() { kubeconfig := os.Getenv("KUBECONFIG") if kubeconfig == "" { kubeconfig = os.Getenv("HOME") + "/.kube/config" } config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { panic(err) } client, err := kubernetes.NewForConfig(config) if err != nil { panic(err) } // SelfSubjectReview — who am I? review, err := client.AuthenticationV1(). SelfSubjectReviews(). Create(context.TODO(), &authv1.SelfSubjectReview{}, metav1.CreateOptions{}) if err != nil { panic(err) } fmt.Printf("Username : %s\n", review.Status.UserInfo.Username) fmt.Printf("Groups : %v\n", review.Status.UserInfo.Groups) fmt.Printf("UID : %s\n", review.Status.UserInfo.UID) }
apiVersion: krew.googlecontainertools.github.com/v1alpha2 kind: Plugin metadata: name: podfull spec: version: v1.0.0 homepage: https://github.com/myuser/kubectl-podfull shortDescription: Show detailed pod info with nodes and resources description: | kubectl-podfull displays comprehensive pod information including node placement, IPs, status, restart counts, and live resource usage in a single command. platforms: - selector: matchLabels: os: linux arch: amd64 uri: https://github.com/myuser/kubectl-podfull/releases/download/v1.0.0/kubectl-podfull_linux_amd64.tar.gz sha256: abc123... bin: kubectl-podfull - selector: matchLabels: os: darwin arch: amd64 uri: https://github.com/myuser/kubectl-podfull/releases/download/v1.0.0/kubectl-podfull_darwin_amd64.tar.gz sha256: def456... bin: kubectl-podfull - selector: matchLabels: os: windows arch: amd64 uri: https://github.com/myuser/kubectl-podfull/releases/download/v1.0.0/kubectl-podfull_windows_amd64.zip sha256: ghi789... bin: kubectl-podfull.exe
| Plugin | Category | Key Command | What It Solves | Install |
|---|---|---|---|---|
| ctx / kubectx | NAVIGATION | kubectl ctx prod |
Fast cluster/context switching | krew install ctx |
| ns / kubens | NAVIGATION | kubectl ns staging |
Fast namespace switching | krew install ns |
| stern | LOGS | stern web-app -n prod |
Multi-pod log tailing | krew install stern |
| neat | DEBUG | kubectl get pod x -oyaml | kubectl neat |
Clean up noisy kubectl YAML output | krew install neat |
| tree | DEBUG | kubectl tree deploy app |
Visualize resource ownership tree | krew install tree |
| popeye | AUDIT | kubectl popeye -n prod |
Cluster health & best-practice scan | krew install popeye |
| who-can | SECURITY | kubectl who-can delete pods |
RBAC: who can do what | krew install who-can |
| access-matrix | SECURITY | kubectl access-matrix --sa mysa |
Full RBAC permission grid | krew install access-matrix |
| view-secret | SECURITY | kubectl view-secret my-secret |
Decode Secrets without manual base64 | krew install view-secret |
| resource-capacity | RESOURCES | kubectl resource-capacity --util |
CPU/memory requests, limits & usage | krew install resource-capacity |
| node-shell | DEBUG | kubectl node-shell node1 |
SSH-less shell on any node | krew install node-shell |
| images | RESOURCES | kubectl images -n prod |
List all container images in cluster | krew install images |
| outdated | RESOURCES | kubectl outdated |
Detect stale/outdated container images | krew install outdated |
| konfig | NAVIGATION | kubectl konfig merge a.yaml b.yaml |
Merge & manage kubeconfig files | krew install konfig |
| np-viewer | NETWORKING | kubectl np-viewer -n prod |
Visualize NetworkPolicy rules | krew install np-viewer |
| cert-manager | SECURITY | kubectl cert-manager renew cert |
Manage cert-manager certificates | krew install cert-manager |
| pv-migrate | STORAGE | kubectl pv-migrate migrate src dst |
Migrate PVC data between namespaces/clusters | krew install pv-migrate |
| ingress-nginx | NETWORKING | kubectl ingress-nginx backends |
Debug NGINX Ingress configuration | krew install ingress-nginx |
// CRDs let you extend the Kubernetes API with your own resource types. Once registered, your custom objects are stored in etcd, managed by kubectl, protected by RBAC, and can drive custom controllers — exactly like built-in resources.
| Field | Location | Required | Description | Example |
|---|---|---|---|---|
| group | spec | YES | API group — use a domain you own. Reverse DNS style. | mycompany.io |
| versions[].name | spec.versions | YES | Version string. Follow: v1alpha1 → v1beta1 → v1 | v1, v1beta1 |
| versions[].served | spec.versions | YES | Whether this version is served by the API. False = deprecated. | true / false |
| versions[].storage | spec.versions | YES | Exactly ONE version must be the storage version. | true (only one) |
| openAPIV3Schema | spec.versions[].schema | YES | Full structural schema for validation. Required for all served versions. | type: object, properties: ... |
| scope | spec | YES | Namespaced (per ns) or Cluster (global like Nodes/PVs). | Namespaced / Cluster |
| names.kind | spec.names | YES | CamelCase singular name used in YAML kind: field. | Database |
| names.plural | spec.names | YES | Lowercase plural — used in URL path and kubectl get. | databases |
| names.shortNames | spec.names | NO | Short aliases for kubectl. Like po=pods, svc=services. | ["db", "dbs"] |
| names.categories | spec.names | NO | Group into categories. kubectl get all uses "all" category. | ["all"] |
| subresources.status | spec.versions[].subresources | NO | Enables separate /status endpoint. Best practice for all CRDs. | status: {} |
| subresources.scale | spec.versions[].subresources | NO | Enables HPA + kubectl scale support. | specReplicasPath, statusReplicasPath |
| additionalPrinterColumns | spec.versions[] | NO | Custom kubectl get columns via JSONPath. | name: Phase, jsonPath: .status.phase |
| x-kubernetes-validations | schema properties | NO | CEL rules for cross-field validation (v1.25+). | rule: self.max >= self.min |
| conversion.strategy | spec.conversion | NO | None (no conversion) or Webhook (call conversion webhook). | Webhook |
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: webapps.apps.mycompany.io # plural.group spec: group: apps.mycompany.io scope: Namespaced names: plural: webapps singular: webapp kind: WebApp shortNames: [wa] categories: [all, mycompany] versions: - name: v1 served: true storage: true subresources: status: {} scale: specReplicasPath: .spec.replicas statusReplicasPath: .status.readyReplicas labelSelectorPath: .status.selector additionalPrinterColumns: - name: Replicas type: integer jsonPath: .spec.replicas - name: Ready type: integer jsonPath: .status.readyReplicas - name: Phase type: string jsonPath: .status.phase - name: Image type: string jsonPath: .spec.image priority: 1 # Only with -o wide - name: Age type: date jsonPath: .metadata.creationTimestamp schema: openAPIV3Schema: type: object properties: spec: type: object required: ["image"] x-kubernetes-validations: - rule: self.maxReplicas >= self.minReplicas message: maxReplicas must be >= minReplicas - rule: self.replicas >= self.minReplicas && self.replicas <= self.maxReplicas message: replicas must be between min and max properties: image: type: string replicas: type: integer minimum: 0 maximum: 100 default: 1 minReplicas: type: integer default: 1 maxReplicas: type: integer default: 10 port: type: integer minimum: 1 maximum: 65535 default: 8080 env: type: array items: type: object required: ["name", "value"] properties: name: type: string value: type: string ingress: type: object properties: enabled: type: boolean default: false host: type: string tlsEnabled: type: boolean default: true status: type: object properties: phase: type: string enum: [Pending, Running, Degraded, Failed] readyReplicas: type: integer selector: type: string conditions: type: array items: type: object properties: type: type: string status: type: string reason: type: string message: type: string lastTransitionTime: type: string format: date-time
# Create a WebApp custom resource instance apiVersion: apps.mycompany.io/v1 kind: WebApp metadata: name: my-web-app namespace: production labels: team: frontend spec: image: registry.mycompany.io/webapp:v2.1.0 replicas: 3 minReplicas: 2 maxReplicas: 10 port: 8080 env: - name: LOG_LEVEL value: info ingress: enabled: true host: myapp.example.com tlsEnabled: true --- # ClusterRole for the controller apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: webapp-controller rules: - apiGroups: ["apps.mycompany.io"] resources: ["webapps"] verbs: ["get", "list", "watch", "update", "patch"] - apiGroups: ["apps.mycompany.io"] resources: ["webapps/status"] # status subresource verbs: ["get", "update", "patch"] - apiGroups: ["apps.mycompany.io"] resources: ["webapps/finalizers"] verbs: ["update"] - apiGroups: ["apps"] resources: ["deployments"] verbs: ["get", "list", "create", "update", "patch", "delete"] --- # ClusterRole for developers — no delete apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: webapp-developer rules: - apiGroups: ["apps.mycompany.io"] resources: ["webapps"] verbs: ["get", "list", "watch", "create", "update", "patch"]
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: webapps.apps.mycompany.io spec: group: apps.mycompany.io scope: Namespaced names: plural: webapps kind: WebApp conversion: strategy: Webhook webhook: conversionReviewVersions: ["v1"] clientConfig: service: name: webapp-conversion-webhook namespace: webapp-system path: /convert port: 443 caBundle: LS0t... versions: - name: v1 served: true storage: true # Storage version schema: openAPIV3Schema: type: object properties: spec: type: object properties: image: type: string containerPort: # Renamed from v1beta1 "port" type: integer - name: v1beta1 served: true storage: false # Not storage version deprecated: true deprecationWarning: "v1beta1 deprecated, migrate to v1" schema: openAPIV3Schema: type: object properties: spec: type: object properties: image: type: string port: # Old field name type: integer
// controllers/webapp_controller.go package controllers import ( "context" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" myv1 "mycompany.io/webapp-operator/api/v1" ) type WebAppReconciler struct { client.Client Scheme *runtime.Scheme } // +kubebuilder:rbac:groups=apps.mycompany.io,resources=webapps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps.mycompany.io,resources=webapps/status,verbs=get;update;patch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete func (r *WebAppReconciler) Reconcile( ctx context.Context, req ctrl.Request, ) (ctrl.Result, error) { // 1. Fetch the WebApp CR webapp := &myv1.WebApp{} if err := r.Get(ctx, req.NamespacedName, webapp); err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } // 2. Define the desired Deployment replicas := int32(webapp.Spec.Replicas) desired := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: webapp.Name, Namespace: webapp.Namespace, }, Spec: appsv1.DeploymentSpec{ Replicas: &replicas, Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"app": webapp.Name}, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": webapp.Name}, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{{ Name: "webapp", Image: webapp.Spec.Image, }}, }, }, }, } // Set WebApp as owner — GC when CR deleted ctrl.SetControllerReference(webapp, desired, r.Scheme) // 3. Create or Update existing := &appsv1.Deployment{} if err := r.Get(ctx, req.NamespacedName, existing); err != nil { r.Create(ctx, desired) } else { existing.Spec = desired.Spec r.Update(ctx, existing) } // 4. Update status (use Status().Update, not Update) webapp.Status.Phase = "Running" r.Status().Update(ctx, webapp) return ctrl.Result{}, nil } func (r *WebAppReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&myv1.WebApp{}). Owns(&appsv1.Deployment{}). // Watch child Deployments Complete(r) }
spec: versions: - name: v1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object x-kubernetes-validations: - # Cross-field: max must be >= min rule: self.maxReplicas >= self.minReplicas message: maxReplicas must be >= minReplicas - # Immutability: minReplicas cannot decrease rule: oldSelf.minReplicas <= self.minReplicas message: minReplicas cannot be decreased fieldPath: .minReplicas - # CPU threshold must be reasonable rule: self.targetCPUPercent >= 10 && self.targetCPUPercent <= 95 message: targetCPUPercent must be between 10 and 95 properties: minReplicas: type: integer minimum: 1 maxReplicas: type: integer maximum: 1000 targetCPUPercent: type: integer default: 70 scaleDownCooldown: type: string pattern: '^[0-9]+(s|m|h)$' default: "5m" x-kubernetes-validations: - rule: self.matches('^[0-9]+(s|m|h)$') message: Must be like 30s, 5m, or 1h
# Finalizer in a CR — prevents immediate deletion apiVersion: apps.mycompany.io/v1 kind: WebApp metadata: name: my-app finalizers: - webapps.apps.mycompany.io/finalizer spec: image: myapp:latest replicas: 3 --- # kubectl CRD commands cheat sheet # Discover kubectl get crds kubectl api-resources --api-group=apps.mycompany.io kubectl explain webapp.spec # field docs kubectl explain webapp.spec.ingress # Manage instances kubectl get webapps -A -o wide kubectl get wa # shortName kubectl describe webapp my-web-app kubectl get webapp my-web-app -o jsonpath='{.status.phase}' # Patch spec kubectl patch webapp my-web-app \ --type=merge -p '{"spec":{"replicas":5}}' # Patch status subresource (controller pattern) kubectl patch webapp my-web-app \ --subresource=status --type=merge \ -p '{"status":{"phase":"Running"}}' # Scale (if scale subresource enabled) kubectl scale webapp my-web-app --replicas=5 # Cleanup — WARNING: deletes ALL CRs! kubectl delete crd webapps.apps.mycompany.io # kubebuilder bootstrap kubebuilder init --domain mycompany.io kubebuilder create api --group apps --version v1 --kind WebApp make generate && make manifests && make install make run
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: databases.storage.mycompany.io spec: group: storage.mycompany.io scope: Namespaced names: plural: databases singular: database kind: Database shortNames: [db] conversion: strategy: Webhook webhook: conversionReviewVersions: ["v1"] clientConfig: service: name: database-operator-webhook namespace: operators path: /convert versions: - name: v1alpha1 served: true # still served but deprecated storage: false deprecated: true deprecationWarning: "v1alpha1 deprecated, migrate to v1" schema: openAPIV3Schema: type: object properties: spec: type: object properties: dbType: # old field name type: string - name: v1 served: true storage: true # current storage version subresources: status: {} schema: openAPIV3Schema: type: object properties: spec: type: object required: ["engine"] properties: engine: # renamed from dbType type: string enum: ["postgres", "mysql", "redis"] version: type: string replicas: type: integer default: 1 minimum: 1
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: clusters.infra.mycompany.io spec: group: infra.mycompany.io scope: Namespaced names: plural: clusters kind: Cluster versions: - name: v1 served: true storage: true subresources: status: {} schema: openAPIV3Schema: type: object x-kubernetes-validations: - rule: "self.spec.maxNodes >= self.spec.minNodes" message: "maxNodes must be >= minNodes" - rule: "!(self.spec.highAvailability && self.spec.minNodes < 3)" message: "HA clusters require at least 3 nodes" properties: spec: type: object required: ["region", "minNodes", "maxNodes"] x-kubernetes-validations: - rule: "self.region == oldSelf.region" message: "region is immutable after creation" properties: region: type: string x-kubernetes-validations: - rule: "self.matches('^[a-z]+-[a-z]+-[0-9]+$')" message: "region must match pattern like us-east-1" minNodes: type: integer minimum: 1 maxNodes: type: integer maximum: 1000 highAvailability: type: boolean default: false nodeType: type: string enum: ["standard", "memory", "compute"] default: standard
// types.go — markers drive CRD YAML generation // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.readyReplicas // +kubebuilder:resource:scope=Namespaced,shortName=wa,categories=all // +kubebuilder:printcolumn:name="Replicas",type=integer,JSONPath=`.spec.replicas` // +kubebuilder:printcolumn:name="Ready",type=integer,JSONPath=`.status.readyReplicas` // +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` type WebApp struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec WebAppSpec `json:"spec,omitempty"` Status WebAppStatus `json:"status,omitempty"` } type WebAppSpec struct { // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 Image string `json:"image"` // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=50 // +kubebuilder:default=1 Replicas int32 `json:"replicas,omitempty"` // +kubebuilder:validation:Enum=RollingUpdate;Recreate // +kubebuilder:default=RollingUpdate Strategy string `json:"strategy,omitempty"` } type WebAppStatus struct { ReadyReplicas int32 `json:"readyReplicas,omitempty"` Phase string `json:"phase,omitempty"` Conditions []metav1.Condition `json:"conditions,omitempty"` } // Reconciler with RBAC markers — make manifests generates ClusterRole // +kubebuilder:rbac:groups=apps.mycompany.io,resources=webapps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps.mycompany.io,resources=webapps/status,verbs=get;update;patch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete func (r *WebAppReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { var webapp appsv1.WebApp if err := r.Get(ctx, req.NamespacedName, &webapp); err != nil { return reconcile.Result{}, client.IgnoreNotFound(err) } // reconcile logic here... // Write status via status subresource (not regular Update) webapp.Status.Phase = "Running" if err := r.Status().Update(ctx, &webapp); err != nil { return reconcile.Result{}, err } return reconcile.Result{RequeueAfter: time.Minute * 5}, nil }
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: pipelines.ci.mycompany.io spec: group: ci.mycompany.io scope: Namespaced names: plural: pipelines kind: Pipeline shortNames: [pl] versions: - name: v1 served: true storage: true subresources: status: {} additionalPrinterColumns: - name: Phase type: string jsonPath: .status.phase - name: Ready type: string jsonPath: .status.conditions[?(@.type=="Ready")].status schema: openAPIV3Schema: type: object properties: spec: type: object required: ["repository"] properties: repository: type: string branch: type: string default: main status: type: object properties: phase: type: string enum: ["Pending","Running","Succeeded","Failed"] conditions: type: array items: type: object required: ["type", "status"] properties: type: type: string status: type: string enum: ["True","False","Unknown"] reason: type: string message: type: string lastTransitionTime: type: string format: date-time --- # Wait for condition to become True kubectl wait pipeline my-pipeline --for=condition=Ready=True --timeout=120s
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: cloudproviders.infra.mycompany.io spec: group: infra.mycompany.io scope: Cluster # NOT Namespaced names: plural: cloudproviders kind: CloudProvider shortNames: [cp] versions: - name: v1 served: true storage: true subresources: status: {} schema: openAPIV3Schema: type: object properties: spec: type: object required: ["provider", "region"] properties: provider: type: string enum: ["aws", "gcp", "azure"] region: type: string --- # Controller ClusterRole (cluster-scoped needs ClusterRole) apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: cloudprovider-controller rules: - apiGroups: ["infra.mycompany.io"] resources: ["cloudproviders"] verbs: ["get","list","watch","update","patch"] - apiGroups: ["infra.mycompany.io"] resources: ["cloudproviders/status"] verbs: ["get","update","patch"] - apiGroups: ["infra.mycompany.io"] resources: ["cloudproviders/finalizers"] verbs: ["update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: cloudprovider-viewer rules: - apiGroups: ["infra.mycompany.io"] resources: ["cloudproviders"] verbs: ["get","list","watch"]
# ── PROJECT INIT ───────────────────────────────────────────── mkdir webapp-operator && cd webapp-operator kubebuilder init --domain mycompany.io --repo github.com/mycompany/webapp-operator # ── CREATE API (generates types.go + controller skeleton) ──── kubebuilder create api --group apps --version v1 --kind WebApp --resource --controller # ── CREATE WEBHOOK ─────────────────────────────────────────── kubebuilder create webhook --group apps --version v1 --kind WebApp --defaulting \ # MutatingWebhook --programmatic-validation # ValidatingWebhook # ── GENERATE ───────────────────────────────────────────────── make generate # generate DeepCopyObject methods make manifests # generate CRD YAML + RBAC from markers # ── INSTALL CRD into cluster ───────────────────────────────── make install # kubectl apply -f config/crd/bases/ # ── RUN LOCALLY (out-of-cluster for development) ───────────── make run # runs controller process locally # ── INTEGRATION TESTS with envtest ─────────────────────────── make test # downloads envtest binaries, runs tests go test ./... -v -run TestReconcile # ── BUILD AND PUSH IMAGE ───────────────────────────────────── make docker-build docker-push IMG=registry.io/webapp-op:v1.0.0 # ── DEPLOY TO CLUSTER ──────────────────────────────────────── make deploy IMG=registry.io/webapp-op:v1.0.0 # ── OLM BUNDLE for OperatorHub distribution ────────────────── make bundle IMG=registry.io/webapp-op:v1.0.0 make bundle-build BUNDLE_IMG=registry.io/webapp-op-bundle:v1.0.0 operator-sdk scorecard bundle/ # validate bundle quality # ── USEFUL RUNTIME COMMANDS ────────────────────────────────── kubectl get crds kubectl explain webapp.spec --api-version=apps.mycompany.io/v1 kubectl get webapps -A -o wide kubectl scale webapp my-app --replicas=5 kubectl wait webapp my-app --for=condition=Ready --timeout=60s kubectl describe crd webapps.apps.mycompany.io
// When to use CRDs vs API Aggregation vs other patterns — pick the right extension point.
| Mechanism | Storage | Validation | Custom Logic | kubectl Support | Best For |
|---|---|---|---|---|---|
| CRD | etcd (via API server) | OpenAPI v3 + CEL | External controller | FULL | 90% of cases — domain objects, Operators, config |
| API Aggregation (AA) | Own backend (any store) | Custom — full control | Built-in to server | FULL | Custom storage, non-standard REST (metrics-server) |
| Built-in Resource (upstream) | etcd | Hardcoded | Core controllers | FULL | Contributing new features to Kubernetes itself |
| ConfigMap (workaround) | etcd | None | App reads directly | LIMITED | Simple config only — avoid for structured domain data |
| Annotations / Labels | etcd (on existing obj) | None | Controller reads | LIMITED | Small metadata additions on existing resources |
// Every essential kubectl command organized by Kubernetes component — pods, deployments, services, configmaps, secrets, nodes, namespaces, and more.
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get pods # list pods in current ns kubectl get pods -A # all namespaces kubectl get pods -o wide # show node, IP kubectl get pods -l app=nginx # filter by label kubectl get pods --field-selector=status.phase=Running kubectl get pod my-pod -o yaml # full YAML spec kubectl get pod my-pod -o jsonpath='{.status.podIP}' kubectl describe pod my-pod # detailed info + events # ── CREATE & DELETE ─────────────────────────────────────────── kubectl run nginx --image=nginx # quick pod creation kubectl run nginx --image=nginx --dry-run=client -o yaml # generate YAML kubectl run tmp --image=busybox --rm -it -- sh # temp interactive pod kubectl delete pod my-pod # delete a pod kubectl delete pod my-pod --grace-period=0 --force # force delete kubectl delete pods -l app=old-app # delete by label # ── LOGS & DEBUG ────────────────────────────────────────────── kubectl logs my-pod # view logs kubectl logs my-pod -c sidecar # specific container kubectl logs my-pod -f # follow / stream kubectl logs my-pod --previous # previous crashed container kubectl logs my-pod --since=1h # last hour kubectl logs my-pod --tail=100 # last 100 lines kubectl logs -l app=nginx --all-containers # logs by label # ── EXEC & INTERACT ─────────────────────────────────────────── kubectl exec my-pod -- ls /app # run command kubectl exec -it my-pod -- /bin/sh # interactive shell kubectl exec -it my-pod -c sidecar -- bash # specific container kubectl cp my-pod:/var/log/app.log ./app.log # copy from pod kubectl cp ./config.yaml my-pod:/etc/config/ # copy to pod kubectl port-forward my-pod 8080:80 # forward local port kubectl debug my-pod -it --image=busybox # ephemeral debug container kubectl top pod my-pod --containers # resource usage
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get deployments # list deployments kubectl get deploy -A # all namespaces kubectl get deploy my-app -o yaml # full spec kubectl describe deploy my-app # detail + events # ── CREATE & UPDATE ─────────────────────────────────────────── kubectl create deploy my-app --image=nginx # imperative create kubectl create deploy my-app --image=nginx --replicas=3 --dry-run=client -o yaml kubectl apply -f deployment.yaml # declarative apply kubectl set image deploy/my-app app=nginx:1.25 # update image kubectl scale deploy my-app --replicas=5 # scale up/down kubectl autoscale deploy my-app --min=2 --max=10 --cpu-percent=80 kubectl patch deploy my-app -p '{"spec":{"replicas":3}}' # ── ROLLOUTS ────────────────────────────────────────────────── kubectl rollout status deploy/my-app # watch rollout kubectl rollout history deploy/my-app # revision history kubectl rollout history deploy/my-app --revision=3 # specific revision kubectl rollout undo deploy/my-app # rollback to previous kubectl rollout undo deploy/my-app --to-revision=2 # rollback to specific kubectl rollout restart deploy/my-app # rolling restart kubectl rollout pause deploy/my-app # pause rollout kubectl rollout resume deploy/my-app # resume rollout # ── DELETE ──────────────────────────────────────────────────── kubectl delete deploy my-app # delete deployment kubectl delete -f deployment.yaml # delete from file
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get services # list services kubectl get svc -A # all namespaces kubectl get svc my-svc -o yaml # full spec kubectl describe svc my-svc # detail + endpoints kubectl get endpoints my-svc # backing pod IPs # ── CREATE ──────────────────────────────────────────────────── kubectl expose deploy my-app --port=80 --target-port=8080 # ClusterIP kubectl expose deploy my-app --port=80 --type=NodePort kubectl expose deploy my-app --port=80 --type=LoadBalancer kubectl create svc clusterip my-svc --tcp=80:8080 --dry-run=client -o yaml kubectl create svc nodeport my-svc --tcp=80:8080 --node-port=30080 # ── ACCESS & DEBUG ──────────────────────────────────────────── kubectl port-forward svc/my-svc 8080:80 # local access kubectl run curl --image=curlimages/curl --rm -it -- curl my-svc:80 # test from cluster # ── DELETE ──────────────────────────────────────────────────── kubectl delete svc my-svc
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get configmaps # list configmaps kubectl get cm -A # all namespaces kubectl get cm my-config -o yaml # view data kubectl describe cm my-config # ── CREATE ──────────────────────────────────────────────────── kubectl create configmap my-config --from-literal=key1=val1 --from-literal=key2=val2 kubectl create cm my-config --from-file=config.properties kubectl create cm my-config --from-file=app-config=./config.yaml kubectl create cm my-config --from-env-file=.env kubectl create cm my-config --from-literal=key=val --dry-run=client -o yaml # ── UPDATE & DELETE ──────────────────────────────────────────── kubectl edit cm my-config # edit in $EDITOR kubectl patch cm my-config -p '{"data":{"key1":"newval"}}' kubectl delete cm my-config
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get secrets # list secrets kubectl get secret my-secret -o yaml # view (base64 encoded) kubectl get secret my-secret -o jsonpath='{.data.password}' | base64 -d # decode kubectl describe secret my-secret # metadata only # ── CREATE ──────────────────────────────────────────────────── kubectl create secret generic my-secret --from-literal=user=admin --from-literal=pass=s3cret kubectl create secret generic my-secret --from-file=ssh-key=~/.ssh/id_rsa kubectl create secret docker-registry regcred \ --docker-server=registry.io --docker-username=user \ --docker-password=pass --docker-email=user@example.com kubectl create secret tls my-tls --cert=tls.crt --key=tls.key kubectl create secret generic my-secret --from-literal=key=val --dry-run=client -o yaml # ── UPDATE & DELETE ──────────────────────────────────────────── kubectl edit secret my-secret kubectl delete secret my-secret
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get namespaces # list all namespaces kubectl get ns # shorthand kubectl describe ns my-namespace # ── CREATE & SWITCH ─────────────────────────────────────────── kubectl create namespace staging kubectl create ns staging --dry-run=client -o yaml kubectl config set-context --current --namespace=staging # set default # ── DELETE ──────────────────────────────────────────────────── kubectl delete ns staging # deletes ALL resources in ns
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get nodes # list nodes kubectl get nodes -o wide # IPs, OS, kernel, runtime kubectl describe node node-1 # capacity, conditions, pods kubectl top nodes # CPU & memory usage kubectl get node node-1 -o jsonpath='{.status.allocatable}' # ── LABELS & TAINTS ─────────────────────────────────────────── kubectl label node node-1 disktype=ssd # add label kubectl label node node-1 disktype- # remove label kubectl taint nodes node-1 dedicated=gpu:NoSchedule kubectl taint nodes node-1 dedicated=gpu:NoSchedule- # remove taint # ── MAINTENANCE ─────────────────────────────────────────────── kubectl cordon node-1 # mark unschedulable kubectl drain node-1 --ignore-daemonsets --delete-emptydir-data kubectl uncordon node-1 # re-enable scheduling
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get replicasets # list replicasets kubectl get rs -A # all namespaces kubectl get rs my-rs -o yaml # full spec kubectl describe rs my-rs # detail + events # ── SCALE & DELETE ──────────────────────────────────────────── kubectl scale rs my-rs --replicas=5 # scale (prefer deploy) kubectl delete rs my-rs # delete replicaset kubectl delete rs my-rs --cascade=orphan # keep pods running
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get statefulsets # list statefulsets kubectl get sts -A # all namespaces kubectl get sts my-db -o yaml # full spec kubectl describe sts my-db # ── SCALE & ROLLOUT ─────────────────────────────────────────── kubectl scale sts my-db --replicas=5 kubectl rollout status sts/my-db kubectl rollout history sts/my-db kubectl rollout undo sts/my-db kubectl rollout restart sts/my-db kubectl patch sts my-db -p '{"spec":{"replicas":3}}' # ── DELETE ──────────────────────────────────────────────────── kubectl delete sts my-db # deletes pods too kubectl delete sts my-db --cascade=orphan # keep pods
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get daemonsets # list daemonsets kubectl get ds -A # all namespaces kubectl get ds my-agent -o yaml kubectl describe ds my-agent # ── ROLLOUT ─────────────────────────────────────────────────── kubectl rollout status ds/my-agent kubectl rollout history ds/my-agent kubectl rollout undo ds/my-agent kubectl rollout restart ds/my-agent # ── DELETE ──────────────────────────────────────────────────── kubectl delete ds my-agent
# ── JOBS ────────────────────────────────────────────────────── kubectl get jobs # list jobs kubectl get job my-job -o yaml kubectl describe job my-job kubectl create job my-job --image=busybox -- echo "hello" kubectl create job my-job --from=cronjob/my-cron # manual trigger kubectl logs job/my-job # view job logs kubectl delete job my-job # ── CRONJOBS ────────────────────────────────────────────────── kubectl get cronjobs # list cronjobs kubectl get cj -A kubectl get cj my-cron -o yaml kubectl describe cj my-cron kubectl create cronjob my-cron --image=busybox --schedule="*/5 * * * *" -- echo "tick" kubectl patch cj my-cron -p '{"spec":{"suspend":true}}' # suspend kubectl patch cj my-cron -p '{"spec":{"suspend":false}}' # resume kubectl delete cj my-cron
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get ingress # list ingress resources kubectl get ing -A kubectl get ing my-ingress -o yaml kubectl describe ing my-ingress # ── CREATE & DELETE ─────────────────────────────────────────── kubectl create ingress my-ingress \ --rule="myapp.example.com/=my-svc:80" \ --annotation nginx.ingress.kubernetes.io/rewrite-target=/ kubectl create ingress my-ingress \ --rule="myapp.example.com/*=my-svc:80,tls=my-tls-secret" kubectl delete ing my-ingress
# ── PERSISTENT VOLUMES ──────────────────────────────────────── kubectl get pv # list persistent volumes kubectl get pv my-pv -o yaml kubectl describe pv my-pv # ── PERSISTENT VOLUME CLAIMS ────────────────────────────────── kubectl get pvc # list claims kubectl get pvc -A kubectl get pvc my-claim -o yaml kubectl describe pvc my-claim kubectl delete pvc my-claim # ── STORAGE CLASSES ─────────────────────────────────────────── kubectl get storageclass # list storage classes kubectl get sc kubectl describe sc standard
# ── ROLES & CLUSTERROLES ────────────────────────────────────── kubectl get roles -A # list roles kubectl get clusterroles # list cluster roles kubectl describe role my-role -n my-ns kubectl describe clusterrole admin kubectl create role pod-reader --verb=get,list,watch --resource=pods kubectl create clusterrole node-reader --verb=get,list --resource=nodes # ── BINDINGS ────────────────────────────────────────────────── kubectl get rolebindings -A kubectl get clusterrolebindings kubectl create rolebinding my-rb --role=pod-reader --user=jane -n my-ns kubectl create clusterrolebinding my-crb --clusterrole=node-reader --user=jane # ── SERVICE ACCOUNTS ────────────────────────────────────────── kubectl get serviceaccounts # list service accounts kubectl get sa -A kubectl create sa my-sa kubectl describe sa my-sa kubectl create token my-sa # generate token (v1.24+) # ── AUTH CHECK ──────────────────────────────────────────────── kubectl auth can-i create pods # check own permissions kubectl auth can-i get pods --as=jane # impersonate user kubectl auth can-i '*' '*' --as=system:serviceaccount:default:my-sa kubectl auth whoami # current identity (v1.27+)
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get networkpolicies # list network policies kubectl get netpol -A kubectl get netpol my-policy -o yaml kubectl describe netpol my-policy # ── DELETE ──────────────────────────────────────────────────── kubectl delete netpol my-policy
# ── RESOURCE QUOTAS ─────────────────────────────────────────── kubectl get resourcequotas # list quotas kubectl get quota -A kubectl describe quota my-quota kubectl create quota my-quota --hard=pods=10,requests.cpu=4,requests.memory=8Gi # ── LIMIT RANGES ────────────────────────────────────────────── kubectl get limitranges kubectl get limits -A kubectl describe limits my-limits
# ── LIST & INSPECT ──────────────────────────────────────────── kubectl get hpa # list autoscalers kubectl get hpa -A kubectl describe hpa my-hpa # ── CREATE & MANAGE ─────────────────────────────────────────── kubectl autoscale deploy my-app --min=2 --max=10 --cpu-percent=80 kubectl patch hpa my-hpa -p '{"spec":{"maxReplicas":20}}' kubectl delete hpa my-hpa