fix: increase Loki install timeout and add failure diagnostics
This commit is contained in:
@@ -73,9 +73,80 @@
|
||||
--version {{ loki_chart_version }}
|
||||
--values /tmp/loki-values.yaml
|
||||
--wait
|
||||
--timeout 10m
|
||||
--timeout 20m
|
||||
register: loki_install
|
||||
failed_when: false
|
||||
changed_when: true
|
||||
|
||||
- name: Show Loki pods on install failure
|
||||
command: kubectl -n {{ observability_namespace }} get pods -l app.kubernetes.io/name=loki -o wide
|
||||
register: loki_pods_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: loki_install.rc != 0
|
||||
|
||||
- name: Show observability PVCs on Loki install failure
|
||||
command: kubectl -n {{ observability_namespace }} get pvc -o wide
|
||||
register: loki_pvc_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: loki_install.rc != 0
|
||||
|
||||
- name: Get Loki pod name on install failure
|
||||
shell: kubectl -n {{ observability_namespace }} get pods -l app.kubernetes.io/name=loki -o jsonpath='{.items[0].metadata.name}'
|
||||
register: loki_pod_name
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: loki_install.rc != 0
|
||||
|
||||
- name: Describe Loki pod on install failure
|
||||
command: kubectl -n {{ observability_namespace }} describe pod {{ loki_pod_name.stdout }}
|
||||
register: loki_pod_describe
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when:
|
||||
- loki_install.rc != 0
|
||||
- loki_pod_name.stdout | length > 0
|
||||
|
||||
- name: Show Loki pod logs on install failure
|
||||
command: kubectl -n {{ observability_namespace }} logs {{ loki_pod_name.stdout }} --tail=200
|
||||
register: loki_pod_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when:
|
||||
- loki_install.rc != 0
|
||||
- loki_pod_name.stdout | length > 0
|
||||
|
||||
- name: Show observability events on Loki install failure
|
||||
command: kubectl -n {{ observability_namespace }} get events --sort-by=.lastTimestamp
|
||||
register: loki_events
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: loki_install.rc != 0
|
||||
|
||||
- name: Fail with Loki diagnostics
|
||||
fail:
|
||||
msg: |
|
||||
Loki install failed.
|
||||
Helm stderr:
|
||||
{{ loki_install.stderr | default('') }}
|
||||
|
||||
Loki pods:
|
||||
{{ loki_pods_status.stdout | default('n/a') }}
|
||||
|
||||
PVCs:
|
||||
{{ loki_pvc_status.stdout | default('n/a') }}
|
||||
|
||||
Loki pod describe:
|
||||
{{ loki_pod_describe.stdout | default('n/a') }}
|
||||
|
||||
Loki pod logs:
|
||||
{{ loki_pod_logs.stdout | default('n/a') }}
|
||||
|
||||
Events:
|
||||
{{ loki_events.stdout | default('n/a') }}
|
||||
when: loki_install.rc != 0
|
||||
|
||||
- name: Write Promtail values
|
||||
template:
|
||||
src: promtail-values.yaml.j2
|
||||
|
||||
@@ -4,6 +4,8 @@ loki:
|
||||
auth_enabled: false
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
limits_config:
|
||||
retention_period: 168h
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2024-01-01"
|
||||
@@ -27,6 +29,13 @@ write:
|
||||
|
||||
singleBinary:
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: {{ loki_storage_class }}
|
||||
|
||||
Reference in New Issue
Block a user