fix: increase Loki install timeout and add failure diagnostics
This commit is contained in:
@@ -73,9 +73,80 @@
|
|||||||
--version {{ loki_chart_version }}
|
--version {{ loki_chart_version }}
|
||||||
--values /tmp/loki-values.yaml
|
--values /tmp/loki-values.yaml
|
||||||
--wait
|
--wait
|
||||||
--timeout 10m
|
--timeout 20m
|
||||||
|
register: loki_install
|
||||||
|
failed_when: false
|
||||||
changed_when: true
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Show Loki pods on install failure
|
||||||
|
command: kubectl -n {{ observability_namespace }} get pods -l app.kubernetes.io/name=loki -o wide
|
||||||
|
register: loki_pods_status
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: loki_install.rc != 0
|
||||||
|
|
||||||
|
- name: Show observability PVCs on Loki install failure
|
||||||
|
command: kubectl -n {{ observability_namespace }} get pvc -o wide
|
||||||
|
register: loki_pvc_status
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: loki_install.rc != 0
|
||||||
|
|
||||||
|
- name: Get Loki pod name on install failure
|
||||||
|
shell: kubectl -n {{ observability_namespace }} get pods -l app.kubernetes.io/name=loki -o jsonpath='{.items[0].metadata.name}'
|
||||||
|
register: loki_pod_name
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: loki_install.rc != 0
|
||||||
|
|
||||||
|
- name: Describe Loki pod on install failure
|
||||||
|
command: kubectl -n {{ observability_namespace }} describe pod {{ loki_pod_name.stdout }}
|
||||||
|
register: loki_pod_describe
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when:
|
||||||
|
- loki_install.rc != 0
|
||||||
|
- loki_pod_name.stdout | length > 0
|
||||||
|
|
||||||
|
- name: Show Loki pod logs on install failure
|
||||||
|
command: kubectl -n {{ observability_namespace }} logs {{ loki_pod_name.stdout }} --tail=200
|
||||||
|
register: loki_pod_logs
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when:
|
||||||
|
- loki_install.rc != 0
|
||||||
|
- loki_pod_name.stdout | length > 0
|
||||||
|
|
||||||
|
- name: Show observability events on Loki install failure
|
||||||
|
command: kubectl -n {{ observability_namespace }} get events --sort-by=.lastTimestamp
|
||||||
|
register: loki_events
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
when: loki_install.rc != 0
|
||||||
|
|
||||||
|
- name: Fail with Loki diagnostics
|
||||||
|
fail:
|
||||||
|
msg: |
|
||||||
|
Loki install failed.
|
||||||
|
Helm stderr:
|
||||||
|
{{ loki_install.stderr | default('') }}
|
||||||
|
|
||||||
|
Loki pods:
|
||||||
|
{{ loki_pods_status.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
PVCs:
|
||||||
|
{{ loki_pvc_status.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Loki pod describe:
|
||||||
|
{{ loki_pod_describe.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Loki pod logs:
|
||||||
|
{{ loki_pod_logs.stdout | default('n/a') }}
|
||||||
|
|
||||||
|
Events:
|
||||||
|
{{ loki_events.stdout | default('n/a') }}
|
||||||
|
when: loki_install.rc != 0
|
||||||
|
|
||||||
- name: Write Promtail values
|
- name: Write Promtail values
|
||||||
template:
|
template:
|
||||||
src: promtail-values.yaml.j2
|
src: promtail-values.yaml.j2
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ loki:
|
|||||||
auth_enabled: false
|
auth_enabled: false
|
||||||
commonConfig:
|
commonConfig:
|
||||||
replication_factor: 1
|
replication_factor: 1
|
||||||
|
limits_config:
|
||||||
|
retention_period: 168h
|
||||||
schemaConfig:
|
schemaConfig:
|
||||||
configs:
|
configs:
|
||||||
- from: "2024-01-01"
|
- from: "2024-01-01"
|
||||||
@@ -27,6 +29,13 @@ write:
|
|||||||
|
|
||||||
singleBinary:
|
singleBinary:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
persistence:
|
persistence:
|
||||||
enabled: true
|
enabled: true
|
||||||
storageClass: {{ loki_storage_class }}
|
storageClass: {{ loki_storage_class }}
|
||||||
|
|||||||
Reference in New Issue
Block a user