summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix/vars/template_openshift_node.yml
blob: 66bd3a14739d805a7ff459547d6e4613e382fe8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
---
g_template_openshift_node:
  name: Template Openshift Node
  zitems:
  - key: openshift.node.process.count
    description: Shows number of OpenShift Node processes running
    type: int
    applications:
    - Openshift Node

  - key: openshift.node.ovs.pids.count
    description: Shows number of ovs process ids running
    type: int
    applications:
    - Openshift Node

  - key: openshift.node.ovs.ports.count
    description: Shows number of OVS ports defined
    type: int
    applications:
    - Openshift Node

  - key: openshift.node.ovs.stray.rules
    description: Number of OVS stray rules found/removed
    type: int
    applications:
    - Openshift Node

  - key: openshift.node.registry-pods.healthy_pct
    description: Shows the percentage of healthy registries in the cluster
    type: int
    applications:
    - Openshift Node

  - key: openshift.node.registry.service.ping
    description: Ping docker-registry service from node
    type: int
    applications:
    - Openshift Node

  ztriggers:
  - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}'
    expression: '{Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#2)}<100 and {Template Openshift Node:openshift.node.registry-pods.healthy_pct.last(#1)}<100'
    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
    priority: avg

  - name: 'Docker Registry service is unhealthy according to {HOST.NAME}'
    expression: '{Template Openshift Node:openshift.node.registry.service.ping.last(#2)}<1 and {Template Openshift Node:openshift.node.registry.service.ping.last(#1)}<1'
    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
    priority: avg

  - name: 'Openshift Node process not running on {HOST.NAME}'
    expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1'
    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
    priority: high

  - name: 'Too many Openshift Node processes running on {HOST.NAME}'
    expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1'
    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
    priority: high

  - name: '[Heal] OVS may not be running on {HOST.NAME}'
    expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last(#1)}<>4 and {Template Openshift Node:openshift.node.ovs.pids.count.last(#2)}<>4'
    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
    priority: high

  - name: 'Number of OVS ports is 0 on {HOST.NAME}'
    expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0'
    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'
    priority: high