--- g_template_openshift_master: name: Template Openshift Master zitems: - name: create_app applications: - Openshift Master key: create_app - key: openshift.master.process.count description: Shows number of master processes running type: int applications: - Openshift Master - key: openshift.master.api.healthz description: "Checks the healthz check of the master's api: https://master_host/healthz" type: int data_type: bool applications: - Openshift Master - key: openshift.master.user.count description: Shows number of users in a cluster type: int applications: - Openshift Master - key: openshift.master.pod.running.count description: Shows number of pods running type: int applications: - Openshift Master - key: openshift.master.pod.user.running.count description: Shows number of user pods running (non infrastructure pods) type: int applications: - Openshift Master - key: openshift.master.pod.total.count description: Shows total number of pods (running and non running) type: int applications: - Openshift Master - key: openshift.project.count description: Shows number of projects on a cluster type: int applications: - Openshift Master - key: openshift.master.etcd.create.success description: Show number of successful create actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.create.fail description: Show number of failed create actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.delete.success description: Show number of successful delete actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.delete.fail description: Show number of failed delete actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.get.success description: Show number of successful get actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.get.fail description: Show number of failed get actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.set.success description: Show number of successful set actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.set.fail description: Show number of failed set actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.update.success description: Show number of successful update actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.update.fail description: Show number of failed update actions type: int applications: - Openshift Etcd - key: openshift.master.etcd.watchers description: Show number of etcd watchers type: int applications: - Openshift Etcd - key: openshift.master.etcd.ping description: etcd ping type: int applications: - Openshift Etcd ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' priority: avg - name: 'Openshift Master API health check is failing on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high - name: 'Openshift Master process not running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high - name: 'Too many Openshift Master processes running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: high - name: 'Number of users for Openshift Master on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info - name: 'There are no projects running on {HOST.NAME}' expression: '{Template Openshift Master:openshift.project.count.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info - name: 'Low number of etcd watchers on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' priority: avg - name: 'Etcd ping failed on {HOST.NAME}' expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' priority: high