summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/etcd_traffic.py
blob: 40c87873dbddc1bfe5cce37c243cdf673988784e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""

from openshift_checks import OpenShiftCheck, get_var


class EtcdTraffic(OpenShiftCheck):
    """Check if host is being affected by an increase in etcd traffic."""

    name = "etcd_traffic"
    tags = ["health", "etcd"]

    @classmethod
    def is_active(cls, task_vars):
        """Skip hosts that do not have etcd in their group names."""
        group_names = get_var(task_vars, "group_names", default=[])
        valid_group_names = "etcd" in group_names

        version = get_var(task_vars, "openshift", "common", "short_version")
        valid_version = version in ("3.4", "3.5", "1.4", "1.5")

        return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version

    def run(self, tmp, task_vars):
        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
        unit = "etcd_container" if is_containerized else "etcd"

        log_matchers = [{
            "start_regexp": r"Starting Etcd Server",
            "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
            "unit": unit
        }]

        match = self.execute_module("search_journalctl", {
            "log_matchers": log_matchers,
        }, task_vars)

        if match.get("matched"):
            msg = ("Higher than normal etcd traffic detected.\n"
                   "OpenShift 3.4 introduced an increase in etcd traffic.\n"
                   "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
                   "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
            return {"failed": True, "msg": msg}

        if match.get("failed"):
            return {"failed": True, "msg": "\n".join(match.get("errors"))}

        return {}