summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/etcd_volume.py
blob: 7452c9cc125d77d2a0a6a1aa62e9b100c25776c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""A health check for OpenShift clusters."""

from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var


class EtcdVolume(OpenShiftCheck):
    """Ensures etcd storage usage does not exceed a given threshold."""

    name = "etcd_volume"
    tags = ["etcd", "health"]

    # Default device usage threshold. Value should be in the range [0, 100].
    default_threshold_percent = 90
    # Where to find ectd data, higher priority first.
    supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]

    @classmethod
    def is_active(cls, task_vars):
        etcd_hosts = get_var(task_vars, "groups", "etcd", default=[]) or get_var(task_vars, "groups", "masters",
                                                                                 default=[]) or []
        is_etcd_host = get_var(task_vars, "ansible_ssh_host") in etcd_hosts
        return super(EtcdVolume, cls).is_active(task_vars) and is_etcd_host

    def run(self, tmp, task_vars):
        mount_info = self._etcd_mount_info(task_vars)
        available = mount_info["size_available"]
        total = mount_info["size_total"]
        used = total - available

        threshold = get_var(
            task_vars,
            "etcd_device_usage_threshold_percent",
            default=self.default_threshold_percent
        )

        used_percent = 100.0 * used / total

        if used_percent > threshold:
            device = mount_info.get("device", "unknown")
            mount = mount_info.get("mount", "unknown")
            msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format(
                used_percent, threshold, device, mount
            )
            return {"failed": True, "msg": msg}

        return {"changed": False}

    def _etcd_mount_info(self, task_vars):
        ansible_mounts = get_var(task_vars, "ansible_mounts")
        mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}

        for path in self.supported_mount_paths:
            if path in mounts:
                return mounts[path]

        paths = ', '.join(sorted(mounts)) or 'none'
        msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths)
        raise OpenShiftCheckException(msg)