summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/disk_availability.py
blob: 6e30a861099366e6440436e3d6035f7845560085 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""Check that there is enough disk space in predefined paths."""

import tempfile
import os.path

from openshift_checks import OpenShiftCheck, OpenShiftCheckException


class DiskAvailability(OpenShiftCheck):
    """Check that recommended disk space is available before a first-time install."""

    name = "disk_availability"
    tags = ["preflight"]

    # Values taken from the official installation documentation:
    # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements
    recommended_disk_space_bytes = {
        '/var': {
            'oo_masters_to_config': 40 * 10**9,
            'oo_nodes_to_config': 15 * 10**9,
            'oo_etcd_to_config': 20 * 10**9,
        },
        # Used to copy client binaries into,
        # see roles/lib_utils/library/openshift_container_binary_sync.py.
        '/usr/local/bin': {
            'oo_masters_to_config': 1 * 10**9,
            'oo_nodes_to_config': 1 * 10**9,
            'oo_etcd_to_config': 1 * 10**9,
        },
        # Used as temporary storage in several cases.
        tempfile.gettempdir(): {
            'oo_masters_to_config': 1 * 10**9,
            'oo_nodes_to_config': 1 * 10**9,
            'oo_etcd_to_config': 1 * 10**9,
        },
    }

    # recommended disk space for each location under an upgrade context
    recommended_disk_upgrade_bytes = {
        '/var': {
            'oo_masters_to_config': 10 * 10**9,
            'oo_nodes_to_config': 5 * 10 ** 9,
            'oo_etcd_to_config': 5 * 10 ** 9,
        },
    }

    def is_active(self):
        """Skip hosts that do not have recommended disk space requirements."""
        group_names = self.get_var("group_names", default=[])
        active_groups = set()
        for recommendation in self.recommended_disk_space_bytes.values():
            active_groups.update(recommendation.keys())
        has_disk_space_recommendation = bool(active_groups.intersection(group_names))
        return super(DiskAvailability, self).is_active() and has_disk_space_recommendation

    def run(self):
        group_names = self.get_var("group_names")
        user_config = self.get_var("openshift_check_min_host_disk_gb", default={})
        try:
            # For backwards-compatibility, if openshift_check_min_host_disk_gb
            # is a number, then it overrides the required config for '/var'.
            number = float(user_config)
            user_config = {
                '/var': {
                    'oo_masters_to_config': number,
                    'oo_nodes_to_config': number,
                    'oo_etcd_to_config': number,
                },
            }
        except TypeError:
            # If it is not a number, then it should be a nested dict.
            pass

        self.register_log("recommended thresholds", self.recommended_disk_space_bytes)
        if user_config:
            self.register_log("user-configured thresholds", user_config)

        # TODO: as suggested in
        # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021,
        # maybe we could support checking disk availability in paths that are
        # not part of the official recommendation but present in the user
        # configuration.
        for path, recommendation in self.recommended_disk_space_bytes.items():
            free_bytes = self.free_bytes(path)
            recommended_bytes = max(recommendation.get(name, 0) for name in group_names)

            config = user_config.get(path, {})
            # NOTE: the user config is in GB, but we compare bytes, thus the
            # conversion.
            config_bytes = max(config.get(name, 0) for name in group_names) * 10**9
            recommended_bytes = config_bytes or recommended_bytes

            # if an "upgrade" context is set, update the minimum disk requirement
            # as this signifies an in-place upgrade - the node might have the
            # required total disk space, but some of that space may already be
            # in use by the existing OpenShift deployment.
            context = self.get_var("r_openshift_health_checker_playbook_context", default="")
            if context == "upgrade":
                recommended_upgrade_paths = self.recommended_disk_upgrade_bytes.get(path, {})
                if recommended_upgrade_paths:
                    recommended_bytes = config_bytes or max(recommended_upgrade_paths.get(name, 0)
                                                            for name in group_names)

            if free_bytes < recommended_bytes:
                free_gb = float(free_bytes) / 10**9
                recommended_gb = float(recommended_bytes) / 10**9
                msg = (
                    'Available disk space in "{}" ({:.1f} GB) '
                    'is below minimum recommended ({:.1f} GB)'
                ).format(path, free_gb, recommended_gb)

                # warn if check failed under an "upgrade" context
                # due to limits imposed by the user config
                if config_bytes and context == "upgrade":
                    msg += ('\n\nMake sure to account for decreased disk space during an upgrade\n'
                            'due to an existing OpenShift deployment. Please check the value of\n'
                            '  openshift_check_min_host_disk_gb={}\n'
                            'in your Ansible inventory, and lower the recommended disk space availability\n'
                            'if necessary for this upgrade.').format(config_bytes)

                self.register_failure(msg)

        return {}

    def find_ansible_submounts(self, path):
        """Return a list of ansible_mounts that are below the given path."""
        base = os.path.join(path, "")
        return [
            mount
            for mount in self.get_var("ansible_mounts")
            if mount["mount"].startswith(base)
        ]

    def free_bytes(self, path):
        """Return the size available in path based on ansible_mounts."""
        submounts = sum(mnt.get('size_available', 0) for mnt in self.find_ansible_submounts(path))
        mount = self.find_ansible_mount(path)
        try:
            return mount['size_available'] + submounts
        except KeyError:
            raise OpenShiftCheckException(
                'Unable to retrieve disk availability for "{path}".\n'
                'Ansible facts included a matching mount point for this path:\n'
                '  {mount}\n'
                'however it is missing the size_available field.\n'
                'To investigate, you can inspect the output of `ansible -m setup <host>`'
                ''.format(path=path, mount=mount)
            )