summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
blob: 7c8ac78fe2c7a321d5171d53d866b6f52d7ff3e4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""Check that required Docker images are available."""

from pipes import quote
from ansible.module_utils import six
from openshift_checks import OpenShiftCheck
from openshift_checks.mixins import DockerHostMixin


NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"]
DEPLOYMENT_IMAGE_INFO = {
    "origin": {
        "namespace": "openshift",
        "name": "origin",
        "registry_console_image": "cockpit/kubernetes",
    },
    "openshift-enterprise": {
        "namespace": "openshift3",
        "name": "ose",
        "registry_console_image": "registry.access.redhat.com/openshift3/registry-console",
    },
}


class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
    """Check that required Docker images are available.

    Determine docker images that an install would require and check that they
    are either present in the host's docker index, or available for the host to pull
    with known registries as defined in our inventory file (or defaults).
    """

    name = "docker_image_availability"
    tags = ["preflight"]
    # we use python-docker-py to check local docker for images, and skopeo
    # to look for images available remotely without waiting to pull them.
    dependencies = ["python-docker-py", "skopeo"]
    # command for checking if remote registries have an image, without docker pull
    skopeo_command = "timeout 10 skopeo inspect --tls-verify={tls} {creds} docker://{registry}/{image}"
    skopeo_example_command = "skopeo inspect [--tls-verify=false] [--creds=<user>:<pass>] docker://<registry>/<image>"

    def __init__(self, *args, **kwargs):
        super(DockerImageAvailability, self).__init__(*args, **kwargs)

        self.registries = dict(
            # set of registries that need to be checked insecurely (note: not accounting for CIDR entries)
            insecure=set(self.ensure_list("openshift_docker_insecure_registries")),
            # set of registries that should never be queried even if given in the image
            blocked=set(self.ensure_list("openshift_docker_blocked_registries")),
        )

        # ordered list of registries (according to inventory vars) that docker will try for unscoped images
        regs = self.ensure_list("openshift_docker_additional_registries")
        # currently one of these registries is added whether the user wants it or not.
        deployment_type = self.get_var("openshift_deployment_type")
        if deployment_type == "origin" and "docker.io" not in regs:
            regs.append("docker.io")
        elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs:
            regs.append("registry.access.redhat.com")
        self.registries["configured"] = regs

        # for the oreg_url registry there may be credentials specified
        components = self.get_var("oreg_url", default="").split('/')
        self.registries["oreg"] = "" if len(components) < 3 else components[0]
        self.skopeo_command_creds = ""
        oreg_auth_user = self.get_var('oreg_auth_user', default='')
        oreg_auth_password = self.get_var('oreg_auth_password', default='')
        if oreg_auth_user != '' and oreg_auth_password != '':
            self.skopeo_command_creds = "--creds={}:{}".format(quote(oreg_auth_user), quote(oreg_auth_password))

        # record whether we could reach a registry or not (and remember results)
        self.reachable_registries = {}

    def is_active(self):
        """Skip hosts with unsupported deployment types."""
        deployment_type = self.get_var("openshift_deployment_type")
        has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO

        return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type

    def run(self):
        msg, failed = self.ensure_dependencies()
        if failed:
            return {
                "failed": True,
                "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg
            }

        required_images = self.required_images()
        missing_images = set(required_images) - set(self.local_images(required_images))

        # exit early if all images were found locally
        if not missing_images:
            return {}

        available_images = self.available_images(missing_images)
        unavailable_images = set(missing_images) - set(available_images)

        if unavailable_images:
            unreachable = [reg for reg, reachable in self.reachable_registries.items() if not reachable]
            unreachable_msg = "Failed connecting to: {}\n".format(", ".join(unreachable))
            blocked_msg = "Blocked registries: {}\n".format(", ".join(self.registries["blocked"]))
            msg = (
                "One or more required container images are not available:\n    {missing}\n"
                "Checked with: {cmd}\n"
                "Default registries searched: {registries}\n"
                "{blocked}"
                "{unreachable}"
            ).format(
                missing=",\n    ".join(sorted(unavailable_images)),
                cmd=self.skopeo_example_command,
                registries=", ".join(self.registries["configured"]),
                blocked=blocked_msg if self.registries["blocked"] else "",
                unreachable=unreachable_msg if unreachable else "",
            )

            return dict(failed=True, msg=msg)

        return {}

    def required_images(self):
        """
        Determine which images we expect to need for this host.
        Returns: a set of required images like 'openshift/origin:v3.6'

        The thorny issue of determining the image names from the variables is under consideration
        via https://github.com/openshift/openshift-ansible/issues/4415

        For now we operate as follows:
        * For containerized components (master, node, ...) we look at the deployment type and
          use openshift/origin or openshift3/ose as the base for those component images. The
          version is openshift_image_tag as determined by the openshift_version role.
        * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
          it is defined; otherwise we again use the base that depends on the deployment type.
        Registry is not included in constructed images. It may be in oreg_url or etcd image.
        """
        required = set()
        deployment_type = self.get_var("openshift_deployment_type")
        host_groups = self.get_var("group_names")
        # containerized etcd may not have openshift_image_tag, see bz 1466622
        image_tag = self.get_var("openshift_image_tag", default="latest")
        image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]

        # template for images that run on top of OpenShift
        image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
        image_url = self.get_var("oreg_url", default="") or image_url
        if 'oo_nodes_to_config' in host_groups:
            for suffix in NODE_IMAGE_SUFFIXES:
                required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag))
            # The registry-console is for some reason not prefixed with ose- like the other components.
            # Nor is it versioned the same, so just look for latest.
            # Also a completely different name is used for Origin.
            required.add(image_info["registry_console_image"])

        # images for containerized components
        if self.get_var("openshift", "common", "is_containerized"):
            components = set()
            if 'oo_nodes_to_config' in host_groups:
                components.update(["node", "openvswitch"])
            if 'oo_masters_to_config' in host_groups:  # name is "origin" or "ose"
                components.add(image_info["name"])
            for component in components:
                required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag))
            if 'oo_etcd_to_config' in host_groups:  # special case, note it is the same for origin/enterprise
                required.add("registry.access.redhat.com/rhel7/etcd")  # and no image tag

        return required

    def local_images(self, images):
        """Filter a list of images and return those available locally."""
        found_images = []
        for image in images:
            # docker could have the image name as-is or prefixed with any registry
            imglist = [image] + [reg + "/" + image for reg in self.registries["configured"]]
            if self.is_image_local(imglist):
                found_images.append(image)
        return found_images

    def is_image_local(self, image):
        """Check if image is already in local docker index."""
        result = self.execute_module("docker_image_facts", {"name": image})
        return bool(result.get("images")) and not result.get("failed")

    def ensure_list(self, registry_param):
        """Return the task var as a list."""
        # https://bugzilla.redhat.com/show_bug.cgi?id=1497274
        # If the result was a string type, place it into a list. We must do this
        # as using list() on a string will split the string into its characters.
        # Otherwise cast to a list as was done previously.
        registry = self.get_var(registry_param, default=[])
        if not isinstance(registry, six.string_types):
            return list(registry)
        return self.normalize(registry)

    def available_images(self, images):
        """Search remotely for images. Returns: list of images found."""
        return [
            image for image in images
            if self.is_available_skopeo_image(image)
        ]

    def is_available_skopeo_image(self, image):
        """Use Skopeo to determine if required image exists in known registry(s)."""
        registries = self.registries["configured"]
        # If image already includes a registry, only use that.
        # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g.
        # registry.access.redhat.com/rhel7 as if the registry were a namespace.
        # It's not clear that there's any way to distinguish them, but fortunately
        # the current set of images all look like [registry/]namespace/name[:version].
        if image.count("/") > 1:
            registry, image = image.split("/", 1)
            registries = [registry]

        for registry in registries:
            if registry in self.registries["blocked"]:
                continue  # blocked will never be consulted
            if registry not in self.reachable_registries:
                self.reachable_registries[registry] = self.connect_to_registry(registry)
            if not self.reachable_registries[registry]:
                continue  # do not keep trying unreachable registries

            args = dict(registry=registry, image=image)
            args["tls"] = "false" if registry in self.registries["insecure"] else "true"
            args["creds"] = self.skopeo_command_creds if registry == self.registries["oreg"] else ""

            result = self.execute_module_with_retries("command", {"_raw_params": self.skopeo_command.format(**args)})
            if result.get("rc", 0) == 0 and not result.get("failed"):
                return True
            if result.get("rc") == 124:  # RC 124 == timed out; mark unreachable
                self.reachable_registries[registry] = False

        return False

    def connect_to_registry(self, registry):
        """Use ansible wait_for module to test connectivity from host to registry. Returns bool."""
        # test a simple TCP connection
        host, _, port = registry.partition(":")
        port = port or 443
        args = dict(host=host, port=port, state="started", timeout=30)
        result = self.execute_module("wait_for", args)
        return result.get("rc", 0) == 0 and not result.get("failed")