summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker')
-rw-r--r--roles/openshift_health_checker/action_plugins/openshift_health_check.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py17
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py89
-rw-r--r--roles/openshift_health_checker/openshift_checks/mixins.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_availability.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_update.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_version.py2
-rw-r--r--roles/openshift_health_checker/test/action_plugin_test.py9
-rw-r--r--roles/openshift_health_checker/test/docker_image_availability_test.py187
-rw-r--r--roles/openshift_health_checker/test/package_availability_test.py4
-rw-r--r--roles/openshift_health_checker/test/package_update_test.py4
-rw-r--r--roles/openshift_health_checker/test/package_version_test.py8
12 files changed, 184 insertions, 144 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index 8d35db6b5..d02a43655 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -187,7 +187,7 @@ def normalize(checks):
def run_check(name, check, user_disabled_checks):
"""Run a single check if enabled and return a result dict."""
- if name in user_disabled_checks:
+ if name in user_disabled_checks or '*' in user_disabled_checks:
return dict(skipped=True, skipped_reason="Disabled by user request")
# pylint: disable=broad-except; capturing exceptions broadly is intentional,
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 02ee1d0f9..987c955b6 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -4,6 +4,7 @@ Health checks for OpenShift clusters.
import operator
import os
+import time
from abc import ABCMeta, abstractmethod, abstractproperty
from importlib import import_module
@@ -57,6 +58,9 @@ class OpenShiftCheck(object):
self._execute_module = execute_module
self.task_vars = task_vars or {}
self.tmp = tmp
+ # mainly for testing purposes; see execute_module_with_retries
+ self._module_retries = 3
+ self._module_retry_interval = 5 # seconds
# set to True when the check changes the host, for accurate total "changed" count
self.changed = False
@@ -115,6 +119,19 @@ class OpenShiftCheck(object):
)
return self._execute_module(module_name, module_args, self.tmp, self.task_vars)
+ def execute_module_with_retries(self, module_name, module_args):
+ """Run execute_module and retry on failure."""
+ result = {}
+ tries = 0
+ while True:
+ res = self.execute_module(module_name, module_args)
+ if tries > self._module_retries or not res.get("failed"):
+ result.update(res)
+ return result
+ result["last_failed"] = res
+ tries += 1
+ time.sleep(self._module_retry_interval)
+
def get_var(self, *keys, **kwargs):
"""Get deeply nested values from task_vars.
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 866c74d7c..9c35f0f92 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -32,7 +32,12 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
# we use python-docker-py to check local docker for images, and skopeo
# to look for images available remotely without waiting to pull them.
dependencies = ["python-docker-py", "skopeo"]
- skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false"
+ skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false docker://{registry}/{image}"
+
+ def __init__(self, *args, **kwargs):
+ super(DockerImageAvailability, self).__init__(*args, **kwargs)
+ # record whether we could reach a registry or not (and remember results)
+ self.reachable_registries = {}
def is_active(self):
"""Skip hosts with unsupported deployment types."""
@@ -64,15 +69,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
unavailable_images = set(missing_images) - set(available_images)
if unavailable_images:
- return {
- "failed": True,
- "msg": (
- "One or more required Docker images are not available:\n {}\n"
- "Configured registries: {}\n"
- "Checked by: {}"
- ).format(",\n ".join(sorted(unavailable_images)), ", ".join(registries),
- self.skopeo_img_check_command),
- }
+ registries = [
+ reg if self.reachable_registries.get(reg, True) else reg + " (unreachable)"
+ for reg in registries
+ ]
+ msg = (
+ "One or more required Docker images are not available:\n {}\n"
+ "Configured registries: {}\n"
+ "Checked by: {}"
+ ).format(
+ ",\n ".join(sorted(unavailable_images)),
+ ", ".join(registries),
+ self.skopeo_img_check_command
+ )
+
+ return dict(failed=True, msg=msg)
return {}
@@ -128,31 +139,31 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
def local_images(self, images):
"""Filter a list of images and return those available locally."""
- return [
- image for image in images
- if self.is_image_local(image)
- ]
+ registries = self.known_docker_registries()
+ found_images = []
+ for image in images:
+ # docker could have the image name as-is or prefixed with any registry
+ imglist = [image] + [reg + "/" + image for reg in registries]
+ if self.is_image_local(imglist):
+ found_images.append(image)
+ return found_images
def is_image_local(self, image):
"""Check if image is already in local docker index."""
result = self.execute_module("docker_image_facts", {"name": image})
- if result.get("failed", False):
- return False
-
- return bool(result.get("images", []))
+ return bool(result.get("images")) and not result.get("failed")
def known_docker_registries(self):
"""Build a list of docker registries available according to inventory vars."""
- docker_facts = self.get_var("openshift", "docker")
- regs = set(docker_facts["additional_registries"])
+ regs = list(self.get_var("openshift.docker.additional_registries", default=[]))
deployment_type = self.get_var("openshift_deployment_type")
- if deployment_type == "origin":
- regs.update(["docker.io"])
- elif "enterprise" in deployment_type:
- regs.update(["registry.access.redhat.com"])
+ if deployment_type == "origin" and "docker.io" not in regs:
+ regs.append("docker.io")
+ elif "enterprise" in deployment_type and "registry.access.redhat.com" not in regs:
+ regs.append("registry.access.redhat.com")
- return list(regs)
+ return regs
def available_images(self, images, default_registries):
"""Search remotely for images. Returns: list of images found."""
@@ -165,17 +176,35 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"""Use Skopeo to determine if required image exists in known registry(s)."""
registries = default_registries
- # if image already includes a registry, only use that
+ # If image already includes a registry, only use that.
+ # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g.
+ # registry.access.redhat.com/rhel7 as if the registry were a namespace.
+ # It's not clear that there's any way to distinguish them, but fortunately
+ # the current set of images all look like [registry/]namespace/name[:version].
if image.count("/") > 1:
registry, image = image.split("/", 1)
registries = [registry]
for registry in registries:
- args = {
- "_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image)
- }
- result = self.execute_module("command", args)
+ if registry not in self.reachable_registries:
+ self.reachable_registries[registry] = self.connect_to_registry(registry)
+ if not self.reachable_registries[registry]:
+ continue
+
+ args = {"_raw_params": self.skopeo_img_check_command.format(registry=registry, image=image)}
+ result = self.execute_module_with_retries("command", args)
if result.get("rc", 0) == 0 and not result.get("failed"):
return True
+ if result.get("rc") == 124: # RC 124 == timed out; mark unreachable
+ self.reachable_registries[registry] = False
return False
+
+ def connect_to_registry(self, registry):
+ """Use ansible wait_for module to test connectivity from host to registry. Returns bool."""
+ # test a simple TCP connection
+ host, _, port = registry.partition(":")
+ port = port or 443
+ args = dict(host=host, port=port, state="started", timeout=30)
+ result = self.execute_module("wait_for", args)
+ return result.get("rc", 0) == 0 and not result.get("failed")
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index e9bae60a3..24f1d938a 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -36,7 +36,7 @@ class DockerHostMixin(object):
# NOTE: we would use the "package" module but it's actually an action plugin
# and it's not clear how to invoke one of those. This is about the same anyway:
- result = self.execute_module(
+ result = self.execute_module_with_retries(
self.get_var("ansible_pkg_mgr", default="yum"),
{"name": self.dependencies, "state": "present"},
)
diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py
index a86180b00..21355c2f0 100644
--- a/roles/openshift_health_checker/openshift_checks/package_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/package_availability.py
@@ -26,7 +26,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
packages.update(self.node_packages(rpm_prefix))
args = {"packages": sorted(set(packages))}
- return self.execute_module("check_yum_update", args)
+ return self.execute_module_with_retries("check_yum_update", args)
@staticmethod
def master_packages(rpm_prefix):
diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py
index 1e9aecbe0..8464e8a5e 100644
--- a/roles/openshift_health_checker/openshift_checks/package_update.py
+++ b/roles/openshift_health_checker/openshift_checks/package_update.py
@@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):
def run(self):
args = {"packages": []}
- return self.execute_module("check_yum_update", args)
+ return self.execute_module_with_retries("check_yum_update", args)
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 0b795b6c4..d4aec3ed8 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -76,7 +76,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
],
}
- return self.execute_module("aos_version", args)
+ return self.execute_module_with_retries("aos_version", args)
def get_required_ovs_version(self):
"""Return the correct Open vSwitch version(s) for the current OpenShift version."""
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index c109ebd24..58864da21 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -110,11 +110,16 @@ def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch):
assert not skipped(result)
-def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch):
+@pytest.mark.parametrize('to_disable', [
+ 'fake_check',
+ ['fake_check', 'spam'],
+ '*,spam,eggs',
+])
+def test_action_plugin_skip_disabled_checks(to_disable, plugin, task_vars, monkeypatch):
checks = [fake_check('fake_check', is_active=True)]
monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks))
- task_vars['openshift_disable_check'] = 'fake_check'
+ task_vars['openshift_disable_check'] = to_disable
result = plugin.run(tmp=None, task_vars=task_vars)
assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Disabled by user request")
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 8d0a53df9..6a7c16c7e 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -3,6 +3,23 @@ import pytest
from openshift_checks.docker_image_availability import DockerImageAvailability
+@pytest.fixture()
+def task_vars():
+ return dict(
+ openshift=dict(
+ common=dict(
+ service_type='origin',
+ is_containerized=False,
+ is_atomic=False,
+ ),
+ docker=dict(),
+ ),
+ openshift_deployment_type='origin',
+ openshift_image_tag='',
+ group_names=['nodes', 'masters'],
+ )
+
+
@pytest.mark.parametrize('deployment_type, is_containerized, group_names, expect_active', [
("origin", True, [], True),
("openshift-enterprise", True, [], True),
@@ -15,12 +32,10 @@ from openshift_checks.docker_image_availability import DockerImageAvailability
("origin", False, ["nodes", "masters"], True),
("openshift-enterprise", False, ["etcd"], False),
])
-def test_is_active(deployment_type, is_containerized, group_names, expect_active):
- task_vars = dict(
- openshift=dict(common=dict(is_containerized=is_containerized)),
- openshift_deployment_type=deployment_type,
- group_names=group_names,
- )
+def test_is_active(task_vars, deployment_type, is_containerized, group_names, expect_active):
+ task_vars['openshift_deployment_type'] = deployment_type
+ task_vars['openshift']['common']['is_containerized'] = is_containerized
+ task_vars['group_names'] = group_names
assert DockerImageAvailability(None, task_vars).is_active() == expect_active
@@ -30,10 +45,10 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active
(True, False),
(False, True),
])
-def test_all_images_available_locally(is_containerized, is_atomic):
+def test_all_images_available_locally(task_vars, is_containerized, is_atomic):
def execute_module(module_name, module_args, *_):
if module_name == "yum":
- return {"changed": True}
+ return {}
assert module_name == "docker_image_facts"
assert 'name' in module_args
@@ -42,19 +57,9 @@ def test_all_images_available_locally(is_containerized, is_atomic):
'images': [module_args['name']],
}
- result = DockerImageAvailability(execute_module, task_vars=dict(
- openshift=dict(
- common=dict(
- service_type='origin',
- is_containerized=is_containerized,
- is_atomic=is_atomic,
- ),
- docker=dict(additional_registries=["docker.io"]),
- ),
- openshift_deployment_type='origin',
- openshift_image_tag='3.4',
- group_names=['nodes', 'masters'],
- )).run()
+ task_vars['openshift']['common']['is_containerized'] = is_containerized
+ task_vars['openshift']['common']['is_atomic'] = is_atomic
+ result = DockerImageAvailability(execute_module, task_vars).run()
assert not result.get('failed', False)
@@ -63,53 +68,36 @@ def test_all_images_available_locally(is_containerized, is_atomic):
False,
True,
])
-def test_all_images_available_remotely(available_locally):
+def test_all_images_available_remotely(task_vars, available_locally):
def execute_module(module_name, *_):
if module_name == 'docker_image_facts':
return {'images': [], 'failed': available_locally}
- return {'changed': False}
+ return {}
- result = DockerImageAvailability(execute_module, task_vars=dict(
- openshift=dict(
- common=dict(
- service_type='origin',
- is_containerized=False,
- is_atomic=False,
- ),
- docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]),
- ),
- openshift_deployment_type='origin',
- openshift_image_tag='v3.4',
- group_names=['nodes', 'masters'],
- )).run()
+ task_vars['openshift']['docker']['additional_registries'] = ["docker.io", "registry.access.redhat.com"]
+ task_vars['openshift_image_tag'] = 'v3.4'
+ check = DockerImageAvailability(execute_module, task_vars)
+ check._module_retry_interval = 0
+ result = check.run()
assert not result.get('failed', False)
-def test_all_images_unavailable():
- def execute_module(module_name=None, *_):
- if module_name == "command":
- return {
- 'failed': True,
- }
+def test_all_images_unavailable(task_vars):
+ def execute_module(module_name=None, *args):
+ if module_name == "wait_for":
+ return {}
+ elif module_name == "command":
+ return {'failed': True}
- return {
- 'changed': False,
- }
+ return {} # docker_image_facts failure
- actual = DockerImageAvailability(execute_module, task_vars=dict(
- openshift=dict(
- common=dict(
- service_type='origin',
- is_containerized=False,
- is_atomic=False,
- ),
- docker=dict(additional_registries=["docker.io"]),
- ),
- openshift_deployment_type="openshift-enterprise",
- openshift_image_tag='latest',
- group_names=['nodes', 'masters'],
- )).run()
+ task_vars['openshift']['docker']['additional_registries'] = ["docker.io"]
+ task_vars['openshift_deployment_type'] = "openshift-enterprise"
+ task_vars['openshift_image_tag'] = 'latest'
+ check = DockerImageAvailability(execute_module, task_vars)
+ check._module_retry_interval = 0
+ actual = check.run()
assert actual['failed']
assert "required Docker images are not available" in actual['msg']
@@ -125,62 +113,63 @@ def test_all_images_unavailable():
["dependencies can be installed via `yum`"]
),
])
-def test_skopeo_update_failure(message, extra_words):
+def test_skopeo_update_failure(task_vars, message, extra_words):
def execute_module(module_name=None, *_):
if module_name == "yum":
return {
"failed": True,
"msg": message,
- "changed": False,
}
- return {'changed': False}
+ return {}
- actual = DockerImageAvailability(execute_module, task_vars=dict(
- openshift=dict(
- common=dict(
- service_type='origin',
- is_containerized=False,
- is_atomic=False,
- ),
- docker=dict(additional_registries=["unknown.io"]),
- ),
- openshift_deployment_type="openshift-enterprise",
- openshift_image_tag='',
- group_names=['nodes', 'masters'],
- )).run()
+ task_vars['openshift']['docker']['additional_registries'] = ["unknown.io"]
+ task_vars['openshift_deployment_type'] = "openshift-enterprise"
+ check = DockerImageAvailability(execute_module, task_vars)
+ check._module_retry_interval = 0
+ actual = check.run()
assert actual["failed"]
for word in extra_words:
assert word in actual["msg"]
-@pytest.mark.parametrize("deployment_type,registries", [
- ("origin", ["unknown.io"]),
- ("openshift-enterprise", ["registry.access.redhat.com"]),
- ("openshift-enterprise", []),
-])
-def test_registry_availability(deployment_type, registries):
+@pytest.mark.parametrize(
+ "image, registries, connection_test_failed, skopeo_failed, "
+ "expect_success, expect_registries_reached", [
+ (
+ "spam/eggs:v1", ["test.reg"],
+ True, True,
+ False,
+ {"test.reg": False},
+ ),
+ (
+ "spam/eggs:v1", ["test.reg"],
+ False, True,
+ False,
+ {"test.reg": True},
+ ),
+ (
+ "eggs.reg/spam/eggs:v1", ["test.reg"],
+ False, False,
+ True,
+ {"eggs.reg": True},
+ ),
+ ])
+def test_registry_availability(image, registries, connection_test_failed, skopeo_failed,
+ expect_success, expect_registries_reached):
def execute_module(module_name=None, *_):
- return {
- 'changed': False,
- }
+ if module_name == "wait_for":
+ return dict(msg="msg", failed=connection_test_failed)
+ elif module_name == "command":
+ return dict(msg="msg", failed=skopeo_failed)
- actual = DockerImageAvailability(execute_module, task_vars=dict(
- openshift=dict(
- common=dict(
- service_type='origin',
- is_containerized=False,
- is_atomic=False,
- ),
- docker=dict(additional_registries=registries),
- ),
- openshift_deployment_type=deployment_type,
- openshift_image_tag='',
- group_names=['nodes', 'masters'],
- )).run()
+ check = DockerImageAvailability(execute_module, task_vars())
+ check._module_retry_interval = 0
- assert not actual.get("failed", False)
+ available = check.is_available_skopeo_image(image, registries)
+ assert available == expect_success
+ assert expect_registries_reached == check.reachable_registries
@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [
@@ -257,7 +246,7 @@ def test_required_images(deployment_type, is_containerized, groups, oreg_url, ex
openshift_image_tag='vtest',
)
- assert expected == DockerImageAvailability("DUMMY", task_vars).required_images()
+ assert expected == DockerImageAvailability(task_vars=task_vars).required_images()
def test_containerized_etcd():
@@ -271,4 +260,4 @@ def test_containerized_etcd():
group_names=['etcd'],
)
expected = set(['registry.access.redhat.com/rhel7/etcd'])
- assert expected == DockerImageAvailability("DUMMY", task_vars).required_images()
+ assert expected == DockerImageAvailability(task_vars=task_vars).required_images()
diff --git a/roles/openshift_health_checker/test/package_availability_test.py b/roles/openshift_health_checker/test/package_availability_test.py
index 1fe648b75..8aa87ca59 100644
--- a/roles/openshift_health_checker/test/package_availability_test.py
+++ b/roles/openshift_health_checker/test/package_availability_test.py
@@ -56,7 +56,7 @@ def test_package_availability(task_vars, must_have_packages, must_not_have_packa
assert 'packages' in module_args
assert set(module_args['packages']).issuperset(must_have_packages)
assert not set(module_args['packages']).intersection(must_not_have_packages)
- return return_value
+ return {'foo': return_value}
result = PackageAvailability(execute_module, task_vars).run()
- assert result is return_value
+ assert result['foo'] is return_value
diff --git a/roles/openshift_health_checker/test/package_update_test.py b/roles/openshift_health_checker/test/package_update_test.py
index 06489b0d7..7d9035a36 100644
--- a/roles/openshift_health_checker/test/package_update_test.py
+++ b/roles/openshift_health_checker/test/package_update_test.py
@@ -9,7 +9,7 @@ def test_package_update():
assert 'packages' in module_args
# empty list of packages means "generic check if 'yum update' will work"
assert module_args['packages'] == []
- return return_value
+ return {'foo': return_value}
result = PackageUpdate(execute_module).run()
- assert result is return_value
+ assert result['foo'] is return_value
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index e871f39f0..8564cd4db 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -52,7 +52,7 @@ def test_invalid_openshift_release_format():
])
def test_package_version(openshift_release):
- return_value = object()
+ return_value = {"foo": object()}
def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *_):
assert module_name == 'aos_version'
@@ -66,7 +66,7 @@ def test_package_version(openshift_release):
check = PackageVersion(execute_module, task_vars_for(openshift_release, 'origin'))
result = check.run()
- assert result is return_value
+ assert result == return_value
@pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [
@@ -79,7 +79,7 @@ def test_package_version(openshift_release):
])
def test_docker_package_version(deployment_type, openshift_release, expected_docker_version):
- return_value = object()
+ return_value = {"foo": object()}
def execute_module(module_name=None, module_args=None, *_):
assert module_name == 'aos_version'
@@ -93,7 +93,7 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc
check = PackageVersion(execute_module, task_vars_for(openshift_release, deployment_type))
result = check.run()
- assert result is return_value
+ assert result == return_value
@pytest.mark.parametrize('group_names,is_containerized,is_active', [