summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker')
-rw-r--r--roles/openshift_health_checker/action_plugins/openshift_health_check.py5
-rw-r--r--roles/openshift_health_checker/callback_plugins/zz_failure_summary.py3
-rwxr-xr-xroles/openshift_health_checker/library/aos_version.py28
-rw-r--r--roles/openshift_health_checker/openshift_checks/disk_availability.py120
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_image_availability.py153
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_storage.py183
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/kibana.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging.py6
-rw-r--r--roles/openshift_health_checker/openshift_checks/mixins.py7
-rw-r--r--roles/openshift_health_checker/openshift_checks/ovs_version.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_availability.py5
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_update.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/package_version.py6
-rw-r--r--roles/openshift_health_checker/test/action_plugin_test.py12
-rw-r--r--roles/openshift_health_checker/test/aos_version_test.py137
-rw-r--r--roles/openshift_health_checker/test/disk_availability_test.py27
-rw-r--r--roles/openshift_health_checker/test/docker_image_availability_test.py113
-rw-r--r--roles/openshift_health_checker/test/docker_storage_test.py113
-rw-r--r--roles/openshift_health_checker/test/kibana_test.py2
-rw-r--r--roles/openshift_health_checker/test/logging_check_test.py32
-rw-r--r--roles/openshift_health_checker/test/package_version_test.py30
21 files changed, 705 insertions, 283 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
index a62e4331e..0390dc82e 100644
--- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py
+++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py
@@ -38,14 +38,13 @@ class ActionModule(ActionBase):
try:
known_checks = self.load_known_checks()
+ args = self._task.args
+ resolved_checks = resolve_checks(args.get("checks", []), known_checks.values())
except OpenShiftCheckException as e:
result["failed"] = True
result["msg"] = str(e)
return result
- args = self._task.args
- resolved_checks = resolve_checks(args.get("checks", []), known_checks.values())
-
result["checks"] = check_results = {}
user_disabled_checks = [
diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
index 64c29a8d9..443b76ea1 100644
--- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
+++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py
@@ -39,7 +39,8 @@ class CallbackModule(CallbackBase):
def v2_runner_on_failed(self, result, ignore_errors=False):
super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors)
- self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
+ if not ignore_errors:
+ self.__failures.append(dict(result=result, ignore_errors=ignore_errors))
def v2_playbook_on_stats(self, stats):
super(CallbackModule, self).v2_playbook_on_stats(stats)
diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py
index 4c205e48c..4f43ee751 100755
--- a/roles/openshift_health_checker/library/aos_version.py
+++ b/roles/openshift_health_checker/library/aos_version.py
@@ -19,6 +19,10 @@ the inventory, the version comparison checks just pass.
'''
from ansible.module_utils.basic import AnsibleModule
+# NOTE: because of the dependency on yum (Python 2-only), this module does not
+# work under Python 3. But since we run unit tests against both Python 2 and
+# Python 3, we use six for cross compatibility in this module alone:
+from ansible.module_utils.six import string_types
IMPORT_EXCEPTION = None
try:
@@ -122,12 +126,15 @@ def _check_precise_version_found(pkgs, expected_pkgs_dict):
for pkg in pkgs:
if pkg.name not in expected_pkgs_dict:
continue
- # does the version match, to the precision requested?
- # and, is it strictly greater, at the precision requested?
- expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
- match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
- if match_version == expected_pkg_version:
- pkgs_precise_version_found.add(pkg.name)
+ expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"]
+ if isinstance(expected_pkg_versions, string_types):
+ expected_pkg_versions = [expected_pkg_versions]
+ for expected_pkg_version in expected_pkg_versions:
+ # does the version match, to the precision requested?
+ # and, is it strictly greater, at the precision requested?
+ match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1])
+ if match_version == expected_pkg_version:
+ pkgs_precise_version_found.add(pkg.name)
not_found = []
for name, pkg in expected_pkgs_dict.items():
@@ -157,8 +164,13 @@ def _check_higher_version_found(pkgs, expected_pkgs_dict):
for pkg in pkgs:
if pkg.name not in expected_pkg_names:
continue
- expected_pkg_version = expected_pkgs_dict[pkg.name]["version"]
- req_release_arr = [int(segment) for segment in expected_pkg_version.split(".")]
+ expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"]
+ if isinstance(expected_pkg_versions, string_types):
+ expected_pkg_versions = [expected_pkg_versions]
+ # NOTE: the list of versions is assumed to be sorted so that the highest
+ # desirable version is the last.
+ highest_desirable_version = expected_pkg_versions[-1]
+ req_release_arr = [int(segment) for segment in highest_desirable_version.split(".")]
version = [int(segment) for segment in pkg.version.split(".")]
too_high = version[:len(req_release_arr)] > req_release_arr
higher_than_seen = version > higher_version_for_pkg.get(pkg.name, [])
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py
index 962148cb8..e93e81efa 100644
--- a/roles/openshift_health_checker/openshift_checks/disk_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py
@@ -1,9 +1,12 @@
-# pylint: disable=missing-docstring
+"""Check that there is enough disk space in predefined paths."""
+
+import os.path
+import tempfile
+
from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
-from openshift_checks.mixins import NotContainerizedMixin
-class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
+class DiskAvailability(OpenShiftCheck):
"""Check that recommended disk space is available before a first-time install."""
name = "disk_availability"
@@ -12,56 +15,101 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):
# Values taken from the official installation documentation:
# https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements
recommended_disk_space_bytes = {
- "masters": 40 * 10**9,
- "nodes": 15 * 10**9,
- "etcd": 20 * 10**9,
+ '/var': {
+ 'masters': 40 * 10**9,
+ 'nodes': 15 * 10**9,
+ 'etcd': 20 * 10**9,
+ },
+ # Used to copy client binaries into,
+ # see roles/openshift_cli/library/openshift_container_binary_sync.py.
+ '/usr/local/bin': {
+ 'masters': 1 * 10**9,
+ 'nodes': 1 * 10**9,
+ 'etcd': 1 * 10**9,
+ },
+ # Used as temporary storage in several cases.
+ tempfile.gettempdir(): {
+ 'masters': 1 * 10**9,
+ 'nodes': 1 * 10**9,
+ 'etcd': 1 * 10**9,
+ },
}
@classmethod
def is_active(cls, task_vars):
"""Skip hosts that do not have recommended disk space requirements."""
group_names = get_var(task_vars, "group_names", default=[])
- has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes))
+ active_groups = set()
+ for recommendation in cls.recommended_disk_space_bytes.values():
+ active_groups.update(recommendation.keys())
+ has_disk_space_recommendation = bool(active_groups.intersection(group_names))
return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation
def run(self, tmp, task_vars):
group_names = get_var(task_vars, "group_names")
ansible_mounts = get_var(task_vars, "ansible_mounts")
- free_bytes = self.openshift_available_disk(ansible_mounts)
-
- recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)
- configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9
- min_free_bytes = configured_min or recommended_min
-
- if free_bytes < min_free_bytes:
- return {
- 'failed': True,
- 'msg': (
- 'Available disk space ({:.1f} GB) for the volume containing '
- '"/var" is below minimum recommended space ({:.1f} GB)'
- ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9)
+ ansible_mounts = {mount['mount']: mount for mount in ansible_mounts}
+
+ user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={})
+ try:
+ # For backwards-compatibility, if openshift_check_min_host_disk_gb
+ # is a number, then it overrides the required config for '/var'.
+ number = float(user_config)
+ user_config = {
+ '/var': {
+ 'masters': number,
+ 'nodes': number,
+ 'etcd': number,
+ },
}
+ except TypeError:
+ # If it is not a number, then it should be a nested dict.
+ pass
+
+ # TODO: as suggested in
+ # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021,
+ # maybe we could support checking disk availability in paths that are
+ # not part of the official recommendation but present in the user
+ # configuration.
+ for path, recommendation in self.recommended_disk_space_bytes.items():
+ free_bytes = self.free_bytes(path, ansible_mounts)
+ recommended_bytes = max(recommendation.get(name, 0) for name in group_names)
+
+ config = user_config.get(path, {})
+ # NOTE: the user config is in GB, but we compare bytes, thus the
+ # conversion.
+ config_bytes = max(config.get(name, 0) for name in group_names) * 10**9
+ recommended_bytes = config_bytes or recommended_bytes
+
+ if free_bytes < recommended_bytes:
+ free_gb = float(free_bytes) / 10**9
+ recommended_gb = float(recommended_bytes) / 10**9
+ return {
+ 'failed': True,
+ 'msg': (
+ 'Available disk space in "{}" ({:.1f} GB) '
+ 'is below minimum recommended ({:.1f} GB)'
+ ).format(path, free_gb, recommended_gb)
+ }
return {}
@staticmethod
- def openshift_available_disk(ansible_mounts):
- """Determine the available disk space for an OpenShift installation.
-
- ansible_mounts should be a list of dicts like the 'setup' Ansible module
- returns.
- """
- # priority list in descending order
- supported_mnt_paths = ["/var", "/"]
- available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
+ def free_bytes(path, ansible_mounts):
+ """Return the size available in path based on ansible_mounts."""
+ mount_point = path
+ # arbitry value to prevent an infinite loop, in the unlike case that '/'
+ # is not in ansible_mounts.
+ max_depth = 32
+ while mount_point not in ansible_mounts and max_depth > 0:
+ mount_point = os.path.dirname(mount_point)
+ max_depth -= 1
try:
- for path in supported_mnt_paths:
- if path in available_mnts:
- return available_mnts[path]["size_available"]
+ free_bytes = ansible_mounts[mount_point]['size_available']
except KeyError:
- pass
+ known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none'
+ msg = 'Unable to determine disk availability for "{}". Known mount points: {}.'
+ raise OpenShiftCheckException(msg.format(path, known_mounts))
- paths = ''.join(sorted(available_mnts)) or 'none'
- msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths)
- raise OpenShiftCheckException(msg)
+ return free_bytes
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 27e6fe383..bde81ad2c 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -1,8 +1,24 @@
-# pylint: disable=missing-docstring
+"""Check that required Docker images are available."""
+
from openshift_checks import OpenShiftCheck, get_var
from openshift_checks.mixins import DockerHostMixin
+NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"]
+DEPLOYMENT_IMAGE_INFO = {
+ "origin": {
+ "namespace": "openshift",
+ "name": "origin",
+ "registry_console_image": "cockpit/kubernetes",
+ },
+ "openshift-enterprise": {
+ "namespace": "openshift3",
+ "name": "ose",
+ "registry_console_image": "registry.access.redhat.com/openshift3/registry-console",
+ },
+}
+
+
class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"""Check that required Docker images are available.
@@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
name = "docker_image_availability"
tags = ["preflight"]
-
dependencies = ["skopeo", "python-docker-py"]
- deployment_image_info = {
- "origin": {
- "namespace": "openshift",
- "name": "origin",
- },
- "openshift-enterprise": {
- "namespace": "openshift3",
- "name": "ose",
- },
- }
-
@classmethod
def is_active(cls, task_vars):
"""Skip hosts with unsupported deployment types."""
deployment_type = get_var(task_vars, "openshift_deployment_type")
- has_valid_deployment_type = deployment_type in cls.deployment_image_info
+ has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO
return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type
@@ -70,51 +74,56 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
return {"changed": changed}
- def required_images(self, task_vars):
- deployment_type = get_var(task_vars, "openshift_deployment_type")
- image_info = self.deployment_image_info[deployment_type]
-
- openshift_release = get_var(task_vars, "openshift_release", default="latest")
- openshift_image_tag = get_var(task_vars, "openshift_image_tag")
- is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
-
- images = set(self.required_docker_images(
- image_info["namespace"],
- image_info["name"],
- ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names
- openshift_release,
- is_containerized,
- ))
-
- # append images with qualified image tags to our list of required images.
- # these are images with a (v0.0.0.0) tag, rather than a standard release
- # format tag (v0.0). We want to check this set in both containerized and
- # non-containerized installations.
- images.update(
- self.required_qualified_docker_images(
- image_info["namespace"],
- image_info["name"],
- openshift_image_tag,
- ),
- )
-
- return images
-
@staticmethod
- def required_docker_images(namespace, name, additional_image_names, version, is_containerized):
- if is_containerized:
- return ["{}/{}:{}".format(namespace, name, version)] if name else []
-
- # include additional non-containerized images specific to the current deployment type
- return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names]
-
- @staticmethod
- def required_qualified_docker_images(namespace, name, version):
- # pylint: disable=invalid-name
- return [
- "{}/{}-{}:{}".format(namespace, name, suffix, version)
- for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"]
- ]
+ def required_images(task_vars):
+ """
+ Determine which images we expect to need for this host.
+ Returns: a set of required images like 'openshift/origin:v3.6'
+
+ The thorny issue of determining the image names from the variables is under consideration
+ via https://github.com/openshift/openshift-ansible/issues/4415
+
+ For now we operate as follows:
+ * For containerized components (master, node, ...) we look at the deployment type and
+ use openshift/origin or openshift3/ose as the base for those component images. The
+ version is openshift_image_tag as determined by the openshift_version role.
+ * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
+ it is defined; otherwise we again use the base that depends on the deployment type.
+ Registry is not included in constructed images. It may be in oreg_url or etcd image.
+ """
+ required = set()
+ deployment_type = get_var(task_vars, "openshift_deployment_type")
+ host_groups = get_var(task_vars, "group_names")
+ # containerized etcd may not have openshift_image_tag, see bz 1466622
+ image_tag = get_var(task_vars, "openshift_image_tag", default="latest")
+ image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
+ if not image_info:
+ return required
+
+ # template for images that run on top of OpenShift
+ image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
+ image_url = get_var(task_vars, "oreg_url", default="") or image_url
+ if 'nodes' in host_groups:
+ for suffix in NODE_IMAGE_SUFFIXES:
+ required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag))
+ # The registry-console is for some reason not prefixed with ose- like the other components.
+ # Nor is it versioned the same, so just look for latest.
+ # Also a completely different name is used for Origin.
+ required.add(image_info["registry_console_image"])
+
+ # images for containerized components
+ if get_var(task_vars, "openshift", "common", "is_containerized"):
+ components = set()
+ if 'nodes' in host_groups:
+ components.update(["node", "openvswitch"])
+ if 'masters' in host_groups: # name is "origin" or "ose"
+ components.add(image_info["name"])
+ for component in components:
+ required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag))
+ if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise
+ required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag
+
+ return required
def local_images(self, images, task_vars):
"""Filter a list of images and return those available locally."""
@@ -124,7 +133,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
]
def is_image_local(self, image, task_vars):
- result = self.module_executor("docker_image_facts", {"name": image}, task_vars)
+ """Check if image is already in local docker index."""
+ result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars)
if result.get("failed", False):
return False
@@ -132,6 +142,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
@staticmethod
def known_docker_registries(task_vars):
+ """Build a list of docker registries available according to inventory vars."""
docker_facts = get_var(task_vars, "openshift", "docker")
regs = set(docker_facts["additional_registries"])
@@ -147,17 +158,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
"""Inspect existing images using Skopeo and return all images successfully inspected."""
return [
image for image in images
- if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries)
+ if self.is_available_skopeo_image(image, registries, task_vars)
]
- def is_available_skopeo_image(self, image, registry, task_vars):
- """Uses Skopeo to determine if required image exists in a given registry."""
+ def is_available_skopeo_image(self, image, registries, task_vars):
+ """Use Skopeo to determine if required image exists in known registry(s)."""
+
+ # if image does already includes a registry, just use that
+ if image.count("/") > 1:
+ registry, image = image.split("/", 1)
+ registries = [registry]
- cmd_str = "skopeo inspect docker://{registry}/{image}".format(
- registry=registry,
- image=image,
- )
+ for registry in registries:
+ args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)}
+ result = self.execute_module("command", args, task_vars=task_vars)
+ if result.get("rc", 0) == 0 and not result.get("failed"):
+ return True
- args = {"_raw_params": cmd_str}
- result = self.module_executor("command", args, task_vars)
- return not result.get("failed", False) and result.get("rc", 0) == 0
+ return False
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
index 7f1751b36..d2227d244 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_storage.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -1,5 +1,6 @@
"""Check Docker storage driver and usage."""
import json
+import os.path
import re
from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
from openshift_checks.mixins import DockerHostMixin
@@ -17,13 +18,30 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
tags = ["pre-install", "health", "preflight"]
dependencies = ["python-docker-py"]
- storage_drivers = ["devicemapper", "overlay2"]
+ storage_drivers = ["devicemapper", "overlay", "overlay2"]
max_thinpool_data_usage_percent = 90.0
max_thinpool_meta_usage_percent = 90.0
+ max_overlay_usage_percent = 90.0
+
+ # TODO(lmeyer): mention these in the output when check fails
+ configuration_variables = [
+ (
+ "max_thinpool_data_usage_percent",
+ "For 'devicemapper' storage driver, usage threshold percentage for data. "
+ "Format: float. Default: {:.1f}".format(max_thinpool_data_usage_percent),
+ ),
+ (
+ "max_thinpool_meta_usage_percent",
+ "For 'devicemapper' storage driver, usage threshold percentage for metadata. "
+ "Format: float. Default: {:.1f}".format(max_thinpool_meta_usage_percent),
+ ),
+ (
+ "max_overlay_usage_percent",
+ "For 'overlay' or 'overlay2' storage driver, usage threshold percentage. "
+ "Format: float. Default: {:.1f}".format(max_overlay_usage_percent),
+ ),
+ ]
- # pylint: disable=too-many-return-statements
- # Reason: permanent stylistic exception;
- # it is clearer to return on failures and there are just many ways to fail here.
def run(self, tmp, task_vars):
msg, failed, changed = self.ensure_dependencies(task_vars)
if failed:
@@ -34,17 +52,17 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
}
# attempt to get the docker info hash from the API
- info = self.execute_module("docker_info", {}, task_vars)
- if info.get("failed"):
+ docker_info = self.execute_module("docker_info", {}, task_vars=task_vars)
+ if docker_info.get("failed"):
return {"failed": True, "changed": changed,
"msg": "Failed to query Docker API. Is docker running on this host?"}
- if not info.get("info"): # this would be very strange
+ if not docker_info.get("info"): # this would be very strange
return {"failed": True, "changed": changed,
- "msg": "Docker API query missing info:\n{}".format(json.dumps(info))}
- info = info["info"]
+ "msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))}
+ docker_info = docker_info["info"]
# check if the storage driver we saw is valid
- driver = info.get("Driver", "[NONE]")
+ driver = docker_info.get("Driver", "[NONE]")
if driver not in self.storage_drivers:
msg = (
"Detected unsupported Docker storage driver '{driver}'.\n"
@@ -53,26 +71,34 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
return {"failed": True, "changed": changed, "msg": msg}
# driver status info is a list of tuples; convert to dict and validate based on driver
- driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])}
+ driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])}
+
+ result = {}
+
if driver == "devicemapper":
- if driver_status.get("Data loop file"):
- msg = (
- "Use of loopback devices with the Docker devicemapper storage driver\n"
- "(the default storage configuration) is unsupported in production.\n"
- "Please use docker-storage-setup to configure a backing storage volume.\n"
- "See http://red.ht/2rNperO for further information."
- )
- return {"failed": True, "changed": changed, "msg": msg}
- result = self._check_dm_usage(driver_status, task_vars)
- result['changed'] = result.get('changed', False) or changed
- return result
+ result = self.check_devicemapper_support(driver_status, task_vars)
- # TODO(lmeyer): determine how to check usage for overlay2
+ if driver in ['overlay', 'overlay2']:
+ result = self.check_overlay_support(docker_info, driver_status, task_vars)
- return {"changed": changed}
+ result['changed'] = result.get('changed', False) or changed
+ return result
- def _check_dm_usage(self, driver_status, task_vars):
- """
+ def check_devicemapper_support(self, driver_status, task_vars):
+ """Check if dm storage driver is supported as configured. Return: result dict."""
+ if driver_status.get("Data loop file"):
+ msg = (
+ "Use of loopback devices with the Docker devicemapper storage driver\n"
+ "(the default storage configuration) is unsupported in production.\n"
+ "Please use docker-storage-setup to configure a backing storage volume.\n"
+ "See http://red.ht/2rNperO for further information."
+ )
+ return {"failed": True, "msg": msg}
+ result = self.check_dm_usage(driver_status, task_vars)
+ return result
+
+ def check_dm_usage(self, driver_status, task_vars):
+ """Check usage thresholds for Docker dm storage driver. Return: result dict.
Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
devicemapper storage. The LV is "thin" because it does not use all available storage
@@ -83,7 +109,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
could run out of space first; so we check both.
"""
vals = dict(
- vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars),
+ vg_free=self.get_vg_free(driver_status.get("Pool Name"), task_vars),
data_used=driver_status.get("Data Space Used"),
data_total=driver_status.get("Data Space Total"),
metadata_used=driver_status.get("Metadata Space Used"),
@@ -93,7 +119,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
# convert all human-readable strings to bytes
for key, value in vals.copy().items():
try:
- vals[key + "_bytes"] = self._convert_to_bytes(value)
+ vals[key + "_bytes"] = self.convert_to_bytes(value)
except ValueError as err: # unlikely to hit this from API info, but just to be safe
return {
"failed": True,
@@ -131,10 +157,12 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
return vals
- def _get_vg_free(self, pool, task_vars):
- # Determine which VG to examine according to the pool name, the only indicator currently
- # available from the Docker API driver info. We assume a name that looks like
- # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen.
+ def get_vg_free(self, pool, task_vars):
+ """Determine which VG to examine according to the pool name. Return: size vgs reports.
+ Pool name is the only indicator currently available from the Docker API driver info.
+ We assume a name that looks like "vg--name-docker--pool";
+ vg and lv names with inner hyphens doubled, joined by a hyphen.
+ """
match = re.match(r'((?:[^-]|--)+)-(?!-)', pool) # matches up to the first single hyphen
if not match: # unlikely, but... be clear if we assumed wrong
raise OpenShiftCheckException(
@@ -143,10 +171,10 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
"so the available storage in the VG cannot be determined.".format(pool)
)
vg_name = match.groups()[0].replace("--", "-")
- vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name
+ vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name
# should return free space like " 12.00g" if the VG exists; empty if it does not
- ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars)
+ ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars)
if ret.get("failed") or ret.get("rc", 0) != 0:
raise OpenShiftCheckException(
"Is LVM installed? Failed to run /sbin/vgs "
@@ -163,7 +191,8 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
return size
@staticmethod
- def _convert_to_bytes(string):
+ def convert_to_bytes(string):
+ """Convert string like "10.3 G" to bytes (binary units assumed). Return: float bytes."""
units = dict(
b=1,
k=1024,
@@ -183,3 +212,87 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck):
raise ValueError("Cannot convert to a byte size: " + string)
return float(number) * multiplier
+
+ def check_overlay_support(self, docker_info, driver_status, task_vars):
+ """Check if overlay storage driver is supported for this host. Return: result dict."""
+ # check for xfs as backing store
+ backing_fs = driver_status.get("Backing Filesystem", "[NONE]")
+ if backing_fs != "xfs":
+ msg = (
+ "Docker storage drivers 'overlay' and 'overlay2' are only supported with\n"
+ "'xfs' as the backing storage, but this host's storage is type '{fs}'."
+ ).format(fs=backing_fs)
+ return {"failed": True, "msg": msg}
+
+ # check support for OS and kernel version
+ o_s = docker_info.get("OperatingSystem", "[NONE]")
+ if "Red Hat Enterprise Linux" in o_s or "CentOS" in o_s:
+ # keep it simple, only check enterprise kernel versions; assume everyone else is good
+ kernel = docker_info.get("KernelVersion", "[NONE]")
+ kernel_arr = [int(num) for num in re.findall(r'\d+', kernel)]
+ if kernel_arr < [3, 10, 0, 514]: # rhel < 7.3
+ msg = (
+ "Docker storage drivers 'overlay' and 'overlay2' are only supported beginning with\n"
+ "kernel version 3.10.0-514; but Docker reports kernel version {version}."
+ ).format(version=kernel)
+ return {"failed": True, "msg": msg}
+ # NOTE: we could check for --selinux-enabled here but docker won't even start with
+ # that option until it's supported in the kernel so we don't need to.
+
+ return self.check_overlay_usage(docker_info, task_vars)
+
+ def check_overlay_usage(self, docker_info, task_vars):
+ """Check disk usage on OverlayFS backing store volume. Return: result dict."""
+ path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"]
+
+ threshold = get_var(task_vars, "max_overlay_usage_percent", default=self.max_overlay_usage_percent)
+ try:
+ threshold = float(threshold)
+ except ValueError:
+ return {
+ "failed": True,
+ "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold),
+ }
+
+ mount = self.find_ansible_mount(path, get_var(task_vars, "ansible_mounts"))
+ try:
+ free_bytes = mount['size_available']
+ total_bytes = mount['size_total']
+ usage = 100.0 * (total_bytes - free_bytes) / total_bytes
+ except (KeyError, ZeroDivisionError):
+ return {
+ "failed": True,
+ "msg": "The ansible_mount found for path {} is invalid.\n"
+ "This is likely to be an Ansible bug. The record was:\n"
+ "{}".format(path, json.dumps(mount, indent=2)),
+ }
+
+ if usage > threshold:
+ return {
+ "failed": True,
+ "msg": (
+ "For Docker OverlayFS mount point {path},\n"
+ "usage percentage {pct:.1f} is higher than threshold {thresh:.1f}."
+ ).format(path=mount["mount"], pct=usage, thresh=threshold)
+ }
+
+ return {}
+
+ # TODO(lmeyer): migrate to base class
+ @staticmethod
+ def find_ansible_mount(path, ansible_mounts):
+ """Return the mount point for path from ansible_mounts."""
+
+ mount_for_path = {mount['mount']: mount for mount in ansible_mounts}
+ mount_point = path
+ while mount_point not in mount_for_path:
+ if mount_point in ["/", ""]: # "/" not in ansible_mounts???
+ break
+ mount_point = os.path.dirname(mount_point)
+
+ try:
+ return mount_for_path[mount_point]
+ except KeyError:
+ known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none'
+ msg = 'Unable to determine mount point for path "{}". Known mount points: {}.'
+ raise OpenShiftCheckException(msg.format(path, known_mounts))
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
index 442f407b1..551e8dfa0 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py
@@ -62,7 +62,7 @@ class Kibana(LoggingCheck):
# TODO(lmeyer): give users option to validate certs
status_code=302,
)
- result = self.execute_module('uri', args, task_vars)
+ result = self.execute_module('uri', args, None, task_vars)
if result.get('failed'):
return result['msg']
return None
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index 05b4d300c..6e951e82c 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -54,12 +54,12 @@ class LoggingCheck(OpenShiftCheck):
"""Returns: list of pods not in a ready and running state"""
return [
pod for pod in pods
- if any(
+ if not pod.get("status", {}).get("containerStatuses") or any(
container['ready'] is False
for container in pod['status']['containerStatuses']
) or not any(
condition['type'] == 'Ready' and condition['status'] == 'True'
- for condition in pod['status']['conditions']
+ for condition in pod['status'].get('conditions', [])
)
]
@@ -78,7 +78,7 @@ class LoggingCheck(OpenShiftCheck):
"extra_args": list(extra_args) if extra_args else [],
}
- result = execute_module("ocutil", args, task_vars)
+ result = execute_module("ocutil", args, None, task_vars)
if result.get("failed"):
msg = (
'Unexpected error using `oc` to validate the logging stack components.\n'
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index 7f3d78cc4..2cb2e21aa 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -40,8 +40,11 @@ class DockerHostMixin(object):
# NOTE: we would use the "package" module but it's actually an action plugin
# and it's not clear how to invoke one of those. This is about the same anyway:
- pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum")
- result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars)
+ result = self.execute_module(
+ get_var(task_vars, "ansible_pkg_mgr", default="yum"),
+ {"name": self.dependencies, "state": "present"},
+ task_vars=task_vars,
+ )
msg = result.get("msg", "")
if result.get("failed"):
if "No package matching" in msg:
diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py
index 1e45ae3af..2dd045f1f 100644
--- a/roles/openshift_health_checker/openshift_checks/ovs_version.py
+++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py
@@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
},
],
}
- return self.execute_module("rpm_version", args, task_vars)
+ return self.execute_module("rpm_version", args, task_vars=task_vars)
def get_required_ovs_version(self, task_vars):
"""Return the correct Open vSwitch version for the current OpenShift version"""
diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py
index a7eb720fd..0dd2b1286 100644
--- a/roles/openshift_health_checker/openshift_checks/package_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/package_availability.py
@@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
packages.update(self.node_packages(rpm_prefix))
args = {"packages": sorted(set(packages))}
- return self.execute_module("check_yum_update", args, tmp, task_vars)
+ return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars)
@staticmethod
def master_packages(rpm_prefix):
@@ -36,8 +36,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
"bash-completion",
"cockpit-bridge",
"cockpit-docker",
- "cockpit-kubernetes",
- "cockpit-shell",
+ "cockpit-system",
"cockpit-ws",
"etcd",
"httpd-tools",
diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py
index fd0c0a755..f432380c6 100644
--- a/roles/openshift_health_checker/openshift_checks/package_update.py
+++ b/roles/openshift_health_checker/openshift_checks/package_update.py
@@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):
def run(self, tmp, task_vars):
args = {"packages": []}
- return self.execute_module("check_yum_update", args, tmp, task_vars)
+ return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars)
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 2e737818b..204752bd0 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -10,8 +10,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
tags = ["preflight"]
openshift_to_ovs_version = {
- "3.6": "2.6",
- "3.5": "2.6",
+ "3.6": ["2.6", "2.7"],
+ "3.5": ["2.6", "2.7"],
"3.4": "2.4",
}
@@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
],
}
- return self.execute_module("aos_version", args, tmp, task_vars)
+ return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars)
def get_required_ovs_version(self, task_vars):
"""Return the correct Open vSwitch version for the current OpenShift version.
diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py
index 6ebf0ebb2..9383b233c 100644
--- a/roles/openshift_health_checker/test/action_plugin_test.py
+++ b/roles/openshift_health_checker/test/action_plugin_test.py
@@ -59,7 +59,7 @@ def failed(result, msg_has=None):
if msg_has is not None:
assert 'msg' in result
for term in msg_has:
- assert term in result['msg']
+ assert term.lower() in result['msg'].lower()
return result.get('failed', False)
@@ -178,6 +178,16 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch):
assert not skipped(result)
+def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch):
+ monkeypatch.setattr(plugin, 'load_known_checks', lambda: {})
+
+ result = plugin.run(tmp=None, task_vars=task_vars)
+
+ assert failed(result, msg_has=['unknown', 'name'])
+ assert not changed(result)
+ assert not skipped(result)
+
+
@pytest.mark.parametrize('names,all_checks,expected', [
([], [], set()),
(
diff --git a/roles/openshift_health_checker/test/aos_version_test.py b/roles/openshift_health_checker/test/aos_version_test.py
index 697805dd2..4100f6c70 100644
--- a/roles/openshift_health_checker/test/aos_version_test.py
+++ b/roles/openshift_health_checker/test/aos_version_test.py
@@ -18,7 +18,43 @@ expected_pkgs = {
}
-@pytest.mark.parametrize('pkgs, expect_not_found', [
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [
+ (
+ # all found
+ [Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
+ expected_pkgs,
+ ),
+ (
+ # found with more specific version
+ [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
+ expected_pkgs,
+ ),
+ (
+ [Package('ovs', '2.6'), Package('ovs', '2.4')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ),
+ (
+ [Package('ovs', '2.7')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ),
+])
+def test_check_precise_version_found(pkgs, expected_pkgs_dict):
+ aos_version._check_precise_version_found(pkgs, expected_pkgs_dict)
+
+
+@pytest.mark.parametrize('pkgs,expect_not_found', [
(
[],
{
@@ -55,14 +91,6 @@ expected_pkgs = {
}, # not the right version
),
(
- [Package('spam', '3.2.1'), Package('eggs', '3.2.1')],
- {}, # all found
- ),
- (
- [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')],
- {}, # found with more specific version
- ),
- (
[Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5')],
{
"spam": {
@@ -73,64 +101,86 @@ expected_pkgs = {
}, # eggs found with multiple versions
),
])
-def test_check_pkgs_for_precise_version(pkgs, expect_not_found):
- if expect_not_found:
- with pytest.raises(aos_version.PreciseVersionNotFound) as e:
- aos_version._check_precise_version_found(pkgs, expected_pkgs)
-
- assert list(expect_not_found.values()) == e.value.problem_pkgs
- else:
+def test_check_precise_version_found_fail(pkgs, expect_not_found):
+ with pytest.raises(aos_version.PreciseVersionNotFound) as e:
aos_version._check_precise_version_found(pkgs, expected_pkgs)
+ assert list(expect_not_found.values()) == e.value.problem_pkgs
-@pytest.mark.parametrize('pkgs, expect_higher', [
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [
(
[],
- [],
+ expected_pkgs,
),
(
+ # more precise but not strictly higher
[Package('spam', '3.2.1.9')],
- [], # more precise but not strictly higher
+ expected_pkgs,
),
(
+ [Package('ovs', '2.7')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ),
+])
+def test_check_higher_version_found(pkgs, expected_pkgs_dict):
+ aos_version._check_higher_version_found(pkgs, expected_pkgs_dict)
+
+
+@pytest.mark.parametrize('pkgs,expected_pkgs_dict,expect_higher', [
+ (
[Package('spam', '3.3')],
+ expected_pkgs,
['spam-3.3'], # lower precision, but higher
),
(
[Package('spam', '3.2.1'), Package('eggs', '3.3.2')],
+ expected_pkgs,
['eggs-3.3.2'], # one too high
),
(
[Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5'), Package('eggs', '3.4')],
+ expected_pkgs,
['eggs-3.4'], # multiple versions, one is higher
),
(
[Package('eggs', '3.2.1'), Package('eggs', '3.4'), Package('eggs', '3.3')],
+ expected_pkgs,
['eggs-3.4'], # multiple versions, two are higher
),
+ (
+ [Package('ovs', '2.8')],
+ {
+ "ovs": {
+ "name": "ovs",
+ "version": ["2.6", "2.7"],
+ "check_multi": False,
+ }
+ },
+ ['ovs-2.8'],
+ ),
])
-def test_check_pkgs_for_greater_version(pkgs, expect_higher):
- if expect_higher:
- with pytest.raises(aos_version.FoundHigherVersion) as e:
- aos_version._check_higher_version_found(pkgs, expected_pkgs)
- assert set(expect_higher) == set(e.value.problem_pkgs)
- else:
- aos_version._check_higher_version_found(pkgs, expected_pkgs)
+def test_check_higher_version_found_fail(pkgs, expected_pkgs_dict, expect_higher):
+ with pytest.raises(aos_version.FoundHigherVersion) as e:
+ aos_version._check_higher_version_found(pkgs, expected_pkgs_dict)
+ assert set(expect_higher) == set(e.value.problem_pkgs)
-@pytest.mark.parametrize('pkgs, expect_to_flag_pkgs', [
- (
- [],
- [],
- ),
- (
- [Package('spam', '3.2.1')],
- [],
- ),
- (
- [Package('spam', '3.2.1'), Package('eggs', '3.2.2')],
- [],
- ),
+@pytest.mark.parametrize('pkgs', [
+ [],
+ [Package('spam', '3.2.1')],
+ [Package('spam', '3.2.1'), Package('eggs', '3.2.2')],
+])
+def test_check_multi_minor_release(pkgs):
+ aos_version._check_multi_minor_release(pkgs, expected_pkgs)
+
+
+@pytest.mark.parametrize('pkgs,expect_to_flag_pkgs', [
(
[Package('spam', '3.2.1'), Package('spam', '3.3.2')],
['spam'],
@@ -140,10 +190,7 @@ def test_check_pkgs_for_greater_version(pkgs, expect_higher):
['eggs'],
),
])
-def test_check_pkgs_for_multi_release(pkgs, expect_to_flag_pkgs):
- if expect_to_flag_pkgs:
- with pytest.raises(aos_version.FoundMultiRelease) as e:
- aos_version._check_multi_minor_release(pkgs, expected_pkgs)
- assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs)
- else:
+def test_check_multi_minor_release_fail(pkgs, expect_to_flag_pkgs):
+ with pytest.raises(aos_version.FoundMultiRelease) as e:
aos_version._check_multi_minor_release(pkgs, expected_pkgs)
+ assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs)
diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py
index b353fa610..945b9eafc 100644
--- a/roles/openshift_health_checker/test/disk_availability_test.py
+++ b/roles/openshift_health_checker/test/disk_availability_test.py
@@ -3,22 +3,19 @@ import pytest
from openshift_checks.disk_availability import DiskAvailability, OpenShiftCheckException
-@pytest.mark.parametrize('group_names,is_containerized,is_active', [
- (['masters'], False, True),
- # ensure check is skipped on containerized installs
- (['masters'], True, False),
- (['nodes'], False, True),
- (['etcd'], False, True),
- (['masters', 'nodes'], False, True),
- (['masters', 'etcd'], False, True),
- ([], False, False),
- (['lb'], False, False),
- (['nfs'], False, False),
+@pytest.mark.parametrize('group_names,is_active', [
+ (['masters'], True),
+ (['nodes'], True),
+ (['etcd'], True),
+ (['masters', 'nodes'], True),
+ (['masters', 'etcd'], True),
+ ([], False),
+ (['lb'], False),
+ (['nfs'], False),
])
-def test_is_active(group_names, is_containerized, is_active):
+def test_is_active(group_names, is_active):
task_vars = dict(
group_names=group_names,
- openshift=dict(common=dict(is_containerized=is_containerized)),
)
assert DiskAvailability.is_active(task_vars=task_vars) == is_active
@@ -38,7 +35,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
with pytest.raises(OpenShiftCheckException) as excinfo:
check.run(tmp=None, task_vars=task_vars)
- for word in 'determine available disk'.split() + extra_words:
+ for word in 'determine disk availability'.split() + extra_words:
assert word in str(excinfo.value)
@@ -81,7 +78,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):
[{
# not enough space on / ...
'mount': '/',
- 'size_available': 0,
+ 'size_available': 2 * 10**9,
}, {
# ... but enough on /var
'mount': '/var',
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 197c65f51..3b9e097fb 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active
(False, True),
])
def test_all_images_available_locally(is_containerized, is_atomic):
- def execute_module(module_name, args, task_vars):
+ def execute_module(module_name, module_args, task_vars):
if module_name == "yum":
return {"changed": True}
assert module_name == "docker_image_facts"
- assert 'name' in args
- assert args['name']
+ assert 'name' in module_args
+ assert module_args['name']
return {
- 'images': [args['name']],
+ 'images': [module_args['name']],
}
result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict(
@@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic):
docker=dict(additional_registries=["docker.io"]),
),
openshift_deployment_type='origin',
- openshift_release='v3.4',
openshift_image_tag='3.4',
+ group_names=['nodes', 'masters'],
))
assert not result.get('failed', False)
@@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic):
True,
])
def test_all_images_available_remotely(available_locally):
- def execute_module(module_name, args, task_vars):
+ def execute_module(module_name, module_args, task_vars):
if module_name == 'docker_image_facts':
return {'images': [], 'failed': available_locally}
return {'changed': False}
@@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally):
docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]),
),
openshift_deployment_type='origin',
- openshift_release='3.4',
openshift_image_tag='v3.4',
+ group_names=['nodes', 'masters'],
))
assert not result.get('failed', False)
@@ -108,8 +108,8 @@ def test_all_images_unavailable():
docker=dict(additional_registries=["docker.io"]),
),
openshift_deployment_type="openshift-enterprise",
- openshift_release=None,
- openshift_image_tag='latest'
+ openshift_image_tag='latest',
+ group_names=['nodes', 'masters'],
))
assert actual['failed']
@@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words):
docker=dict(additional_registries=["unknown.io"]),
),
openshift_deployment_type="openshift-enterprise",
- openshift_release='',
openshift_image_tag='',
+ group_names=['nodes', 'masters'],
))
assert actual["failed"]
@@ -177,8 +177,99 @@ def test_registry_availability(deployment_type, registries):
docker=dict(additional_registries=registries),
),
openshift_deployment_type=deployment_type,
- openshift_release='',
openshift_image_tag='',
+ group_names=['nodes', 'masters'],
))
assert not actual.get("failed", False)
+
+
+@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [
+ ( # standard set of stuff required on nodes
+ "origin", False, ['nodes'], None,
+ set([
+ 'openshift/origin-pod:vtest',
+ 'openshift/origin-deployer:vtest',
+ 'openshift/origin-docker-registry:vtest',
+ 'openshift/origin-haproxy-router:vtest',
+ 'cockpit/kubernetes', # origin version of registry-console
+ ])
+ ),
+ ( # set a different URL for images
+ "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}',
+ set([
+ 'foo.io/openshift/origin-pod:vtest',
+ 'foo.io/openshift/origin-deployer:vtest',
+ 'foo.io/openshift/origin-docker-registry:vtest',
+ 'foo.io/openshift/origin-haproxy-router:vtest',
+ 'cockpit/kubernetes', # AFAICS this is not built from the URL
+ ])
+ ),
+ (
+ "origin", True, ['nodes', 'masters', 'etcd'], None,
+ set([
+ # images running on top of openshift
+ 'openshift/origin-pod:vtest',
+ 'openshift/origin-deployer:vtest',
+ 'openshift/origin-docker-registry:vtest',
+ 'openshift/origin-haproxy-router:vtest',
+ 'cockpit/kubernetes',
+ # containerized component images
+ 'openshift/origin:vtest',
+ 'openshift/node:vtest',
+ 'openshift/openvswitch:vtest',
+ 'registry.access.redhat.com/rhel7/etcd',
+ ])
+ ),
+ ( # enterprise images
+ "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45',
+ set([
+ 'foo.io/openshift3/ose-pod:f13ac45',
+ 'foo.io/openshift3/ose-deployer:f13ac45',
+ 'foo.io/openshift3/ose-docker-registry:f13ac45',
+ 'foo.io/openshift3/ose-haproxy-router:f13ac45',
+ # registry-console is not constructed/versioned the same as the others.
+ 'registry.access.redhat.com/openshift3/registry-console',
+ # containerized images aren't built from oreg_url
+ 'openshift3/node:vtest',
+ 'openshift3/openvswitch:vtest',
+ ])
+ ),
+ (
+ "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45',
+ set([
+ 'registry.access.redhat.com/rhel7/etcd',
+ # lb does not yet come in a containerized version
+ ])
+ ),
+
+])
+def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected):
+ task_vars = dict(
+ openshift=dict(
+ common=dict(
+ is_containerized=is_containerized,
+ is_atomic=False,
+ ),
+ ),
+ openshift_deployment_type=deployment_type,
+ group_names=groups,
+ oreg_url=oreg_url,
+ openshift_image_tag='vtest',
+ )
+
+ assert expected == DockerImageAvailability("DUMMY").required_images(task_vars)
+
+
+def test_containerized_etcd():
+ task_vars = dict(
+ openshift=dict(
+ common=dict(
+ is_containerized=True,
+ ),
+ ),
+ openshift_deployment_type="origin",
+ group_names=['etcd'],
+ )
+ expected = set(['registry.access.redhat.com/rhel7/etcd'])
+ assert expected == DockerImageAvailability("DUMMY").required_images(task_vars)
diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py
index 292a323db..99c529054 100644
--- a/roles/openshift_health_checker/test/docker_storage_test.py
+++ b/roles/openshift_health_checker/test/docker_storage_test.py
@@ -23,7 +23,8 @@ def test_is_active(is_containerized, group_names, is_active):
assert DockerStorage.is_active(task_vars=task_vars) == is_active
-non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
+def non_atomic_task_vars():
+ return {"openshift": {"common": {"is_atomic": False}}}
@pytest.mark.parametrize('docker_info, failed, expect_msg', [
@@ -56,7 +57,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
(
dict(info={
"Driver": "overlay2",
- "DriverStatus": []
+ "DriverStatus": [("Backing Filesystem", "xfs")],
}),
False,
[],
@@ -64,9 +65,30 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
(
dict(info={
"Driver": "overlay",
+ "DriverStatus": [("Backing Filesystem", "btrfs")],
}),
True,
- ["unsupported Docker storage driver"],
+ ["storage is type 'btrfs'", "only supported with\n'xfs'"],
+ ),
+ (
+ dict(info={
+ "Driver": "overlay2",
+ "DriverStatus": [("Backing Filesystem", "xfs")],
+ "OperatingSystem": "Red Hat Enterprise Linux Server release 7.2 (Maipo)",
+ "KernelVersion": "3.10.0-327.22.2.el7.x86_64",
+ }),
+ True,
+ ["Docker reports kernel version 3.10.0-327"],
+ ),
+ (
+ dict(info={
+ "Driver": "overlay",
+ "DriverStatus": [("Backing Filesystem", "xfs")],
+ "OperatingSystem": "CentOS",
+ "KernelVersion": "3.10.0-514",
+ }),
+ False,
+ [],
),
(
dict(info={
@@ -77,7 +99,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
),
])
def test_check_storage_driver(docker_info, failed, expect_msg):
- def execute_module(module_name, args, tmp=None, task_vars=None):
+ def execute_module(module_name, module_args, tmp=None, task_vars=None):
if module_name == "yum":
return {}
if module_name != "docker_info":
@@ -85,8 +107,9 @@ def test_check_storage_driver(docker_info, failed, expect_msg):
return docker_info
check = dummy_check(execute_module=execute_module)
- check._check_dm_usage = lambda status, task_vars: dict() # stub out for this test
- result = check.run(tmp=None, task_vars=non_atomic_task_vars)
+ check.check_dm_usage = lambda status, task_vars: dict() # stub out for this test
+ check.check_overlay_usage = lambda info, task_vars: dict() # stub out for this test
+ result = check.run(tmp=None, task_vars=non_atomic_task_vars())
if failed:
assert result["failed"]
@@ -146,8 +169,8 @@ not_enough_space = {
])
def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):
check = dummy_check()
- check._get_vg_free = lambda pool, task_vars: vg_free
- result = check._check_dm_usage(driver_status, task_vars)
+ check.get_vg_free = lambda pool, task_vars: vg_free
+ result = check.check_dm_usage(driver_status, task_vars)
result_success = not result.get("failed")
assert result_success is success
@@ -187,7 +210,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):
)
])
def test_vg_free(pool, command_returns, raises, returns):
- def execute_module(module_name, args, tmp=None, task_vars=None):
+ def execute_module(module_name, module_args, tmp=None, task_vars=None):
if module_name != "command":
raise ValueError("not expecting module " + module_name)
return command_returns
@@ -195,10 +218,10 @@ def test_vg_free(pool, command_returns, raises, returns):
check = dummy_check(execute_module=execute_module)
if raises:
with pytest.raises(OpenShiftCheckException) as err:
- check._get_vg_free(pool, {})
+ check.get_vg_free(pool, {})
assert raises in str(err.value)
else:
- ret = check._get_vg_free(pool, {})
+ ret = check.get_vg_free(pool, {})
assert ret == returns
@@ -209,7 +232,7 @@ def test_vg_free(pool, command_returns, raises, returns):
("12g", 12.0 * 1024**3),
])
def test_convert_to_bytes(string, expect_bytes):
- got = DockerStorage._convert_to_bytes(string)
+ got = DockerStorage.convert_to_bytes(string)
assert got == expect_bytes
@@ -219,6 +242,70 @@ def test_convert_to_bytes(string, expect_bytes):
])
def test_convert_to_bytes_error(string):
with pytest.raises(ValueError) as err:
- DockerStorage._convert_to_bytes(string)
+ DockerStorage.convert_to_bytes(string)
assert "Cannot convert" in str(err.value)
assert string in str(err.value)
+
+
+ansible_mounts_enough = [{
+ 'mount': '/var/lib/docker',
+ 'size_available': 50 * 10**9,
+ 'size_total': 50 * 10**9,
+}]
+ansible_mounts_not_enough = [{
+ 'mount': '/var/lib/docker',
+ 'size_available': 0,
+ 'size_total': 50 * 10**9,
+}]
+ansible_mounts_missing_fields = [dict(mount='/var/lib/docker')]
+ansible_mounts_zero_size = [{
+ 'mount': '/var/lib/docker',
+ 'size_available': 0,
+ 'size_total': 0,
+}]
+
+
+@pytest.mark.parametrize('ansible_mounts, threshold, expect_fail, expect_msg', [
+ (
+ ansible_mounts_enough,
+ None,
+ False,
+ [],
+ ),
+ (
+ ansible_mounts_not_enough,
+ None,
+ True,
+ ["usage percentage", "higher than threshold"],
+ ),
+ (
+ ansible_mounts_not_enough,
+ "bogus percent",
+ True,
+ ["is not a percentage"],
+ ),
+ (
+ ansible_mounts_missing_fields,
+ None,
+ True,
+ ["Ansible bug"],
+ ),
+ (
+ ansible_mounts_zero_size,
+ None,
+ True,
+ ["Ansible bug"],
+ ),
+])
+def test_overlay_usage(ansible_mounts, threshold, expect_fail, expect_msg):
+ check = dummy_check()
+ task_vars = non_atomic_task_vars()
+ task_vars["ansible_mounts"] = ansible_mounts
+ if threshold is not None:
+ task_vars["max_overlay_usage_percent"] = threshold
+ docker_info = dict(DockerRootDir="/var/lib/docker", Driver="overlay")
+ result = check.check_overlay_usage(docker_info, task_vars)
+
+ assert expect_fail == bool(result.get("failed"))
+ for msg in expect_msg:
+ assert msg in result["msg"]
diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py
index 19140a1b6..40a5d19d8 100644
--- a/roles/openshift_health_checker/test/kibana_test.py
+++ b/roles/openshift_health_checker/test/kibana_test.py
@@ -169,7 +169,7 @@ def test_get_kibana_url(route, expect_url, expect_error):
),
])
def test_verify_url_internal_failure(exec_result, expect):
- check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result))
+ check = Kibana(execute_module=lambda module_name, args, tmp, task_vars: dict(failed=True, msg=exec_result))
check._get_kibana_url = lambda task_vars: ('url', None)
error = check._check_kibana_route({})
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
index b6db34fe3..128b76b12 100644
--- a/roles/openshift_health_checker/test/logging_check_test.py
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -50,6 +50,16 @@ plain_kibana_pod = {
}
}
+plain_kibana_pod_no_containerstatus = {
+ "metadata": {
+ "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"},
+ "name": "logging-kibana-1",
+ },
+ "status": {
+ "conditions": [{"status": "True", "type": "Ready"}],
+ }
+}
+
fluentd_pod_node1 = {
"metadata": {
"labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"},
@@ -80,7 +90,7 @@ plain_curator_pod = {
("Permission denied", "Unexpected error using `oc`"),
])
def test_oc_failure(problem, expect):
- def execute_module(module_name, args, task_vars):
+ def execute_module(module_name, args, tmp, task_vars):
if module_name == "ocutil":
return dict(failed=True, result=problem)
return dict(changed=False)
@@ -135,3 +145,23 @@ def test_get_pods_for_component(pod_output, expect_pods, expect_error):
{}
)
assert_error(error, expect_error)
+
+
+@pytest.mark.parametrize('name, pods, expected_pods', [
+ (
+ 'test single pod found, scheduled, but no containerStatuses field',
+ [plain_kibana_pod_no_containerstatus],
+ [plain_kibana_pod_no_containerstatus],
+ ),
+ (
+ 'set of pods has at least one pod with containerStatuses (scheduled); should still fail',
+ [plain_kibana_pod_no_containerstatus, plain_kibana_pod],
+ [plain_kibana_pod_no_containerstatus],
+ ),
+
+], ids=lambda argvals: argvals[0])
+def test_get_not_running_pods_no_container_status(name, pods, expected_pods):
+ check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: '')
+ result = check.not_running_pods(pods)
+
+ assert result == expected_pods
diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py
index 91eace512..1bb6371ae 100644
--- a/roles/openshift_health_checker/test/package_version_test.py
+++ b/roles/openshift_health_checker/test/package_version_test.py
@@ -72,36 +72,6 @@ def test_package_version(openshift_release):
assert result is return_value
-@pytest.mark.parametrize('deployment_type,openshift_release,expected_ovs_version', [
- ("openshift-enterprise", "3.5", "2.6"),
- ("origin", "3.6", "2.6"),
- ("openshift-enterprise", "3.4", "2.4"),
- ("origin", "3.3", "2.4"),
-])
-def test_ovs_package_version(deployment_type, openshift_release, expected_ovs_version):
- task_vars = dict(
- openshift=dict(common=dict(service_type='origin')),
- openshift_release=openshift_release,
- openshift_image_tag='v' + openshift_release,
- openshift_deployment_type=deployment_type,
- )
- return_value = object()
-
- def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):
- assert module_name == 'aos_version'
- assert "package_list" in module_args
-
- for pkg in module_args["package_list"]:
- if pkg["name"] == "openvswitch":
- assert pkg["version"] == expected_ovs_version
-
- return return_value
-
- check = PackageVersion(execute_module=execute_module)
- result = check.run(tmp=None, task_vars=task_vars)
- assert result is return_value
-
-
@pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [
("origin", "3.5", "1.12"),
("openshift-enterprise", "3.4", "1.12"),